done
This commit is contained in:
15
lib/python3.11/site-packages/pandas/core/groupby/__init__.py
Normal file
15
lib/python3.11/site-packages/pandas/core/groupby/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
from pandas.core.groupby.generic import (
|
||||
DataFrameGroupBy,
|
||||
NamedAgg,
|
||||
SeriesGroupBy,
|
||||
)
|
||||
from pandas.core.groupby.groupby import GroupBy
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
|
||||
__all__ = [
|
||||
"DataFrameGroupBy",
|
||||
"NamedAgg",
|
||||
"SeriesGroupBy",
|
||||
"GroupBy",
|
||||
"Grouper",
|
||||
]
|
121
lib/python3.11/site-packages/pandas/core/groupby/base.py
Normal file
121
lib/python3.11/site-packages/pandas/core/groupby/base.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""
|
||||
Provide basic components for groupby.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Hashable
|
||||
|
||||
|
||||
@dataclasses.dataclass(order=True, frozen=True)
|
||||
class OutputKey:
|
||||
label: Hashable
|
||||
position: int
|
||||
|
||||
|
||||
# special case to prevent duplicate plots when catching exceptions when
|
||||
# forwarding methods from NDFrames
|
||||
plotting_methods = frozenset(["plot", "hist"])
|
||||
|
||||
# cythonized transformations or canned "agg+broadcast", which do not
|
||||
# require postprocessing of the result by transform.
|
||||
cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"])
|
||||
|
||||
# List of aggregation/reduction functions.
|
||||
# These map each group to a single numeric value
|
||||
reduction_kernels = frozenset(
|
||||
[
|
||||
"all",
|
||||
"any",
|
||||
"corrwith",
|
||||
"count",
|
||||
"first",
|
||||
"idxmax",
|
||||
"idxmin",
|
||||
"last",
|
||||
"max",
|
||||
"mean",
|
||||
"median",
|
||||
"min",
|
||||
"nunique",
|
||||
"prod",
|
||||
# as long as `quantile`'s signature accepts only
|
||||
# a single quantile value, it's a reduction.
|
||||
# GH#27526 might change that.
|
||||
"quantile",
|
||||
"sem",
|
||||
"size",
|
||||
"skew",
|
||||
"std",
|
||||
"sum",
|
||||
"var",
|
||||
]
|
||||
)
|
||||
|
||||
# List of transformation functions.
|
||||
# a transformation is a function that, for each group,
|
||||
# produces a result that has the same shape as the group.
|
||||
|
||||
|
||||
transformation_kernels = frozenset(
|
||||
[
|
||||
"bfill",
|
||||
"cumcount",
|
||||
"cummax",
|
||||
"cummin",
|
||||
"cumprod",
|
||||
"cumsum",
|
||||
"diff",
|
||||
"ffill",
|
||||
"fillna",
|
||||
"ngroup",
|
||||
"pct_change",
|
||||
"rank",
|
||||
"shift",
|
||||
]
|
||||
)
|
||||
|
||||
# these are all the public methods on Grouper which don't belong
|
||||
# in either of the above lists
|
||||
groupby_other_methods = frozenset(
|
||||
[
|
||||
"agg",
|
||||
"aggregate",
|
||||
"apply",
|
||||
"boxplot",
|
||||
# corr and cov return ngroups*ncolumns rows, so they
|
||||
# are neither a transformation nor a reduction
|
||||
"corr",
|
||||
"cov",
|
||||
"describe",
|
||||
"dtypes",
|
||||
"expanding",
|
||||
"ewm",
|
||||
"filter",
|
||||
"get_group",
|
||||
"groups",
|
||||
"head",
|
||||
"hist",
|
||||
"indices",
|
||||
"ndim",
|
||||
"ngroups",
|
||||
"nth",
|
||||
"ohlc",
|
||||
"pipe",
|
||||
"plot",
|
||||
"resample",
|
||||
"rolling",
|
||||
"tail",
|
||||
"take",
|
||||
"transform",
|
||||
"sample",
|
||||
"value_counts",
|
||||
]
|
||||
)
|
||||
# Valid values of `name` for `groupby.transform(name)`
|
||||
# NOTE: do NOT edit this directly. New additions should be inserted
|
||||
# into the appropriate list above.
|
||||
transform_kernel_allowlist = reduction_kernels | transformation_kernels
|
@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.algorithms import unique1d
|
||||
from pandas.core.arrays.categorical import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
recode_for_categories,
|
||||
)
|
||||
|
||||
|
||||
def recode_for_groupby(
|
||||
c: Categorical, sort: bool, observed: bool
|
||||
) -> tuple[Categorical, Categorical | None]:
|
||||
"""
|
||||
Code the categories to ensure we can groupby for categoricals.
|
||||
|
||||
If observed=True, we return a new Categorical with the observed
|
||||
categories only.
|
||||
|
||||
If sort=False, return a copy of self, coded with categories as
|
||||
returned by .unique(), followed by any categories not appearing in
|
||||
the data. If sort=True, return self.
|
||||
|
||||
This method is needed solely to ensure the categorical index of the
|
||||
GroupBy result has categories in the order of appearance in the data
|
||||
(GH-8868).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : Categorical
|
||||
sort : bool
|
||||
The value of the sort parameter groupby was called with.
|
||||
observed : bool
|
||||
Account only for the observed values
|
||||
|
||||
Returns
|
||||
-------
|
||||
Categorical
|
||||
If sort=False, the new categories are set to the order of
|
||||
appearance in codes (unless ordered=True, in which case the
|
||||
original order is preserved), followed by any unrepresented
|
||||
categories in the original order.
|
||||
Categorical or None
|
||||
If we are observed, return the original categorical, otherwise None
|
||||
"""
|
||||
# we only care about observed values
|
||||
if observed:
|
||||
# In cases with c.ordered, this is equivalent to
|
||||
# return c.remove_unused_categories(), c
|
||||
|
||||
unique_codes = unique1d(c.codes)
|
||||
|
||||
take_codes = unique_codes[unique_codes != -1]
|
||||
if sort:
|
||||
take_codes = np.sort(take_codes)
|
||||
|
||||
# we recode according to the uniques
|
||||
categories = c.categories.take(take_codes)
|
||||
codes = recode_for_categories(c.codes, c.categories, categories)
|
||||
|
||||
# return a new categorical that maps our new codes
|
||||
# and categories
|
||||
dtype = CategoricalDtype(categories, ordered=c.ordered)
|
||||
return Categorical._simple_new(codes, dtype=dtype), c
|
||||
|
||||
# Already sorted according to c.categories; all is fine
|
||||
if sort:
|
||||
return c, None
|
||||
|
||||
# sort=False should order groups in as-encountered order (GH-8868)
|
||||
|
||||
# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
|
||||
all_codes = np.arange(c.categories.nunique())
|
||||
# GH 38140: exclude nan from indexer for categories
|
||||
unique_notnan_codes = unique1d(c.codes[c.codes != -1])
|
||||
if sort:
|
||||
unique_notnan_codes = np.sort(unique_notnan_codes)
|
||||
if len(all_codes) > len(unique_notnan_codes):
|
||||
# GH 13179: All categories need to be present, even if missing from the data
|
||||
missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
|
||||
take_codes = np.concatenate((unique_notnan_codes, missing_codes))
|
||||
else:
|
||||
take_codes = unique_notnan_codes
|
||||
|
||||
return Categorical(c, c.unique().categories.take(take_codes)), None
|
2852
lib/python3.11/site-packages/pandas/core/groupby/generic.py
Normal file
2852
lib/python3.11/site-packages/pandas/core/groupby/generic.py
Normal file
File diff suppressed because it is too large
Load Diff
6003
lib/python3.11/site-packages/pandas/core/groupby/groupby.py
Normal file
6003
lib/python3.11/site-packages/pandas/core/groupby/groupby.py
Normal file
File diff suppressed because it is too large
Load Diff
1102
lib/python3.11/site-packages/pandas/core/groupby/grouper.py
Normal file
1102
lib/python3.11/site-packages/pandas/core/groupby/grouper.py
Normal file
File diff suppressed because it is too large
Load Diff
304
lib/python3.11/site-packages/pandas/core/groupby/indexing.py
Normal file
304
lib/python3.11/site-packages/pandas/core/groupby/indexing.py
Normal file
@ -0,0 +1,304 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Literal,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.util._decorators import (
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer,
|
||||
is_list_like,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import PositionalIndexer
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.groupby import groupby
|
||||
|
||||
|
||||
class GroupByIndexingMixin:
|
||||
"""
|
||||
Mixin for adding ._positional_selector to GroupBy.
|
||||
"""
|
||||
|
||||
@cache_readonly
|
||||
def _positional_selector(self) -> GroupByPositionalSelector:
|
||||
"""
|
||||
Return positional selection for each group.
|
||||
|
||||
``groupby._positional_selector[i:j]`` is similar to
|
||||
``groupby.apply(lambda x: x.iloc[i:j])``
|
||||
but much faster and preserves the original index and order.
|
||||
|
||||
``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
|
||||
and :meth:`~GroupBy.tail`. For example:
|
||||
|
||||
- ``head(5)``
|
||||
- ``_positional_selector[5:-5]``
|
||||
- ``tail(5)``
|
||||
|
||||
together return all the rows.
|
||||
|
||||
Allowed inputs for the index are:
|
||||
|
||||
- An integer valued iterable, e.g. ``range(2, 4)``.
|
||||
- A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.
|
||||
|
||||
The output format is the same as :meth:`~GroupBy.head` and
|
||||
:meth:`~GroupBy.tail`, namely
|
||||
a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
The filtered subset of the original Series.
|
||||
DataFrame
|
||||
The filtered subset of the original DataFrame.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.iloc : Purely integer-location based indexing for selection by
|
||||
position.
|
||||
GroupBy.head : Return first n rows of each group.
|
||||
GroupBy.tail : Return last n rows of each group.
|
||||
GroupBy.nth : Take the nth row from each group if n is an int, or a
|
||||
subset of rows, if n is a list of ints.
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The slice step cannot be negative.
|
||||
- If the index specification results in overlaps, the item is not duplicated.
|
||||
- If the index specification changes the order of items, then
|
||||
they are returned in their original order.
|
||||
By contrast, ``DataFrame.iloc`` can change the row order.
|
||||
- ``groupby()`` parameters such as as_index and dropna are ignored.
|
||||
|
||||
The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
|
||||
with ``as_index=False`` are:
|
||||
|
||||
- Input to ``_positional_selector`` can include
|
||||
one or more slices whereas ``nth``
|
||||
just handles an integer or a list of integers.
|
||||
- ``_positional_selector`` can accept a slice relative to the
|
||||
last row of each group.
|
||||
- ``_positional_selector`` does not have an equivalent to the
|
||||
``nth()`` ``dropna`` parameter.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],
|
||||
... columns=["A", "B"])
|
||||
>>> df.groupby("A")._positional_selector[1:2]
|
||||
A B
|
||||
1 a 2
|
||||
4 b 5
|
||||
|
||||
>>> df.groupby("A")._positional_selector[1, -1]
|
||||
A B
|
||||
1 a 2
|
||||
2 a 3
|
||||
4 b 5
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
# pylint: disable-next=used-before-assignment
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return GroupByPositionalSelector(groupby_self)
|
||||
|
||||
def _make_mask_from_positional_indexer(
|
||||
self,
|
||||
arg: PositionalIndexer | tuple,
|
||||
) -> np.ndarray:
|
||||
if is_list_like(arg):
|
||||
if all(is_integer(i) for i in cast(Iterable, arg)):
|
||||
mask = self._make_mask_from_list(cast(Iterable[int], arg))
|
||||
else:
|
||||
mask = self._make_mask_from_tuple(cast(tuple, arg))
|
||||
|
||||
elif isinstance(arg, slice):
|
||||
mask = self._make_mask_from_slice(arg)
|
||||
elif is_integer(arg):
|
||||
mask = self._make_mask_from_int(cast(int, arg))
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Invalid index {type(arg)}. "
|
||||
"Must be integer, list-like, slice or a tuple of "
|
||||
"integers and slices"
|
||||
)
|
||||
|
||||
if isinstance(mask, bool):
|
||||
if mask:
|
||||
mask = self._ascending_count >= 0
|
||||
else:
|
||||
mask = self._ascending_count < 0
|
||||
|
||||
return cast(np.ndarray, mask)
|
||||
|
||||
def _make_mask_from_int(self, arg: int) -> np.ndarray:
|
||||
if arg >= 0:
|
||||
return self._ascending_count == arg
|
||||
else:
|
||||
return self._descending_count == (-arg - 1)
|
||||
|
||||
def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:
|
||||
positive = [arg for arg in args if arg >= 0]
|
||||
negative = [-arg - 1 for arg in args if arg < 0]
|
||||
|
||||
mask: bool | np.ndarray = False
|
||||
|
||||
if positive:
|
||||
mask |= np.isin(self._ascending_count, positive)
|
||||
|
||||
if negative:
|
||||
mask |= np.isin(self._descending_count, negative)
|
||||
|
||||
return mask
|
||||
|
||||
def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:
|
||||
mask: bool | np.ndarray = False
|
||||
|
||||
for arg in args:
|
||||
if is_integer(arg):
|
||||
mask |= self._make_mask_from_int(cast(int, arg))
|
||||
elif isinstance(arg, slice):
|
||||
mask |= self._make_mask_from_slice(arg)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid argument {type(arg)}. Should be int or slice."
|
||||
)
|
||||
|
||||
return mask
|
||||
|
||||
def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:
|
||||
start = arg.start
|
||||
stop = arg.stop
|
||||
step = arg.step
|
||||
|
||||
if step is not None and step < 0:
|
||||
raise ValueError(f"Invalid step {step}. Must be non-negative")
|
||||
|
||||
mask: bool | np.ndarray = True
|
||||
|
||||
if step is None:
|
||||
step = 1
|
||||
|
||||
if start is None:
|
||||
if step > 1:
|
||||
mask &= self._ascending_count % step == 0
|
||||
|
||||
elif start >= 0:
|
||||
mask &= self._ascending_count >= start
|
||||
|
||||
if step > 1:
|
||||
mask &= (self._ascending_count - start) % step == 0
|
||||
|
||||
else:
|
||||
mask &= self._descending_count < -start
|
||||
|
||||
offset_array = self._descending_count + start + 1
|
||||
limit_array = (
|
||||
self._ascending_count + self._descending_count + (start + 1)
|
||||
) < 0
|
||||
offset_array = np.where(limit_array, self._ascending_count, offset_array)
|
||||
|
||||
mask &= offset_array % step == 0
|
||||
|
||||
if stop is not None:
|
||||
if stop >= 0:
|
||||
mask &= self._ascending_count < stop
|
||||
else:
|
||||
mask &= self._descending_count >= -stop
|
||||
|
||||
return mask
|
||||
|
||||
@cache_readonly
|
||||
def _ascending_count(self) -> np.ndarray:
|
||||
if TYPE_CHECKING:
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return groupby_self._cumcount_array()
|
||||
|
||||
@cache_readonly
|
||||
def _descending_count(self) -> np.ndarray:
|
||||
if TYPE_CHECKING:
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return groupby_self._cumcount_array(ascending=False)
|
||||
|
||||
|
||||
@doc(GroupByIndexingMixin._positional_selector)
|
||||
class GroupByPositionalSelector:
|
||||
def __init__(self, groupby_object: groupby.GroupBy) -> None:
|
||||
self.groupby_object = groupby_object
|
||||
|
||||
def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
|
||||
"""
|
||||
Select by positional index per group.
|
||||
|
||||
Implements GroupBy._positional_selector
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arg : PositionalIndexer | tuple
|
||||
Allowed values are:
|
||||
- int
|
||||
- int valued iterable such as list or range
|
||||
- slice with step either None or positive
|
||||
- tuple of integers and slices
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
The filtered subset of the original groupby Series.
|
||||
DataFrame
|
||||
The filtered subset of the original groupby DataFrame.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.iloc : Integer-location based indexing for selection by position.
|
||||
GroupBy.head : Return first n rows of each group.
|
||||
GroupBy.tail : Return last n rows of each group.
|
||||
GroupBy._positional_selector : Return positional selection for each group.
|
||||
GroupBy.nth : Take the nth row from each group if n is an int, or a
|
||||
subset of rows, if n is a list of ints.
|
||||
"""
|
||||
mask = self.groupby_object._make_mask_from_positional_indexer(arg)
|
||||
return self.groupby_object._mask_selected_obj(mask)
|
||||
|
||||
|
||||
class GroupByNthSelector:
|
||||
"""
|
||||
Dynamically substituted for GroupBy.nth to enable both call and index
|
||||
"""
|
||||
|
||||
def __init__(self, groupby_object: groupby.GroupBy) -> None:
|
||||
self.groupby_object = groupby_object
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
n: PositionalIndexer | tuple,
|
||||
dropna: Literal["any", "all", None] = None,
|
||||
) -> DataFrame | Series:
|
||||
return self.groupby_object._nth(n, dropna)
|
||||
|
||||
def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
|
||||
return self.groupby_object._nth(n)
|
181
lib/python3.11/site-packages/pandas/core/groupby/numba_.py
Normal file
181
lib/python3.11/site-packages/pandas/core/groupby/numba_.py
Normal file
@ -0,0 +1,181 @@
|
||||
"""Common utilities for Numba operations with groupby ops"""
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
from pandas.core.util.numba_ import (
|
||||
NumbaUtilError,
|
||||
jit_user_function,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Scalar
|
||||
|
||||
|
||||
def validate_udf(func: Callable) -> None:
|
||||
"""
|
||||
Validate user defined function for ops when using Numba with groupby ops.
|
||||
|
||||
The first signature arguments should include:
|
||||
|
||||
def f(values, index, ...):
|
||||
...
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function, default False
|
||||
user defined function
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
NumbaUtilError
|
||||
"""
|
||||
if not callable(func):
|
||||
raise NotImplementedError(
|
||||
"Numba engine can only be used with a single function."
|
||||
)
|
||||
udf_signature = list(inspect.signature(func).parameters.keys())
|
||||
expected_args = ["values", "index"]
|
||||
min_number_args = len(expected_args)
|
||||
if (
|
||||
len(udf_signature) < min_number_args
|
||||
or udf_signature[:min_number_args] != expected_args
|
||||
):
|
||||
raise NumbaUtilError(
|
||||
f"The first {min_number_args} arguments to {func.__name__} must be "
|
||||
f"{expected_args}"
|
||||
)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_agg_func(
|
||||
func: Callable[..., Scalar],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]:
|
||||
"""
|
||||
Generate a numba jitted agg function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a groupby agg function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the groupby evaluation loop.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each group and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def group_agg(
|
||||
values: np.ndarray,
|
||||
index: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
num_columns: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
assert len(begin) == len(end)
|
||||
num_groups = len(begin)
|
||||
|
||||
result = np.empty((num_groups, num_columns))
|
||||
for i in numba.prange(num_groups):
|
||||
group_index = index[begin[i] : end[i]]
|
||||
for j in numba.prange(num_columns):
|
||||
group = values[begin[i] : end[i], j]
|
||||
result[i, j] = numba_func(group, group_index, *args)
|
||||
return result
|
||||
|
||||
return group_agg
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_transform_func(
|
||||
func: Callable[..., np.ndarray],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]:
|
||||
"""
|
||||
Generate a numba jitted transform function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a groupby transform function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the groupby evaluation loop.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def group_transform(
|
||||
values: np.ndarray,
|
||||
index: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
num_columns: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
assert len(begin) == len(end)
|
||||
num_groups = len(begin)
|
||||
|
||||
result = np.empty((len(values), num_columns))
|
||||
for i in numba.prange(num_groups):
|
||||
group_index = index[begin[i] : end[i]]
|
||||
for j in numba.prange(num_columns):
|
||||
group = values[begin[i] : end[i], j]
|
||||
result[begin[i] : end[i], j] = numba_func(group, group_index, *args)
|
||||
return result
|
||||
|
||||
return group_transform
|
1208
lib/python3.11/site-packages/pandas/core/groupby/ops.py
Normal file
1208
lib/python3.11/site-packages/pandas/core/groupby/ops.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user