done
This commit is contained in:
@ -0,0 +1,85 @@
|
||||
from pandas.core.internals.api import make_block # 2023-09-18 pyarrow uses this
|
||||
from pandas.core.internals.array_manager import (
|
||||
ArrayManager,
|
||||
SingleArrayManager,
|
||||
)
|
||||
from pandas.core.internals.base import (
|
||||
DataManager,
|
||||
SingleDataManager,
|
||||
)
|
||||
from pandas.core.internals.concat import concatenate_managers
|
||||
from pandas.core.internals.managers import (
|
||||
BlockManager,
|
||||
SingleBlockManager,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Block", # pylint: disable=undefined-all-variable
|
||||
"DatetimeTZBlock", # pylint: disable=undefined-all-variable
|
||||
"ExtensionBlock", # pylint: disable=undefined-all-variable
|
||||
"make_block",
|
||||
"DataManager",
|
||||
"ArrayManager",
|
||||
"BlockManager",
|
||||
"SingleDataManager",
|
||||
"SingleBlockManager",
|
||||
"SingleArrayManager",
|
||||
"concatenate_managers",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# GH#55139
|
||||
import warnings
|
||||
|
||||
if name == "create_block_manager_from_blocks":
|
||||
# GH#33892
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
from pandas.core.internals.managers import create_block_manager_from_blocks
|
||||
|
||||
return create_block_manager_from_blocks
|
||||
|
||||
if name in [
|
||||
"NumericBlock",
|
||||
"ObjectBlock",
|
||||
"Block",
|
||||
"ExtensionBlock",
|
||||
"DatetimeTZBlock",
|
||||
]:
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
if name == "NumericBlock":
|
||||
from pandas.core.internals.blocks import NumericBlock
|
||||
|
||||
return NumericBlock
|
||||
elif name == "DatetimeTZBlock":
|
||||
from pandas.core.internals.blocks import DatetimeTZBlock
|
||||
|
||||
return DatetimeTZBlock
|
||||
elif name == "ExtensionBlock":
|
||||
from pandas.core.internals.blocks import ExtensionBlock
|
||||
|
||||
return ExtensionBlock
|
||||
elif name == "Block":
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
return Block
|
||||
else:
|
||||
from pandas.core.internals.blocks import ObjectBlock
|
||||
|
||||
return ObjectBlock
|
||||
|
||||
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
|
156
lib/python3.11/site-packages/pandas/core/internals/api.py
Normal file
156
lib/python3.11/site-packages/pandas/core/internals/api.py
Normal file
@ -0,0 +1,156 @@
|
||||
"""
|
||||
This is a pseudo-public API for downstream libraries. We ask that downstream
|
||||
authors
|
||||
|
||||
1) Try to avoid using internals directly altogether, and failing that,
|
||||
2) Use only functions exposed here (or in core.internals)
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
|
||||
from pandas.core.dtypes.common import pandas_dtype
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
DatetimeTZDtype,
|
||||
PeriodDtype,
|
||||
)
|
||||
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.internals.blocks import (
|
||||
check_ndim,
|
||||
ensure_block_shape,
|
||||
extract_pandas_array,
|
||||
get_block_type,
|
||||
maybe_coerce_values,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Dtype
|
||||
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
|
||||
def make_block(
|
||||
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
|
||||
) -> Block:
|
||||
"""
|
||||
This is a pseudo-public analogue to blocks.new_block.
|
||||
|
||||
We ask that downstream libraries use this rather than any fully-internal
|
||||
APIs, including but not limited to:
|
||||
|
||||
- core.internals.blocks.make_block
|
||||
- Block.make_block
|
||||
- Block.make_block_same_class
|
||||
- Block.__init__
|
||||
"""
|
||||
if dtype is not None:
|
||||
dtype = pandas_dtype(dtype)
|
||||
|
||||
values, dtype = extract_pandas_array(values, dtype, ndim)
|
||||
|
||||
from pandas.core.internals.blocks import (
|
||||
DatetimeTZBlock,
|
||||
ExtensionBlock,
|
||||
)
|
||||
|
||||
if klass is ExtensionBlock and isinstance(values.dtype, PeriodDtype):
|
||||
# GH-44681 changed PeriodArray to be stored in the 2D
|
||||
# NDArrayBackedExtensionBlock instead of ExtensionBlock
|
||||
# -> still allow ExtensionBlock to be passed in this case for back compat
|
||||
klass = None
|
||||
|
||||
if klass is None:
|
||||
dtype = dtype or values.dtype
|
||||
klass = get_block_type(dtype)
|
||||
|
||||
elif klass is DatetimeTZBlock and not isinstance(values.dtype, DatetimeTZDtype):
|
||||
# pyarrow calls get here
|
||||
values = DatetimeArray._simple_new(
|
||||
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
|
||||
# incompatible type "Union[ExtensionDtype, dtype[Any], None]";
|
||||
# expected "Union[dtype[datetime64], DatetimeTZDtype]"
|
||||
values,
|
||||
dtype=dtype, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
if not isinstance(placement, BlockPlacement):
|
||||
placement = BlockPlacement(placement)
|
||||
|
||||
ndim = maybe_infer_ndim(values, placement, ndim)
|
||||
if isinstance(values.dtype, (PeriodDtype, DatetimeTZDtype)):
|
||||
# GH#41168 ensure we can pass 1D dt64tz values
|
||||
# More generally, any EA dtype that isn't is_1d_only_ea_dtype
|
||||
values = extract_array(values, extract_numpy=True)
|
||||
values = ensure_block_shape(values, ndim)
|
||||
|
||||
check_ndim(values, placement, ndim)
|
||||
values = maybe_coerce_values(values)
|
||||
return klass(values, ndim=ndim, placement=placement)
|
||||
|
||||
|
||||
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
|
||||
"""
|
||||
If `ndim` is not provided, infer it from placement and values.
|
||||
"""
|
||||
if ndim is None:
|
||||
# GH#38134 Block constructor now assumes ndim is not None
|
||||
if not isinstance(values.dtype, np.dtype):
|
||||
if len(placement) != 1:
|
||||
ndim = 1
|
||||
else:
|
||||
ndim = 2
|
||||
else:
|
||||
ndim = values.ndim
|
||||
return ndim
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# GH#55139
|
||||
import warnings
|
||||
|
||||
if name in [
|
||||
"Block",
|
||||
"ExtensionBlock",
|
||||
"DatetimeTZBlock",
|
||||
"create_block_manager_from_blocks",
|
||||
]:
|
||||
# GH#33892
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if name == "create_block_manager_from_blocks":
|
||||
from pandas.core.internals.managers import create_block_manager_from_blocks
|
||||
|
||||
return create_block_manager_from_blocks
|
||||
|
||||
elif name == "Block":
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
return Block
|
||||
|
||||
elif name == "DatetimeTZBlock":
|
||||
from pandas.core.internals.blocks import DatetimeTZBlock
|
||||
|
||||
return DatetimeTZBlock
|
||||
|
||||
elif name == "ExtensionBlock":
|
||||
from pandas.core.internals.blocks import ExtensionBlock
|
||||
|
||||
return ExtensionBlock
|
||||
|
||||
raise AttributeError(
|
||||
f"module 'pandas.core.internals.api' has no attribute '{name}'"
|
||||
)
|
1340
lib/python3.11/site-packages/pandas/core/internals/array_manager.py
Normal file
1340
lib/python3.11/site-packages/pandas/core/internals/array_manager.py
Normal file
File diff suppressed because it is too large
Load Diff
407
lib/python3.11/site-packages/pandas/core/internals/base.py
Normal file
407
lib/python3.11/site-packages/pandas/core/internals/base.py
Normal file
@ -0,0 +1,407 @@
|
||||
"""
|
||||
Base class for the internal managers. Both BlockManager and ArrayManager
|
||||
inherit from this class.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
cast,
|
||||
final,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import (
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
)
|
||||
|
||||
from pandas._libs import (
|
||||
algos as libalgos,
|
||||
lib,
|
||||
)
|
||||
from pandas.errors import AbstractMethodError
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
find_common_type,
|
||||
np_can_hold_element,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
ExtensionDtype,
|
||||
SparseDtype,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.indexes.api import (
|
||||
Index,
|
||||
default_index,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
AxisInt,
|
||||
DtypeObj,
|
||||
Self,
|
||||
Shape,
|
||||
)
|
||||
|
||||
|
||||
class _AlreadyWarned:
|
||||
def __init__(self):
|
||||
# This class is used on the manager level to the block level to
|
||||
# ensure that we warn only once. The block method can update the
|
||||
# warned_already option without returning a value to keep the
|
||||
# interface consistent. This is only a temporary solution for
|
||||
# CoW warnings.
|
||||
self.warned_already = False
|
||||
|
||||
|
||||
class DataManager(PandasObject):
|
||||
# TODO share more methods/attributes
|
||||
|
||||
axes: list[Index]
|
||||
|
||||
@property
|
||||
def items(self) -> Index:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def __len__(self) -> int:
|
||||
return len(self.items)
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return len(self.axes)
|
||||
|
||||
@property
|
||||
def shape(self) -> Shape:
|
||||
return tuple(len(ax) for ax in self.axes)
|
||||
|
||||
@final
|
||||
def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None:
|
||||
# Caller is responsible for ensuring we have an Index object.
|
||||
old_len = len(self.axes[axis])
|
||||
new_len = len(new_labels)
|
||||
|
||||
if axis == 1 and len(self.items) == 0:
|
||||
# If we are setting the index on a DataFrame with no columns,
|
||||
# it is OK to change the length.
|
||||
pass
|
||||
|
||||
elif new_len != old_len:
|
||||
raise ValueError(
|
||||
f"Length mismatch: Expected axis has {old_len} elements, new "
|
||||
f"values have {new_len} elements"
|
||||
)
|
||||
|
||||
def reindex_indexer(
|
||||
self,
|
||||
new_axis,
|
||||
indexer,
|
||||
axis: AxisInt,
|
||||
fill_value=None,
|
||||
allow_dups: bool = False,
|
||||
copy: bool = True,
|
||||
only_slice: bool = False,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def reindex_axis(
|
||||
self,
|
||||
new_index: Index,
|
||||
axis: AxisInt,
|
||||
fill_value=None,
|
||||
only_slice: bool = False,
|
||||
) -> Self:
|
||||
"""
|
||||
Conform data manager to new index.
|
||||
"""
|
||||
new_index, indexer = self.axes[axis].reindex(new_index)
|
||||
|
||||
return self.reindex_indexer(
|
||||
new_index,
|
||||
indexer,
|
||||
axis=axis,
|
||||
fill_value=fill_value,
|
||||
copy=False,
|
||||
only_slice=only_slice,
|
||||
)
|
||||
|
||||
def _equal_values(self, other: Self) -> bool:
|
||||
"""
|
||||
To be implemented by the subclasses. Only check the column values
|
||||
assuming shape and indexes have already been checked.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def equals(self, other: object) -> bool:
|
||||
"""
|
||||
Implementation for DataFrame.equals
|
||||
"""
|
||||
if not isinstance(other, type(self)):
|
||||
return False
|
||||
|
||||
self_axes, other_axes = self.axes, other.axes
|
||||
if len(self_axes) != len(other_axes):
|
||||
return False
|
||||
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
|
||||
return False
|
||||
|
||||
return self._equal_values(other)
|
||||
|
||||
def apply(
|
||||
self,
|
||||
f,
|
||||
align_keys: list[str] | None = None,
|
||||
**kwargs,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
def apply_with_block(
|
||||
self,
|
||||
f,
|
||||
align_keys: list[str] | None = None,
|
||||
**kwargs,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def isna(self, func) -> Self:
|
||||
return self.apply("apply", func=func)
|
||||
|
||||
@final
|
||||
def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self:
|
||||
if limit is not None:
|
||||
# Do this validation even if we go through one of the no-op paths
|
||||
limit = libalgos.validate_limit(None, limit=limit)
|
||||
|
||||
return self.apply_with_block(
|
||||
"fillna",
|
||||
value=value,
|
||||
limit=limit,
|
||||
inplace=inplace,
|
||||
downcast=downcast,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def where(self, other, cond, align: bool) -> Self:
|
||||
if align:
|
||||
align_keys = ["other", "cond"]
|
||||
else:
|
||||
align_keys = ["cond"]
|
||||
other = extract_array(other, extract_numpy=True)
|
||||
|
||||
return self.apply_with_block(
|
||||
"where",
|
||||
align_keys=align_keys,
|
||||
other=other,
|
||||
cond=cond,
|
||||
using_cow=using_copy_on_write(),
|
||||
)
|
||||
|
||||
@final
|
||||
def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self:
|
||||
if align:
|
||||
align_keys = ["new", "mask"]
|
||||
else:
|
||||
align_keys = ["mask"]
|
||||
new = extract_array(new, extract_numpy=True)
|
||||
|
||||
already_warned = None
|
||||
if warn_copy_on_write():
|
||||
already_warned = _AlreadyWarned()
|
||||
if not warn:
|
||||
already_warned.warned_already = True
|
||||
|
||||
return self.apply_with_block(
|
||||
"putmask",
|
||||
align_keys=align_keys,
|
||||
mask=mask,
|
||||
new=new,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=already_warned,
|
||||
)
|
||||
|
||||
@final
|
||||
def round(self, decimals: int, using_cow: bool = False) -> Self:
|
||||
return self.apply_with_block(
|
||||
"round",
|
||||
decimals=decimals,
|
||||
using_cow=using_cow,
|
||||
)
|
||||
|
||||
@final
|
||||
def replace(self, to_replace, value, inplace: bool) -> Self:
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
# NDFrame.replace ensures the not-is_list_likes here
|
||||
assert not lib.is_list_like(to_replace)
|
||||
assert not lib.is_list_like(value)
|
||||
return self.apply_with_block(
|
||||
"replace",
|
||||
to_replace=to_replace,
|
||||
value=value,
|
||||
inplace=inplace,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def replace_regex(self, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"_replace_regex",
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def replace_list(
|
||||
self,
|
||||
src_list: list[Any],
|
||||
dest_list: list[Any],
|
||||
inplace: bool = False,
|
||||
regex: bool = False,
|
||||
) -> Self:
|
||||
"""do a list replace"""
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
|
||||
bm = self.apply_with_block(
|
||||
"replace_list",
|
||||
src_list=src_list,
|
||||
dest_list=dest_list,
|
||||
inplace=inplace,
|
||||
regex=regex,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
bm._consolidate_inplace()
|
||||
return bm
|
||||
|
||||
def interpolate(self, inplace: bool, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"interpolate",
|
||||
inplace=inplace,
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
def pad_or_backfill(self, inplace: bool, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"pad_or_backfill",
|
||||
inplace=inplace,
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
def shift(self, periods: int, fill_value) -> Self:
|
||||
if fill_value is lib.no_default:
|
||||
fill_value = None
|
||||
|
||||
return self.apply_with_block("shift", periods=periods, fill_value=fill_value)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Consolidation: No-ops for all but BlockManager
|
||||
|
||||
def is_consolidated(self) -> bool:
|
||||
return True
|
||||
|
||||
def consolidate(self) -> Self:
|
||||
return self
|
||||
|
||||
def _consolidate_inplace(self) -> None:
|
||||
return
|
||||
|
||||
|
||||
class SingleDataManager(DataManager):
|
||||
@property
|
||||
def ndim(self) -> Literal[1]:
|
||||
return 1
|
||||
|
||||
@final
|
||||
@property
|
||||
def array(self) -> ArrayLike:
|
||||
"""
|
||||
Quick access to the backing array of the Block or SingleArrayManager.
|
||||
"""
|
||||
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
|
||||
return self.arrays[0] # type: ignore[attr-defined]
|
||||
|
||||
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
|
||||
"""
|
||||
Set values with indexer.
|
||||
|
||||
For Single[Block/Array]Manager, this backs s[indexer] = value
|
||||
|
||||
This is an inplace version of `setitem()`, mutating the manager/values
|
||||
in place, not returning a new Manager (and Block), and thus never changing
|
||||
the dtype.
|
||||
"""
|
||||
arr = self.array
|
||||
|
||||
# EAs will do this validation in their own __setitem__ methods.
|
||||
if isinstance(arr, np.ndarray):
|
||||
# Note: checking for ndarray instead of np.dtype means we exclude
|
||||
# dt64/td64, which do their own validation.
|
||||
value = np_can_hold_element(arr.dtype, value)
|
||||
|
||||
if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1:
|
||||
# NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
|
||||
value = value[0, ...]
|
||||
|
||||
arr[indexer] = value
|
||||
|
||||
def grouped_reduce(self, func):
|
||||
arr = self.array
|
||||
res = func(arr)
|
||||
index = default_index(len(res))
|
||||
|
||||
mgr = type(self).from_array(res, index)
|
||||
return mgr
|
||||
|
||||
@classmethod
|
||||
def from_array(cls, arr: ArrayLike, index: Index):
|
||||
raise AbstractMethodError(cls)
|
||||
|
||||
|
||||
def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
|
||||
"""
|
||||
Find the common dtype for `blocks`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blocks : List[DtypeObj]
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype : np.dtype, ExtensionDtype, or None
|
||||
None is returned when `blocks` is empty.
|
||||
"""
|
||||
if not len(dtypes):
|
||||
return None
|
||||
|
||||
return find_common_type(dtypes)
|
||||
|
||||
|
||||
def ensure_np_dtype(dtype: DtypeObj) -> np.dtype:
|
||||
# TODO: https://github.com/pandas-dev/pandas/issues/22791
|
||||
# Give EAs some input on what happens here. Sparse needs this.
|
||||
if isinstance(dtype, SparseDtype):
|
||||
dtype = dtype.subtype
|
||||
dtype = cast(np.dtype, dtype)
|
||||
elif isinstance(dtype, ExtensionDtype):
|
||||
dtype = np.dtype("object")
|
||||
elif dtype == np.dtype(str):
|
||||
dtype = np.dtype("object")
|
||||
return dtype
|
2923
lib/python3.11/site-packages/pandas/core/internals/blocks.py
Normal file
2923
lib/python3.11/site-packages/pandas/core/internals/blocks.py
Normal file
File diff suppressed because it is too large
Load Diff
598
lib/python3.11/site-packages/pandas/core/internals/concat.py
Normal file
598
lib/python3.11/site-packages/pandas/core/internals/concat.py
Normal file
@ -0,0 +1,598 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
cast,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
algos as libalgos,
|
||||
internals as libinternals,
|
||||
lib,
|
||||
)
|
||||
from pandas._libs.missing import NA
|
||||
from pandas.util._decorators import cache_readonly
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
ensure_dtype_can_hold_na,
|
||||
find_common_type,
|
||||
)
|
||||
from pandas.core.dtypes.common import (
|
||||
is_1d_only_ea_dtype,
|
||||
is_scalar,
|
||||
needs_i8_conversion,
|
||||
)
|
||||
from pandas.core.dtypes.concat import concat_compat
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
ExtensionDtype,
|
||||
SparseDtype,
|
||||
)
|
||||
from pandas.core.dtypes.missing import (
|
||||
is_valid_na_for_dtype,
|
||||
isna,
|
||||
isna_all,
|
||||
)
|
||||
|
||||
from pandas.core.construction import ensure_wrapped_if_datetimelike
|
||||
from pandas.core.internals.array_manager import ArrayManager
|
||||
from pandas.core.internals.blocks import (
|
||||
ensure_block_shape,
|
||||
new_block_2d,
|
||||
)
|
||||
from pandas.core.internals.managers import (
|
||||
BlockManager,
|
||||
make_na_array,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
AxisInt,
|
||||
DtypeObj,
|
||||
Manager2D,
|
||||
Shape,
|
||||
)
|
||||
|
||||
from pandas import Index
|
||||
from pandas.core.internals.blocks import (
|
||||
Block,
|
||||
BlockPlacement,
|
||||
)
|
||||
|
||||
|
||||
def _concatenate_array_managers(
|
||||
mgrs: list[ArrayManager], axes: list[Index], concat_axis: AxisInt
|
||||
) -> Manager2D:
|
||||
"""
|
||||
Concatenate array managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
|
||||
Returns
|
||||
-------
|
||||
ArrayManager
|
||||
"""
|
||||
if concat_axis == 1:
|
||||
return mgrs[0].concat_vertical(mgrs, axes)
|
||||
else:
|
||||
# concatting along the columns -> combine reindexed arrays in a single manager
|
||||
assert concat_axis == 0
|
||||
return mgrs[0].concat_horizontal(mgrs, axes)
|
||||
|
||||
|
||||
def concatenate_managers(
|
||||
mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool
|
||||
) -> Manager2D:
|
||||
"""
|
||||
Concatenate block managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
copy : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
BlockManager
|
||||
"""
|
||||
|
||||
needs_copy = copy and concat_axis == 0
|
||||
|
||||
# TODO(ArrayManager) this assumes that all managers are of the same type
|
||||
if isinstance(mgrs_indexers[0][0], ArrayManager):
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
# error: Argument 1 to "_concatenate_array_managers" has incompatible
|
||||
# type "List[BlockManager]"; expected "List[Union[ArrayManager,
|
||||
# SingleArrayManager, BlockManager, SingleBlockManager]]"
|
||||
return _concatenate_array_managers(
|
||||
mgrs, axes, concat_axis # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
# Assertions disabled for performance
|
||||
# for tup in mgrs_indexers:
|
||||
# # caller is responsible for ensuring this
|
||||
# indexers = tup[1]
|
||||
# assert concat_axis not in indexers
|
||||
|
||||
if concat_axis == 0:
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
return mgrs[0].concat_horizontal(mgrs, axes)
|
||||
|
||||
if len(mgrs_indexers) > 0 and mgrs_indexers[0][0].nblocks > 0:
|
||||
first_dtype = mgrs_indexers[0][0].blocks[0].dtype
|
||||
if first_dtype in [np.float64, np.float32]:
|
||||
# TODO: support more dtypes here. This will be simpler once
|
||||
# JoinUnit.is_na behavior is deprecated.
|
||||
if (
|
||||
all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in mgrs_indexers)
|
||||
and len(mgrs_indexers) > 1
|
||||
):
|
||||
# Fastpath!
|
||||
# Length restriction is just to avoid having to worry about 'copy'
|
||||
shape = tuple(len(x) for x in axes)
|
||||
nb = _concat_homogeneous_fastpath(mgrs_indexers, shape, first_dtype)
|
||||
return BlockManager((nb,), axes)
|
||||
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
|
||||
if len(mgrs) == 1:
|
||||
mgr = mgrs[0]
|
||||
out = mgr.copy(deep=False)
|
||||
out.axes = axes
|
||||
return out
|
||||
|
||||
concat_plan = _get_combined_plan(mgrs)
|
||||
|
||||
blocks = []
|
||||
values: ArrayLike
|
||||
|
||||
for placement, join_units in concat_plan:
|
||||
unit = join_units[0]
|
||||
blk = unit.block
|
||||
|
||||
if _is_uniform_join_units(join_units):
|
||||
vals = [ju.block.values for ju in join_units]
|
||||
|
||||
if not blk.is_extension:
|
||||
# _is_uniform_join_units ensures a single dtype, so
|
||||
# we can use np.concatenate, which is more performant
|
||||
# than concat_compat
|
||||
# error: Argument 1 to "concatenate" has incompatible type
|
||||
# "List[Union[ndarray[Any, Any], ExtensionArray]]";
|
||||
# expected "Union[_SupportsArray[dtype[Any]],
|
||||
# _NestedSequence[_SupportsArray[dtype[Any]]]]"
|
||||
values = np.concatenate(vals, axis=1) # type: ignore[arg-type]
|
||||
elif is_1d_only_ea_dtype(blk.dtype):
|
||||
# TODO(EA2D): special-casing not needed with 2D EAs
|
||||
values = concat_compat(vals, axis=0, ea_compat_axis=True)
|
||||
values = ensure_block_shape(values, ndim=2)
|
||||
else:
|
||||
values = concat_compat(vals, axis=1)
|
||||
|
||||
values = ensure_wrapped_if_datetimelike(values)
|
||||
|
||||
fastpath = blk.values.dtype == values.dtype
|
||||
else:
|
||||
values = _concatenate_join_units(join_units, copy=copy)
|
||||
fastpath = False
|
||||
|
||||
if fastpath:
|
||||
b = blk.make_block_same_class(values, placement=placement)
|
||||
else:
|
||||
b = new_block_2d(values, placement=placement)
|
||||
|
||||
blocks.append(b)
|
||||
|
||||
return BlockManager(tuple(blocks), axes)
|
||||
|
||||
|
||||
def _maybe_reindex_columns_na_proxy(
|
||||
axes: list[Index],
|
||||
mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]],
|
||||
needs_copy: bool,
|
||||
) -> list[BlockManager]:
|
||||
"""
|
||||
Reindex along columns so that all of the BlockManagers being concatenated
|
||||
have matching columns.
|
||||
|
||||
Columns added in this reindexing have dtype=np.void, indicating they
|
||||
should be ignored when choosing a column's final dtype.
|
||||
"""
|
||||
new_mgrs = []
|
||||
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
# For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
|
||||
# is a cheap reindexing.
|
||||
for i, indexer in indexers.items():
|
||||
mgr = mgr.reindex_indexer(
|
||||
axes[i],
|
||||
indexers[i],
|
||||
axis=i,
|
||||
copy=False,
|
||||
only_slice=True, # only relevant for i==0
|
||||
allow_dups=True,
|
||||
use_na_proxy=True, # only relevant for i==0
|
||||
)
|
||||
if needs_copy and not indexers:
|
||||
mgr = mgr.copy()
|
||||
|
||||
new_mgrs.append(mgr)
|
||||
return new_mgrs
|
||||
|
||||
|
||||
def _is_homogeneous_mgr(mgr: BlockManager, first_dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Check if this Manager can be treated as a single ndarray.
|
||||
"""
|
||||
if mgr.nblocks != 1:
|
||||
return False
|
||||
blk = mgr.blocks[0]
|
||||
if not (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1):
|
||||
return False
|
||||
|
||||
return blk.dtype == first_dtype
|
||||
|
||||
|
||||
def _concat_homogeneous_fastpath(
|
||||
mgrs_indexers, shape: Shape, first_dtype: np.dtype
|
||||
) -> Block:
|
||||
"""
|
||||
With single-Block managers with homogeneous dtypes (that can already hold nan),
|
||||
we avoid [...]
|
||||
"""
|
||||
# assumes
|
||||
# all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in in mgrs_indexers)
|
||||
|
||||
if all(not indexers for _, indexers in mgrs_indexers):
|
||||
# https://github.com/pandas-dev/pandas/pull/52685#issuecomment-1523287739
|
||||
arrs = [mgr.blocks[0].values.T for mgr, _ in mgrs_indexers]
|
||||
arr = np.concatenate(arrs).T
|
||||
bp = libinternals.BlockPlacement(slice(shape[0]))
|
||||
nb = new_block_2d(arr, bp)
|
||||
return nb
|
||||
|
||||
arr = np.empty(shape, dtype=first_dtype)
|
||||
|
||||
if first_dtype == np.float64:
|
||||
take_func = libalgos.take_2d_axis0_float64_float64
|
||||
else:
|
||||
take_func = libalgos.take_2d_axis0_float32_float32
|
||||
|
||||
start = 0
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
mgr_len = mgr.shape[1]
|
||||
end = start + mgr_len
|
||||
|
||||
if 0 in indexers:
|
||||
take_func(
|
||||
mgr.blocks[0].values,
|
||||
indexers[0],
|
||||
arr[:, start:end],
|
||||
)
|
||||
else:
|
||||
# No reindexing necessary, we can copy values directly
|
||||
arr[:, start:end] = mgr.blocks[0].values
|
||||
|
||||
start += mgr_len
|
||||
|
||||
bp = libinternals.BlockPlacement(slice(shape[0]))
|
||||
nb = new_block_2d(arr, bp)
|
||||
return nb
|
||||
|
||||
|
||||
def _get_combined_plan(
|
||||
mgrs: list[BlockManager],
|
||||
) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
|
||||
plan = []
|
||||
|
||||
max_len = mgrs[0].shape[0]
|
||||
|
||||
blknos_list = [mgr.blknos for mgr in mgrs]
|
||||
pairs = libinternals.get_concat_blkno_indexers(blknos_list)
|
||||
for ind, (blknos, bp) in enumerate(pairs):
|
||||
# assert bp.is_slice_like
|
||||
# assert len(bp) > 0
|
||||
|
||||
units_for_bp = []
|
||||
for k, mgr in enumerate(mgrs):
|
||||
blkno = blknos[k]
|
||||
|
||||
nb = _get_block_for_concat_plan(mgr, bp, blkno, max_len=max_len)
|
||||
unit = JoinUnit(nb)
|
||||
units_for_bp.append(unit)
|
||||
|
||||
plan.append((bp, units_for_bp))
|
||||
|
||||
return plan
|
||||
|
||||
|
||||
def _get_block_for_concat_plan(
|
||||
mgr: BlockManager, bp: BlockPlacement, blkno: int, *, max_len: int
|
||||
) -> Block:
|
||||
blk = mgr.blocks[blkno]
|
||||
# Assertions disabled for performance:
|
||||
# assert bp.is_slice_like
|
||||
# assert blkno != -1
|
||||
# assert (mgr.blknos[bp] == blkno).all()
|
||||
|
||||
if len(bp) == len(blk.mgr_locs) and (
|
||||
blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1
|
||||
):
|
||||
nb = blk
|
||||
else:
|
||||
ax0_blk_indexer = mgr.blklocs[bp.indexer]
|
||||
|
||||
slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len)
|
||||
# TODO: in all extant test cases 2023-04-08 we have a slice here.
|
||||
# Will this always be the case?
|
||||
if isinstance(slc, slice):
|
||||
nb = blk.slice_block_columns(slc)
|
||||
else:
|
||||
nb = blk.take_block_columns(slc)
|
||||
|
||||
# assert nb.shape == (len(bp), mgr.shape[1])
|
||||
return nb
|
||||
|
||||
|
||||
class JoinUnit:
|
||||
def __init__(self, block: Block) -> None:
|
||||
self.block = block
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({repr(self.block)})"
|
||||
|
||||
def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Check that we are all-NA of a type/dtype that is compatible with this dtype.
|
||||
Augments `self.is_na` with an additional check of the type of NA values.
|
||||
"""
|
||||
if not self.is_na:
|
||||
return False
|
||||
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
|
||||
if blk.dtype == object:
|
||||
values = blk.values
|
||||
return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))
|
||||
|
||||
na_value = blk.fill_value
|
||||
if na_value is NaT and blk.dtype != dtype:
|
||||
# e.g. we are dt64 and other is td64
|
||||
# fill_values match but we should not cast blk.values to dtype
|
||||
# TODO: this will need updating if we ever have non-nano dt64/td64
|
||||
return False
|
||||
|
||||
if na_value is NA and needs_i8_conversion(dtype):
|
||||
# FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
|
||||
# e.g. blk.dtype == "Int64" and dtype is td64, we dont want
|
||||
# to consider these as matching
|
||||
return False
|
||||
|
||||
# TODO: better to use can_hold_element?
|
||||
return is_valid_na_for_dtype(na_value, dtype)
|
||||
|
||||
@cache_readonly
|
||||
def is_na(self) -> bool:
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
|
||||
if not blk._can_hold_na:
|
||||
return False
|
||||
|
||||
values = blk.values
|
||||
if values.size == 0:
|
||||
# GH#39122 this case will return False once deprecation is enforced
|
||||
return True
|
||||
|
||||
if isinstance(values.dtype, SparseDtype):
|
||||
return False
|
||||
|
||||
if values.ndim == 1:
|
||||
# TODO(EA2D): no need for special case with 2D EAs
|
||||
val = values[0]
|
||||
if not is_scalar(val) or not isna(val):
|
||||
# ideally isna_all would do this short-circuiting
|
||||
return False
|
||||
return isna_all(values)
|
||||
else:
|
||||
val = values[0][0]
|
||||
if not is_scalar(val) or not isna(val):
|
||||
# ideally isna_all would do this short-circuiting
|
||||
return False
|
||||
return all(isna_all(row) for row in values)
|
||||
|
||||
@cache_readonly
|
||||
def is_na_after_size_and_isna_all_deprecation(self) -> bool:
|
||||
"""
|
||||
Will self.is_na be True after values.size == 0 deprecation and isna_all
|
||||
deprecation are enforced?
|
||||
"""
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
|
||||
values: ArrayLike
|
||||
|
||||
if upcasted_na is None and self.block.dtype.kind != "V":
|
||||
# No upcasting is necessary
|
||||
return self.block.values
|
||||
else:
|
||||
fill_value = upcasted_na
|
||||
|
||||
if self._is_valid_na_for(empty_dtype):
|
||||
# note: always holds when self.block.dtype.kind == "V"
|
||||
blk_dtype = self.block.dtype
|
||||
|
||||
if blk_dtype == np.dtype("object"):
|
||||
# we want to avoid filling with np.nan if we are
|
||||
# using None; we already know that we are all
|
||||
# nulls
|
||||
values = cast(np.ndarray, self.block.values)
|
||||
if values.size and values[0, 0] is None:
|
||||
fill_value = None
|
||||
|
||||
return make_na_array(empty_dtype, self.block.shape, fill_value)
|
||||
|
||||
return self.block.values
|
||||
|
||||
|
||||
def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:
|
||||
"""
|
||||
Concatenate values from several join units along axis=1.
|
||||
"""
|
||||
empty_dtype, empty_dtype_future = _get_empty_dtype(join_units)
|
||||
|
||||
has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
|
||||
upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
|
||||
|
||||
to_concat = [
|
||||
ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
|
||||
for ju in join_units
|
||||
]
|
||||
|
||||
if any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
|
||||
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
to_concat = [
|
||||
t
|
||||
if is_1d_only_ea_dtype(t.dtype)
|
||||
else t[0, :] # type: ignore[call-overload]
|
||||
for t in to_concat
|
||||
]
|
||||
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
|
||||
concat_values = ensure_block_shape(concat_values, 2)
|
||||
|
||||
else:
|
||||
concat_values = concat_compat(to_concat, axis=1)
|
||||
|
||||
if empty_dtype != empty_dtype_future:
|
||||
if empty_dtype == concat_values.dtype:
|
||||
# GH#39122, GH#40893
|
||||
warnings.warn(
|
||||
"The behavior of DataFrame concatenation with empty or all-NA "
|
||||
"entries is deprecated. In a future version, this will no longer "
|
||||
"exclude empty or all-NA columns when determining the result dtypes. "
|
||||
"To retain the old behavior, exclude the relevant entries before "
|
||||
"the concat operation.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return concat_values
|
||||
|
||||
|
||||
def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool):
|
||||
"""
|
||||
Find the NA value to go with this dtype.
|
||||
"""
|
||||
if isinstance(dtype, ExtensionDtype):
|
||||
return dtype.na_value
|
||||
elif dtype.kind in "mM":
|
||||
return dtype.type("NaT")
|
||||
elif dtype.kind in "fc":
|
||||
return dtype.type("NaN")
|
||||
elif dtype.kind == "b":
|
||||
# different from missing.na_value_for_dtype
|
||||
return None
|
||||
elif dtype.kind in "iu":
|
||||
if not has_none_blocks:
|
||||
# different from missing.na_value_for_dtype
|
||||
return None
|
||||
return np.nan
|
||||
elif dtype.kind == "O":
|
||||
return np.nan
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj]:
|
||||
"""
|
||||
Return dtype and N/A values to use when concatenating specified units.
|
||||
|
||||
Returned N/A value may be None which means there was no casting involved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype
|
||||
"""
|
||||
if lib.dtypes_all_equal([ju.block.dtype for ju in join_units]):
|
||||
empty_dtype = join_units[0].block.dtype
|
||||
return empty_dtype, empty_dtype
|
||||
|
||||
has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
|
||||
|
||||
dtypes = [unit.block.dtype for unit in join_units if not unit.is_na]
|
||||
if not len(dtypes):
|
||||
dtypes = [
|
||||
unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
|
||||
]
|
||||
|
||||
dtype = find_common_type(dtypes)
|
||||
if has_none_blocks:
|
||||
dtype = ensure_dtype_can_hold_na(dtype)
|
||||
|
||||
dtype_future = dtype
|
||||
if len(dtypes) != len(join_units):
|
||||
dtypes_future = [
|
||||
unit.block.dtype
|
||||
for unit in join_units
|
||||
if not unit.is_na_after_size_and_isna_all_deprecation
|
||||
]
|
||||
if not len(dtypes_future):
|
||||
dtypes_future = [
|
||||
unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
|
||||
]
|
||||
|
||||
if len(dtypes) != len(dtypes_future):
|
||||
dtype_future = find_common_type(dtypes_future)
|
||||
if has_none_blocks:
|
||||
dtype_future = ensure_dtype_can_hold_na(dtype_future)
|
||||
|
||||
return dtype, dtype_future
|
||||
|
||||
|
||||
def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
|
||||
"""
|
||||
Check if the join units consist of blocks of uniform type that can
|
||||
be concatenated using Block.concat_same_type instead of the generic
|
||||
_concatenate_join_units (which uses `concat_compat`).
|
||||
|
||||
"""
|
||||
first = join_units[0].block
|
||||
if first.dtype.kind == "V":
|
||||
return False
|
||||
return (
|
||||
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
|
||||
all(type(ju.block) is type(first) for ju in join_units)
|
||||
and
|
||||
# e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform
|
||||
all(
|
||||
ju.block.dtype == first.dtype
|
||||
# GH#42092 we only want the dtype_equal check for non-numeric blocks
|
||||
# (for now, may change but that would need a deprecation)
|
||||
or ju.block.dtype.kind in "iub"
|
||||
for ju in join_units
|
||||
)
|
||||
and
|
||||
# no blocks that would get missing values (can lead to type upcasts)
|
||||
# unless we're an extension dtype.
|
||||
all(not ju.is_na or ju.block.is_extension for ju in join_units)
|
||||
)
|
1073
lib/python3.11/site-packages/pandas/core/internals/construction.py
Normal file
1073
lib/python3.11/site-packages/pandas/core/internals/construction.py
Normal file
File diff suppressed because it is too large
Load Diff
2375
lib/python3.11/site-packages/pandas/core/internals/managers.py
Normal file
2375
lib/python3.11/site-packages/pandas/core/internals/managers.py
Normal file
File diff suppressed because it is too large
Load Diff
154
lib/python3.11/site-packages/pandas/core/internals/ops.py
Normal file
154
lib/python3.11/site-packages/pandas/core/internals/ops.py
Normal file
@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
NamedTuple,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_1d_only_ea_dtype
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
from pandas._typing import ArrayLike
|
||||
|
||||
from pandas.core.internals.blocks import Block
|
||||
from pandas.core.internals.managers import BlockManager
|
||||
|
||||
|
||||
class BlockPairInfo(NamedTuple):
|
||||
lvals: ArrayLike
|
||||
rvals: ArrayLike
|
||||
locs: BlockPlacement
|
||||
left_ea: bool
|
||||
right_ea: bool
|
||||
rblk: Block
|
||||
|
||||
|
||||
def _iter_block_pairs(
|
||||
left: BlockManager, right: BlockManager
|
||||
) -> Iterator[BlockPairInfo]:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
for blk in left.blocks:
|
||||
locs = blk.mgr_locs
|
||||
blk_vals = blk.values
|
||||
|
||||
left_ea = blk_vals.ndim == 1
|
||||
|
||||
rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if left_ea:
|
||||
# assert len(locs) == 1, locs
|
||||
# assert len(rblks) == 1, rblks
|
||||
# assert rblks[0].shape[0] == 1, rblks[0].shape
|
||||
|
||||
for rblk in rblks:
|
||||
right_ea = rblk.values.ndim == 1
|
||||
|
||||
lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
|
||||
info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk)
|
||||
yield info
|
||||
|
||||
|
||||
def operate_blockwise(
|
||||
left: BlockManager, right: BlockManager, array_op
|
||||
) -> BlockManager:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
res_blks: list[Block] = []
|
||||
for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right):
|
||||
res_values = array_op(lvals, rvals)
|
||||
if (
|
||||
left_ea
|
||||
and not right_ea
|
||||
and hasattr(res_values, "reshape")
|
||||
and not is_1d_only_ea_dtype(res_values.dtype)
|
||||
):
|
||||
res_values = res_values.reshape(1, -1)
|
||||
nbs = rblk._split_op_result(res_values)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if right_ea or left_ea:
|
||||
# assert len(nbs) == 1
|
||||
# else:
|
||||
# assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
|
||||
|
||||
_reset_block_mgr_locs(nbs, locs)
|
||||
|
||||
res_blks.extend(nbs)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
|
||||
# nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
|
||||
# assert nlocs == len(left.items), (nlocs, len(left.items))
|
||||
# assert len(slocs) == nlocs, (len(slocs), nlocs)
|
||||
# assert slocs == set(range(nlocs)), slocs
|
||||
|
||||
new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False)
|
||||
return new_mgr
|
||||
|
||||
|
||||
def _reset_block_mgr_locs(nbs: list[Block], locs) -> None:
|
||||
"""
|
||||
Reset mgr_locs to correspond to our original DataFrame.
|
||||
"""
|
||||
for nb in nbs:
|
||||
nblocs = locs[nb.mgr_locs.indexer]
|
||||
nb.mgr_locs = nblocs
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
|
||||
# assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
|
||||
|
||||
|
||||
def _get_same_shape_values(
|
||||
lblk: Block, rblk: Block, left_ea: bool, right_ea: bool
|
||||
) -> tuple[ArrayLike, ArrayLike]:
|
||||
"""
|
||||
Slice lblk.values to align with rblk. Squeeze if we have EAs.
|
||||
"""
|
||||
lvals = lblk.values
|
||||
rvals = rblk.values
|
||||
|
||||
# Require that the indexing into lvals be slice-like
|
||||
assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs
|
||||
|
||||
# TODO(EA2D): with 2D EAs only this first clause would be needed
|
||||
if not (left_ea or right_ea):
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif left_ea and right_ea:
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif right_ea:
|
||||
# lvals are 2D, rvals are 1D
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape[0] == 1, lvals.shape
|
||||
lvals = lvals[0, :]
|
||||
else:
|
||||
# lvals are 1D, rvals are 2D
|
||||
assert rvals.shape[0] == 1, rvals.shape
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
rvals = rvals[0, :] # type: ignore[call-overload]
|
||||
|
||||
return lvals, rvals
|
||||
|
||||
|
||||
def blockwise_all(left: BlockManager, right: BlockManager, op) -> bool:
|
||||
"""
|
||||
Blockwise `all` reduction.
|
||||
"""
|
||||
for info in _iter_block_pairs(left, right):
|
||||
res = op(info.lvals, info.rvals)
|
||||
if not res:
|
||||
return False
|
||||
return True
|
Reference in New Issue
Block a user