done
This commit is contained in:
643
lib/python3.11/site-packages/pandas/core/indexes/accessors.py
Normal file
643
lib/python3.11/site-packages/pandas/core/indexes/accessors.py
Normal file
@ -0,0 +1,643 @@
|
||||
"""
|
||||
datetimelike delegation
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
cast,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
ArrowDtype,
|
||||
CategoricalDtype,
|
||||
DatetimeTZDtype,
|
||||
PeriodDtype,
|
||||
)
|
||||
from pandas.core.dtypes.generic import ABCSeries
|
||||
|
||||
from pandas.core.accessor import (
|
||||
PandasDelegate,
|
||||
delegate_names,
|
||||
)
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
from pandas.core.arrays.arrow.array import ArrowExtensionArray
|
||||
from pandas.core.base import (
|
||||
NoNewAttributesMixin,
|
||||
PandasObject,
|
||||
)
|
||||
from pandas.core.indexes.datetimes import DatetimeIndex
|
||||
from pandas.core.indexes.timedeltas import TimedeltaIndex
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
|
||||
class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
|
||||
_hidden_attrs = PandasObject._hidden_attrs | {
|
||||
"orig",
|
||||
"name",
|
||||
}
|
||||
|
||||
def __init__(self, data: Series, orig) -> None:
|
||||
if not isinstance(data, ABCSeries):
|
||||
raise TypeError(
|
||||
f"cannot convert an object of type {type(data)} to a datetimelike index"
|
||||
)
|
||||
|
||||
self._parent = data
|
||||
self.orig = orig
|
||||
self.name = getattr(data, "name", None)
|
||||
self._freeze()
|
||||
|
||||
def _get_values(self):
|
||||
data = self._parent
|
||||
if lib.is_np_dtype(data.dtype, "M"):
|
||||
return DatetimeIndex(data, copy=False, name=self.name)
|
||||
|
||||
elif isinstance(data.dtype, DatetimeTZDtype):
|
||||
return DatetimeIndex(data, copy=False, name=self.name)
|
||||
|
||||
elif lib.is_np_dtype(data.dtype, "m"):
|
||||
return TimedeltaIndex(data, copy=False, name=self.name)
|
||||
|
||||
elif isinstance(data.dtype, PeriodDtype):
|
||||
return PeriodArray(data, copy=False)
|
||||
|
||||
raise TypeError(
|
||||
f"cannot convert an object of type {type(data)} to a datetimelike index"
|
||||
)
|
||||
|
||||
def _delegate_property_get(self, name: str):
|
||||
from pandas import Series
|
||||
|
||||
values = self._get_values()
|
||||
|
||||
result = getattr(values, name)
|
||||
|
||||
# maybe need to upcast (ints)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype("int64")
|
||||
elif not is_list_like(result):
|
||||
return result
|
||||
|
||||
result = np.asarray(result)
|
||||
|
||||
if self.orig is not None:
|
||||
index = self.orig.index
|
||||
else:
|
||||
index = self._parent.index
|
||||
# return the result as a Series
|
||||
result = Series(result, index=index, name=self.name).__finalize__(self._parent)
|
||||
|
||||
# setting this object will show a SettingWithCopyWarning/Error
|
||||
result._is_copy = (
|
||||
"modifications to a property of a datetimelike "
|
||||
"object are not supported and are discarded. "
|
||||
"Change values on the original."
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _delegate_property_set(self, name: str, value, *args, **kwargs):
|
||||
raise ValueError(
|
||||
"modifications to a property of a datetimelike object are not supported. "
|
||||
"Change values on the original."
|
||||
)
|
||||
|
||||
def _delegate_method(self, name: str, *args, **kwargs):
|
||||
from pandas import Series
|
||||
|
||||
values = self._get_values()
|
||||
|
||||
method = getattr(values, name)
|
||||
result = method(*args, **kwargs)
|
||||
|
||||
if not is_list_like(result):
|
||||
return result
|
||||
|
||||
result = Series(result, index=self._parent.index, name=self.name).__finalize__(
|
||||
self._parent
|
||||
)
|
||||
|
||||
# setting this object will show a SettingWithCopyWarning/Error
|
||||
result._is_copy = (
|
||||
"modifications to a method of a datetimelike "
|
||||
"object are not supported and are discarded. "
|
||||
"Change values on the original."
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@delegate_names(
|
||||
delegate=ArrowExtensionArray,
|
||||
accessors=TimedeltaArray._datetimelike_ops,
|
||||
typ="property",
|
||||
accessor_mapping=lambda x: f"_dt_{x}",
|
||||
raise_on_missing=False,
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=ArrowExtensionArray,
|
||||
accessors=TimedeltaArray._datetimelike_methods,
|
||||
typ="method",
|
||||
accessor_mapping=lambda x: f"_dt_{x}",
|
||||
raise_on_missing=False,
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=ArrowExtensionArray,
|
||||
accessors=DatetimeArray._datetimelike_ops,
|
||||
typ="property",
|
||||
accessor_mapping=lambda x: f"_dt_{x}",
|
||||
raise_on_missing=False,
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=ArrowExtensionArray,
|
||||
accessors=DatetimeArray._datetimelike_methods,
|
||||
typ="method",
|
||||
accessor_mapping=lambda x: f"_dt_{x}",
|
||||
raise_on_missing=False,
|
||||
)
|
||||
class ArrowTemporalProperties(PandasDelegate, PandasObject, NoNewAttributesMixin):
|
||||
def __init__(self, data: Series, orig) -> None:
|
||||
if not isinstance(data, ABCSeries):
|
||||
raise TypeError(
|
||||
f"cannot convert an object of type {type(data)} to a datetimelike index"
|
||||
)
|
||||
|
||||
self._parent = data
|
||||
self._orig = orig
|
||||
self._freeze()
|
||||
|
||||
def _delegate_property_get(self, name: str):
|
||||
if not hasattr(self._parent.array, f"_dt_{name}"):
|
||||
raise NotImplementedError(
|
||||
f"dt.{name} is not supported for {self._parent.dtype}"
|
||||
)
|
||||
result = getattr(self._parent.array, f"_dt_{name}")
|
||||
|
||||
if not is_list_like(result):
|
||||
return result
|
||||
|
||||
if self._orig is not None:
|
||||
index = self._orig.index
|
||||
else:
|
||||
index = self._parent.index
|
||||
# return the result as a Series, which is by definition a copy
|
||||
result = type(self._parent)(
|
||||
result, index=index, name=self._parent.name
|
||||
).__finalize__(self._parent)
|
||||
|
||||
return result
|
||||
|
||||
def _delegate_method(self, name: str, *args, **kwargs):
|
||||
if not hasattr(self._parent.array, f"_dt_{name}"):
|
||||
raise NotImplementedError(
|
||||
f"dt.{name} is not supported for {self._parent.dtype}"
|
||||
)
|
||||
|
||||
result = getattr(self._parent.array, f"_dt_{name}")(*args, **kwargs)
|
||||
|
||||
if self._orig is not None:
|
||||
index = self._orig.index
|
||||
else:
|
||||
index = self._parent.index
|
||||
# return the result as a Series, which is by definition a copy
|
||||
result = type(self._parent)(
|
||||
result, index=index, name=self._parent.name
|
||||
).__finalize__(self._parent)
|
||||
|
||||
return result
|
||||
|
||||
def to_pytimedelta(self):
|
||||
return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta()
|
||||
|
||||
def to_pydatetime(self):
|
||||
# GH#20306
|
||||
warnings.warn(
|
||||
f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, "
|
||||
"in a future version this will return a Series containing python "
|
||||
"datetime objects instead of an ndarray. To retain the old behavior, "
|
||||
"call `np.array` on the result",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return cast(ArrowExtensionArray, self._parent.array)._dt_to_pydatetime()
|
||||
|
||||
def isocalendar(self) -> DataFrame:
|
||||
from pandas import DataFrame
|
||||
|
||||
result = (
|
||||
cast(ArrowExtensionArray, self._parent.array)
|
||||
._dt_isocalendar()
|
||||
._pa_array.combine_chunks()
|
||||
)
|
||||
iso_calendar_df = DataFrame(
|
||||
{
|
||||
col: type(self._parent.array)(result.field(i)) # type: ignore[call-arg]
|
||||
for i, col in enumerate(["year", "week", "day"])
|
||||
}
|
||||
)
|
||||
return iso_calendar_df
|
||||
|
||||
@property
|
||||
def components(self) -> DataFrame:
|
||||
from pandas import DataFrame
|
||||
|
||||
components_df = DataFrame(
|
||||
{
|
||||
col: getattr(self._parent.array, f"_dt_{col}")
|
||||
for col in [
|
||||
"days",
|
||||
"hours",
|
||||
"minutes",
|
||||
"seconds",
|
||||
"milliseconds",
|
||||
"microseconds",
|
||||
"nanoseconds",
|
||||
]
|
||||
}
|
||||
)
|
||||
return components_df
|
||||
|
||||
|
||||
@delegate_names(
|
||||
delegate=DatetimeArray,
|
||||
accessors=DatetimeArray._datetimelike_ops + ["unit"],
|
||||
typ="property",
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=DatetimeArray,
|
||||
accessors=DatetimeArray._datetimelike_methods + ["as_unit"],
|
||||
typ="method",
|
||||
)
|
||||
class DatetimeProperties(Properties):
|
||||
"""
|
||||
Accessor object for datetimelike properties of the Series values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
|
||||
>>> seconds_series
|
||||
0 2000-01-01 00:00:00
|
||||
1 2000-01-01 00:00:01
|
||||
2 2000-01-01 00:00:02
|
||||
dtype: datetime64[ns]
|
||||
>>> seconds_series.dt.second
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
dtype: int32
|
||||
|
||||
>>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
|
||||
>>> hours_series
|
||||
0 2000-01-01 00:00:00
|
||||
1 2000-01-01 01:00:00
|
||||
2 2000-01-01 02:00:00
|
||||
dtype: datetime64[ns]
|
||||
>>> hours_series.dt.hour
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
dtype: int32
|
||||
|
||||
>>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="QE"))
|
||||
>>> quarters_series
|
||||
0 2000-03-31
|
||||
1 2000-06-30
|
||||
2 2000-09-30
|
||||
dtype: datetime64[ns]
|
||||
>>> quarters_series.dt.quarter
|
||||
0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: int32
|
||||
|
||||
Returns a Series indexed like the original Series.
|
||||
Raises TypeError if the Series does not contain datetimelike values.
|
||||
"""
|
||||
|
||||
def to_pydatetime(self) -> np.ndarray:
|
||||
"""
|
||||
Return the data as an array of :class:`datetime.datetime` objects.
|
||||
|
||||
.. deprecated:: 2.1.0
|
||||
|
||||
The current behavior of dt.to_pydatetime is deprecated.
|
||||
In a future version this will return a Series containing python
|
||||
datetime objects instead of a ndarray.
|
||||
|
||||
Timezone information is retained if present.
|
||||
|
||||
.. warning::
|
||||
|
||||
Python's datetime uses microsecond resolution, which is lower than
|
||||
pandas (nanosecond). The values are truncated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray
|
||||
Object dtype array containing native Python datetime objects.
|
||||
|
||||
See Also
|
||||
--------
|
||||
datetime.datetime : Standard library value for a datetime.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = pd.Series(pd.date_range('20180310', periods=2))
|
||||
>>> s
|
||||
0 2018-03-10
|
||||
1 2018-03-11
|
||||
dtype: datetime64[ns]
|
||||
|
||||
>>> s.dt.to_pydatetime()
|
||||
array([datetime.datetime(2018, 3, 10, 0, 0),
|
||||
datetime.datetime(2018, 3, 11, 0, 0)], dtype=object)
|
||||
|
||||
pandas' nanosecond precision is truncated to microseconds.
|
||||
|
||||
>>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns'))
|
||||
>>> s
|
||||
0 2018-03-10 00:00:00.000000000
|
||||
1 2018-03-10 00:00:00.000000001
|
||||
dtype: datetime64[ns]
|
||||
|
||||
>>> s.dt.to_pydatetime()
|
||||
array([datetime.datetime(2018, 3, 10, 0, 0),
|
||||
datetime.datetime(2018, 3, 10, 0, 0)], dtype=object)
|
||||
"""
|
||||
# GH#20306
|
||||
warnings.warn(
|
||||
f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, "
|
||||
"in a future version this will return a Series containing python "
|
||||
"datetime objects instead of an ndarray. To retain the old behavior, "
|
||||
"call `np.array` on the result",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return self._get_values().to_pydatetime()
|
||||
|
||||
@property
|
||||
def freq(self):
|
||||
return self._get_values().inferred_freq
|
||||
|
||||
def isocalendar(self) -> DataFrame:
|
||||
"""
|
||||
Calculate year, week, and day according to the ISO 8601 standard.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
With columns year, week and day.
|
||||
|
||||
See Also
|
||||
--------
|
||||
Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
|
||||
week number, and weekday for the given Timestamp object.
|
||||
datetime.date.isocalendar : Return a named tuple object with
|
||||
three components: year, week and weekday.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
|
||||
>>> ser.dt.isocalendar()
|
||||
year week day
|
||||
0 2009 53 5
|
||||
1 <NA> <NA> <NA>
|
||||
>>> ser.dt.isocalendar().week
|
||||
0 53
|
||||
1 <NA>
|
||||
Name: week, dtype: UInt32
|
||||
"""
|
||||
return self._get_values().isocalendar().set_index(self._parent.index)
|
||||
|
||||
|
||||
@delegate_names(
|
||||
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=TimedeltaArray,
|
||||
accessors=TimedeltaArray._datetimelike_methods,
|
||||
typ="method",
|
||||
)
|
||||
class TimedeltaProperties(Properties):
|
||||
"""
|
||||
Accessor object for datetimelike properties of the Series values.
|
||||
|
||||
Returns a Series indexed like the original Series.
|
||||
Raises TypeError if the Series does not contain datetimelike values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> seconds_series = pd.Series(
|
||||
... pd.timedelta_range(start="1 second", periods=3, freq="s")
|
||||
... )
|
||||
>>> seconds_series
|
||||
0 0 days 00:00:01
|
||||
1 0 days 00:00:02
|
||||
2 0 days 00:00:03
|
||||
dtype: timedelta64[ns]
|
||||
>>> seconds_series.dt.seconds
|
||||
0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: int32
|
||||
"""
|
||||
|
||||
def to_pytimedelta(self) -> np.ndarray:
|
||||
"""
|
||||
Return an array of native :class:`datetime.timedelta` objects.
|
||||
|
||||
Python's standard `datetime` library uses a different representation
|
||||
timedelta's. This method converts a Series of pandas Timedeltas
|
||||
to `datetime.timedelta` format with the same length as the original
|
||||
Series.
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray
|
||||
Array of 1D containing data with `datetime.timedelta` type.
|
||||
|
||||
See Also
|
||||
--------
|
||||
datetime.timedelta : A duration expressing the difference
|
||||
between two date, time, or datetime.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
|
||||
>>> s
|
||||
0 0 days
|
||||
1 1 days
|
||||
2 2 days
|
||||
3 3 days
|
||||
4 4 days
|
||||
dtype: timedelta64[ns]
|
||||
|
||||
>>> s.dt.to_pytimedelta()
|
||||
array([datetime.timedelta(0), datetime.timedelta(days=1),
|
||||
datetime.timedelta(days=2), datetime.timedelta(days=3),
|
||||
datetime.timedelta(days=4)], dtype=object)
|
||||
"""
|
||||
return self._get_values().to_pytimedelta()
|
||||
|
||||
@property
|
||||
def components(self):
|
||||
"""
|
||||
Return a Dataframe of the components of the Timedeltas.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
|
||||
>>> s
|
||||
0 0 days 00:00:00
|
||||
1 0 days 00:00:01
|
||||
2 0 days 00:00:02
|
||||
3 0 days 00:00:03
|
||||
4 0 days 00:00:04
|
||||
dtype: timedelta64[ns]
|
||||
>>> s.dt.components
|
||||
days hours minutes seconds milliseconds microseconds nanoseconds
|
||||
0 0 0 0 0 0 0 0
|
||||
1 0 0 0 1 0 0 0
|
||||
2 0 0 0 2 0 0 0
|
||||
3 0 0 0 3 0 0 0
|
||||
4 0 0 0 4 0 0 0
|
||||
"""
|
||||
return (
|
||||
self._get_values()
|
||||
.components.set_index(self._parent.index)
|
||||
.__finalize__(self._parent)
|
||||
)
|
||||
|
||||
@property
|
||||
def freq(self):
|
||||
return self._get_values().inferred_freq
|
||||
|
||||
|
||||
@delegate_names(
|
||||
delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
|
||||
)
|
||||
@delegate_names(
|
||||
delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method"
|
||||
)
|
||||
class PeriodProperties(Properties):
|
||||
"""
|
||||
Accessor object for datetimelike properties of the Series values.
|
||||
|
||||
Returns a Series indexed like the original Series.
|
||||
Raises TypeError if the Series does not contain datetimelike values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> seconds_series = pd.Series(
|
||||
... pd.period_range(
|
||||
... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s"
|
||||
... )
|
||||
... )
|
||||
>>> seconds_series
|
||||
0 2000-01-01 00:00:00
|
||||
1 2000-01-01 00:00:01
|
||||
2 2000-01-01 00:00:02
|
||||
3 2000-01-01 00:00:03
|
||||
dtype: period[s]
|
||||
>>> seconds_series.dt.second
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
dtype: int64
|
||||
|
||||
>>> hours_series = pd.Series(
|
||||
... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h")
|
||||
... )
|
||||
>>> hours_series
|
||||
0 2000-01-01 00:00
|
||||
1 2000-01-01 01:00
|
||||
2 2000-01-01 02:00
|
||||
3 2000-01-01 03:00
|
||||
dtype: period[h]
|
||||
>>> hours_series.dt.hour
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
dtype: int64
|
||||
|
||||
>>> quarters_series = pd.Series(
|
||||
... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC")
|
||||
... )
|
||||
>>> quarters_series
|
||||
0 2000Q1
|
||||
1 2000Q2
|
||||
2 2000Q3
|
||||
3 2000Q4
|
||||
dtype: period[Q-DEC]
|
||||
>>> quarters_series.dt.quarter
|
||||
0 1
|
||||
1 2
|
||||
2 3
|
||||
3 4
|
||||
dtype: int64
|
||||
"""
|
||||
|
||||
|
||||
class CombinedDatetimelikeProperties(
|
||||
DatetimeProperties, TimedeltaProperties, PeriodProperties
|
||||
):
|
||||
def __new__(cls, data: Series): # pyright: ignore[reportInconsistentConstructor]
|
||||
# CombinedDatetimelikeProperties isn't really instantiated. Instead
|
||||
# we need to choose which parent (datetime or timedelta) is
|
||||
# appropriate. Since we're checking the dtypes anyway, we'll just
|
||||
# do all the validation here.
|
||||
|
||||
if not isinstance(data, ABCSeries):
|
||||
raise TypeError(
|
||||
f"cannot convert an object of type {type(data)} to a datetimelike index"
|
||||
)
|
||||
|
||||
orig = data if isinstance(data.dtype, CategoricalDtype) else None
|
||||
if orig is not None:
|
||||
data = data._constructor(
|
||||
orig.array,
|
||||
name=orig.name,
|
||||
copy=False,
|
||||
dtype=orig._values.categories.dtype,
|
||||
index=orig.index,
|
||||
)
|
||||
|
||||
if isinstance(data.dtype, ArrowDtype) and data.dtype.kind in "Mm":
|
||||
return ArrowTemporalProperties(data, orig)
|
||||
if lib.is_np_dtype(data.dtype, "M"):
|
||||
return DatetimeProperties(data, orig)
|
||||
elif isinstance(data.dtype, DatetimeTZDtype):
|
||||
return DatetimeProperties(data, orig)
|
||||
elif lib.is_np_dtype(data.dtype, "m"):
|
||||
return TimedeltaProperties(data, orig)
|
||||
elif isinstance(data.dtype, PeriodDtype):
|
||||
return PeriodProperties(data, orig)
|
||||
|
||||
raise AttributeError("Can only use .dt accessor with datetimelike values")
|
388
lib/python3.11/site-packages/pandas/core/indexes/api.py
Normal file
388
lib/python3.11/site-packages/pandas/core/indexes/api.py
Normal file
@ -0,0 +1,388 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
lib,
|
||||
)
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas.core.dtypes.cast import find_common_type
|
||||
|
||||
from pandas.core.algorithms import safe_sort
|
||||
from pandas.core.indexes.base import (
|
||||
Index,
|
||||
_new_Index,
|
||||
ensure_index,
|
||||
ensure_index_from_sequences,
|
||||
get_unanimous_names,
|
||||
)
|
||||
from pandas.core.indexes.category import CategoricalIndex
|
||||
from pandas.core.indexes.datetimes import DatetimeIndex
|
||||
from pandas.core.indexes.interval import IntervalIndex
|
||||
from pandas.core.indexes.multi import MultiIndex
|
||||
from pandas.core.indexes.period import PeriodIndex
|
||||
from pandas.core.indexes.range import RangeIndex
|
||||
from pandas.core.indexes.timedeltas import TimedeltaIndex
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Axis
|
||||
_sort_msg = textwrap.dedent(
|
||||
"""\
|
||||
Sorting because non-concatenation axis is not aligned. A future version
|
||||
of pandas will change to not sort by default.
|
||||
|
||||
To accept the future behavior, pass 'sort=False'.
|
||||
|
||||
To retain the current behavior and silence the warning, pass 'sort=True'.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Index",
|
||||
"MultiIndex",
|
||||
"CategoricalIndex",
|
||||
"IntervalIndex",
|
||||
"RangeIndex",
|
||||
"InvalidIndexError",
|
||||
"TimedeltaIndex",
|
||||
"PeriodIndex",
|
||||
"DatetimeIndex",
|
||||
"_new_Index",
|
||||
"NaT",
|
||||
"ensure_index",
|
||||
"ensure_index_from_sequences",
|
||||
"get_objs_combined_axis",
|
||||
"union_indexes",
|
||||
"get_unanimous_names",
|
||||
"all_indexes_same",
|
||||
"default_index",
|
||||
"safe_sort_index",
|
||||
]
|
||||
|
||||
|
||||
def get_objs_combined_axis(
|
||||
objs,
|
||||
intersect: bool = False,
|
||||
axis: Axis = 0,
|
||||
sort: bool = True,
|
||||
copy: bool = False,
|
||||
) -> Index:
|
||||
"""
|
||||
Extract combined index: return intersection or union (depending on the
|
||||
value of "intersect") of indexes on given axis, or None if all objects
|
||||
lack indexes (e.g. they are numpy arrays).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
objs : list
|
||||
Series or DataFrame objects, may be mix of the two.
|
||||
intersect : bool, default False
|
||||
If True, calculate the intersection between indexes. Otherwise,
|
||||
calculate the union.
|
||||
axis : {0 or 'index', 1 or 'outer'}, default 0
|
||||
The axis to extract indexes from.
|
||||
sort : bool, default True
|
||||
Whether the result index should come out sorted or not.
|
||||
copy : bool, default False
|
||||
If True, return a copy of the combined index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Index
|
||||
"""
|
||||
obs_idxes = [obj._get_axis(axis) for obj in objs]
|
||||
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
|
||||
|
||||
|
||||
def _get_distinct_objs(objs: list[Index]) -> list[Index]:
|
||||
"""
|
||||
Return a list with distinct elements of "objs" (different ids).
|
||||
Preserves order.
|
||||
"""
|
||||
ids: set[int] = set()
|
||||
res = []
|
||||
for obj in objs:
|
||||
if id(obj) not in ids:
|
||||
ids.add(id(obj))
|
||||
res.append(obj)
|
||||
return res
|
||||
|
||||
|
||||
def _get_combined_index(
|
||||
indexes: list[Index],
|
||||
intersect: bool = False,
|
||||
sort: bool = False,
|
||||
copy: bool = False,
|
||||
) -> Index:
|
||||
"""
|
||||
Return the union or intersection of indexes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indexes : list of Index or list objects
|
||||
When intersect=True, do not accept list of lists.
|
||||
intersect : bool, default False
|
||||
If True, calculate the intersection between indexes. Otherwise,
|
||||
calculate the union.
|
||||
sort : bool, default False
|
||||
Whether the result index should come out sorted or not.
|
||||
copy : bool, default False
|
||||
If True, return a copy of the combined index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Index
|
||||
"""
|
||||
# TODO: handle index names!
|
||||
indexes = _get_distinct_objs(indexes)
|
||||
if len(indexes) == 0:
|
||||
index = Index([])
|
||||
elif len(indexes) == 1:
|
||||
index = indexes[0]
|
||||
elif intersect:
|
||||
index = indexes[0]
|
||||
for other in indexes[1:]:
|
||||
index = index.intersection(other)
|
||||
else:
|
||||
index = union_indexes(indexes, sort=False)
|
||||
index = ensure_index(index)
|
||||
|
||||
if sort:
|
||||
index = safe_sort_index(index)
|
||||
# GH 29879
|
||||
if copy:
|
||||
index = index.copy()
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def safe_sort_index(index: Index) -> Index:
|
||||
"""
|
||||
Returns the sorted index
|
||||
|
||||
We keep the dtypes and the name attributes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : an Index
|
||||
|
||||
Returns
|
||||
-------
|
||||
Index
|
||||
"""
|
||||
if index.is_monotonic_increasing:
|
||||
return index
|
||||
|
||||
try:
|
||||
array_sorted = safe_sort(index)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(array_sorted, Index):
|
||||
return array_sorted
|
||||
|
||||
array_sorted = cast(np.ndarray, array_sorted)
|
||||
if isinstance(index, MultiIndex):
|
||||
index = MultiIndex.from_tuples(array_sorted, names=index.names)
|
||||
else:
|
||||
index = Index(array_sorted, name=index.name, dtype=index.dtype)
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def union_indexes(indexes, sort: bool | None = True) -> Index:
|
||||
"""
|
||||
Return the union of indexes.
|
||||
|
||||
The behavior of sort and names is not consistent.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indexes : list of Index or list objects
|
||||
sort : bool, default True
|
||||
Whether the result index should come out sorted or not.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Index
|
||||
"""
|
||||
if len(indexes) == 0:
|
||||
raise AssertionError("Must have at least 1 Index to union")
|
||||
if len(indexes) == 1:
|
||||
result = indexes[0]
|
||||
if isinstance(result, list):
|
||||
if not sort:
|
||||
result = Index(result)
|
||||
else:
|
||||
result = Index(sorted(result))
|
||||
return result
|
||||
|
||||
indexes, kind = _sanitize_and_check(indexes)
|
||||
|
||||
def _unique_indices(inds, dtype) -> Index:
|
||||
"""
|
||||
Concatenate indices and remove duplicates.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inds : list of Index or list objects
|
||||
dtype : dtype to set for the resulting Index
|
||||
|
||||
Returns
|
||||
-------
|
||||
Index
|
||||
"""
|
||||
if all(isinstance(ind, Index) for ind in inds):
|
||||
inds = [ind.astype(dtype, copy=False) for ind in inds]
|
||||
result = inds[0].unique()
|
||||
other = inds[1].append(inds[2:])
|
||||
diff = other[result.get_indexer_for(other) == -1]
|
||||
if len(diff):
|
||||
result = result.append(diff.unique())
|
||||
if sort:
|
||||
result = result.sort_values()
|
||||
return result
|
||||
|
||||
def conv(i):
|
||||
if isinstance(i, Index):
|
||||
i = i.tolist()
|
||||
return i
|
||||
|
||||
return Index(
|
||||
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
def _find_common_index_dtype(inds):
|
||||
"""
|
||||
Finds a common type for the indexes to pass through to resulting index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inds: list of Index or list objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
The common type or None if no indexes were given
|
||||
"""
|
||||
dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
|
||||
if dtypes:
|
||||
dtype = find_common_type(dtypes)
|
||||
else:
|
||||
dtype = None
|
||||
|
||||
return dtype
|
||||
|
||||
if kind == "special":
|
||||
result = indexes[0]
|
||||
|
||||
dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
|
||||
dti_tzs = [x for x in dtis if x.tz is not None]
|
||||
if len(dti_tzs) not in [0, len(dtis)]:
|
||||
# TODO: this behavior is not tested (so may not be desired),
|
||||
# but is kept in order to keep behavior the same when
|
||||
# deprecating union_many
|
||||
# test_frame_from_dict_with_mixed_indexes
|
||||
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
|
||||
|
||||
if len(dtis) == len(indexes):
|
||||
sort = True
|
||||
result = indexes[0]
|
||||
|
||||
elif len(dtis) > 1:
|
||||
# If we have mixed timezones, our casting behavior may depend on
|
||||
# the order of indexes, which we don't want.
|
||||
sort = False
|
||||
|
||||
# TODO: what about Categorical[dt64]?
|
||||
# test_frame_from_dict_with_mixed_indexes
|
||||
indexes = [x.astype(object, copy=False) for x in indexes]
|
||||
result = indexes[0]
|
||||
|
||||
for other in indexes[1:]:
|
||||
result = result.union(other, sort=None if sort else False)
|
||||
return result
|
||||
|
||||
elif kind == "array":
|
||||
dtype = _find_common_index_dtype(indexes)
|
||||
index = indexes[0]
|
||||
if not all(index.equals(other) for other in indexes[1:]):
|
||||
index = _unique_indices(indexes, dtype)
|
||||
|
||||
name = get_unanimous_names(*indexes)[0]
|
||||
if name != index.name:
|
||||
index = index.rename(name)
|
||||
return index
|
||||
else: # kind='list'
|
||||
dtype = _find_common_index_dtype(indexes)
|
||||
return _unique_indices(indexes, dtype)
|
||||
|
||||
|
||||
def _sanitize_and_check(indexes):
|
||||
"""
|
||||
Verify the type of indexes and convert lists to Index.
|
||||
|
||||
Cases:
|
||||
|
||||
- [list, list, ...]: Return ([list, list, ...], 'list')
|
||||
- [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
|
||||
Lists are sorted and converted to Index.
|
||||
- [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
|
||||
TYPE = 'special' if at least one special type, 'array' otherwise.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indexes : list of Index or list objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
sanitized_indexes : list of Index or list objects
|
||||
type : {'list', 'array', 'special'}
|
||||
"""
|
||||
kinds = list({type(index) for index in indexes})
|
||||
|
||||
if list in kinds:
|
||||
if len(kinds) > 1:
|
||||
indexes = [
|
||||
Index(list(x)) if not isinstance(x, Index) else x for x in indexes
|
||||
]
|
||||
kinds.remove(list)
|
||||
else:
|
||||
return indexes, "list"
|
||||
|
||||
if len(kinds) > 1 or Index not in kinds:
|
||||
return indexes, "special"
|
||||
else:
|
||||
return indexes, "array"
|
||||
|
||||
|
||||
def all_indexes_same(indexes) -> bool:
|
||||
"""
|
||||
Determine if all indexes contain the same elements.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indexes : iterable of Index objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all indexes contain the same elements, False otherwise.
|
||||
"""
|
||||
itr = iter(indexes)
|
||||
first = next(itr)
|
||||
return all(first.equals(index) for index in itr)
|
||||
|
||||
|
||||
def default_index(n: int) -> RangeIndex:
|
||||
rng = range(n)
|
||||
return RangeIndex._simple_new(rng, name=None)
|
7943
lib/python3.11/site-packages/pandas/core/indexes/base.py
Normal file
7943
lib/python3.11/site-packages/pandas/core/indexes/base.py
Normal file
File diff suppressed because it is too large
Load Diff
513
lib/python3.11/site-packages/pandas/core/indexes/category.py
Normal file
513
lib/python3.11/site-packages/pandas/core/indexes/category.py
Normal file
@ -0,0 +1,513 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas.util._decorators import (
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
from pandas.core.dtypes.concat import concat_compat
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
from pandas.core.dtypes.missing import (
|
||||
is_valid_na_for_dtype,
|
||||
isna,
|
||||
)
|
||||
|
||||
from pandas.core.arrays.categorical import (
|
||||
Categorical,
|
||||
contains,
|
||||
)
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.indexes.base import (
|
||||
Index,
|
||||
maybe_extract_name,
|
||||
)
|
||||
from pandas.core.indexes.extension import (
|
||||
NDArrayBackedExtensionIndex,
|
||||
inherit_names,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Hashable
|
||||
|
||||
from pandas._typing import (
|
||||
Dtype,
|
||||
DtypeObj,
|
||||
Self,
|
||||
npt,
|
||||
)
|
||||
|
||||
|
||||
@inherit_names(
|
||||
[
|
||||
"argsort",
|
||||
"tolist",
|
||||
"codes",
|
||||
"categories",
|
||||
"ordered",
|
||||
"_reverse_indexer",
|
||||
"searchsorted",
|
||||
"min",
|
||||
"max",
|
||||
],
|
||||
Categorical,
|
||||
)
|
||||
@inherit_names(
|
||||
[
|
||||
"rename_categories",
|
||||
"reorder_categories",
|
||||
"add_categories",
|
||||
"remove_categories",
|
||||
"remove_unused_categories",
|
||||
"set_categories",
|
||||
"as_ordered",
|
||||
"as_unordered",
|
||||
],
|
||||
Categorical,
|
||||
wrap=True,
|
||||
)
|
||||
class CategoricalIndex(NDArrayBackedExtensionIndex):
|
||||
"""
|
||||
Index based on an underlying :class:`Categorical`.
|
||||
|
||||
CategoricalIndex, like Categorical, can only take on a limited,
|
||||
and usually fixed, number of possible values (`categories`). Also,
|
||||
like Categorical, it might have an order, but numerical operations
|
||||
(additions, divisions, ...) are not possible.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : array-like (1-dimensional)
|
||||
The values of the categorical. If `categories` are given, values not in
|
||||
`categories` will be replaced with NaN.
|
||||
categories : index-like, optional
|
||||
The categories for the categorical. Items need to be unique.
|
||||
If the categories are not given here (and also not in `dtype`), they
|
||||
will be inferred from the `data`.
|
||||
ordered : bool, optional
|
||||
Whether or not this categorical is treated as an ordered
|
||||
categorical. If not given here or in `dtype`, the resulting
|
||||
categorical will be unordered.
|
||||
dtype : CategoricalDtype or "category", optional
|
||||
If :class:`CategoricalDtype`, cannot be used together with
|
||||
`categories` or `ordered`.
|
||||
copy : bool, default False
|
||||
Make a copy of input ndarray.
|
||||
name : object, optional
|
||||
Name to be stored in the index.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
codes
|
||||
categories
|
||||
ordered
|
||||
|
||||
Methods
|
||||
-------
|
||||
rename_categories
|
||||
reorder_categories
|
||||
add_categories
|
||||
remove_categories
|
||||
remove_unused_categories
|
||||
set_categories
|
||||
as_ordered
|
||||
as_unordered
|
||||
map
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the categories do not validate.
|
||||
TypeError
|
||||
If an explicit ``ordered=True`` is given but no `categories` and the
|
||||
`values` are not sortable.
|
||||
|
||||
See Also
|
||||
--------
|
||||
Index : The base pandas Index type.
|
||||
Categorical : A categorical array.
|
||||
CategoricalDtype : Type for categorical data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See the `user guide
|
||||
<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__
|
||||
for more.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"])
|
||||
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
|
||||
categories=['a', 'b', 'c'], ordered=False, dtype='category')
|
||||
|
||||
``CategoricalIndex`` can also be instantiated from a ``Categorical``:
|
||||
|
||||
>>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"])
|
||||
>>> pd.CategoricalIndex(c)
|
||||
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
|
||||
categories=['a', 'b', 'c'], ordered=False, dtype='category')
|
||||
|
||||
Ordered ``CategoricalIndex`` can have a min and max value.
|
||||
|
||||
>>> ci = pd.CategoricalIndex(
|
||||
... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"]
|
||||
... )
|
||||
>>> ci
|
||||
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
|
||||
categories=['c', 'b', 'a'], ordered=True, dtype='category')
|
||||
>>> ci.min()
|
||||
'c'
|
||||
"""
|
||||
|
||||
_typ = "categoricalindex"
|
||||
_data_cls = Categorical
|
||||
|
||||
@property
|
||||
def _can_hold_strings(self):
|
||||
return self.categories._can_hold_strings
|
||||
|
||||
@cache_readonly
|
||||
def _should_fallback_to_positional(self) -> bool:
|
||||
return self.categories._should_fallback_to_positional
|
||||
|
||||
codes: np.ndarray
|
||||
categories: Index
|
||||
ordered: bool | None
|
||||
_data: Categorical
|
||||
_values: Categorical
|
||||
|
||||
@property
|
||||
def _engine_type(self) -> type[libindex.IndexEngine]:
|
||||
# self.codes can have dtype int8, int16, int32 or int64, so we need
|
||||
# to return the corresponding engine type (libindex.Int8Engine, etc.).
|
||||
return {
|
||||
np.int8: libindex.Int8Engine,
|
||||
np.int16: libindex.Int16Engine,
|
||||
np.int32: libindex.Int32Engine,
|
||||
np.int64: libindex.Int64Engine,
|
||||
}[self.codes.dtype.type]
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Constructors
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
data=None,
|
||||
categories=None,
|
||||
ordered=None,
|
||||
dtype: Dtype | None = None,
|
||||
copy: bool = False,
|
||||
name: Hashable | None = None,
|
||||
) -> Self:
|
||||
name = maybe_extract_name(name, data, cls)
|
||||
|
||||
if is_scalar(data):
|
||||
# GH#38944 include None here, which pre-2.0 subbed in []
|
||||
cls._raise_scalar_data_error(data)
|
||||
|
||||
data = Categorical(
|
||||
data, categories=categories, ordered=ordered, dtype=dtype, copy=copy
|
||||
)
|
||||
|
||||
return cls._simple_new(data, name=name)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
def _is_dtype_compat(self, other: Index) -> Categorical:
|
||||
"""
|
||||
*this is an internal non-public method*
|
||||
|
||||
provide a comparison between the dtype of self and other (coercing if
|
||||
needed)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : Index
|
||||
|
||||
Returns
|
||||
-------
|
||||
Categorical
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError if the dtypes are not compatible
|
||||
"""
|
||||
if isinstance(other.dtype, CategoricalDtype):
|
||||
cat = extract_array(other)
|
||||
cat = cast(Categorical, cat)
|
||||
if not cat._categories_match_up_to_permutation(self._values):
|
||||
raise TypeError(
|
||||
"categories must match existing categories when appending"
|
||||
)
|
||||
|
||||
elif other._is_multi:
|
||||
# preempt raising NotImplementedError in isna call
|
||||
raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex")
|
||||
else:
|
||||
values = other
|
||||
|
||||
cat = Categorical(other, dtype=self.dtype)
|
||||
other = CategoricalIndex(cat)
|
||||
if not other.isin(values).all():
|
||||
raise TypeError(
|
||||
"cannot append a non-category item to a CategoricalIndex"
|
||||
)
|
||||
cat = other._values
|
||||
|
||||
if not ((cat == values) | (isna(cat) & isna(values))).all():
|
||||
# GH#37667 see test_equals_non_category
|
||||
raise TypeError(
|
||||
"categories must match existing categories when appending"
|
||||
)
|
||||
|
||||
return cat
|
||||
|
||||
def equals(self, other: object) -> bool:
|
||||
"""
|
||||
Determine if two CategoricalIndex objects contain the same elements.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
``True`` if two :class:`pandas.CategoricalIndex` objects have equal
|
||||
elements, ``False`` otherwise.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'])
|
||||
>>> ci2 = pd.CategoricalIndex(pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']))
|
||||
>>> ci.equals(ci2)
|
||||
True
|
||||
|
||||
The order of elements matters.
|
||||
|
||||
>>> ci3 = pd.CategoricalIndex(['c', 'b', 'a', 'a', 'b', 'c'])
|
||||
>>> ci.equals(ci3)
|
||||
False
|
||||
|
||||
The orderedness also matters.
|
||||
|
||||
>>> ci4 = ci.as_ordered()
|
||||
>>> ci.equals(ci4)
|
||||
False
|
||||
|
||||
The categories matter, but the order of the categories matters only when
|
||||
``ordered=True``.
|
||||
|
||||
>>> ci5 = ci.set_categories(['a', 'b', 'c', 'd'])
|
||||
>>> ci.equals(ci5)
|
||||
False
|
||||
|
||||
>>> ci6 = ci.set_categories(['b', 'c', 'a'])
|
||||
>>> ci.equals(ci6)
|
||||
True
|
||||
>>> ci_ordered = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
|
||||
... ordered=True)
|
||||
>>> ci2_ordered = ci_ordered.set_categories(['b', 'c', 'a'])
|
||||
>>> ci_ordered.equals(ci2_ordered)
|
||||
False
|
||||
"""
|
||||
if self.is_(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, Index):
|
||||
return False
|
||||
|
||||
try:
|
||||
other = self._is_dtype_compat(other)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
return self._data.equals(other)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Rendering Methods
|
||||
|
||||
@property
|
||||
def _formatter_func(self):
|
||||
return self.categories._formatter_func
|
||||
|
||||
def _format_attrs(self):
|
||||
"""
|
||||
Return a list of tuples of the (attr,formatted_value)
|
||||
"""
|
||||
attrs: list[tuple[str, str | int | bool | None]]
|
||||
|
||||
attrs = [
|
||||
(
|
||||
"categories",
|
||||
f"[{', '.join(self._data._repr_categories())}]",
|
||||
),
|
||||
("ordered", self.ordered),
|
||||
]
|
||||
extra = super()._format_attrs()
|
||||
return attrs + extra
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def inferred_type(self) -> str:
|
||||
return "categorical"
|
||||
|
||||
@doc(Index.__contains__)
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
# if key is a NaN, check if any NaN is in self.
|
||||
if is_valid_na_for_dtype(key, self.categories.dtype):
|
||||
return self.hasnans
|
||||
|
||||
return contains(self, key, container=self._engine)
|
||||
|
||||
def reindex(
|
||||
self, target, method=None, level=None, limit: int | None = None, tolerance=None
|
||||
) -> tuple[Index, npt.NDArray[np.intp] | None]:
|
||||
"""
|
||||
Create index with target's values (move/add/delete values as necessary)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_index : pd.Index
|
||||
Resulting index
|
||||
indexer : np.ndarray[np.intp] or None
|
||||
Indices of output values in original index
|
||||
|
||||
"""
|
||||
if method is not None:
|
||||
raise NotImplementedError(
|
||||
"argument method is not implemented for CategoricalIndex.reindex"
|
||||
)
|
||||
if level is not None:
|
||||
raise NotImplementedError(
|
||||
"argument level is not implemented for CategoricalIndex.reindex"
|
||||
)
|
||||
if limit is not None:
|
||||
raise NotImplementedError(
|
||||
"argument limit is not implemented for CategoricalIndex.reindex"
|
||||
)
|
||||
return super().reindex(target)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Indexing Methods
|
||||
|
||||
def _maybe_cast_indexer(self, key) -> int:
|
||||
# GH#41933: we have to do this instead of self._data._validate_scalar
|
||||
# because this will correctly get partial-indexing on Interval categories
|
||||
try:
|
||||
return self._data._unbox_scalar(key)
|
||||
except KeyError:
|
||||
if is_valid_na_for_dtype(key, self.categories.dtype):
|
||||
return -1
|
||||
raise
|
||||
|
||||
def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:
|
||||
if isinstance(values, CategoricalIndex):
|
||||
values = values._data
|
||||
if isinstance(values, Categorical):
|
||||
# Indexing on codes is more efficient if categories are the same,
|
||||
# so we can apply some optimizations based on the degree of
|
||||
# dtype-matching.
|
||||
cat = self._data._encode_with_my_categories(values)
|
||||
codes = cat._codes
|
||||
else:
|
||||
codes = self.categories.get_indexer(values)
|
||||
codes = codes.astype(self.codes.dtype, copy=False)
|
||||
cat = self._data._from_backing_data(codes)
|
||||
return type(self)._simple_new(cat)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
|
||||
return self.categories._is_comparable_dtype(dtype)
|
||||
|
||||
def map(self, mapper, na_action: Literal["ignore"] | None = None):
|
||||
"""
|
||||
Map values using input an input mapping or function.
|
||||
|
||||
Maps the values (their categories, not the codes) of the index to new
|
||||
categories. If the mapping correspondence is one-to-one the result is a
|
||||
:class:`~pandas.CategoricalIndex` which has the same order property as
|
||||
the original, otherwise an :class:`~pandas.Index` is returned.
|
||||
|
||||
If a `dict` or :class:`~pandas.Series` is used any unmapped category is
|
||||
mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
|
||||
will be returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mapper : function, dict, or Series
|
||||
Mapping correspondence.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.CategoricalIndex or pandas.Index
|
||||
Mapped index.
|
||||
|
||||
See Also
|
||||
--------
|
||||
Index.map : Apply a mapping correspondence on an
|
||||
:class:`~pandas.Index`.
|
||||
Series.map : Apply a mapping correspondence on a
|
||||
:class:`~pandas.Series`.
|
||||
Series.apply : Apply more complex functions on a
|
||||
:class:`~pandas.Series`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'])
|
||||
>>> idx
|
||||
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
|
||||
ordered=False, dtype='category')
|
||||
>>> idx.map(lambda x: x.upper())
|
||||
CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'],
|
||||
ordered=False, dtype='category')
|
||||
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'})
|
||||
CategoricalIndex(['first', 'second', 'third'], categories=['first',
|
||||
'second', 'third'], ordered=False, dtype='category')
|
||||
|
||||
If the mapping is one-to-one the ordering of the categories is
|
||||
preserved:
|
||||
|
||||
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True)
|
||||
>>> idx
|
||||
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
|
||||
ordered=True, dtype='category')
|
||||
>>> idx.map({'a': 3, 'b': 2, 'c': 1})
|
||||
CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True,
|
||||
dtype='category')
|
||||
|
||||
If the mapping is not one-to-one an :class:`~pandas.Index` is returned:
|
||||
|
||||
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'})
|
||||
Index(['first', 'second', 'first'], dtype='object')
|
||||
|
||||
If a `dict` is used, all unmapped categories are mapped to `NaN` and
|
||||
the result is an :class:`~pandas.Index`:
|
||||
|
||||
>>> idx.map({'a': 'first', 'b': 'second'})
|
||||
Index(['first', 'second', nan], dtype='object')
|
||||
"""
|
||||
mapped = self._values.map(mapper, na_action=na_action)
|
||||
return Index(mapped, name=self.name)
|
||||
|
||||
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
|
||||
# if calling index is category, don't check dtype of others
|
||||
try:
|
||||
cat = Categorical._concat_same_type(
|
||||
[self._is_dtype_compat(c) for c in to_concat]
|
||||
)
|
||||
except TypeError:
|
||||
# not all to_concat elements are among our categories (or NA)
|
||||
|
||||
res = concat_compat([x._values for x in to_concat])
|
||||
return Index(res, name=name)
|
||||
else:
|
||||
return type(self)._simple_new(cat, name=name)
|
843
lib/python3.11/site-packages/pandas/core/indexes/datetimelike.py
Normal file
843
lib/python3.11/site-packages/pandas/core/indexes/datetimelike.py
Normal file
@ -0,0 +1,843 @@
|
||||
"""
|
||||
Base and utility classes for tseries type pandas objects.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import (
|
||||
ABC,
|
||||
abstractmethod,
|
||||
)
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
cast,
|
||||
final,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import using_copy_on_write
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
Timedelta,
|
||||
lib,
|
||||
)
|
||||
from pandas._libs.tslibs import (
|
||||
BaseOffset,
|
||||
Resolution,
|
||||
Tick,
|
||||
parsing,
|
||||
to_offset,
|
||||
)
|
||||
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
|
||||
from pandas.compat.numpy import function as nv
|
||||
from pandas.errors import (
|
||||
InvalidIndexError,
|
||||
NullFrequencyError,
|
||||
)
|
||||
from pandas.util._decorators import (
|
||||
Appender,
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer,
|
||||
is_list_like,
|
||||
)
|
||||
from pandas.core.dtypes.concat import concat_compat
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
ExtensionArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
|
||||
import pandas.core.common as com
|
||||
import pandas.core.indexes.base as ibase
|
||||
from pandas.core.indexes.base import (
|
||||
Index,
|
||||
_index_shared_docs,
|
||||
)
|
||||
from pandas.core.indexes.extension import NDArrayBackedExtensionIndex
|
||||
from pandas.core.indexes.range import RangeIndex
|
||||
from pandas.core.tools.timedeltas import to_timedelta
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
|
||||
from pandas._typing import (
|
||||
Axis,
|
||||
Self,
|
||||
npt,
|
||||
)
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
|
||||
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
|
||||
|
||||
|
||||
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC):
|
||||
"""
|
||||
Common ops mixin to support a unified interface datetimelike Index.
|
||||
"""
|
||||
|
||||
_can_hold_strings = False
|
||||
_data: DatetimeArray | TimedeltaArray | PeriodArray
|
||||
|
||||
@doc(DatetimeLikeArrayMixin.mean)
|
||||
def mean(self, *, skipna: bool = True, axis: int | None = 0):
|
||||
return self._data.mean(skipna=skipna, axis=axis)
|
||||
|
||||
@property
|
||||
def freq(self) -> BaseOffset | None:
|
||||
return self._data.freq
|
||||
|
||||
@freq.setter
|
||||
def freq(self, value) -> None:
|
||||
# error: Property "freq" defined in "PeriodArray" is read-only [misc]
|
||||
self._data.freq = value # type: ignore[misc]
|
||||
|
||||
@property
|
||||
def asi8(self) -> npt.NDArray[np.int64]:
|
||||
return self._data.asi8
|
||||
|
||||
@property
|
||||
@doc(DatetimeLikeArrayMixin.freqstr)
|
||||
def freqstr(self) -> str:
|
||||
from pandas import PeriodIndex
|
||||
|
||||
if self._data.freqstr is not None and isinstance(
|
||||
self._data, (PeriodArray, PeriodIndex)
|
||||
):
|
||||
freq = freq_to_period_freqstr(self._data.freq.n, self._data.freq.name)
|
||||
return freq
|
||||
else:
|
||||
return self._data.freqstr # type: ignore[return-value]
|
||||
|
||||
@cache_readonly
|
||||
@abstractmethod
|
||||
def _resolution_obj(self) -> Resolution:
|
||||
...
|
||||
|
||||
@cache_readonly
|
||||
@doc(DatetimeLikeArrayMixin.resolution)
|
||||
def resolution(self) -> str:
|
||||
return self._data.resolution
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
@cache_readonly
|
||||
def hasnans(self) -> bool:
|
||||
return self._data._hasna
|
||||
|
||||
def equals(self, other: Any) -> bool:
|
||||
"""
|
||||
Determines if two Index objects contain the same elements.
|
||||
"""
|
||||
if self.is_(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, Index):
|
||||
return False
|
||||
elif other.dtype.kind in "iufc":
|
||||
return False
|
||||
elif not isinstance(other, type(self)):
|
||||
should_try = False
|
||||
inferable = self._data._infer_matches
|
||||
if other.dtype == object:
|
||||
should_try = other.inferred_type in inferable
|
||||
elif isinstance(other.dtype, CategoricalDtype):
|
||||
other = cast("CategoricalIndex", other)
|
||||
should_try = other.categories.inferred_type in inferable
|
||||
|
||||
if should_try:
|
||||
try:
|
||||
other = type(self)(other)
|
||||
except (ValueError, TypeError, OverflowError):
|
||||
# e.g.
|
||||
# ValueError -> cannot parse str entry, or OutOfBoundsDatetime
|
||||
# TypeError -> trying to convert IntervalIndex to DatetimeIndex
|
||||
# OverflowError -> Index([very_large_timedeltas])
|
||||
return False
|
||||
|
||||
if self.dtype != other.dtype:
|
||||
# have different timezone
|
||||
return False
|
||||
|
||||
return np.array_equal(self.asi8, other.asi8)
|
||||
|
||||
@Appender(Index.__contains__.__doc__)
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
hash(key)
|
||||
try:
|
||||
self.get_loc(key)
|
||||
except (KeyError, TypeError, ValueError, InvalidIndexError):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _convert_tolerance(self, tolerance, target):
|
||||
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
|
||||
return super()._convert_tolerance(tolerance, target)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Rendering Methods
|
||||
_default_na_rep = "NaT"
|
||||
|
||||
def format(
|
||||
self,
|
||||
name: bool = False,
|
||||
formatter: Callable | None = None,
|
||||
na_rep: str = "NaT",
|
||||
date_format: str | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Render a string representation of the Index.
|
||||
"""
|
||||
warnings.warn(
|
||||
# GH#55413
|
||||
f"{type(self).__name__}.format is deprecated and will be removed "
|
||||
"in a future version. Convert using index.astype(str) or "
|
||||
"index.map(formatter) instead.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
header = []
|
||||
if name:
|
||||
header.append(
|
||||
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
|
||||
if self.name is not None
|
||||
else ""
|
||||
)
|
||||
|
||||
if formatter is not None:
|
||||
return header + list(self.map(formatter))
|
||||
|
||||
return self._format_with_header(
|
||||
header=header, na_rep=na_rep, date_format=date_format
|
||||
)
|
||||
|
||||
def _format_with_header(
|
||||
self, *, header: list[str], na_rep: str, date_format: str | None = None
|
||||
) -> list[str]:
|
||||
# TODO: not reached in tests 2023-10-11
|
||||
# matches base class except for whitespace padding and date_format
|
||||
return header + list(
|
||||
self._get_values_for_csv(na_rep=na_rep, date_format=date_format)
|
||||
)
|
||||
|
||||
@property
|
||||
def _formatter_func(self):
|
||||
return self._data._formatter()
|
||||
|
||||
def _format_attrs(self):
|
||||
"""
|
||||
Return a list of tuples of the (attr,formatted_value).
|
||||
"""
|
||||
attrs = super()._format_attrs()
|
||||
for attrib in self._attributes:
|
||||
# iterating over _attributes prevents us from doing this for PeriodIndex
|
||||
if attrib == "freq":
|
||||
freq = self.freqstr
|
||||
if freq is not None:
|
||||
freq = repr(freq) # e.g. D -> 'D'
|
||||
attrs.append(("freq", freq))
|
||||
return attrs
|
||||
|
||||
@Appender(Index._summary.__doc__)
|
||||
def _summary(self, name=None) -> str:
|
||||
result = super()._summary(name=name)
|
||||
if self.freq:
|
||||
result += f"\nFreq: {self.freqstr}"
|
||||
|
||||
return result
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Indexing Methods
|
||||
|
||||
@final
|
||||
def _can_partial_date_slice(self, reso: Resolution) -> bool:
|
||||
# e.g. test_getitem_setitem_periodindex
|
||||
# History of conversation GH#3452, GH#3931, GH#2369, GH#14826
|
||||
return reso > self._resolution_obj
|
||||
# NB: for DTI/PI, not TDI
|
||||
|
||||
def _parsed_string_to_bounds(self, reso: Resolution, parsed):
|
||||
raise NotImplementedError
|
||||
|
||||
def _parse_with_reso(self, label: str):
|
||||
# overridden by TimedeltaIndex
|
||||
try:
|
||||
if self.freq is None or hasattr(self.freq, "rule_code"):
|
||||
freq = self.freq
|
||||
except NotImplementedError:
|
||||
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
|
||||
|
||||
freqstr: str | None
|
||||
if freq is not None and not isinstance(freq, str):
|
||||
freqstr = freq.rule_code
|
||||
else:
|
||||
freqstr = freq
|
||||
|
||||
if isinstance(label, np.str_):
|
||||
# GH#45580
|
||||
label = str(label)
|
||||
|
||||
parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr)
|
||||
reso = Resolution.from_attrname(reso_str)
|
||||
return parsed, reso
|
||||
|
||||
def _get_string_slice(self, key: str):
|
||||
# overridden by TimedeltaIndex
|
||||
parsed, reso = self._parse_with_reso(key)
|
||||
try:
|
||||
return self._partial_date_slice(reso, parsed)
|
||||
except KeyError as err:
|
||||
raise KeyError(key) from err
|
||||
|
||||
@final
|
||||
def _partial_date_slice(
|
||||
self,
|
||||
reso: Resolution,
|
||||
parsed: datetime,
|
||||
) -> slice | npt.NDArray[np.intp]:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
reso : Resolution
|
||||
parsed : datetime
|
||||
|
||||
Returns
|
||||
-------
|
||||
slice or ndarray[intp]
|
||||
"""
|
||||
if not self._can_partial_date_slice(reso):
|
||||
raise ValueError
|
||||
|
||||
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
|
||||
vals = self._data._ndarray
|
||||
unbox = self._data._unbox
|
||||
|
||||
if self.is_monotonic_increasing:
|
||||
if len(self) and (
|
||||
(t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
|
||||
):
|
||||
# we are out of range
|
||||
raise KeyError
|
||||
|
||||
# TODO: does this depend on being monotonic _increasing_?
|
||||
|
||||
# a monotonic (sorted) series can be sliced
|
||||
left = vals.searchsorted(unbox(t1), side="left")
|
||||
right = vals.searchsorted(unbox(t2), side="right")
|
||||
return slice(left, right)
|
||||
|
||||
else:
|
||||
lhs_mask = vals >= unbox(t1)
|
||||
rhs_mask = vals <= unbox(t2)
|
||||
|
||||
# try to find the dates
|
||||
return (lhs_mask & rhs_mask).nonzero()[0]
|
||||
|
||||
def _maybe_cast_slice_bound(self, label, side: str):
|
||||
"""
|
||||
If label is a string, cast it to scalar type according to resolution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
label : object
|
||||
side : {'left', 'right'}
|
||||
|
||||
Returns
|
||||
-------
|
||||
label : object
|
||||
|
||||
Notes
|
||||
-----
|
||||
Value of `side` parameter should be validated in caller.
|
||||
"""
|
||||
if isinstance(label, str):
|
||||
try:
|
||||
parsed, reso = self._parse_with_reso(label)
|
||||
except ValueError as err:
|
||||
# DTI -> parsing.DateParseError
|
||||
# TDI -> 'unit abbreviation w/o a number'
|
||||
# PI -> string cannot be parsed as datetime-like
|
||||
self._raise_invalid_indexer("slice", label, err)
|
||||
|
||||
lower, upper = self._parsed_string_to_bounds(reso, parsed)
|
||||
return lower if side == "left" else upper
|
||||
elif not isinstance(label, self._data._recognized_scalars):
|
||||
self._raise_invalid_indexer("slice", label)
|
||||
|
||||
return label
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Arithmetic Methods
|
||||
|
||||
def shift(self, periods: int = 1, freq=None) -> Self:
|
||||
"""
|
||||
Shift index by desired number of time frequency increments.
|
||||
|
||||
This method is for shifting the values of datetime-like indexes
|
||||
by a specified time increment a given number of times.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
periods : int, default 1
|
||||
Number of periods (or increments) to shift by,
|
||||
can be positive or negative.
|
||||
freq : pandas.DateOffset, pandas.Timedelta or string, optional
|
||||
Frequency increment to shift by.
|
||||
If None, the index is shifted by its own `freq` attribute.
|
||||
Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.DatetimeIndex
|
||||
Shifted index.
|
||||
|
||||
See Also
|
||||
--------
|
||||
Index.shift : Shift values of Index.
|
||||
PeriodIndex.shift : Shift values of PeriodIndex.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
@doc(Index._maybe_cast_listlike_indexer)
|
||||
def _maybe_cast_listlike_indexer(self, keyarr):
|
||||
try:
|
||||
res = self._data._validate_listlike(keyarr, allow_object=True)
|
||||
except (ValueError, TypeError):
|
||||
if not isinstance(keyarr, ExtensionArray):
|
||||
# e.g. we don't want to cast DTA to ndarray[object]
|
||||
res = com.asarray_tuplesafe(keyarr)
|
||||
# TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray
|
||||
else:
|
||||
res = keyarr
|
||||
return Index(res, dtype=res.dtype)
|
||||
|
||||
|
||||
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, ABC):
|
||||
"""
|
||||
Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
|
||||
but not PeriodIndex
|
||||
"""
|
||||
|
||||
_data: DatetimeArray | TimedeltaArray
|
||||
_comparables = ["name", "freq"]
|
||||
_attributes = ["name", "freq"]
|
||||
|
||||
# Compat for frequency inference, see GH#23789
|
||||
_is_monotonic_increasing = Index.is_monotonic_increasing
|
||||
_is_monotonic_decreasing = Index.is_monotonic_decreasing
|
||||
_is_unique = Index.is_unique
|
||||
|
||||
@property
|
||||
def unit(self) -> str:
|
||||
return self._data.unit
|
||||
|
||||
def as_unit(self, unit: str) -> Self:
|
||||
"""
|
||||
Convert to a dtype with the given unit resolution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
unit : {'s', 'ms', 'us', 'ns'}
|
||||
|
||||
Returns
|
||||
-------
|
||||
same type as self
|
||||
|
||||
Examples
|
||||
--------
|
||||
For :class:`pandas.DatetimeIndex`:
|
||||
|
||||
>>> idx = pd.DatetimeIndex(['2020-01-02 01:02:03.004005006'])
|
||||
>>> idx
|
||||
DatetimeIndex(['2020-01-02 01:02:03.004005006'],
|
||||
dtype='datetime64[ns]', freq=None)
|
||||
>>> idx.as_unit('s')
|
||||
DatetimeIndex(['2020-01-02 01:02:03'], dtype='datetime64[s]', freq=None)
|
||||
|
||||
For :class:`pandas.TimedeltaIndex`:
|
||||
|
||||
>>> tdelta_idx = pd.to_timedelta(['1 day 3 min 2 us 42 ns'])
|
||||
>>> tdelta_idx
|
||||
TimedeltaIndex(['1 days 00:03:00.000002042'],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
>>> tdelta_idx.as_unit('s')
|
||||
TimedeltaIndex(['1 days 00:03:00'], dtype='timedelta64[s]', freq=None)
|
||||
"""
|
||||
arr = self._data.as_unit(unit)
|
||||
return type(self)._simple_new(arr, name=self.name)
|
||||
|
||||
def _with_freq(self, freq):
|
||||
arr = self._data._with_freq(freq)
|
||||
return type(self)._simple_new(arr, name=self._name)
|
||||
|
||||
@property
|
||||
def values(self) -> np.ndarray:
|
||||
# NB: For Datetime64TZ this is lossy
|
||||
data = self._data._ndarray
|
||||
if using_copy_on_write():
|
||||
data = data.view()
|
||||
data.flags.writeable = False
|
||||
return data
|
||||
|
||||
@doc(DatetimeIndexOpsMixin.shift)
|
||||
def shift(self, periods: int = 1, freq=None) -> Self:
|
||||
if freq is not None and freq != self.freq:
|
||||
if isinstance(freq, str):
|
||||
freq = to_offset(freq)
|
||||
offset = periods * freq
|
||||
return self + offset
|
||||
|
||||
if periods == 0 or len(self) == 0:
|
||||
# GH#14811 empty case
|
||||
return self.copy()
|
||||
|
||||
if self.freq is None:
|
||||
raise NullFrequencyError("Cannot shift with no freq")
|
||||
|
||||
start = self[0] + periods * self.freq
|
||||
end = self[-1] + periods * self.freq
|
||||
|
||||
# Note: in the DatetimeTZ case, _generate_range will infer the
|
||||
# appropriate timezone from `start` and `end`, so tz does not need
|
||||
# to be passed explicitly.
|
||||
result = self._data._generate_range(
|
||||
start=start, end=end, periods=None, freq=self.freq, unit=self.unit
|
||||
)
|
||||
return type(self)._simple_new(result, name=self.name)
|
||||
|
||||
@cache_readonly
|
||||
@doc(DatetimeLikeArrayMixin.inferred_freq)
|
||||
def inferred_freq(self) -> str | None:
|
||||
return self._data.inferred_freq
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Set Operation Methods
|
||||
|
||||
@cache_readonly
|
||||
def _as_range_index(self) -> RangeIndex:
|
||||
# Convert our i8 representations to RangeIndex
|
||||
# Caller is responsible for checking isinstance(self.freq, Tick)
|
||||
freq = cast(Tick, self.freq)
|
||||
tick = Timedelta(freq).as_unit("ns")._value
|
||||
rng = range(self[0]._value, self[-1]._value + tick, tick)
|
||||
return RangeIndex(rng)
|
||||
|
||||
def _can_range_setop(self, other) -> bool:
|
||||
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
|
||||
|
||||
def _wrap_range_setop(self, other, res_i8) -> Self:
|
||||
new_freq = None
|
||||
if not len(res_i8):
|
||||
# RangeIndex defaults to step=1, which we don't want.
|
||||
new_freq = self.freq
|
||||
elif isinstance(res_i8, RangeIndex):
|
||||
new_freq = to_offset(Timedelta(res_i8.step))
|
||||
|
||||
# TODO(GH#41493): we cannot just do
|
||||
# type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
|
||||
# because test_setops_preserve_freq fails with _validate_frequency raising.
|
||||
# This raising is incorrect, as 'on_freq' is incorrect. This will
|
||||
# be fixed by GH#41493
|
||||
res_values = res_i8.values.view(self._data._ndarray.dtype)
|
||||
result = type(self._data)._simple_new(
|
||||
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
|
||||
# incompatible type "Union[dtype[Any], ExtensionDtype]"; expected
|
||||
# "Union[dtype[datetime64], DatetimeTZDtype]"
|
||||
res_values,
|
||||
dtype=self.dtype, # type: ignore[arg-type]
|
||||
freq=new_freq, # type: ignore[arg-type]
|
||||
)
|
||||
return cast("Self", self._wrap_setop_result(other, result))
|
||||
|
||||
def _range_intersect(self, other, sort) -> Self:
|
||||
# Dispatch to RangeIndex intersection logic.
|
||||
left = self._as_range_index
|
||||
right = other._as_range_index
|
||||
res_i8 = left.intersection(right, sort=sort)
|
||||
return self._wrap_range_setop(other, res_i8)
|
||||
|
||||
def _range_union(self, other, sort) -> Self:
|
||||
# Dispatch to RangeIndex union logic.
|
||||
left = self._as_range_index
|
||||
right = other._as_range_index
|
||||
res_i8 = left.union(right, sort=sort)
|
||||
return self._wrap_range_setop(other, res_i8)
|
||||
|
||||
def _intersection(self, other: Index, sort: bool = False) -> Index:
|
||||
"""
|
||||
intersection specialized to the case with matching dtypes and both non-empty.
|
||||
"""
|
||||
other = cast("DatetimeTimedeltaMixin", other)
|
||||
|
||||
if self._can_range_setop(other):
|
||||
return self._range_intersect(other, sort=sort)
|
||||
|
||||
if not self._can_fast_intersect(other):
|
||||
result = Index._intersection(self, other, sort=sort)
|
||||
# We need to invalidate the freq because Index._intersection
|
||||
# uses _shallow_copy on a view of self._data, which will preserve
|
||||
# self.freq if we're not careful.
|
||||
# At this point we should have result.dtype == self.dtype
|
||||
# and type(result) is type(self._data)
|
||||
result = self._wrap_setop_result(other, result)
|
||||
return result._with_freq(None)._with_freq("infer")
|
||||
|
||||
else:
|
||||
return self._fast_intersect(other, sort)
|
||||
|
||||
def _fast_intersect(self, other, sort):
|
||||
# to make our life easier, "sort" the two ranges
|
||||
if self[0] <= other[0]:
|
||||
left, right = self, other
|
||||
else:
|
||||
left, right = other, self
|
||||
|
||||
# after sorting, the intersection always starts with the right index
|
||||
# and ends with the index of which the last elements is smallest
|
||||
end = min(left[-1], right[-1])
|
||||
start = right[0]
|
||||
|
||||
if end < start:
|
||||
result = self[:0]
|
||||
else:
|
||||
lslice = slice(*left.slice_locs(start, end))
|
||||
result = left._values[lslice]
|
||||
|
||||
return result
|
||||
|
||||
def _can_fast_intersect(self, other: Self) -> bool:
|
||||
# Note: we only get here with len(self) > 0 and len(other) > 0
|
||||
if self.freq is None:
|
||||
return False
|
||||
|
||||
elif other.freq != self.freq:
|
||||
return False
|
||||
|
||||
elif not self.is_monotonic_increasing:
|
||||
# Because freq is not None, we must then be monotonic decreasing
|
||||
return False
|
||||
|
||||
# this along with matching freqs ensure that we "line up",
|
||||
# so intersection will preserve freq
|
||||
# Note we are assuming away Ticks, as those go through _range_intersect
|
||||
# GH#42104
|
||||
return self.freq.n == 1
|
||||
|
||||
def _can_fast_union(self, other: Self) -> bool:
|
||||
# Assumes that type(self) == type(other), as per the annotation
|
||||
# The ability to fast_union also implies that `freq` should be
|
||||
# retained on union.
|
||||
freq = self.freq
|
||||
|
||||
if freq is None or freq != other.freq:
|
||||
return False
|
||||
|
||||
if not self.is_monotonic_increasing:
|
||||
# Because freq is not None, we must then be monotonic decreasing
|
||||
# TODO: do union on the reversed indexes?
|
||||
return False
|
||||
|
||||
if len(self) == 0 or len(other) == 0:
|
||||
# only reached via union_many
|
||||
return True
|
||||
|
||||
# to make our life easier, "sort" the two ranges
|
||||
if self[0] <= other[0]:
|
||||
left, right = self, other
|
||||
else:
|
||||
left, right = other, self
|
||||
|
||||
right_start = right[0]
|
||||
left_end = left[-1]
|
||||
|
||||
# Only need to "adjoin", not overlap
|
||||
return (right_start == left_end + freq) or right_start in left
|
||||
|
||||
def _fast_union(self, other: Self, sort=None) -> Self:
|
||||
# Caller is responsible for ensuring self and other are non-empty
|
||||
|
||||
# to make our life easier, "sort" the two ranges
|
||||
if self[0] <= other[0]:
|
||||
left, right = self, other
|
||||
elif sort is False:
|
||||
# TDIs are not in the "correct" order and we don't want
|
||||
# to sort but want to remove overlaps
|
||||
left, right = self, other
|
||||
left_start = left[0]
|
||||
loc = right.searchsorted(left_start, side="left")
|
||||
right_chunk = right._values[:loc]
|
||||
dates = concat_compat((left._values, right_chunk))
|
||||
result = type(self)._simple_new(dates, name=self.name)
|
||||
return result
|
||||
else:
|
||||
left, right = other, self
|
||||
|
||||
left_end = left[-1]
|
||||
right_end = right[-1]
|
||||
|
||||
# concatenate
|
||||
if left_end < right_end:
|
||||
loc = right.searchsorted(left_end, side="right")
|
||||
right_chunk = right._values[loc:]
|
||||
dates = concat_compat([left._values, right_chunk])
|
||||
# The can_fast_union check ensures that the result.freq
|
||||
# should match self.freq
|
||||
assert isinstance(dates, type(self._data))
|
||||
# error: Item "ExtensionArray" of "ExtensionArray |
|
||||
# ndarray[Any, Any]" has no attribute "_freq"
|
||||
assert dates._freq == self.freq # type: ignore[union-attr]
|
||||
result = type(self)._simple_new(dates)
|
||||
return result
|
||||
else:
|
||||
return left
|
||||
|
||||
def _union(self, other, sort):
|
||||
# We are called by `union`, which is responsible for this validation
|
||||
assert isinstance(other, type(self))
|
||||
assert self.dtype == other.dtype
|
||||
|
||||
if self._can_range_setop(other):
|
||||
return self._range_union(other, sort=sort)
|
||||
|
||||
if self._can_fast_union(other):
|
||||
result = self._fast_union(other, sort=sort)
|
||||
# in the case with sort=None, the _can_fast_union check ensures
|
||||
# that result.freq == self.freq
|
||||
return result
|
||||
else:
|
||||
return super()._union(other, sort)._with_freq("infer")
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Join Methods
|
||||
|
||||
def _get_join_freq(self, other):
|
||||
"""
|
||||
Get the freq to attach to the result of a join operation.
|
||||
"""
|
||||
freq = None
|
||||
if self._can_fast_union(other):
|
||||
freq = self.freq
|
||||
return freq
|
||||
|
||||
def _wrap_joined_index(
|
||||
self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp]
|
||||
):
|
||||
assert other.dtype == self.dtype, (other.dtype, self.dtype)
|
||||
result = super()._wrap_joined_index(joined, other, lidx, ridx)
|
||||
result._data._freq = self._get_join_freq(other)
|
||||
return result
|
||||
|
||||
def _get_engine_target(self) -> np.ndarray:
|
||||
# engine methods and libjoin methods need dt64/td64 values cast to i8
|
||||
return self._data._ndarray.view("i8")
|
||||
|
||||
def _from_join_target(self, result: np.ndarray):
|
||||
# view e.g. i8 back to M8[ns]
|
||||
result = result.view(self._data._ndarray.dtype)
|
||||
return self._data._from_backing_data(result)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# List-like Methods
|
||||
|
||||
def _get_delete_freq(self, loc: int | slice | Sequence[int]):
|
||||
"""
|
||||
Find the `freq` for self.delete(loc).
|
||||
"""
|
||||
freq = None
|
||||
if self.freq is not None:
|
||||
if is_integer(loc):
|
||||
if loc in (0, -len(self), -1, len(self) - 1):
|
||||
freq = self.freq
|
||||
else:
|
||||
if is_list_like(loc):
|
||||
# error: Incompatible types in assignment (expression has
|
||||
# type "Union[slice, ndarray]", variable has type
|
||||
# "Union[int, slice, Sequence[int]]")
|
||||
loc = lib.maybe_indices_to_slice( # type: ignore[assignment]
|
||||
np.asarray(loc, dtype=np.intp), len(self)
|
||||
)
|
||||
if isinstance(loc, slice) and loc.step in (1, None):
|
||||
if loc.start in (0, None) or loc.stop in (len(self), None):
|
||||
freq = self.freq
|
||||
return freq
|
||||
|
||||
def _get_insert_freq(self, loc: int, item):
|
||||
"""
|
||||
Find the `freq` for self.insert(loc, item).
|
||||
"""
|
||||
value = self._data._validate_scalar(item)
|
||||
item = self._data._box_func(value)
|
||||
|
||||
freq = None
|
||||
if self.freq is not None:
|
||||
# freq can be preserved on edge cases
|
||||
if self.size:
|
||||
if item is NaT:
|
||||
pass
|
||||
elif loc in (0, -len(self)) and item + self.freq == self[0]:
|
||||
freq = self.freq
|
||||
elif (loc == len(self)) and item - self.freq == self[-1]:
|
||||
freq = self.freq
|
||||
else:
|
||||
# Adding a single item to an empty index may preserve freq
|
||||
if isinstance(self.freq, Tick):
|
||||
# all TimedeltaIndex cases go through here; is_on_offset
|
||||
# would raise TypeError
|
||||
freq = self.freq
|
||||
elif self.freq.is_on_offset(item):
|
||||
freq = self.freq
|
||||
return freq
|
||||
|
||||
@doc(NDArrayBackedExtensionIndex.delete)
|
||||
def delete(self, loc) -> Self:
|
||||
result = super().delete(loc)
|
||||
result._data._freq = self._get_delete_freq(loc)
|
||||
return result
|
||||
|
||||
@doc(NDArrayBackedExtensionIndex.insert)
|
||||
def insert(self, loc: int, item):
|
||||
result = super().insert(loc, item)
|
||||
if isinstance(result, type(self)):
|
||||
# i.e. parent class method did not cast
|
||||
result._data._freq = self._get_insert_freq(loc, item)
|
||||
return result
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# NDArray-Like Methods
|
||||
|
||||
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
|
||||
def take(
|
||||
self,
|
||||
indices,
|
||||
axis: Axis = 0,
|
||||
allow_fill: bool = True,
|
||||
fill_value=None,
|
||||
**kwargs,
|
||||
) -> Self:
|
||||
nv.validate_take((), kwargs)
|
||||
indices = np.asarray(indices, dtype=np.intp)
|
||||
|
||||
result = NDArrayBackedExtensionIndex.take(
|
||||
self, indices, axis, allow_fill, fill_value, **kwargs
|
||||
)
|
||||
|
||||
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
|
||||
if isinstance(maybe_slice, slice):
|
||||
freq = self._data._get_getitem_freq(maybe_slice)
|
||||
result._data._freq = freq
|
||||
return result
|
1127
lib/python3.11/site-packages/pandas/core/indexes/datetimes.py
Normal file
1127
lib/python3.11/site-packages/pandas/core/indexes/datetimes.py
Normal file
File diff suppressed because it is too large
Load Diff
172
lib/python3.11/site-packages/pandas/core/indexes/extension.py
Normal file
172
lib/python3.11/site-packages/pandas/core/indexes/extension.py
Normal file
@ -0,0 +1,172 @@
|
||||
"""
|
||||
Shared methods for Index subclasses backed by ExtensionArray.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.generic import ABCDataFrame
|
||||
|
||||
from pandas.core.indexes.base import Index
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
npt,
|
||||
)
|
||||
|
||||
from pandas.core.arrays import IntervalArray
|
||||
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
||||
|
||||
_ExtensionIndexT = TypeVar("_ExtensionIndexT", bound="ExtensionIndex")
|
||||
|
||||
|
||||
def _inherit_from_data(
|
||||
name: str, delegate: type, cache: bool = False, wrap: bool = False
|
||||
):
|
||||
"""
|
||||
Make an alias for a method of the underlying ExtensionArray.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of an attribute the class should inherit from its EA parent.
|
||||
delegate : class
|
||||
cache : bool, default False
|
||||
Whether to convert wrapped properties into cache_readonly
|
||||
wrap : bool, default False
|
||||
Whether to wrap the inherited result in an Index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
attribute, method, property, or cache_readonly
|
||||
"""
|
||||
attr = getattr(delegate, name)
|
||||
|
||||
if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor":
|
||||
# getset_descriptor i.e. property defined in cython class
|
||||
if cache:
|
||||
|
||||
def cached(self):
|
||||
return getattr(self._data, name)
|
||||
|
||||
cached.__name__ = name
|
||||
cached.__doc__ = attr.__doc__
|
||||
method = cache_readonly(cached)
|
||||
|
||||
else:
|
||||
|
||||
def fget(self):
|
||||
result = getattr(self._data, name)
|
||||
if wrap:
|
||||
if isinstance(result, type(self._data)):
|
||||
return type(self)._simple_new(result, name=self.name)
|
||||
elif isinstance(result, ABCDataFrame):
|
||||
return result.set_index(self)
|
||||
return Index(result, name=self.name, dtype=result.dtype)
|
||||
return result
|
||||
|
||||
def fset(self, value) -> None:
|
||||
setattr(self._data, name, value)
|
||||
|
||||
fget.__name__ = name
|
||||
fget.__doc__ = attr.__doc__
|
||||
|
||||
method = property(fget, fset)
|
||||
|
||||
elif not callable(attr):
|
||||
# just a normal attribute, no wrapping
|
||||
method = attr
|
||||
|
||||
else:
|
||||
# error: Incompatible redefinition (redefinition with type "Callable[[Any,
|
||||
# VarArg(Any), KwArg(Any)], Any]", original type "property")
|
||||
def method(self, *args, **kwargs): # type: ignore[misc]
|
||||
if "inplace" in kwargs:
|
||||
raise ValueError(f"cannot use inplace with {type(self).__name__}")
|
||||
result = attr(self._data, *args, **kwargs)
|
||||
if wrap:
|
||||
if isinstance(result, type(self._data)):
|
||||
return type(self)._simple_new(result, name=self.name)
|
||||
elif isinstance(result, ABCDataFrame):
|
||||
return result.set_index(self)
|
||||
return Index(result, name=self.name, dtype=result.dtype)
|
||||
return result
|
||||
|
||||
# error: "property" has no attribute "__name__"
|
||||
method.__name__ = name # type: ignore[attr-defined]
|
||||
method.__doc__ = attr.__doc__
|
||||
return method
|
||||
|
||||
|
||||
def inherit_names(
|
||||
names: list[str], delegate: type, cache: bool = False, wrap: bool = False
|
||||
) -> Callable[[type[_ExtensionIndexT]], type[_ExtensionIndexT]]:
|
||||
"""
|
||||
Class decorator to pin attributes from an ExtensionArray to a Index subclass.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
names : List[str]
|
||||
delegate : class
|
||||
cache : bool, default False
|
||||
wrap : bool, default False
|
||||
Whether to wrap the inherited result in an Index.
|
||||
"""
|
||||
|
||||
def wrapper(cls: type[_ExtensionIndexT]) -> type[_ExtensionIndexT]:
|
||||
for name in names:
|
||||
meth = _inherit_from_data(name, delegate, cache=cache, wrap=wrap)
|
||||
setattr(cls, name, meth)
|
||||
|
||||
return cls
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class ExtensionIndex(Index):
|
||||
"""
|
||||
Index subclass for indexes backed by ExtensionArray.
|
||||
"""
|
||||
|
||||
# The base class already passes through to _data:
|
||||
# size, __len__, dtype
|
||||
|
||||
_data: IntervalArray | NDArrayBackedExtensionArray
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _validate_fill_value(self, value):
|
||||
"""
|
||||
Convert value to be insertable to underlying array.
|
||||
"""
|
||||
return self._data._validate_setitem_value(value)
|
||||
|
||||
@cache_readonly
|
||||
def _isnan(self) -> npt.NDArray[np.bool_]:
|
||||
# error: Incompatible return value type (got "ExtensionArray", expected
|
||||
# "ndarray")
|
||||
return self._data.isna() # type: ignore[return-value]
|
||||
|
||||
|
||||
class NDArrayBackedExtensionIndex(ExtensionIndex):
|
||||
"""
|
||||
Index subclass for indexes backed by NDArrayBackedExtensionArray.
|
||||
"""
|
||||
|
||||
_data: NDArrayBackedExtensionArray
|
||||
|
||||
def _get_engine_target(self) -> np.ndarray:
|
||||
return self._data._ndarray
|
||||
|
||||
def _from_join_target(self, result: np.ndarray) -> ArrayLike:
|
||||
assert result.dtype == self._data._ndarray.dtype
|
||||
return self._data._from_backing_data(result)
|
120
lib/python3.11/site-packages/pandas/core/indexes/frozen.py
Normal file
120
lib/python3.11/site-packages/pandas/core/indexes/frozen.py
Normal file
@ -0,0 +1,120 @@
|
||||
"""
|
||||
frozen (immutable) data structures to support MultiIndexing
|
||||
|
||||
These are used for:
|
||||
|
||||
- .names (FrozenList)
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
NoReturn,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Self
|
||||
|
||||
|
||||
class FrozenList(PandasObject, list):
|
||||
"""
|
||||
Container that doesn't allow setting item *but*
|
||||
because it's technically hashable, will be used
|
||||
for lookups, appropriately, etc.
|
||||
"""
|
||||
|
||||
# Side note: This has to be of type list. Otherwise,
|
||||
# it messes up PyTables type checks.
|
||||
|
||||
def union(self, other) -> FrozenList:
|
||||
"""
|
||||
Returns a FrozenList with other concatenated to the end of self.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : array-like
|
||||
The array-like whose elements we are concatenating.
|
||||
|
||||
Returns
|
||||
-------
|
||||
FrozenList
|
||||
The collection difference between self and other.
|
||||
"""
|
||||
if isinstance(other, tuple):
|
||||
other = list(other)
|
||||
return type(self)(super().__add__(other))
|
||||
|
||||
def difference(self, other) -> FrozenList:
|
||||
"""
|
||||
Returns a FrozenList with elements from other removed from self.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : array-like
|
||||
The array-like whose elements we are removing self.
|
||||
|
||||
Returns
|
||||
-------
|
||||
FrozenList
|
||||
The collection difference between self and other.
|
||||
"""
|
||||
other = set(other)
|
||||
temp = [x for x in self if x not in other]
|
||||
return type(self)(temp)
|
||||
|
||||
# TODO: Consider deprecating these in favor of `union` (xref gh-15506)
|
||||
# error: Incompatible types in assignment (expression has type
|
||||
# "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the
|
||||
# type as overloaded function)
|
||||
__add__ = __iadd__ = union # type: ignore[assignment]
|
||||
|
||||
def __getitem__(self, n):
|
||||
if isinstance(n, slice):
|
||||
return type(self)(super().__getitem__(n))
|
||||
return super().__getitem__(n)
|
||||
|
||||
def __radd__(self, other) -> Self:
|
||||
if isinstance(other, tuple):
|
||||
other = list(other)
|
||||
return type(self)(other + list(self))
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if isinstance(other, (tuple, FrozenList)):
|
||||
other = list(other)
|
||||
return super().__eq__(other)
|
||||
|
||||
__req__ = __eq__
|
||||
|
||||
def __mul__(self, other) -> Self:
|
||||
return type(self)(super().__mul__(other))
|
||||
|
||||
__imul__ = __mul__
|
||||
|
||||
def __reduce__(self):
|
||||
return type(self), (list(self),)
|
||||
|
||||
# error: Signature of "__hash__" incompatible with supertype "list"
|
||||
def __hash__(self) -> int: # type: ignore[override]
|
||||
return hash(tuple(self))
|
||||
|
||||
def _disabled(self, *args, **kwargs) -> NoReturn:
|
||||
"""
|
||||
This method will not function because object is immutable.
|
||||
"""
|
||||
raise TypeError(f"'{type(self).__name__}' does not support mutable operations.")
|
||||
|
||||
def __str__(self) -> str:
|
||||
return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n"))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({str(self)})"
|
||||
|
||||
__setitem__ = __setslice__ = _disabled # type: ignore[assignment]
|
||||
__delitem__ = __delslice__ = _disabled
|
||||
pop = append = extend = _disabled
|
||||
remove = sort = insert = _disabled # type: ignore[assignment]
|
1137
lib/python3.11/site-packages/pandas/core/indexes/interval.py
Normal file
1137
lib/python3.11/site-packages/pandas/core/indexes/interval.py
Normal file
File diff suppressed because it is too large
Load Diff
4176
lib/python3.11/site-packages/pandas/core/indexes/multi.py
Normal file
4176
lib/python3.11/site-packages/pandas/core/indexes/multi.py
Normal file
File diff suppressed because it is too large
Load Diff
614
lib/python3.11/site-packages/pandas/core/indexes/period.py
Normal file
614
lib/python3.11/site-packages/pandas/core/indexes/period.py
Normal file
@ -0,0 +1,614 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas._libs.tslibs import (
|
||||
BaseOffset,
|
||||
NaT,
|
||||
Period,
|
||||
Resolution,
|
||||
Tick,
|
||||
)
|
||||
from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR
|
||||
from pandas.util._decorators import (
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
from pandas.core.dtypes.generic import ABCSeries
|
||||
from pandas.core.dtypes.missing import is_valid_na_for_dtype
|
||||
|
||||
from pandas.core.arrays.period import (
|
||||
PeriodArray,
|
||||
period_array,
|
||||
raise_on_incompatible,
|
||||
validate_dtype_freq,
|
||||
)
|
||||
import pandas.core.common as com
|
||||
import pandas.core.indexes.base as ibase
|
||||
from pandas.core.indexes.base import maybe_extract_name
|
||||
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||||
from pandas.core.indexes.datetimes import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
)
|
||||
from pandas.core.indexes.extension import inherit_names
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Hashable
|
||||
|
||||
from pandas._typing import (
|
||||
Dtype,
|
||||
DtypeObj,
|
||||
Self,
|
||||
npt,
|
||||
)
|
||||
|
||||
|
||||
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
|
||||
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
|
||||
_shared_doc_kwargs = {
|
||||
"klass": "PeriodArray",
|
||||
}
|
||||
|
||||
# --- Period index sketch
|
||||
|
||||
|
||||
def _new_PeriodIndex(cls, **d):
|
||||
# GH13277 for unpickling
|
||||
values = d.pop("data")
|
||||
if values.dtype == "int64":
|
||||
freq = d.pop("freq", None)
|
||||
dtype = PeriodDtype(freq)
|
||||
values = PeriodArray(values, dtype=dtype)
|
||||
return cls._simple_new(values, **d)
|
||||
else:
|
||||
return cls(values, **d)
|
||||
|
||||
|
||||
@inherit_names(
|
||||
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
|
||||
PeriodArray,
|
||||
wrap=True,
|
||||
)
|
||||
@inherit_names(["is_leap_year"], PeriodArray)
|
||||
class PeriodIndex(DatetimeIndexOpsMixin):
|
||||
"""
|
||||
Immutable ndarray holding ordinal values indicating regular periods in time.
|
||||
|
||||
Index keys are boxed to Period objects which carries the metadata (eg,
|
||||
frequency information).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : array-like (1d int np.ndarray or PeriodArray), optional
|
||||
Optional period-like data to construct index with.
|
||||
copy : bool
|
||||
Make a copy of input ndarray.
|
||||
freq : str or period object, optional
|
||||
One of pandas period strings or corresponding objects.
|
||||
year : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
month : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
quarter : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
day : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
hour : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
minute : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
second : int, array, or Series, default None
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use PeriodIndex.from_fields instead.
|
||||
dtype : str or PeriodDtype, default None
|
||||
|
||||
Attributes
|
||||
----------
|
||||
day
|
||||
dayofweek
|
||||
day_of_week
|
||||
dayofyear
|
||||
day_of_year
|
||||
days_in_month
|
||||
daysinmonth
|
||||
end_time
|
||||
freq
|
||||
freqstr
|
||||
hour
|
||||
is_leap_year
|
||||
minute
|
||||
month
|
||||
quarter
|
||||
qyear
|
||||
second
|
||||
start_time
|
||||
week
|
||||
weekday
|
||||
weekofyear
|
||||
year
|
||||
|
||||
Methods
|
||||
-------
|
||||
asfreq
|
||||
strftime
|
||||
to_timestamp
|
||||
from_fields
|
||||
from_ordinals
|
||||
|
||||
See Also
|
||||
--------
|
||||
Index : The base pandas Index type.
|
||||
Period : Represents a period of time.
|
||||
DatetimeIndex : Index with datetime64 data.
|
||||
TimedeltaIndex : Index of timedelta64 data.
|
||||
period_range : Create a fixed-frequency PeriodIndex.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3])
|
||||
>>> idx
|
||||
PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
|
||||
"""
|
||||
|
||||
_typ = "periodindex"
|
||||
|
||||
_data: PeriodArray
|
||||
freq: BaseOffset
|
||||
dtype: PeriodDtype
|
||||
|
||||
_data_cls = PeriodArray
|
||||
_supports_partial_string_indexing = True
|
||||
|
||||
@property
|
||||
def _engine_type(self) -> type[libindex.PeriodEngine]:
|
||||
return libindex.PeriodEngine
|
||||
|
||||
@cache_readonly
|
||||
def _resolution_obj(self) -> Resolution:
|
||||
# for compat with DatetimeIndex
|
||||
return self.dtype._resolution_obj
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# methods that dispatch to array and wrap result in Index
|
||||
# These are defined here instead of via inherit_names for mypy
|
||||
|
||||
@doc(
|
||||
PeriodArray.asfreq,
|
||||
other="pandas.arrays.PeriodArray",
|
||||
other_name="PeriodArray",
|
||||
**_shared_doc_kwargs,
|
||||
)
|
||||
def asfreq(self, freq=None, how: str = "E") -> Self:
|
||||
arr = self._data.asfreq(freq, how)
|
||||
return type(self)._simple_new(arr, name=self.name)
|
||||
|
||||
@doc(PeriodArray.to_timestamp)
|
||||
def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
|
||||
arr = self._data.to_timestamp(freq, how)
|
||||
return DatetimeIndex._simple_new(arr, name=self.name)
|
||||
|
||||
@property
|
||||
@doc(PeriodArray.hour.fget)
|
||||
def hour(self) -> Index:
|
||||
return Index(self._data.hour, name=self.name)
|
||||
|
||||
@property
|
||||
@doc(PeriodArray.minute.fget)
|
||||
def minute(self) -> Index:
|
||||
return Index(self._data.minute, name=self.name)
|
||||
|
||||
@property
|
||||
@doc(PeriodArray.second.fget)
|
||||
def second(self) -> Index:
|
||||
return Index(self._data.second, name=self.name)
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Index Constructors
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
data=None,
|
||||
ordinal=None,
|
||||
freq=None,
|
||||
dtype: Dtype | None = None,
|
||||
copy: bool = False,
|
||||
name: Hashable | None = None,
|
||||
**fields,
|
||||
) -> Self:
|
||||
valid_field_set = {
|
||||
"year",
|
||||
"month",
|
||||
"day",
|
||||
"quarter",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
}
|
||||
|
||||
refs = None
|
||||
if not copy and isinstance(data, (Index, ABCSeries)):
|
||||
refs = data._references
|
||||
|
||||
if not set(fields).issubset(valid_field_set):
|
||||
argument = next(iter(set(fields) - valid_field_set))
|
||||
raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
|
||||
elif len(fields):
|
||||
# GH#55960
|
||||
warnings.warn(
|
||||
"Constructing PeriodIndex from fields is deprecated. Use "
|
||||
"PeriodIndex.from_fields instead.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
if ordinal is not None:
|
||||
# GH#55960
|
||||
warnings.warn(
|
||||
"The 'ordinal' keyword in PeriodIndex is deprecated and will "
|
||||
"be removed in a future version. Use PeriodIndex.from_ordinals "
|
||||
"instead.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
name = maybe_extract_name(name, data, cls)
|
||||
|
||||
if data is None and ordinal is None:
|
||||
# range-based.
|
||||
if not fields:
|
||||
# test_pickle_compat_construction
|
||||
cls._raise_scalar_data_error(None)
|
||||
data = cls.from_fields(**fields, freq=freq)._data
|
||||
copy = False
|
||||
|
||||
elif fields:
|
||||
if data is not None:
|
||||
raise ValueError("Cannot pass both data and fields")
|
||||
raise ValueError("Cannot pass both ordinal and fields")
|
||||
|
||||
else:
|
||||
freq = validate_dtype_freq(dtype, freq)
|
||||
|
||||
# PeriodIndex allow PeriodIndex(period_index, freq=different)
|
||||
# Let's not encourage that kind of behavior in PeriodArray.
|
||||
|
||||
if freq and isinstance(data, cls) and data.freq != freq:
|
||||
# TODO: We can do some of these with no-copy / coercion?
|
||||
# e.g. D -> 2D seems to be OK
|
||||
data = data.asfreq(freq)
|
||||
|
||||
if data is None and ordinal is not None:
|
||||
ordinal = np.asarray(ordinal, dtype=np.int64)
|
||||
dtype = PeriodDtype(freq)
|
||||
data = PeriodArray(ordinal, dtype=dtype)
|
||||
elif data is not None and ordinal is not None:
|
||||
raise ValueError("Cannot pass both data and ordinal")
|
||||
else:
|
||||
# don't pass copy here, since we copy later.
|
||||
data = period_array(data=data, freq=freq)
|
||||
|
||||
if copy:
|
||||
data = data.copy()
|
||||
|
||||
return cls._simple_new(data, name=name, refs=refs)
|
||||
|
||||
@classmethod
|
||||
def from_fields(
|
||||
cls,
|
||||
*,
|
||||
year=None,
|
||||
quarter=None,
|
||||
month=None,
|
||||
day=None,
|
||||
hour=None,
|
||||
minute=None,
|
||||
second=None,
|
||||
freq=None,
|
||||
) -> Self:
|
||||
fields = {
|
||||
"year": year,
|
||||
"quarter": quarter,
|
||||
"month": month,
|
||||
"day": day,
|
||||
"hour": hour,
|
||||
"minute": minute,
|
||||
"second": second,
|
||||
}
|
||||
fields = {key: value for key, value in fields.items() if value is not None}
|
||||
arr = PeriodArray._from_fields(fields=fields, freq=freq)
|
||||
return cls._simple_new(arr)
|
||||
|
||||
@classmethod
|
||||
def from_ordinals(cls, ordinals, *, freq, name=None) -> Self:
|
||||
ordinals = np.asarray(ordinals, dtype=np.int64)
|
||||
dtype = PeriodDtype(freq)
|
||||
data = PeriodArray._simple_new(ordinals, dtype=dtype)
|
||||
return cls._simple_new(data, name=name)
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Data
|
||||
|
||||
@property
|
||||
def values(self) -> npt.NDArray[np.object_]:
|
||||
return np.asarray(self, dtype=object)
|
||||
|
||||
def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
|
||||
"""
|
||||
Convert timedelta-like input to an integer multiple of self.freq
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
converted : int, np.ndarray[int64]
|
||||
|
||||
Raises
|
||||
------
|
||||
IncompatibleFrequency : if the input cannot be written as a multiple
|
||||
of self.freq. Note IncompatibleFrequency subclasses ValueError.
|
||||
"""
|
||||
if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
|
||||
if isinstance(self.freq, Tick):
|
||||
# _check_timedeltalike_freq_compat will raise if incompatible
|
||||
delta = self._data._check_timedeltalike_freq_compat(other)
|
||||
return delta
|
||||
elif isinstance(other, BaseOffset):
|
||||
if other.base == self.freq.base:
|
||||
return other.n
|
||||
|
||||
raise raise_on_incompatible(self, other)
|
||||
elif is_integer(other):
|
||||
assert isinstance(other, int)
|
||||
return other
|
||||
|
||||
# raise when input doesn't have freq
|
||||
raise raise_on_incompatible(self, None)
|
||||
|
||||
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Can we compare values of the given dtype to our own?
|
||||
"""
|
||||
return self.dtype == dtype
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Index Methods
|
||||
|
||||
def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
|
||||
"""
|
||||
where : array of timestamps
|
||||
mask : np.ndarray[bool]
|
||||
Array of booleans where data is not NA.
|
||||
"""
|
||||
if isinstance(where, DatetimeIndex):
|
||||
where = PeriodIndex(where._values, freq=self.freq)
|
||||
elif not isinstance(where, PeriodIndex):
|
||||
raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
|
||||
|
||||
return super().asof_locs(where, mask)
|
||||
|
||||
@property
|
||||
def is_full(self) -> bool:
|
||||
"""
|
||||
Returns True if this PeriodIndex is range-like in that all Periods
|
||||
between start and end are present, in order.
|
||||
"""
|
||||
if len(self) == 0:
|
||||
return True
|
||||
if not self.is_monotonic_increasing:
|
||||
raise ValueError("Index is not monotonic")
|
||||
values = self.asi8
|
||||
return bool(((values[1:] - values[:-1]) < 2).all())
|
||||
|
||||
@property
|
||||
def inferred_type(self) -> str:
|
||||
# b/c data is represented as ints make sure we can't have ambiguous
|
||||
# indexing
|
||||
return "period"
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Indexing Methods
|
||||
|
||||
def _convert_tolerance(self, tolerance, target):
|
||||
# Returned tolerance must be in dtype/units so that
|
||||
# `|self._get_engine_target() - target._engine_target()| <= tolerance`
|
||||
# is meaningful. Since PeriodIndex returns int64 for engine_target,
|
||||
# we may need to convert timedelta64 tolerance to int64.
|
||||
tolerance = super()._convert_tolerance(tolerance, target)
|
||||
|
||||
if self.dtype == target.dtype:
|
||||
# convert tolerance to i8
|
||||
tolerance = self._maybe_convert_timedelta(tolerance)
|
||||
|
||||
return tolerance
|
||||
|
||||
def get_loc(self, key):
|
||||
"""
|
||||
Get integer location for requested label.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : Period, NaT, str, or datetime
|
||||
String or datetime key must be parsable as Period.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loc : int or ndarray[int64]
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
Key is not present in the index.
|
||||
TypeError
|
||||
If key is listlike or otherwise not hashable.
|
||||
"""
|
||||
orig_key = key
|
||||
|
||||
self._check_indexing_error(key)
|
||||
|
||||
if is_valid_na_for_dtype(key, self.dtype):
|
||||
key = NaT
|
||||
|
||||
elif isinstance(key, str):
|
||||
try:
|
||||
parsed, reso = self._parse_with_reso(key)
|
||||
except ValueError as err:
|
||||
# A string with invalid format
|
||||
raise KeyError(f"Cannot interpret '{key}' as period") from err
|
||||
|
||||
if self._can_partial_date_slice(reso):
|
||||
try:
|
||||
return self._partial_date_slice(reso, parsed)
|
||||
except KeyError as err:
|
||||
raise KeyError(key) from err
|
||||
|
||||
if reso == self._resolution_obj:
|
||||
# the reso < self._resolution_obj case goes
|
||||
# through _get_string_slice
|
||||
key = self._cast_partial_indexing_scalar(parsed)
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
elif isinstance(key, Period):
|
||||
self._disallow_mismatched_indexing(key)
|
||||
|
||||
elif isinstance(key, datetime):
|
||||
key = self._cast_partial_indexing_scalar(key)
|
||||
|
||||
else:
|
||||
# in particular integer, which Period constructor would cast to string
|
||||
raise KeyError(key)
|
||||
|
||||
try:
|
||||
return Index.get_loc(self, key)
|
||||
except KeyError as err:
|
||||
raise KeyError(orig_key) from err
|
||||
|
||||
def _disallow_mismatched_indexing(self, key: Period) -> None:
|
||||
if key._dtype != self.dtype:
|
||||
raise KeyError(key)
|
||||
|
||||
def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
|
||||
try:
|
||||
period = Period(label, freq=self.freq)
|
||||
except ValueError as err:
|
||||
# we cannot construct the Period
|
||||
raise KeyError(label) from err
|
||||
return period
|
||||
|
||||
@doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
|
||||
def _maybe_cast_slice_bound(self, label, side: str):
|
||||
if isinstance(label, datetime):
|
||||
label = self._cast_partial_indexing_scalar(label)
|
||||
|
||||
return super()._maybe_cast_slice_bound(label, side)
|
||||
|
||||
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
|
||||
freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
|
||||
iv = Period(parsed, freq=freq)
|
||||
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
|
||||
|
||||
@doc(DatetimeIndexOpsMixin.shift)
|
||||
def shift(self, periods: int = 1, freq=None) -> Self:
|
||||
if freq is not None:
|
||||
raise TypeError(
|
||||
f"`freq` argument is not supported for {type(self).__name__}.shift"
|
||||
)
|
||||
return self + periods
|
||||
|
||||
|
||||
def period_range(
|
||||
start=None,
|
||||
end=None,
|
||||
periods: int | None = None,
|
||||
freq=None,
|
||||
name: Hashable | None = None,
|
||||
) -> PeriodIndex:
|
||||
"""
|
||||
Return a fixed frequency PeriodIndex.
|
||||
|
||||
The day (calendar) is the default frequency.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
start : str, datetime, date, pandas.Timestamp, or period-like, default None
|
||||
Left bound for generating periods.
|
||||
end : str, datetime, date, pandas.Timestamp, or period-like, default None
|
||||
Right bound for generating periods.
|
||||
periods : int, default None
|
||||
Number of periods to generate.
|
||||
freq : str or DateOffset, optional
|
||||
Frequency alias. By default the freq is taken from `start` or `end`
|
||||
if those are Period objects. Otherwise, the default is ``"D"`` for
|
||||
daily frequency.
|
||||
name : str, default None
|
||||
Name of the resulting PeriodIndex.
|
||||
|
||||
Returns
|
||||
-------
|
||||
PeriodIndex
|
||||
|
||||
Notes
|
||||
-----
|
||||
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
|
||||
must be specified.
|
||||
|
||||
To learn more about the frequency strings, please see `this link
|
||||
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
|
||||
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
|
||||
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
|
||||
'2018-01'],
|
||||
dtype='period[M]')
|
||||
|
||||
If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
|
||||
endpoints for a ``PeriodIndex`` with frequency matching that of the
|
||||
``period_range`` constructor.
|
||||
|
||||
>>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
|
||||
... end=pd.Period('2017Q2', freq='Q'), freq='M')
|
||||
PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
|
||||
dtype='period[M]')
|
||||
"""
|
||||
if com.count_not_none(start, end, periods) != 2:
|
||||
raise ValueError(
|
||||
"Of the three parameters: start, end, and periods, "
|
||||
"exactly two must be specified"
|
||||
)
|
||||
if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
|
||||
freq = "D"
|
||||
|
||||
data, freq = PeriodArray._generate_range(start, end, periods, freq)
|
||||
dtype = PeriodDtype(freq)
|
||||
data = PeriodArray(data, dtype=dtype)
|
||||
return PeriodIndex(data, name=name)
|
1187
lib/python3.11/site-packages/pandas/core/indexes/range.py
Normal file
1187
lib/python3.11/site-packages/pandas/core/indexes/range.py
Normal file
File diff suppressed because it is too large
Load Diff
356
lib/python3.11/site-packages/pandas/core/indexes/timedeltas.py
Normal file
356
lib/python3.11/site-packages/pandas/core/indexes/timedeltas.py
Normal file
@ -0,0 +1,356 @@
|
||||
""" implement the TimedeltaIndex """
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
from pandas._libs import (
|
||||
index as libindex,
|
||||
lib,
|
||||
)
|
||||
from pandas._libs.tslibs import (
|
||||
Resolution,
|
||||
Timedelta,
|
||||
to_offset,
|
||||
)
|
||||
from pandas._libs.tslibs.timedeltas import disallow_ambiguous_unit
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_scalar,
|
||||
pandas_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.generic import ABCSeries
|
||||
|
||||
from pandas.core.arrays.timedeltas import TimedeltaArray
|
||||
import pandas.core.common as com
|
||||
from pandas.core.indexes.base import (
|
||||
Index,
|
||||
maybe_extract_name,
|
||||
)
|
||||
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
|
||||
from pandas.core.indexes.extension import inherit_names
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import DtypeObj
|
||||
|
||||
|
||||
@inherit_names(
|
||||
["__neg__", "__pos__", "__abs__", "total_seconds", "round", "floor", "ceil"]
|
||||
+ TimedeltaArray._field_ops,
|
||||
TimedeltaArray,
|
||||
wrap=True,
|
||||
)
|
||||
@inherit_names(
|
||||
[
|
||||
"components",
|
||||
"to_pytimedelta",
|
||||
"sum",
|
||||
"std",
|
||||
"median",
|
||||
],
|
||||
TimedeltaArray,
|
||||
)
|
||||
class TimedeltaIndex(DatetimeTimedeltaMixin):
|
||||
"""
|
||||
Immutable Index of timedelta64 data.
|
||||
|
||||
Represented internally as int64, and scalars returned Timedelta objects.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : array-like (1-dimensional), optional
|
||||
Optional timedelta-like data to construct index with.
|
||||
unit : {'D', 'h', 'm', 's', 'ms', 'us', 'ns'}, optional
|
||||
The unit of ``data``.
|
||||
|
||||
.. deprecated:: 2.2.0
|
||||
Use ``pd.to_timedelta`` instead.
|
||||
|
||||
freq : str or pandas offset object, optional
|
||||
One of pandas date offset strings or corresponding objects. The string
|
||||
``'infer'`` can be passed in order to set the frequency of the index as
|
||||
the inferred frequency upon creation.
|
||||
dtype : numpy.dtype or str, default None
|
||||
Valid ``numpy`` dtypes are ``timedelta64[ns]``, ``timedelta64[us]``,
|
||||
``timedelta64[ms]``, and ``timedelta64[s]``.
|
||||
copy : bool
|
||||
Make a copy of input array.
|
||||
name : object
|
||||
Name to be stored in the index.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
days
|
||||
seconds
|
||||
microseconds
|
||||
nanoseconds
|
||||
components
|
||||
inferred_freq
|
||||
|
||||
Methods
|
||||
-------
|
||||
to_pytimedelta
|
||||
to_series
|
||||
round
|
||||
floor
|
||||
ceil
|
||||
to_frame
|
||||
mean
|
||||
|
||||
See Also
|
||||
--------
|
||||
Index : The base pandas Index type.
|
||||
Timedelta : Represents a duration between two dates or times.
|
||||
DatetimeIndex : Index of datetime64 data.
|
||||
PeriodIndex : Index of Period data.
|
||||
timedelta_range : Create a fixed-frequency TimedeltaIndex.
|
||||
|
||||
Notes
|
||||
-----
|
||||
To learn more about the frequency strings, please see `this link
|
||||
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> pd.TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'])
|
||||
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
|
||||
We can also let pandas infer the frequency when possible.
|
||||
|
||||
>>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq='infer')
|
||||
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
|
||||
dtype='timedelta64[ns]', freq='D')
|
||||
"""
|
||||
|
||||
_typ = "timedeltaindex"
|
||||
|
||||
_data_cls = TimedeltaArray
|
||||
|
||||
@property
|
||||
def _engine_type(self) -> type[libindex.TimedeltaEngine]:
|
||||
return libindex.TimedeltaEngine
|
||||
|
||||
_data: TimedeltaArray
|
||||
|
||||
# Use base class method instead of DatetimeTimedeltaMixin._get_string_slice
|
||||
_get_string_slice = Index._get_string_slice
|
||||
|
||||
# error: Signature of "_resolution_obj" incompatible with supertype
|
||||
# "DatetimeIndexOpsMixin"
|
||||
@property
|
||||
def _resolution_obj(self) -> Resolution | None: # type: ignore[override]
|
||||
return self._data._resolution_obj
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Constructors
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
data=None,
|
||||
unit=lib.no_default,
|
||||
freq=lib.no_default,
|
||||
closed=lib.no_default,
|
||||
dtype=None,
|
||||
copy: bool = False,
|
||||
name=None,
|
||||
):
|
||||
if closed is not lib.no_default:
|
||||
# GH#52628
|
||||
warnings.warn(
|
||||
f"The 'closed' keyword in {cls.__name__} construction is "
|
||||
"deprecated and will be removed in a future version.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
if unit is not lib.no_default:
|
||||
# GH#55499
|
||||
warnings.warn(
|
||||
f"The 'unit' keyword in {cls.__name__} construction is "
|
||||
"deprecated and will be removed in a future version. "
|
||||
"Use pd.to_timedelta instead.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
else:
|
||||
unit = None
|
||||
|
||||
name = maybe_extract_name(name, data, cls)
|
||||
|
||||
if is_scalar(data):
|
||||
cls._raise_scalar_data_error(data)
|
||||
|
||||
disallow_ambiguous_unit(unit)
|
||||
if dtype is not None:
|
||||
dtype = pandas_dtype(dtype)
|
||||
|
||||
if (
|
||||
isinstance(data, TimedeltaArray)
|
||||
and freq is lib.no_default
|
||||
and (dtype is None or dtype == data.dtype)
|
||||
):
|
||||
if copy:
|
||||
data = data.copy()
|
||||
return cls._simple_new(data, name=name)
|
||||
|
||||
if (
|
||||
isinstance(data, TimedeltaIndex)
|
||||
and freq is lib.no_default
|
||||
and name is None
|
||||
and (dtype is None or dtype == data.dtype)
|
||||
):
|
||||
if copy:
|
||||
return data.copy()
|
||||
else:
|
||||
return data._view()
|
||||
|
||||
# - Cases checked above all return/raise before reaching here - #
|
||||
|
||||
tdarr = TimedeltaArray._from_sequence_not_strict(
|
||||
data, freq=freq, unit=unit, dtype=dtype, copy=copy
|
||||
)
|
||||
refs = None
|
||||
if not copy and isinstance(data, (ABCSeries, Index)):
|
||||
refs = data._references
|
||||
|
||||
return cls._simple_new(tdarr, name=name, refs=refs)
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Can we compare values of the given dtype to our own?
|
||||
"""
|
||||
return lib.is_np_dtype(dtype, "m") # aka self._data._is_recognized_dtype
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Indexing Methods
|
||||
|
||||
def get_loc(self, key):
|
||||
"""
|
||||
Get integer location for requested label
|
||||
|
||||
Returns
|
||||
-------
|
||||
loc : int, slice, or ndarray[int]
|
||||
"""
|
||||
self._check_indexing_error(key)
|
||||
|
||||
try:
|
||||
key = self._data._validate_scalar(key, unbox=False)
|
||||
except TypeError as err:
|
||||
raise KeyError(key) from err
|
||||
|
||||
return Index.get_loc(self, key)
|
||||
|
||||
def _parse_with_reso(self, label: str):
|
||||
# the "with_reso" is a no-op for TimedeltaIndex
|
||||
parsed = Timedelta(label)
|
||||
return parsed, None
|
||||
|
||||
def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
|
||||
# reso is unused, included to match signature of DTI/PI
|
||||
lbound = parsed.round(parsed.resolution_string)
|
||||
rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
|
||||
return lbound, rbound
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def inferred_type(self) -> str:
|
||||
return "timedelta64"
|
||||
|
||||
|
||||
def timedelta_range(
|
||||
start=None,
|
||||
end=None,
|
||||
periods: int | None = None,
|
||||
freq=None,
|
||||
name=None,
|
||||
closed=None,
|
||||
*,
|
||||
unit: str | None = None,
|
||||
) -> TimedeltaIndex:
|
||||
"""
|
||||
Return a fixed frequency TimedeltaIndex with day as the default.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
start : str or timedelta-like, default None
|
||||
Left bound for generating timedeltas.
|
||||
end : str or timedelta-like, default None
|
||||
Right bound for generating timedeltas.
|
||||
periods : int, default None
|
||||
Number of periods to generate.
|
||||
freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D'
|
||||
Frequency strings can have multiples, e.g. '5h'.
|
||||
name : str, default None
|
||||
Name of the resulting TimedeltaIndex.
|
||||
closed : str, default None
|
||||
Make the interval closed with respect to the given frequency to
|
||||
the 'left', 'right', or both sides (None).
|
||||
unit : str, default None
|
||||
Specify the desired resolution of the result.
|
||||
|
||||
.. versionadded:: 2.0.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
TimedeltaIndex
|
||||
|
||||
Notes
|
||||
-----
|
||||
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
|
||||
exactly three must be specified. If ``freq`` is omitted, the resulting
|
||||
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
|
||||
``start`` and ``end`` (closed on both sides).
|
||||
|
||||
To learn more about the frequency strings, please see `this link
|
||||
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> pd.timedelta_range(start='1 day', periods=4)
|
||||
TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
|
||||
dtype='timedelta64[ns]', freq='D')
|
||||
|
||||
The ``closed`` parameter specifies which endpoint is included. The default
|
||||
behavior is to include both endpoints.
|
||||
|
||||
>>> pd.timedelta_range(start='1 day', periods=4, closed='right')
|
||||
TimedeltaIndex(['2 days', '3 days', '4 days'],
|
||||
dtype='timedelta64[ns]', freq='D')
|
||||
|
||||
The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
|
||||
Only fixed frequencies can be passed, non-fixed frequencies such as
|
||||
'M' (month end) will raise.
|
||||
|
||||
>>> pd.timedelta_range(start='1 day', end='2 days', freq='6h')
|
||||
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
|
||||
'1 days 18:00:00', '2 days 00:00:00'],
|
||||
dtype='timedelta64[ns]', freq='6h')
|
||||
|
||||
Specify ``start``, ``end``, and ``periods``; the frequency is generated
|
||||
automatically (linearly spaced).
|
||||
|
||||
>>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
|
||||
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
|
||||
'5 days 00:00:00'],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
|
||||
**Specify a unit**
|
||||
|
||||
>>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s")
|
||||
TimedeltaIndex(['1 days', '100001 days', '200001 days'],
|
||||
dtype='timedelta64[s]', freq='100000D')
|
||||
"""
|
||||
if freq is None and com.any_none(periods, start, end):
|
||||
freq = "D"
|
||||
|
||||
freq = to_offset(freq)
|
||||
tdarr = TimedeltaArray._generate_range(
|
||||
start, end, periods, freq, closed=closed, unit=unit
|
||||
)
|
||||
return TimedeltaIndex._simple_new(tdarr, name=name)
|
Reference in New Issue
Block a user