This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,367 @@
from __future__ import annotations
import os
import warnings
__docformat__ = "restructuredtext"
# Let users know if they're missing any of our hard dependencies
_hard_dependencies = ("numpy", "pytz", "dateutil")
_missing_dependencies = []
for _dependency in _hard_dependencies:
try:
__import__(_dependency)
except ImportError as _e: # pragma: no cover
_missing_dependencies.append(f"{_dependency}: {_e}")
if _missing_dependencies: # pragma: no cover
raise ImportError(
"Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
)
del _hard_dependencies, _dependency, _missing_dependencies
try:
# numpy compat
from pandas.compat import (
is_numpy_dev as _is_numpy_dev, # pyright: ignore[reportUnusedImport] # noqa: F401
)
except ImportError as _err: # pragma: no cover
_module = _err.name
raise ImportError(
f"C extension: {_module} not built. If you want to import "
"pandas from the source directory, you may need to run "
"'python setup.py build_ext' to build the C extensions first."
) from _err
from pandas._config import (
get_option,
set_option,
reset_option,
describe_option,
option_context,
options,
)
# let init-time option registration happen
import pandas.core.config_init # pyright: ignore[reportUnusedImport] # noqa: F401
from pandas.core.api import (
# dtype
ArrowDtype,
Int8Dtype,
Int16Dtype,
Int32Dtype,
Int64Dtype,
UInt8Dtype,
UInt16Dtype,
UInt32Dtype,
UInt64Dtype,
Float32Dtype,
Float64Dtype,
CategoricalDtype,
PeriodDtype,
IntervalDtype,
DatetimeTZDtype,
StringDtype,
BooleanDtype,
# missing
NA,
isna,
isnull,
notna,
notnull,
# indexes
Index,
CategoricalIndex,
RangeIndex,
MultiIndex,
IntervalIndex,
TimedeltaIndex,
DatetimeIndex,
PeriodIndex,
IndexSlice,
# tseries
NaT,
Period,
period_range,
Timedelta,
timedelta_range,
Timestamp,
date_range,
bdate_range,
Interval,
interval_range,
DateOffset,
# conversion
to_numeric,
to_datetime,
to_timedelta,
# misc
Flags,
Grouper,
factorize,
unique,
value_counts,
NamedAgg,
array,
Categorical,
set_eng_float_format,
Series,
DataFrame,
)
from pandas.core.dtypes.dtypes import SparseDtype
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets
from pandas.core.computation.api import eval
from pandas.core.reshape.api import (
concat,
lreshape,
melt,
wide_to_long,
merge,
merge_asof,
merge_ordered,
crosstab,
pivot,
pivot_table,
get_dummies,
from_dummies,
cut,
qcut,
)
from pandas import api, arrays, errors, io, plotting, tseries
from pandas import testing
from pandas.util._print_versions import show_versions
from pandas.io.api import (
# excel
ExcelFile,
ExcelWriter,
read_excel,
# parsers
read_csv,
read_fwf,
read_table,
# pickle
read_pickle,
to_pickle,
# pytables
HDFStore,
read_hdf,
# sql
read_sql,
read_sql_query,
read_sql_table,
# misc
read_clipboard,
read_parquet,
read_orc,
read_feather,
read_gbq,
read_html,
read_xml,
read_json,
read_stata,
read_sas,
read_spss,
)
from pandas.io.json._normalize import json_normalize
from pandas.util._tester import test
# use the closest tagged version if possible
_built_with_meson = False
try:
from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
__version__,
__git_version__,
)
_built_with_meson = True
except ImportError:
from pandas._version import get_versions
v = get_versions()
__version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid")
del get_versions, v
# GH#55043 - deprecation of the data_manager option
if "PANDAS_DATA_MANAGER" in os.environ:
warnings.warn(
"The env variable PANDAS_DATA_MANAGER is set. The data_manager option is "
"deprecated and will be removed in a future version. Only the BlockManager "
"will be available. Unset this environment variable to silence this warning.",
FutureWarning,
stacklevel=2,
)
del warnings, os
# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
=====================================================================
**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.
Main Features
-------------
Here are just a few of the things that pandas does well:
- Easy handling of missing data in floating point as well as non-floating
point data.
- Size mutability: columns can be inserted and deleted from DataFrame and
higher dimensional objects
- Automatic and explicit data alignment: objects can be explicitly aligned
to a set of labels, or the user can simply ignore the labels and let
`Series`, `DataFrame`, etc. automatically align the data for you in
computations.
- Powerful, flexible group by functionality to perform split-apply-combine
operations on data sets, for both aggregating and transforming data.
- Make it easy to convert ragged, differently-indexed data in other Python
and NumPy data structures into DataFrame objects.
- Intelligent label-based slicing, fancy indexing, and subsetting of large
data sets.
- Intuitive merging and joining data sets.
- Flexible reshaping and pivoting of data sets.
- Hierarchical labeling of axes (possible to have multiple labels per tick).
- Robust IO tools for loading data from flat files (CSV and delimited),
Excel files, databases, and saving/loading data from the ultrafast HDF5
format.
- Time series-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.
"""
# Use __all__ to let type checkers know what is part of the public API.
# Pandas is not (yet) a py.typed library: the public API is determined
# based on the documentation.
__all__ = [
"ArrowDtype",
"BooleanDtype",
"Categorical",
"CategoricalDtype",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"DatetimeIndex",
"DatetimeTZDtype",
"ExcelFile",
"ExcelWriter",
"Flags",
"Float32Dtype",
"Float64Dtype",
"Grouper",
"HDFStore",
"Index",
"IndexSlice",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"Int8Dtype",
"Interval",
"IntervalDtype",
"IntervalIndex",
"MultiIndex",
"NA",
"NaT",
"NamedAgg",
"Period",
"PeriodDtype",
"PeriodIndex",
"RangeIndex",
"Series",
"SparseDtype",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"UInt8Dtype",
"api",
"array",
"arrays",
"bdate_range",
"concat",
"crosstab",
"cut",
"date_range",
"describe_option",
"errors",
"eval",
"factorize",
"get_dummies",
"from_dummies",
"get_option",
"infer_freq",
"interval_range",
"io",
"isna",
"isnull",
"json_normalize",
"lreshape",
"melt",
"merge",
"merge_asof",
"merge_ordered",
"notna",
"notnull",
"offsets",
"option_context",
"options",
"period_range",
"pivot",
"pivot_table",
"plotting",
"qcut",
"read_clipboard",
"read_csv",
"read_excel",
"read_feather",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_json",
"read_orc",
"read_parquet",
"read_pickle",
"read_sas",
"read_spss",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_xml",
"reset_option",
"set_eng_float_format",
"set_option",
"show_versions",
"test",
"testing",
"timedelta_range",
"to_datetime",
"to_numeric",
"to_pickle",
"to_timedelta",
"tseries",
"unique",
"value_counts",
"wide_to_long",
]

View File

@ -0,0 +1,57 @@
"""
pandas._config is considered explicitly upstream of everything else in pandas,
should have no intra-pandas dependencies.
importing `dates` and `display` ensures that keys needed by _libs
are initialized.
"""
__all__ = [
"config",
"detect_console_encoding",
"get_option",
"set_option",
"reset_option",
"describe_option",
"option_context",
"options",
"using_copy_on_write",
"warn_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
from pandas._config.config import (
_global_config,
describe_option,
get_option,
option_context,
options,
reset_option,
set_option,
)
from pandas._config.display import detect_console_encoding
def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)
def warn_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] == "warn"
and _mode_options["data_manager"] == "block"
)
def using_nullable_dtypes() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nullable_dtypes"]
def using_string_dtype() -> bool:
_mode_options = _global_config["future"]
return _mode_options["infer_string"]

View File

@ -0,0 +1,948 @@
"""
The config module holds package-wide configurables and provides
a uniform API for working with them.
Overview
========
This module supports the following requirements:
- options are referenced using keys in dot.notation, e.g. "x.y.option - z".
- keys are case-insensitive.
- functions should accept partial/regex keys, when unambiguous.
- options can be registered by modules at import time.
- options can be registered at init-time (via core.config_init)
- options have a default value, and (optionally) a description and
validation function associated with them.
- options can be deprecated, in which case referencing them
should produce a warning.
- deprecated options can optionally be rerouted to a replacement
so that accessing a deprecated option reroutes to a differently
named option.
- options can be reset to their default value.
- all option can be reset to their default value at once.
- all options in a certain sub - namespace can be reset at once.
- the user can set / get / reset or ask for the description of an option.
- a developer can register and mark an option as deprecated.
- you can register a callback to be invoked when the option value
is set or reset. Changing the stored value is considered misuse, but
is not verboten.
Implementation
==============
- Data is stored using nested dictionaries, and should be accessed
through the provided API.
- "Registered options" and "Deprecated options" have metadata associated
with them, which are stored in auxiliary dictionaries keyed on the
fully-qualified key, e.g. "x.y.z.option".
- the config_init module is imported by the package's __init__.py file.
placing any register_option() calls there will ensure those options
are available as soon as pandas is loaded. If you use register_option
in a module, it will only be available after that module is imported,
which you should be aware of.
- `config_prefix` is a context_manager (for use with the `with` keyword)
which can save developers some typing, see the docstring.
"""
from __future__ import annotations
from contextlib import (
ContextDecorator,
contextmanager,
)
import re
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
NamedTuple,
cast,
)
import warnings
from pandas._typing import (
F,
T,
)
from pandas.util._exceptions import find_stack_level
if TYPE_CHECKING:
from collections.abc import (
Generator,
Iterable,
)
class DeprecatedOption(NamedTuple):
key: str
msg: str | None
rkey: str | None
removal_ver: str | None
class RegisteredOption(NamedTuple):
key: str
defval: object
doc: str
validator: Callable[[object], Any] | None
cb: Callable[[str], Any] | None
# holds deprecated option metadata
_deprecated_options: dict[str, DeprecatedOption] = {}
# holds registered option metadata
_registered_options: dict[str, RegisteredOption] = {}
# holds the current values for registered options
_global_config: dict[str, Any] = {}
# keys which have a special meaning
_reserved_keys: list[str] = ["all"]
class OptionError(AttributeError, KeyError):
"""
Exception raised for pandas.options.
Backwards compatible with KeyError checks.
Examples
--------
>>> pd.options.context
Traceback (most recent call last):
OptionError: No such option
"""
#
# User API
def _get_single_key(pat: str, silent: bool) -> str:
keys = _select_options(pat)
if len(keys) == 0:
if not silent:
_warn_if_deprecated(pat)
raise OptionError(f"No such keys(s): {repr(pat)}")
if len(keys) > 1:
raise OptionError("Pattern matched multiple keys")
key = keys[0]
if not silent:
_warn_if_deprecated(key)
key = _translate_key(key)
return key
def _get_option(pat: str, silent: bool = False) -> Any:
key = _get_single_key(pat, silent)
# walk the nested dict
root, k = _get_root(key)
return root[k]
def _set_option(*args, **kwargs) -> None:
# must at least 1 arg deal with constraints later
nargs = len(args)
if not nargs or nargs % 2 != 0:
raise ValueError("Must provide an even number of non-keyword arguments")
# default to false
silent = kwargs.pop("silent", False)
if kwargs:
kwarg = next(iter(kwargs.keys()))
raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"')
for k, v in zip(args[::2], args[1::2]):
key = _get_single_key(k, silent)
o = _get_registered_option(key)
if o and o.validator:
o.validator(v)
# walk the nested dict
root, k_root = _get_root(key)
root[k_root] = v
if o.cb:
if silent:
with warnings.catch_warnings(record=True):
o.cb(key)
else:
o.cb(key)
def _describe_option(pat: str = "", _print_desc: bool = True) -> str | None:
keys = _select_options(pat)
if len(keys) == 0:
raise OptionError("No such keys(s)")
s = "\n".join([_build_option_description(k) for k in keys])
if _print_desc:
print(s)
return None
return s
def _reset_option(pat: str, silent: bool = False) -> None:
keys = _select_options(pat)
if len(keys) == 0:
raise OptionError("No such keys(s)")
if len(keys) > 1 and len(pat) < 4 and pat != "all":
raise ValueError(
"You must specify at least 4 characters when "
"resetting multiple keys, use the special keyword "
'"all" to reset all the options to their default value'
)
for k in keys:
_set_option(k, _registered_options[k].defval, silent=silent)
def get_default_val(pat: str):
key = _get_single_key(pat, silent=True)
return _get_registered_option(key).defval
class DictWrapper:
"""provide attribute-style access to a nested dict"""
d: dict[str, Any]
def __init__(self, d: dict[str, Any], prefix: str = "") -> None:
object.__setattr__(self, "d", d)
object.__setattr__(self, "prefix", prefix)
def __setattr__(self, key: str, val: Any) -> None:
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
prefix += key
# you can't set new keys
# can you can't overwrite subtrees
if key in self.d and not isinstance(self.d[key], dict):
_set_option(prefix, val)
else:
raise OptionError("You can only set the value of existing options")
def __getattr__(self, key: str):
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
prefix += key
try:
v = object.__getattribute__(self, "d")[key]
except KeyError as err:
raise OptionError("No such option") from err
if isinstance(v, dict):
return DictWrapper(v, prefix)
else:
return _get_option(prefix)
def __dir__(self) -> list[str]:
return list(self.d.keys())
# For user convenience, we'd like to have the available options described
# in the docstring. For dev convenience we'd like to generate the docstrings
# dynamically instead of maintaining them by hand. To this, we use the
# class below which wraps functions inside a callable, and converts
# __doc__ into a property function. The doctsrings below are templates
# using the py2.6+ advanced formatting syntax to plug in a concise list
# of options, and option descriptions.
class CallableDynamicDoc(Generic[T]):
def __init__(self, func: Callable[..., T], doc_tmpl: str) -> None:
self.__doc_tmpl__ = doc_tmpl
self.__func__ = func
def __call__(self, *args, **kwds) -> T:
return self.__func__(*args, **kwds)
# error: Signature of "__doc__" incompatible with supertype "object"
@property
def __doc__(self) -> str: # type: ignore[override]
opts_desc = _describe_option("all", _print_desc=False)
opts_list = pp_options_list(list(_registered_options.keys()))
return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list)
_get_option_tmpl = """
get_option(pat)
Retrieves the value of the specified option.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp which should match a single option.
Note: partial matches are supported for convenience, but unless you use the
full option name (e.g. x.y.z.option_name), your code may break in future
versions if new options with similar names are introduced.
Returns
-------
result : the value of the option
Raises
------
OptionError : if no such option exists
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.get_option('display.max_columns') # doctest: +SKIP
4
"""
_set_option_tmpl = """
set_option(pat, value)
Sets the value of the specified option.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp which should match a single option.
Note: partial matches are supported for convenience, but unless you use the
full option name (e.g. x.y.z.option_name), your code may break in future
versions if new options with similar names are introduced.
value : object
New value of option.
Returns
-------
None
Raises
------
OptionError if no such option exists
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.set_option('display.max_columns', 4)
>>> df = pd.DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
>>> df
0 1 ... 3 4
0 1 2 ... 4 5
1 6 7 ... 9 10
[2 rows x 5 columns]
>>> pd.reset_option('display.max_columns')
"""
_describe_option_tmpl = """
describe_option(pat, _print_desc=False)
Prints the description for one or more registered options.
Call with no arguments to get a listing for all registered options.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp pattern. All matching keys will have their description displayed.
_print_desc : bool, default True
If True (default) the description(s) will be printed to stdout.
Otherwise, the description(s) will be returned as a unicode string
(for testing).
Returns
-------
None by default, the description(s) as a unicode string if _print_desc
is False
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.describe_option('display.max_columns') # doctest: +SKIP
display.max_columns : int
If max_cols is exceeded, switch to truncate view...
"""
_reset_option_tmpl = """
reset_option(pat)
Reset one or more options to their default value.
Pass "all" as argument to reset all options.
Available options:
{opts_list}
Parameters
----------
pat : str/regex
If specified only options matching `prefix*` will be reset.
Note: partial matches are supported for convenience, but unless you
use the full option name (e.g. x.y.z.option_name), your code may break
in future versions if new options with similar names are introduced.
Returns
-------
None
Notes
-----
Please reference the :ref:`User Guide <options>` for more information.
The available options with its descriptions:
{opts_desc}
Examples
--------
>>> pd.reset_option('display.max_columns') # doctest: +SKIP
"""
# bind the functions with their docstrings into a Callable
# and use that as the functions exposed in pd.api
get_option = CallableDynamicDoc(_get_option, _get_option_tmpl)
set_option = CallableDynamicDoc(_set_option, _set_option_tmpl)
reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl)
describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl)
options = DictWrapper(_global_config)
#
# Functions for use by pandas developers, in addition to User - api
class option_context(ContextDecorator):
"""
Context manager to temporarily set options in the `with` statement context.
You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
Examples
--------
>>> from pandas import option_context
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
... pass
"""
def __init__(self, *args) -> None:
if len(args) % 2 != 0 or len(args) < 2:
raise ValueError(
"Need to invoke as option_context(pat, val, [(pat, val), ...])."
)
self.ops = list(zip(args[::2], args[1::2]))
def __enter__(self) -> None:
self.undo = [(pat, _get_option(pat)) for pat, val in self.ops]
for pat, val in self.ops:
_set_option(pat, val, silent=True)
def __exit__(self, *args) -> None:
if self.undo:
for pat, val in self.undo:
_set_option(pat, val, silent=True)
def register_option(
key: str,
defval: object,
doc: str = "",
validator: Callable[[object], Any] | None = None,
cb: Callable[[str], Any] | None = None,
) -> None:
"""
Register an option in the package-wide pandas config object
Parameters
----------
key : str
Fully-qualified key, e.g. "x.y.option - z".
defval : object
Default value of the option.
doc : str
Description of the option.
validator : Callable, optional
Function of a single argument, should raise `ValueError` if
called with a value which is not a legal value for the option.
cb
a function of a single argument "key", which is called
immediately after an option value is set/reset. key is
the full name of the option.
Raises
------
ValueError if `validator` is specified and `defval` is not a valid value.
"""
import keyword
import tokenize
key = key.lower()
if key in _registered_options:
raise OptionError(f"Option '{key}' has already been registered")
if key in _reserved_keys:
raise OptionError(f"Option '{key}' is a reserved key")
# the default value should be legal
if validator:
validator(defval)
# walk the nested dict, creating dicts as needed along the path
path = key.split(".")
for k in path:
if not re.match("^" + tokenize.Name + "$", k):
raise ValueError(f"{k} is not a valid identifier")
if keyword.iskeyword(k):
raise ValueError(f"{k} is a python keyword")
cursor = _global_config
msg = "Path prefix to option '{option}' is already an option"
for i, p in enumerate(path[:-1]):
if not isinstance(cursor, dict):
raise OptionError(msg.format(option=".".join(path[:i])))
if p not in cursor:
cursor[p] = {}
cursor = cursor[p]
if not isinstance(cursor, dict):
raise OptionError(msg.format(option=".".join(path[:-1])))
cursor[path[-1]] = defval # initialize
# save the option metadata
_registered_options[key] = RegisteredOption(
key=key, defval=defval, doc=doc, validator=validator, cb=cb
)
def deprecate_option(
key: str,
msg: str | None = None,
rkey: str | None = None,
removal_ver: str | None = None,
) -> None:
"""
Mark option `key` as deprecated, if code attempts to access this option,
a warning will be produced, using `msg` if given, or a default message
if not.
if `rkey` is given, any access to the key will be re-routed to `rkey`.
Neither the existence of `key` nor that if `rkey` is checked. If they
do not exist, any subsequence access will fail as usual, after the
deprecation warning is given.
Parameters
----------
key : str
Name of the option to be deprecated.
must be a fully-qualified option name (e.g "x.y.z.rkey").
msg : str, optional
Warning message to output when the key is referenced.
if no message is given a default message will be emitted.
rkey : str, optional
Name of an option to reroute access to.
If specified, any referenced `key` will be
re-routed to `rkey` including set/get/reset.
rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
used by the default message if no `msg` is specified.
removal_ver : str, optional
Specifies the version in which this option will
be removed. used by the default message if no `msg` is specified.
Raises
------
OptionError
If the specified key has already been deprecated.
"""
key = key.lower()
if key in _deprecated_options:
raise OptionError(f"Option '{key}' has already been defined as deprecated.")
_deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
#
# functions internal to the module
def _select_options(pat: str) -> list[str]:
"""
returns a list of keys matching `pat`
if pat=="all", returns all registered options
"""
# short-circuit for exact key
if pat in _registered_options:
return [pat]
# else look through all of them
keys = sorted(_registered_options.keys())
if pat == "all": # reserved key
return keys
return [k for k in keys if re.search(pat, k, re.I)]
def _get_root(key: str) -> tuple[dict[str, Any], str]:
path = key.split(".")
cursor = _global_config
for p in path[:-1]:
cursor = cursor[p]
return cursor, path[-1]
def _is_deprecated(key: str) -> bool:
"""Returns True if the given option has been deprecated"""
key = key.lower()
return key in _deprecated_options
def _get_deprecated_option(key: str):
"""
Retrieves the metadata for a deprecated option, if `key` is deprecated.
Returns
-------
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
"""
try:
d = _deprecated_options[key]
except KeyError:
return None
else:
return d
def _get_registered_option(key: str):
"""
Retrieves the option metadata if `key` is a registered option.
Returns
-------
RegisteredOption (namedtuple) if key is deprecated, None otherwise
"""
return _registered_options.get(key)
def _translate_key(key: str) -> str:
"""
if key id deprecated and a replacement key defined, will return the
replacement key, otherwise returns `key` as - is
"""
d = _get_deprecated_option(key)
if d:
return d.rkey or key
else:
return key
def _warn_if_deprecated(key: str) -> bool:
"""
Checks if `key` is a deprecated option and if so, prints a warning.
Returns
-------
bool - True if `key` is deprecated, False otherwise.
"""
d = _get_deprecated_option(key)
if d:
if d.msg:
warnings.warn(
d.msg,
FutureWarning,
stacklevel=find_stack_level(),
)
else:
msg = f"'{key}' is deprecated"
if d.removal_ver:
msg += f" and will be removed in {d.removal_ver}"
if d.rkey:
msg += f", please use '{d.rkey}' instead."
else:
msg += ", please refrain from using it."
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
return True
return False
def _build_option_description(k: str) -> str:
"""Builds a formatted description of a registered option and prints it"""
o = _get_registered_option(k)
d = _get_deprecated_option(k)
s = f"{k} "
if o.doc:
s += "\n".join(o.doc.strip().split("\n"))
else:
s += "No description available."
if o:
s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]"
if d:
rkey = d.rkey or ""
s += "\n (Deprecated"
s += f", use `{rkey}` instead."
s += ")"
return s
def pp_options_list(keys: Iterable[str], width: int = 80, _print: bool = False):
"""Builds a concise listing of available options, grouped by prefix"""
from itertools import groupby
from textwrap import wrap
def pp(name: str, ks: Iterable[str]) -> list[str]:
pfx = "- " + name + ".[" if name else ""
ls = wrap(
", ".join(ks),
width,
initial_indent=pfx,
subsequent_indent=" ",
break_long_words=False,
)
if ls and ls[-1] and name:
ls[-1] = ls[-1] + "]"
return ls
ls: list[str] = []
singles = [x for x in sorted(keys) if x.find(".") < 0]
if singles:
ls += pp("", singles)
keys = [x for x in keys if x.find(".") >= 0]
for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]):
ks = [x[len(k) + 1 :] for x in list(g)]
ls += pp(k, ks)
s = "\n".join(ls)
if _print:
print(s)
else:
return s
#
# helpers
@contextmanager
def config_prefix(prefix: str) -> Generator[None, None, None]:
"""
contextmanager for multiple invocations of API with a common prefix
supported API functions: (register / get / set )__option
Warning: This is not thread - safe, and won't work properly if you import
the API functions into your module using the "from x import y" construct.
Example
-------
import pandas._config.config as cf
with cf.config_prefix("display.font"):
cf.register_option("color", "red")
cf.register_option("size", " 5 pt")
cf.set_option(size, " 6 pt")
cf.get_option(size)
...
etc'
will register options "display.font.color", "display.font.size", set the
value of "display.font.size"... and so on.
"""
# Note: reset_option relies on set_option, and on key directly
# it does not fit in to this monkey-patching scheme
global register_option, get_option, set_option
def wrap(func: F) -> F:
def inner(key: str, *args, **kwds):
pkey = f"{prefix}.{key}"
return func(pkey, *args, **kwds)
return cast(F, inner)
_register_option = register_option
_get_option = get_option
_set_option = set_option
set_option = wrap(set_option)
get_option = wrap(get_option)
register_option = wrap(register_option)
try:
yield
finally:
set_option = _set_option
get_option = _get_option
register_option = _register_option
# These factories and methods are handy for use as the validator
# arg in register_option
def is_type_factory(_type: type[Any]) -> Callable[[Any], None]:
"""
Parameters
----------
`_type` - a type to be compared against (e.g. type(x) == `_type`)
Returns
-------
validator - a function of a single argument x , which raises
ValueError if type(x) is not equal to `_type`
"""
def inner(x) -> None:
if type(x) != _type:
raise ValueError(f"Value must have type '{_type}'")
return inner
def is_instance_factory(_type) -> Callable[[Any], None]:
"""
Parameters
----------
`_type` - the type to be checked against
Returns
-------
validator - a function of a single argument x , which raises
ValueError if x is not an instance of `_type`
"""
if isinstance(_type, (tuple, list)):
_type = tuple(_type)
type_repr = "|".join(map(str, _type))
else:
type_repr = f"'{_type}'"
def inner(x) -> None:
if not isinstance(x, _type):
raise ValueError(f"Value must be an instance of {type_repr}")
return inner
def is_one_of_factory(legal_values) -> Callable[[Any], None]:
callables = [c for c in legal_values if callable(c)]
legal_values = [c for c in legal_values if not callable(c)]
def inner(x) -> None:
if x not in legal_values:
if not any(c(x) for c in callables):
uvals = [str(lval) for lval in legal_values]
pp_values = "|".join(uvals)
msg = f"Value must be one of {pp_values}"
if len(callables):
msg += " or a callable"
raise ValueError(msg)
return inner
def is_nonnegative_int(value: object) -> None:
"""
Verify that value is None or a positive int.
Parameters
----------
value : None or int
The `value` to be checked.
Raises
------
ValueError
When the value is not None or is a negative integer
"""
if value is None:
return
elif isinstance(value, int):
if value >= 0:
return
msg = "Value must be a nonnegative integer or None"
raise ValueError(msg)
# common type validators, for convenience
# usage: register_option(... , validator = is_int)
is_int = is_type_factory(int)
is_bool = is_type_factory(bool)
is_float = is_type_factory(float)
is_str = is_type_factory(str)
is_text = is_instance_factory((str, bytes))
def is_callable(obj) -> bool:
"""
Parameters
----------
`obj` - the object to be checked
Returns
-------
validator - returns True if object is callable
raises ValueError otherwise.
"""
if not callable(obj):
raise ValueError("Value must be a callable")
return True

View File

@ -0,0 +1,25 @@
"""
config for datetime formatting
"""
from __future__ import annotations
from pandas._config import config as cf
pc_date_dayfirst_doc = """
: boolean
When True, prints and parses dates with the day first, eg 20/01/2005
"""
pc_date_yearfirst_doc = """
: boolean
When True, prints and parses dates with the year first, eg 2005/01/20
"""
with cf.config_prefix("display"):
# Needed upstream of `_libs` because these are used in tslibs.parsing
cf.register_option(
"date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool
)
cf.register_option(
"date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool
)

View File

@ -0,0 +1,62 @@
"""
Unopinionated display configuration.
"""
from __future__ import annotations
import locale
import sys
from pandas._config import config as cf
# -----------------------------------------------------------------------------
# Global formatting options
_initial_defencoding: str | None = None
def detect_console_encoding() -> str:
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
"""
global _initial_defencoding
encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, OSError):
pass
# try again for something better
if not encoding or "ascii" in encoding.lower():
try:
encoding = locale.getpreferredencoding()
except locale.Error:
# can be raised by locale.setlocale(), which is
# called by getpreferredencoding
# (on some systems, see stdlib locale docs)
pass
# when all else fails. this will usually be "ascii"
if not encoding or "ascii" in encoding.lower():
encoding = sys.getdefaultencoding()
# GH#3360, save the reported defencoding at import time
# MPL backends may change it. Make available for debugging.
if not _initial_defencoding:
_initial_defencoding = sys.getdefaultencoding()
return encoding
pc_encoding_doc = """
: str/unicode
Defaults to the detected encoding of the console.
Specifies the encoding to be used for strings returned by to_string,
these are generally strings meant to be displayed on the console.
"""
with cf.config_prefix("display"):
cf.register_option(
"encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text
)

View File

@ -0,0 +1,172 @@
"""
Helpers for configuring locale settings.
Name `localization` is chosen to avoid overlap with builtin `locale` module.
"""
from __future__ import annotations
from contextlib import contextmanager
import locale
import platform
import re
import subprocess
from typing import TYPE_CHECKING
from pandas._config.config import options
if TYPE_CHECKING:
from collections.abc import Generator
@contextmanager
def set_locale(
new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
) -> Generator[str | tuple[str, str], None, None]:
"""
Context manager for temporarily setting a locale.
Parameters
----------
new_locale : str or tuple
A string of the form <language_country>.<encoding>. For example to set
the current locale to US English with a UTF8 encoding, you would pass
"en_US.UTF-8".
lc_var : int, default `locale.LC_ALL`
The category of the locale being set.
Notes
-----
This is useful when you want to run a particular block of code under a
particular locale, without globally setting the locale. This probably isn't
thread-safe.
"""
# getlocale is not always compliant with setlocale, use setlocale. GH#46595
current_locale = locale.setlocale(lc_var)
try:
locale.setlocale(lc_var, new_locale)
normalized_code, normalized_encoding = locale.getlocale()
if normalized_code is not None and normalized_encoding is not None:
yield f"{normalized_code}.{normalized_encoding}"
else:
yield new_locale
finally:
locale.setlocale(lc_var, current_locale)
def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
"""
Check to see if we can set a locale, and subsequently get the locale,
without raising an Exception.
Parameters
----------
lc : str
The locale to attempt to set.
lc_var : int, default `locale.LC_ALL`
The category of the locale being set.
Returns
-------
bool
Whether the passed locale can be set
"""
try:
with set_locale(lc, lc_var=lc_var):
pass
except (ValueError, locale.Error):
# horrible name for a Exception subclass
return False
else:
return True
def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]:
"""
Return a list of normalized locales that do not throw an ``Exception``
when set.
Parameters
----------
locales : str
A string where each locale is separated by a newline.
normalize : bool
Whether to call ``locale.normalize`` on each locale.
Returns
-------
valid_locales : list
A list of valid locales.
"""
return [
loc
for loc in (
locale.normalize(loc.strip()) if normalize else loc.strip()
for loc in locales
)
if can_set_locale(loc)
]
def get_locales(
prefix: str | None = None,
normalize: bool = True,
) -> list[str]:
"""
Get all the locales that are available on the system.
Parameters
----------
prefix : str
If not ``None`` then return only those locales with the prefix
provided. For example to get all English language locales (those that
start with ``"en"``), pass ``prefix="en"``.
normalize : bool
Call ``locale.normalize`` on the resulting list of available locales.
If ``True``, only locales that can be set without throwing an
``Exception`` are returned.
Returns
-------
locales : list of strings
A list of locale strings that can be set with ``locale.setlocale()``.
For example::
locale.setlocale(locale.LC_ALL, locale_string)
On error will return an empty list (no locale available, e.g. Windows)
"""
if platform.system() in ("Linux", "Darwin"):
raw_locales = subprocess.check_output(["locale", "-a"])
else:
# Other platforms e.g. windows platforms don't define "locale -a"
# Note: is_platform_windows causes circular import here
return []
try:
# raw_locales is "\n" separated list of locales
# it may contain non-decodable parts, so split
# extract what we can and then rejoin.
split_raw_locales = raw_locales.split(b"\n")
out_locales = []
for x in split_raw_locales:
try:
out_locales.append(str(x, encoding=options.display.encoding))
except UnicodeError:
# 'locale -a' is used to populated 'raw_locales' and on
# Redhat 7 Linux (and maybe others) prints locale names
# using windows-1252 encoding. Bug only triggered by
# a few special characters and when there is an
# extensive list of installed locales.
out_locales.append(str(x, encoding="windows-1252"))
except TypeError:
pass
if prefix is None:
return _valid_locales(out_locales, normalize)
pattern = re.compile(f"{prefix}.*")
found = pattern.findall("\n".join(out_locales))
return _valid_locales(found, normalize)

View File

@ -0,0 +1,27 @@
__all__ = [
"NaT",
"NaTType",
"OutOfBoundsDatetime",
"Period",
"Timedelta",
"Timestamp",
"iNaT",
"Interval",
]
# Below imports needs to happen first to ensure pandas top level
# module gets monkeypatched with the pandas_datetime_CAPI
# see pandas_datetime_exec in pd_datetime.c
import pandas._libs.pandas_parser # isort: skip # type: ignore[reportUnusedImport]
import pandas._libs.pandas_datetime # noqa: F401 # isort: skip # type: ignore[reportUnusedImport]
from pandas._libs.interval import Interval
from pandas._libs.tslibs import (
NaT,
NaTType,
OutOfBoundsDatetime,
Period,
Timedelta,
Timestamp,
iNaT,
)

View File

@ -0,0 +1,416 @@
from typing import Any
import numpy as np
from pandas._typing import npt
class Infinity:
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __lt__(self, other) -> bool: ...
def __le__(self, other) -> bool: ...
def __gt__(self, other) -> bool: ...
def __ge__(self, other) -> bool: ...
class NegInfinity:
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __lt__(self, other) -> bool: ...
def __le__(self, other) -> bool: ...
def __gt__(self, other) -> bool: ...
def __ge__(self, other) -> bool: ...
def unique_deltas(
arr: np.ndarray, # const int64_t[:]
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ...
def groupsort_indexer(
index: np.ndarray, # const int64_t[:]
ngroups: int,
) -> tuple[
np.ndarray, # ndarray[int64_t, ndim=1]
np.ndarray, # ndarray[int64_t, ndim=1]
]: ...
def kth_smallest(
arr: np.ndarray, # numeric[:]
k: int,
) -> Any: ... # numeric
# ----------------------------------------------------------------------
# Pairwise correlation/covariance
def nancorr(
mat: npt.NDArray[np.float64], # const float64_t[:, :]
cov: bool = ...,
minp: int | None = ...,
) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2]
def nancorr_spearman(
mat: npt.NDArray[np.float64], # ndarray[float64_t, ndim=2]
minp: int = ...,
) -> npt.NDArray[np.float64]: ... # ndarray[float64_t, ndim=2]
# ----------------------------------------------------------------------
def validate_limit(nobs: int | None, limit=...) -> int: ...
def get_fill_indexer(
mask: npt.NDArray[np.bool_],
limit: int | None = None,
) -> npt.NDArray[np.intp]: ...
def pad(
old: np.ndarray, # ndarray[numeric_object_t]
new: np.ndarray, # ndarray[numeric_object_t]
limit=...,
) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1]
def pad_inplace(
values: np.ndarray, # numeric_object_t[:]
mask: np.ndarray, # uint8_t[:]
limit=...,
) -> None: ...
def pad_2d_inplace(
values: np.ndarray, # numeric_object_t[:, :]
mask: np.ndarray, # const uint8_t[:, :]
limit=...,
) -> None: ...
def backfill(
old: np.ndarray, # ndarray[numeric_object_t]
new: np.ndarray, # ndarray[numeric_object_t]
limit=...,
) -> npt.NDArray[np.intp]: ... # np.ndarray[np.intp, ndim=1]
def backfill_inplace(
values: np.ndarray, # numeric_object_t[:]
mask: np.ndarray, # uint8_t[:]
limit=...,
) -> None: ...
def backfill_2d_inplace(
values: np.ndarray, # numeric_object_t[:, :]
mask: np.ndarray, # const uint8_t[:, :]
limit=...,
) -> None: ...
def is_monotonic(
arr: np.ndarray, # ndarray[numeric_object_t, ndim=1]
timelike: bool,
) -> tuple[bool, bool, bool]: ...
# ----------------------------------------------------------------------
# rank_1d, rank_2d
# ----------------------------------------------------------------------
def rank_1d(
values: np.ndarray, # ndarray[numeric_object_t, ndim=1]
labels: np.ndarray | None = ..., # const int64_t[:]=None
is_datetimelike: bool = ...,
ties_method=...,
ascending: bool = ...,
pct: bool = ...,
na_option=...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
def rank_2d(
in_arr: np.ndarray, # ndarray[numeric_object_t, ndim=2]
axis: int = ...,
is_datetimelike: bool = ...,
ties_method=...,
ascending: bool = ...,
na_option=...,
pct: bool = ...,
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
def diff_2d(
arr: np.ndarray, # ndarray[diff_t, ndim=2]
out: np.ndarray, # ndarray[out_t, ndim=2]
periods: int,
axis: int,
datetimelike: bool = ...,
) -> None: ...
def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
def ensure_float64(arr: object) -> npt.NDArray[np.float64]: ...
def ensure_int8(arr: object) -> npt.NDArray[np.int8]: ...
def ensure_int16(arr: object) -> npt.NDArray[np.int16]: ...
def ensure_int32(arr: object) -> npt.NDArray[np.int32]: ...
def ensure_int64(arr: object) -> npt.NDArray[np.int64]: ...
def ensure_uint64(arr: object) -> npt.NDArray[np.uint64]: ...
def take_1d_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_1d_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis0_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int8(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int8_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int16(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int16_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_int32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int64_int64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_int64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float32_float32(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float32_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_float64_float64(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_object_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_bool_bool(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_axis1_bool_object(
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
) -> None: ...
def take_2d_multi_int8_int8(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int8_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int16(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int16_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_int32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int32_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int64_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float32_float32(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float32_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_float64_float64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_object_object(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_bool_bool(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_bool_object(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...
def take_2d_multi_int64_int64(
values: np.ndarray,
indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
out: np.ndarray,
fill_value=...,
) -> None: ...

View File

@ -0,0 +1,40 @@
from typing import Sequence
import numpy as np
from pandas._typing import (
AxisInt,
DtypeObj,
Self,
Shape,
)
class NDArrayBacked:
_dtype: DtypeObj
_ndarray: np.ndarray
def __init__(self, values: np.ndarray, dtype: DtypeObj) -> None: ...
@classmethod
def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ...
def _from_backing_data(self, values: np.ndarray): ...
def __setstate__(self, state): ...
def __len__(self) -> int: ...
@property
def shape(self) -> Shape: ...
@property
def ndim(self) -> int: ...
@property
def size(self) -> int: ...
@property
def nbytes(self) -> int: ...
def copy(self, order=...): ...
def delete(self, loc, axis=...): ...
def swapaxes(self, axis1, axis2): ...
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
def reshape(self, *args, **kwargs): ...
def ravel(self, order=...): ...
@property
def T(self): ...
@classmethod
def _concat_same_type(
cls, to_concat: Sequence[Self], axis: AxisInt = ...
) -> Self: ...

View File

@ -0,0 +1,5 @@
def read_float_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
def read_double_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
def read_uint16_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
def read_uint32_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
def read_uint64_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...

View File

@ -0,0 +1,216 @@
from typing import Literal
import numpy as np
from pandas._typing import npt
def group_median_float64(
out: np.ndarray, # ndarray[float64_t, ndim=2]
counts: npt.NDArray[np.int64],
values: np.ndarray, # ndarray[float64_t, ndim=2]
labels: npt.NDArray[np.int64],
min_count: int = ..., # Py_ssize_t
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cumprod(
out: np.ndarray, # float64_t[:, ::1]
values: np.ndarray, # const float64_t[:, :]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
skipna: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cumsum(
out: np.ndarray, # int64float_t[:, ::1]
values: np.ndarray, # ndarray[int64float_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
skipna: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_shift_indexer(
out: np.ndarray, # int64_t[::1]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
periods: int,
) -> None: ...
def group_fillna_indexer(
out: np.ndarray, # ndarray[intp_t]
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
limit: int, # int64_t
dropna: bool,
) -> None: ...
def group_any_all(
out: np.ndarray, # uint8_t[::1]
values: np.ndarray, # const uint8_t[::1]
labels: np.ndarray, # const int64_t[:]
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
result_mask: np.ndarray | None,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[complexfloatingintuint_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
mask: np.ndarray | None,
result_mask: np.ndarray | None = ...,
min_count: int = ...,
is_datetimelike: bool = ...,
) -> None: ...
def group_prod(
out: np.ndarray, # int64float_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[int64float_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
mask: np.ndarray | None,
result_mask: np.ndarray | None = ...,
min_count: int = ...,
) -> None: ...
def group_var(
out: np.ndarray, # floating[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floating, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ..., # Py_ssize_t
ddof: int = ..., # int64_t
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
is_datetimelike: bool = ...,
name: str = ...,
) -> None: ...
def group_skew(
out: np.ndarray, # float64_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[float64_T, ndim=2]
labels: np.ndarray, # const intp_t[::1]
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...
def group_mean(
out: np.ndarray, # floating[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floating, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ..., # Py_ssize_t
is_datetimelike: bool = ..., # bint
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_ohlc(
out: np.ndarray, # floatingintuint_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[floatingintuint_t, ndim=2]
labels: np.ndarray, # const intp_t[:]
min_count: int = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_quantile(
out: npt.NDArray[np.float64],
values: np.ndarray, # ndarray[numeric, ndim=1]
labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
qs: npt.NDArray[np.float64], # const
starts: npt.NDArray[np.int64],
ends: npt.NDArray[np.int64],
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
result_mask: np.ndarray | None,
is_datetimelike: bool,
) -> None: ...
def group_last(
out: np.ndarray, # rank_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
mask: npt.NDArray[np.bool_] | None,
result_mask: npt.NDArray[np.bool_] | None = ...,
min_count: int = ..., # Py_ssize_t
is_datetimelike: bool = ...,
skipna: bool = ...,
) -> None: ...
def group_nth(
out: np.ndarray, # rank_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
mask: npt.NDArray[np.bool_] | None,
result_mask: npt.NDArray[np.bool_] | None = ...,
min_count: int = ..., # int64_t
rank: int = ..., # int64_t
is_datetimelike: bool = ...,
skipna: bool = ...,
) -> None: ...
def group_rank(
out: np.ndarray, # float64_t[:, ::1]
values: np.ndarray, # ndarray[rank_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
ties_method: Literal["average", "min", "max", "first", "dense"] = ...,
ascending: bool = ...,
pct: bool = ...,
na_option: Literal["keep", "top", "bottom"] = ...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> None: ...
def group_max(
out: np.ndarray, # groupby_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
min_count: int = ...,
is_datetimelike: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_min(
out: np.ndarray, # groupby_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
min_count: int = ...,
is_datetimelike: bool = ...,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_idxmin_idxmax(
out: npt.NDArray[np.intp],
counts: npt.NDArray[np.int64],
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: npt.NDArray[np.intp],
min_count: int = ...,
is_datetimelike: bool = ...,
mask: np.ndarray | None = ...,
name: str = ...,
skipna: bool = ...,
result_mask: np.ndarray | None = ...,
) -> None: ...
def group_cummin(
out: np.ndarray, # groupby_t[:, ::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...
def group_cummax(
out: np.ndarray, # groupby_t[:, ::1]
values: np.ndarray, # ndarray[groupby_t, ndim=2]
labels: np.ndarray, # const int64_t[:]
ngroups: int,
is_datetimelike: bool,
mask: np.ndarray | None = ...,
result_mask: np.ndarray | None = ...,
skipna: bool = ...,
) -> None: ...

View File

@ -0,0 +1,9 @@
import numpy as np
from pandas._typing import npt
def hash_object_array(
arr: npt.NDArray[np.object_],
key: str,
encoding: str = ...,
) -> npt.NDArray[np.uint64]: ...

View File

@ -0,0 +1,252 @@
from typing import (
Any,
Hashable,
Literal,
)
import numpy as np
from pandas._typing import npt
def unique_label_indices(
labels: np.ndarray, # const int64_t[:]
) -> np.ndarray: ...
class Factorizer:
count: int
uniques: Any
def __init__(self, size_hint: int) -> None: ...
def get_count(self) -> int: ...
def factorize(
self,
values: np.ndarray,
na_sentinel=...,
na_value=...,
mask=...,
) -> npt.NDArray[np.intp]: ...
class ObjectFactorizer(Factorizer):
table: PyObjectHashTable
uniques: ObjectVector
class Int64Factorizer(Factorizer):
table: Int64HashTable
uniques: Int64Vector
class UInt64Factorizer(Factorizer):
table: UInt64HashTable
uniques: UInt64Vector
class Int32Factorizer(Factorizer):
table: Int32HashTable
uniques: Int32Vector
class UInt32Factorizer(Factorizer):
table: UInt32HashTable
uniques: UInt32Vector
class Int16Factorizer(Factorizer):
table: Int16HashTable
uniques: Int16Vector
class UInt16Factorizer(Factorizer):
table: UInt16HashTable
uniques: UInt16Vector
class Int8Factorizer(Factorizer):
table: Int8HashTable
uniques: Int8Vector
class UInt8Factorizer(Factorizer):
table: UInt8HashTable
uniques: UInt8Vector
class Float64Factorizer(Factorizer):
table: Float64HashTable
uniques: Float64Vector
class Float32Factorizer(Factorizer):
table: Float32HashTable
uniques: Float32Vector
class Complex64Factorizer(Factorizer):
table: Complex64HashTable
uniques: Complex64Vector
class Complex128Factorizer(Factorizer):
table: Complex128HashTable
uniques: Complex128Vector
class Int64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int64]: ...
class Int32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int32]: ...
class Int16Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int16]: ...
class Int8Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.int8]: ...
class UInt64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint64]: ...
class UInt32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint32]: ...
class UInt16Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint16]: ...
class UInt8Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.uint8]: ...
class Float64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.float64]: ...
class Float32Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.float32]: ...
class Complex128Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.complex128]: ...
class Complex64Vector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.complex64]: ...
class StringVector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.object_]: ...
class ObjectVector:
def __init__(self, *args) -> None: ...
def __len__(self) -> int: ...
def to_array(self) -> npt.NDArray[np.object_]: ...
class HashTable:
# NB: The base HashTable class does _not_ actually have these methods;
# we are putting them here for the sake of mypy to avoid
# reproducing them in each subclass below.
def __init__(self, size_hint: int = ..., uses_mask: bool = ...) -> None: ...
def __len__(self) -> int: ...
def __contains__(self, key: Hashable) -> bool: ...
def sizeof(self, deep: bool = ...) -> int: ...
def get_state(self) -> dict[str, int]: ...
# TODO: `val/key` type is subclass-specific
def get_item(self, val): ... # TODO: return type?
def set_item(self, key, val) -> None: ...
def get_na(self): ... # TODO: return type?
def set_na(self, val) -> None: ...
def map_locations(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
mask: npt.NDArray[np.bool_] | None = ...,
) -> None: ...
def lookup(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
mask: npt.NDArray[np.bool_] | None = ...,
) -> npt.NDArray[np.intp]: ...
def get_labels(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
uniques, # SubclassTypeVector
count_prior: int = ...,
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
) -> npt.NDArray[np.intp]: ...
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
mask=...,
) -> (
tuple[
np.ndarray, # np.ndarray[subclass-specific]
npt.NDArray[np.intp],
]
| np.ndarray
): ... # np.ndarray[subclass-specific]
def factorize(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
ignore_na: bool = True,
) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]
class Complex128HashTable(HashTable): ...
class Complex64HashTable(HashTable): ...
class Float64HashTable(HashTable): ...
class Float32HashTable(HashTable): ...
class Int64HashTable(HashTable):
# Only Int64HashTable has get_labels_groupby, map_keys_to_values
def get_labels_groupby(
self,
values: npt.NDArray[np.int64], # const int64_t[:]
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: ...
def map_keys_to_values(
self,
keys: npt.NDArray[np.int64],
values: npt.NDArray[np.int64], # const int64_t[:]
) -> None: ...
class Int32HashTable(HashTable): ...
class Int16HashTable(HashTable): ...
class Int8HashTable(HashTable): ...
class UInt64HashTable(HashTable): ...
class UInt32HashTable(HashTable): ...
class UInt16HashTable(HashTable): ...
class UInt8HashTable(HashTable): ...
class StringHashTable(HashTable): ...
class PyObjectHashTable(HashTable): ...
class IntpHashTable(HashTable): ...
def duplicated(
values: np.ndarray,
keep: Literal["last", "first", False] = ...,
mask: npt.NDArray[np.bool_] | None = ...,
) -> npt.NDArray[np.bool_]: ...
def mode(
values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = ...
) -> np.ndarray: ...
def value_count(
values: np.ndarray,
dropna: bool,
mask: npt.NDArray[np.bool_] | None = ...,
) -> tuple[np.ndarray, npt.NDArray[np.int64], int]: ... # np.ndarray[same-as-values]
# arr and values should have same dtype
def ismember(
arr: np.ndarray,
values: np.ndarray,
) -> npt.NDArray[np.bool_]: ...
def object_hash(obj) -> int: ...
def objects_are_equal(a, b) -> bool: ...

View File

@ -0,0 +1,103 @@
import numpy as np
from pandas._typing import npt
from pandas import MultiIndex
from pandas.core.arrays import ExtensionArray
multiindex_nulls_shift: int
class IndexEngine:
over_size_threshold: bool
def __init__(self, values: np.ndarray) -> None: ...
def __contains__(self, val: object) -> bool: ...
# -> int | slice | np.ndarray[bool]
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
def sizeof(self, deep: bool = ...) -> int: ...
def __sizeof__(self) -> int: ...
@property
def is_unique(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
@property
def is_monotonic_decreasing(self) -> bool: ...
@property
def is_mapping_populated(self) -> bool: ...
def clear_mapping(self): ...
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self,
targets: np.ndarray,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
class MaskedIndexEngine(IndexEngine):
def __init__(self, values: object) -> None: ...
def get_indexer_non_unique(
self, targets: object
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
class Float64Engine(IndexEngine): ...
class Float32Engine(IndexEngine): ...
class Complex128Engine(IndexEngine): ...
class Complex64Engine(IndexEngine): ...
class Int64Engine(IndexEngine): ...
class Int32Engine(IndexEngine): ...
class Int16Engine(IndexEngine): ...
class Int8Engine(IndexEngine): ...
class UInt64Engine(IndexEngine): ...
class UInt32Engine(IndexEngine): ...
class UInt16Engine(IndexEngine): ...
class UInt8Engine(IndexEngine): ...
class ObjectEngine(IndexEngine): ...
class DatetimeEngine(Int64Engine): ...
class TimedeltaEngine(DatetimeEngine): ...
class PeriodEngine(Int64Engine): ...
class BoolEngine(UInt8Engine): ...
class MaskedFloat64Engine(MaskedIndexEngine): ...
class MaskedFloat32Engine(MaskedIndexEngine): ...
class MaskedComplex128Engine(MaskedIndexEngine): ...
class MaskedComplex64Engine(MaskedIndexEngine): ...
class MaskedInt64Engine(MaskedIndexEngine): ...
class MaskedInt32Engine(MaskedIndexEngine): ...
class MaskedInt16Engine(MaskedIndexEngine): ...
class MaskedInt8Engine(MaskedIndexEngine): ...
class MaskedUInt64Engine(MaskedIndexEngine): ...
class MaskedUInt32Engine(MaskedIndexEngine): ...
class MaskedUInt16Engine(MaskedIndexEngine): ...
class MaskedUInt8Engine(MaskedIndexEngine): ...
class MaskedBoolEngine(MaskedUInt8Engine): ...
class StringObjectEngine(ObjectEngine):
def __init__(self, values: object, na_value) -> None: ...
class BaseMultiIndexCodesEngine:
levels: list[np.ndarray]
offsets: np.ndarray # ndarray[uint64_t, ndim=1]
def __init__(
self,
levels: list[np.ndarray], # all entries hashable
labels: list[np.ndarray], # all entries integer-dtyped
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
) -> None: ...
def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ...
class ExtensionEngine:
def __init__(self, values: ExtensionArray) -> None: ...
def __contains__(self, val: object) -> bool: ...
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self,
targets: np.ndarray,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
@property
def is_unique(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
@property
def is_monotonic_decreasing(self) -> bool: ...
def sizeof(self, deep: bool = ...) -> int: ...
def clear_mapping(self): ...

View File

@ -0,0 +1,17 @@
from typing import (
Generic,
TypeVar,
)
from pandas.core.indexing import IndexingMixin
_IndexingMixinT = TypeVar("_IndexingMixinT", bound=IndexingMixin)
class NDFrameIndexerBase(Generic[_IndexingMixinT]):
name: str
# in practice obj is either a DataFrame or a Series
obj: _IndexingMixinT
def __init__(self, name: str, obj: _IndexingMixinT) -> None: ...
@property
def ndim(self) -> int: ...

View File

@ -0,0 +1,94 @@
from typing import (
Iterator,
Sequence,
final,
overload,
)
import weakref
import numpy as np
from pandas._typing import (
ArrayLike,
Self,
npt,
)
from pandas import Index
from pandas.core.internals.blocks import Block as B
def slice_len(slc: slice, objlen: int = ...) -> int: ...
def get_concat_blkno_indexers(
blknos_list: list[npt.NDArray[np.intp]],
) -> list[tuple[npt.NDArray[np.intp], BlockPlacement]]: ...
def get_blkno_indexers(
blknos: np.ndarray, # int64_t[:]
group: bool = ...,
) -> list[tuple[int, slice | np.ndarray]]: ...
def get_blkno_placements(
blknos: np.ndarray,
group: bool = ...,
) -> Iterator[tuple[int, BlockPlacement]]: ...
def update_blklocs_and_blknos(
blklocs: npt.NDArray[np.intp],
blknos: npt.NDArray[np.intp],
loc: int,
nblocks: int,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
@final
class BlockPlacement:
def __init__(self, val: int | slice | np.ndarray) -> None: ...
@property
def indexer(self) -> np.ndarray | slice: ...
@property
def as_array(self) -> np.ndarray: ...
@property
def as_slice(self) -> slice: ...
@property
def is_slice_like(self) -> bool: ...
@overload
def __getitem__(
self, loc: slice | Sequence[int] | npt.NDArray[np.intp]
) -> BlockPlacement: ...
@overload
def __getitem__(self, loc: int) -> int: ...
def __iter__(self) -> Iterator[int]: ...
def __len__(self) -> int: ...
def delete(self, loc) -> BlockPlacement: ...
def add(self, other) -> BlockPlacement: ...
def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ...
class Block:
_mgr_locs: BlockPlacement
ndim: int
values: ArrayLike
refs: BlockValuesRefs
def __init__(
self,
values: ArrayLike,
placement: BlockPlacement,
ndim: int,
refs: BlockValuesRefs | None = ...,
) -> None: ...
def slice_block_rows(self, slicer: slice) -> Self: ...
class BlockManager:
blocks: tuple[B, ...]
axes: list[Index]
_known_consolidated: bool
_is_consolidated: bool
_blknos: np.ndarray
_blklocs: np.ndarray
def __init__(
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
) -> None: ...
def get_slice(self, slobj: slice, axis: int = ...) -> Self: ...
def _rebuild_blknos_and_blklocs(self) -> None: ...
class BlockValuesRefs:
referenced_blocks: list[weakref.ref]
def __init__(self, blk: Block | None = ...) -> None: ...
def add_reference(self, blk: Block) -> None: ...
def add_index_reference(self, index: Index) -> None: ...
def has_reference(self) -> bool: ...

View File

@ -0,0 +1,174 @@
from typing import (
Any,
Generic,
TypeVar,
overload,
)
import numpy as np
import numpy.typing as npt
from pandas._typing import (
IntervalClosedType,
Timedelta,
Timestamp,
)
VALID_CLOSED: frozenset[str]
_OrderableScalarT = TypeVar("_OrderableScalarT", int, float)
_OrderableTimesT = TypeVar("_OrderableTimesT", Timestamp, Timedelta)
_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta)
class _LengthDescriptor:
@overload
def __get__(
self, instance: Interval[_OrderableScalarT], owner: Any
) -> _OrderableScalarT: ...
@overload
def __get__(
self, instance: Interval[_OrderableTimesT], owner: Any
) -> Timedelta: ...
class _MidDescriptor:
@overload
def __get__(self, instance: Interval[_OrderableScalarT], owner: Any) -> float: ...
@overload
def __get__(
self, instance: Interval[_OrderableTimesT], owner: Any
) -> _OrderableTimesT: ...
class IntervalMixin:
@property
def closed_left(self) -> bool: ...
@property
def closed_right(self) -> bool: ...
@property
def open_left(self) -> bool: ...
@property
def open_right(self) -> bool: ...
@property
def is_empty(self) -> bool: ...
def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ...
class Interval(IntervalMixin, Generic[_OrderableT]):
@property
def left(self: Interval[_OrderableT]) -> _OrderableT: ...
@property
def right(self: Interval[_OrderableT]) -> _OrderableT: ...
@property
def closed(self) -> IntervalClosedType: ...
mid: _MidDescriptor
length: _LengthDescriptor
def __init__(
self,
left: _OrderableT,
right: _OrderableT,
closed: IntervalClosedType = ...,
) -> None: ...
def __hash__(self) -> int: ...
@overload
def __contains__(
self: Interval[Timedelta], key: Timedelta | Interval[Timedelta]
) -> bool: ...
@overload
def __contains__(
self: Interval[Timestamp], key: Timestamp | Interval[Timestamp]
) -> bool: ...
@overload
def __contains__(
self: Interval[_OrderableScalarT],
key: _OrderableScalarT | Interval[_OrderableScalarT],
) -> bool: ...
@overload
def __add__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __add__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __add__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __radd__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __radd__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __radd__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __sub__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __sub__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __sub__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __rsub__(
self: Interval[_OrderableTimesT], y: Timedelta
) -> Interval[_OrderableTimesT]: ...
@overload
def __rsub__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __rsub__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __mul__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __mul__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __rmul__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __rmul__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __truediv__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __truediv__(self: Interval[float], y: float) -> Interval[float]: ...
@overload
def __floordiv__(
self: Interval[int], y: _OrderableScalarT
) -> Interval[_OrderableScalarT]: ...
@overload
def __floordiv__(self: Interval[float], y: float) -> Interval[float]: ...
def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ...
def intervals_to_interval_bounds(
intervals: np.ndarray, validate_closed: bool = ...
) -> tuple[np.ndarray, np.ndarray, IntervalClosedType]: ...
class IntervalTree(IntervalMixin):
def __init__(
self,
left: np.ndarray,
right: np.ndarray,
closed: IntervalClosedType = ...,
leaf_size: int = ...,
) -> None: ...
@property
def mid(self) -> np.ndarray: ...
@property
def length(self) -> np.ndarray: ...
def get_indexer(self, target) -> npt.NDArray[np.intp]: ...
def get_indexer_non_unique(
self, target
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
_na_count: int
@property
def is_overlapping(self) -> bool: ...
@property
def is_monotonic_increasing(self) -> bool: ...
def clear_mapping(self) -> None: ...

View File

@ -0,0 +1,79 @@
import numpy as np
from pandas._typing import npt
def inner_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
sort: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def left_outer_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
sort: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def full_outer_join(
left: np.ndarray, # const intp_t[:]
right: np.ndarray, # const intp_t[:]
max_groups: int,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def ffill_indexer(
indexer: np.ndarray, # const intp_t[:]
) -> npt.NDArray[np.intp]: ...
def left_join_indexer_unique(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> npt.NDArray[np.intp]: ...
def left_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def inner_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def outer_join_indexer(
left: np.ndarray, # ndarray[join_t]
right: np.ndarray, # ndarray[join_t]
) -> tuple[
np.ndarray, # np.ndarray[join_t]
npt.NDArray[np.intp],
npt.NDArray[np.intp],
]: ...
def asof_join_backward_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # const int64_t[:]
right_by_values: np.ndarray, # const int64_t[:]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def asof_join_forward_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # const int64_t[:]
right_by_values: np.ndarray, # const int64_t[:]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
def asof_join_nearest_on_X_by_Y(
left_values: np.ndarray, # ndarray[numeric_t]
right_values: np.ndarray, # ndarray[numeric_t]
left_by_values: np.ndarray, # const int64_t[:]
right_by_values: np.ndarray, # const int64_t[:]
allow_exact_matches: bool = ...,
tolerance: np.number | float | None = ...,
use_hashtable: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...

View File

@ -0,0 +1,23 @@
from typing import (
Any,
Callable,
)
def ujson_dumps(
obj: Any,
ensure_ascii: bool = ...,
double_precision: int = ...,
indent: int = ...,
orient: str = ...,
date_unit: str = ...,
iso_dates: bool = ...,
default_handler: None
| Callable[[Any], str | float | bool | list | dict | None] = ...,
) -> str: ...
def ujson_loads(
s: str,
precise_float: bool = ...,
numpy: bool = ...,
dtype: None = ...,
labelled: bool = ...,
) -> Any: ...

View File

@ -0,0 +1,216 @@
# TODO(npdtypes): Many types specified here can be made more specific/accurate;
# the more specific versions are specified in comments
from decimal import Decimal
from typing import (
Any,
Callable,
Final,
Generator,
Hashable,
Literal,
TypeAlias,
overload,
)
import numpy as np
from pandas._libs.interval import Interval
from pandas._libs.tslibs import Period
from pandas._typing import (
ArrayLike,
DtypeObj,
TypeGuard,
npt,
)
# placeholder until we can specify np.ndarray[object, ndim=2]
ndarray_obj_2d = np.ndarray
from enum import Enum
class _NoDefault(Enum):
no_default = ...
no_default: Final = _NoDefault.no_default
NoDefault: TypeAlias = Literal[_NoDefault.no_default]
i8max: int
u8max: int
def is_np_dtype(dtype: object, kinds: str | None = ...) -> TypeGuard[np.dtype]: ...
def item_from_zerodim(val: object) -> object: ...
def infer_dtype(value: object, skipna: bool = ...) -> str: ...
def is_iterator(obj: object) -> bool: ...
def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
def is_pyarrow_array(obj: object) -> bool: ...
def is_period(val: object) -> TypeGuard[Period]: ...
def is_interval(obj: object) -> TypeGuard[Interval]: ...
def is_decimal(obj: object) -> TypeGuard[Decimal]: ...
def is_complex(obj: object) -> TypeGuard[complex]: ...
def is_bool(obj: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(obj: object) -> TypeGuard[int | np.integer]: ...
def is_int_or_none(obj) -> bool: ...
def is_float(obj: object) -> TypeGuard[float]: ...
def is_interval_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray, skipna: bool = True) -> bool: ...
def is_timedelta_or_timedelta64_array(
values: np.ndarray, skipna: bool = True
) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
def is_time_array(values: np.ndarray, skipna: bool = ...): ...
def is_date_array(values: np.ndarray, skipna: bool = ...): ...
def is_datetime_array(values: np.ndarray, skipna: bool = ...): ...
def is_string_array(values: np.ndarray, skipna: bool = ...): ...
def is_float_array(values: np.ndarray): ...
def is_integer_array(values: np.ndarray, skipna: bool = ...): ...
def is_bool_array(values: np.ndarray, skipna: bool = ...): ...
def fast_multiget(
mapping: dict,
keys: np.ndarray, # object[:]
default=...,
) -> np.ndarray: ...
def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ...
def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ...
def map_infer(
arr: np.ndarray,
f: Callable[[Any], Any],
convert: bool = ...,
ignore_na: bool = ...,
) -> np.ndarray: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: Literal[False] = ...,
convert_string: Literal[False] = ...,
convert_to_nullable_dtype: Literal[False] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> npt.NDArray[np.object_ | np.number]: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_string: bool = ...,
convert_to_nullable_dtype: Literal[True] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
def maybe_convert_objects(
objects: npt.NDArray[np.object_],
*,
try_float: bool = ...,
safe: bool = ...,
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_string: bool = ...,
convert_to_nullable_dtype: bool = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
def maybe_convert_numeric(
values: npt.NDArray[np.object_],
na_values: set,
convert_empty: bool = ...,
coerce_numeric: bool = ...,
convert_to_masked_nullable: Literal[False] = ...,
) -> tuple[np.ndarray, None]: ...
@overload
def maybe_convert_numeric(
values: npt.NDArray[np.object_],
na_values: set,
convert_empty: bool = ...,
coerce_numeric: bool = ...,
*,
convert_to_masked_nullable: Literal[True],
) -> tuple[np.ndarray, np.ndarray]: ...
# TODO: restrict `arr`?
def ensure_string_array(
arr,
na_value: object = ...,
convert_na_value: bool = ...,
copy: bool = ...,
skipna: bool = ...,
) -> npt.NDArray[np.object_]: ...
def convert_nans_to_NA(
arr: npt.NDArray[np.object_],
) -> npt.NDArray[np.object_]: ...
def fast_zip(ndarrays: list) -> npt.NDArray[np.object_]: ...
# TODO: can we be more specific about rows?
def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...
def tuples_to_object_array(
tuples: npt.NDArray[np.object_],
) -> ndarray_obj_2d: ...
# TODO: can we be more specific about rows?
def to_object_array(rows: object, min_width: int = ...) -> ndarray_obj_2d: ...
def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...
def maybe_booleans_to_slice(
mask: npt.NDArray[np.uint8],
) -> slice | npt.NDArray[np.uint8]: ...
def maybe_indices_to_slice(
indices: npt.NDArray[np.intp],
max_len: int,
) -> slice | npt.NDArray[np.intp]: ...
def is_all_arraylike(obj: list) -> bool: ...
# -----------------------------------------------------------------
# Functions which in reality take memoryviews
def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64
def map_infer_mask(
arr: np.ndarray,
f: Callable[[Any], Any],
mask: np.ndarray, # const uint8_t[:]
convert: bool = ...,
na_value: Any = ...,
dtype: np.dtype = ...,
) -> np.ndarray: ...
def indices_fast(
index: npt.NDArray[np.intp],
labels: np.ndarray, # const int64_t[:]
keys: list,
sorted_labels: list[npt.NDArray[np.int64]],
) -> dict[Hashable, npt.NDArray[np.intp]]: ...
def generate_slices(
labels: np.ndarray, ngroups: int # const intp_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
def count_level_2d(
mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True],
labels: np.ndarray, # const intp_t[:]
max_bin: int,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]
def get_level_sorter(
codes: np.ndarray, # const int64_t[:]
starts: np.ndarray, # const intp_t[:]
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
def generate_bins_dt64(
values: npt.NDArray[np.int64],
binner: np.ndarray, # const int64_t[:]
closed: object = ...,
hasnans: bool = ...,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
def array_equivalent_object(
left: npt.NDArray[np.object_],
right: npt.NDArray[np.object_],
) -> bool: ...
def has_infs(arr: np.ndarray) -> bool: ... # const floating[:]
def has_only_ints_or_nan(arr: np.ndarray) -> bool: ... # const floating[:]
def get_reverse_indexer(
indexer: np.ndarray, # const intp_t[:]
length: int,
) -> npt.NDArray[np.intp]: ...
def is_bool_list(obj: list) -> bool: ...
def dtypes_all_equal(types: list[DtypeObj]) -> bool: ...
def is_range_indexer(
left: np.ndarray, n: int # np.ndarray[np.int64, ndim=1]
) -> bool: ...

View File

@ -0,0 +1,16 @@
import numpy as np
from numpy import typing as npt
class NAType:
def __new__(cls, *args, **kwargs): ...
NA: NAType
def is_matching_na(
left: object, right: object, nan_matches_none: bool = ...
) -> bool: ...
def isposinf_scalar(val: object) -> bool: ...
def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

View File

@ -0,0 +1,51 @@
from typing import (
Any,
Callable,
Iterable,
Literal,
TypeAlias,
overload,
)
import numpy as np
from pandas._typing import npt
_BinOp: TypeAlias = Callable[[Any, Any], Any]
_BoolOp: TypeAlias = Callable[[Any, Any], bool]
def scalar_compare(
values: np.ndarray, # object[:]
val: object,
op: _BoolOp, # {operator.eq, operator.ne, ...}
) -> npt.NDArray[np.bool_]: ...
def vec_compare(
left: npt.NDArray[np.object_],
right: npt.NDArray[np.object_],
op: _BoolOp, # {operator.eq, operator.ne, ...}
) -> npt.NDArray[np.bool_]: ...
def scalar_binop(
values: np.ndarray, # object[:]
val: object,
op: _BinOp, # binary operator
) -> np.ndarray: ...
def vec_binop(
left: np.ndarray, # object[:]
right: np.ndarray, # object[:]
op: _BinOp, # binary operator
) -> np.ndarray: ...
@overload
def maybe_convert_bool(
arr: npt.NDArray[np.object_],
true_values: Iterable | None = None,
false_values: Iterable | None = None,
convert_to_masked_nullable: Literal[False] = ...,
) -> tuple[np.ndarray, None]: ...
@overload
def maybe_convert_bool(
arr: npt.NDArray[np.object_],
true_values: Iterable = ...,
false_values: Iterable = ...,
*,
convert_to_masked_nullable: Literal[True],
) -> tuple[np.ndarray, np.ndarray]: ...

View File

@ -0,0 +1,5 @@
import numpy as np
def maybe_dispatch_ufunc_to_dunder_op(
self, ufunc: np.ufunc, method: str, *inputs, **kwargs
): ...

View File

@ -0,0 +1,77 @@
from typing import (
Hashable,
Literal,
)
import numpy as np
from pandas._typing import (
ArrayLike,
Dtype,
npt,
)
STR_NA_VALUES: set[str]
DEFAULT_BUFFER_HEURISTIC: int
def sanitize_objects(
values: npt.NDArray[np.object_],
na_values: set,
) -> int: ...
class TextReader:
unnamed_cols: set[str]
table_width: int # int64_t
leading_cols: int # int64_t
header: list[list[int]] # non-negative integers
def __init__(
self,
source,
delimiter: bytes | str = ..., # single-character only
header=...,
header_start: int = ..., # int64_t
header_end: int = ..., # uint64_t
index_col=...,
names=...,
tokenize_chunksize: int = ..., # int64_t
delim_whitespace: bool = ...,
converters=...,
skipinitialspace: bool = ...,
escapechar: bytes | str | None = ..., # single-character only
doublequote: bool = ...,
quotechar: str | bytes | None = ..., # at most 1 character
quoting: int = ...,
lineterminator: bytes | str | None = ..., # at most 1 character
comment=...,
decimal: bytes | str = ..., # single-character only
thousands: bytes | str | None = ..., # single-character only
dtype: Dtype | dict[Hashable, Dtype] = ...,
usecols=...,
error_bad_lines: bool = ...,
warn_bad_lines: bool = ...,
na_filter: bool = ...,
na_values=...,
na_fvalues=...,
keep_default_na: bool = ...,
true_values=...,
false_values=...,
allow_leading_cols: bool = ...,
skiprows=...,
skipfooter: int = ..., # int64_t
verbose: bool = ...,
float_precision: Literal["round_trip", "legacy", "high"] | None = ...,
skip_blank_lines: bool = ...,
encoding_errors: bytes | str = ...,
) -> None: ...
def set_noconvert(self, i: int) -> None: ...
def remove_noconvert(self, i: int) -> None: ...
def close(self) -> None: ...
def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...
# _maybe_upcast, na_values are only exposed for testing
na_values: dict
def _maybe_upcast(
arr, use_dtype_backend: bool = ..., dtype_backend: str = ...
) -> np.ndarray: ...

View File

@ -0,0 +1,27 @@
from typing import (
Sequence,
overload,
)
from pandas._typing import (
AnyArrayLike,
DataFrame,
Index,
Series,
)
# note: this is a lie to make type checkers happy (they special
# case property). cache_readonly uses attribute names similar to
# property (fget) but it does not provide fset and fdel.
cache_readonly = property
class AxisProperty:
axis: int
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
@overload
def __get__(self, obj: DataFrame | Series, type) -> Index: ...
@overload
def __get__(self, obj: None, type) -> AxisProperty: ...
def __set__(
self, obj: DataFrame | Series, value: AnyArrayLike | Sequence
) -> None: ...

View File

@ -0,0 +1,16 @@
import numpy as np
from pandas._typing import npt
def unstack(
values: np.ndarray, # reshape_t[:, :]
mask: np.ndarray, # const uint8_t[:]
stride: int,
length: int,
width: int,
new_values: np.ndarray, # reshape_t[:, :]
new_mask: np.ndarray, # uint8_t[:, :]
) -> None: ...
def explode(
values: npt.NDArray[np.object_],
) -> tuple[npt.NDArray[np.object_], npt.NDArray[np.int64]]: ...

View File

@ -0,0 +1,7 @@
from pandas.io.sas.sas7bdat import SAS7BDATReader
class Parser:
def __init__(self, parser: SAS7BDATReader) -> None: ...
def read(self, nrows: int) -> None: ...
def get_subheader_index(signature: bytes) -> int: ...

View File

@ -0,0 +1,51 @@
from typing import Sequence
import numpy as np
from pandas._typing import (
Self,
npt,
)
class SparseIndex:
length: int
npoints: int
def __init__(self) -> None: ...
@property
def ngaps(self) -> int: ...
@property
def nbytes(self) -> int: ...
@property
def indices(self) -> npt.NDArray[np.int32]: ...
def equals(self, other) -> bool: ...
def lookup(self, index: int) -> np.int32: ...
def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ...
def to_int_index(self) -> IntIndex: ...
def to_block_index(self) -> BlockIndex: ...
def intersect(self, y_: SparseIndex) -> Self: ...
def make_union(self, y_: SparseIndex) -> Self: ...
class IntIndex(SparseIndex):
indices: npt.NDArray[np.int32]
def __init__(
self, length: int, indices: Sequence[int], check_integrity: bool = ...
) -> None: ...
class BlockIndex(SparseIndex):
nblocks: int
blocs: np.ndarray
blengths: np.ndarray
def __init__(
self, length: int, blocs: np.ndarray, blengths: np.ndarray
) -> None: ...
# Override to have correct parameters
def intersect(self, other: SparseIndex) -> Self: ...
def make_union(self, y: SparseIndex) -> Self: ...
def make_mask_object_ndarray(
arr: npt.NDArray[np.object_], fill_value
) -> npt.NDArray[np.bool_]: ...
def get_blocks(
indices: npt.NDArray[np.int32],
) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ...

View File

@ -0,0 +1,12 @@
def assert_dict_equal(a, b, compare_keys: bool = ...): ...
def assert_almost_equal(
a,
b,
rtol: float = ...,
atol: float = ...,
check_dtype: bool = ...,
obj=...,
lobj=...,
robj=...,
index_values=...,
): ...

View File

@ -0,0 +1,37 @@
from datetime import tzinfo
import numpy as np
from pandas._typing import npt
def format_array_from_datetime(
values: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
format: str | None = ...,
na_rep: str | float = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def array_with_unit_to_datetime(
values: npt.NDArray[np.object_],
unit: str,
errors: str = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...
def first_non_null(values: np.ndarray) -> int: ...
def array_to_datetime(
values: npt.NDArray[np.object_],
errors: str = ...,
dayfirst: bool = ...,
yearfirst: bool = ...,
utc: bool = ...,
creso: int = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...
# returned ndarray may be object dtype or datetime64[ns]
def array_to_datetime_with_tz(
values: npt.NDArray[np.object_],
tz: tzinfo,
dayfirst: bool,
yearfirst: bool,
creso: int,
) -> npt.NDArray[np.int64]: ...

View File

@ -0,0 +1,87 @@
__all__ = [
"dtypes",
"localize_pydatetime",
"NaT",
"NaTType",
"iNaT",
"nat_strings",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"IncompatibleFrequency",
"Period",
"Resolution",
"Timedelta",
"normalize_i8_timestamps",
"is_date_array_normalized",
"dt64arr_to_periodarr",
"delta_to_nanoseconds",
"ints_to_pydatetime",
"ints_to_pytimedelta",
"get_resolution",
"Timestamp",
"tz_convert_from_utc_single",
"tz_convert_from_utc",
"to_offset",
"Tick",
"BaseOffset",
"tz_compare",
"is_unitless",
"astype_overflowsafe",
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"guess_datetime_format",
"add_overflowsafe",
"get_supported_dtype",
"is_supported_dtype",
]
from pandas._libs.tslibs import dtypes # pylint: disable=import-self
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import (
Resolution,
periods_per_day,
periods_per_second,
)
from pandas._libs.tslibs.nattype import (
NaT,
NaTType,
iNaT,
nat_strings,
)
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
add_overflowsafe,
astype_overflowsafe,
get_supported_dtype,
is_supported_dtype,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Tick,
to_offset,
)
from pandas._libs.tslibs.parsing import guess_datetime_format
from pandas._libs.tslibs.period import (
IncompatibleFrequency,
Period,
)
from pandas._libs.tslibs.timedeltas import (
Timedelta,
delta_to_nanoseconds,
ints_to_pytimedelta,
)
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._libs.tslibs.timezones import tz_compare
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single
from pandas._libs.tslibs.vectorized import (
dt64arr_to_periodarr,
get_resolution,
ints_to_pydatetime,
is_date_array_normalized,
normalize_i8_timestamps,
tz_convert_from_utc,
)

View File

@ -0,0 +1,12 @@
DAYS: list[str]
MONTH_ALIASES: dict[int, str]
MONTH_NUMBERS: dict[str, int]
MONTHS: list[str]
int_to_weekday: dict[int, str]
def get_firstbday(year: int, month: int) -> int: ...
def get_lastbday(year: int, month: int) -> int: ...
def get_day_of_year(year: int, month: int, day: int) -> int: ...
def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
def get_week_of_year(year: int, month: int, day: int) -> int: ...
def get_days_in_month(year: int, month: int) -> int: ...

View File

@ -0,0 +1,14 @@
from datetime import (
datetime,
tzinfo,
)
import numpy as np
DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
def cast_from_unit_vectorized(
values: np.ndarray, unit: str, out_unit: str = ...
) -> np.ndarray: ...

View File

@ -0,0 +1,83 @@
from enum import Enum
OFFSET_TO_PERIOD_FREQSTR: dict[str, str]
def periods_per_day(reso: int = ...) -> int: ...
def periods_per_second(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str | None) -> int: ...
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...
class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
_n: int
# actually __cinit__
def __new__(cls, code: int, n: int): ...
@property
def _freq_group_code(self) -> int: ...
@property
def _resolution_obj(self) -> Resolution: ...
def _get_to_timestamp_base(self) -> int: ...
@property
def _freqstr(self) -> str: ...
def __hash__(self) -> int: ...
def _is_tick_like(self) -> bool: ...
@property
def _creso(self) -> int: ...
@property
def _td64_unit(self) -> str: ...
class FreqGroup(Enum):
FR_ANN: int
FR_QTR: int
FR_MTH: int
FR_WK: int
FR_BUS: int
FR_DAY: int
FR_HR: int
FR_MIN: int
FR_SEC: int
FR_MS: int
FR_US: int
FR_NS: int
FR_UND: int
@staticmethod
def from_period_dtype_code(code: int) -> FreqGroup: ...
class Resolution(Enum):
RESO_NS: int
RESO_US: int
RESO_MS: int
RESO_SEC: int
RESO_MIN: int
RESO_HR: int
RESO_DAY: int
RESO_MTH: int
RESO_QTR: int
RESO_YR: int
def __lt__(self, other: Resolution) -> bool: ...
def __ge__(self, other: Resolution) -> bool: ...
@property
def attrname(self) -> str: ...
@classmethod
def from_attrname(cls, attrname: str) -> Resolution: ...
@classmethod
def get_reso_from_freqstr(cls, freq: str) -> Resolution: ...
@property
def attr_abbrev(self) -> str: ...
class NpyDatetimeUnit(Enum):
NPY_FR_Y: int
NPY_FR_M: int
NPY_FR_W: int
NPY_FR_D: int
NPY_FR_h: int
NPY_FR_m: int
NPY_FR_s: int
NPY_FR_ms: int
NPY_FR_us: int
NPY_FR_ns: int
NPY_FR_ps: int
NPY_FR_fs: int
NPY_FR_as: int
NPY_FR_GENERIC: int

View File

@ -0,0 +1,62 @@
import numpy as np
from pandas._typing import npt
def build_field_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def month_position_check(fields, weekdays) -> str | None: ...
def get_date_name_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
locale: str | None = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def get_start_end_field(
dtindex: npt.NDArray[np.int64],
field: str,
freqstr: str | None = ...,
month_kw: int = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.bool_]: ...
def get_date_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int32]: ...
def get_timedelta_field(
tdindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int32]: ...
def get_timedelta_days(
tdindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def isleapyear_arr(
years: np.ndarray,
) -> npt.NDArray[np.bool_]: ...
def build_isocalendar_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
reso: int, # NPY_DATETIMEUNIT
) -> np.ndarray: ...
def _get_locale_names(name_type: str, locale: str | None = ...): ...
class RoundTo:
@property
def MINUS_INFTY(self) -> int: ...
@property
def PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_EVEN(self) -> int: ...
@property
def NEAREST_HALF_PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_MINUS_INFTY(self) -> int: ...
def round_nsint64(
values: npt.NDArray[np.int64],
mode: RoundTo,
nanos: int,
) -> npt.NDArray[np.int64]: ...

View File

@ -0,0 +1,141 @@
from datetime import (
datetime,
timedelta,
tzinfo as _tzinfo,
)
import typing
import numpy as np
from pandas._libs.tslibs.period import Period
from pandas._typing import Self
NaT: NaTType
iNaT: int
nat_strings: set[str]
_NaTComparisonTypes: typing.TypeAlias = (
datetime | timedelta | Period | np.datetime64 | np.timedelta64
)
class _NatComparison:
def __call__(self, other: _NaTComparisonTypes) -> bool: ...
class NaTType:
_value: np.int64
@property
def value(self) -> int: ...
@property
def asm8(self) -> np.datetime64: ...
def to_datetime64(self) -> np.datetime64: ...
def to_numpy(
self, dtype: np.dtype | str | None = ..., copy: bool = ...
) -> np.datetime64 | np.timedelta64: ...
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
@property
def day_of_year(self) -> float: ...
@property
def dayofyear(self) -> float: ...
@property
def days_in_month(self) -> float: ...
@property
def daysinmonth(self) -> float: ...
@property
def day_of_week(self) -> float: ...
@property
def dayofweek(self) -> float: ...
@property
def week(self) -> float: ...
@property
def weekofyear(self) -> float: ...
def day_name(self) -> float: ...
def month_name(self) -> float: ...
def weekday(self) -> float: ...
def isoweekday(self) -> float: ...
def total_seconds(self) -> float: ...
def today(self, *args, **kwargs) -> NaTType: ...
def now(self, *args, **kwargs) -> NaTType: ...
def to_pydatetime(self) -> NaTType: ...
def date(self) -> NaTType: ...
def round(self) -> NaTType: ...
def floor(self) -> NaTType: ...
def ceil(self) -> NaTType: ...
@property
def tzinfo(self) -> None: ...
@property
def tz(self) -> None: ...
def tz_convert(self, tz: _tzinfo | str | None) -> NaTType: ...
def tz_localize(
self,
tz: _tzinfo | str | None,
ambiguous: str = ...,
nonexistent: str = ...,
) -> NaTType: ...
def replace(
self,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
fold: int | None = ...,
) -> NaTType: ...
@property
def year(self) -> float: ...
@property
def quarter(self) -> float: ...
@property
def month(self) -> float: ...
@property
def day(self) -> float: ...
@property
def hour(self) -> float: ...
@property
def minute(self) -> float: ...
@property
def second(self) -> float: ...
@property
def millisecond(self) -> float: ...
@property
def microsecond(self) -> float: ...
@property
def nanosecond(self) -> float: ...
# inject Timedelta properties
@property
def days(self) -> float: ...
@property
def microseconds(self) -> float: ...
@property
def nanoseconds(self) -> float: ...
# inject Period properties
@property
def qyear(self) -> float: ...
def __eq__(self, other: object) -> bool: ...
def __ne__(self, other: object) -> bool: ...
__lt__: _NatComparison
__le__: _NatComparison
__gt__: _NatComparison
__ge__: _NatComparison
def __sub__(self, other: Self | timedelta | datetime) -> Self: ...
def __rsub__(self, other: Self | timedelta | datetime) -> Self: ...
def __add__(self, other: Self | timedelta | datetime) -> Self: ...
def __radd__(self, other: Self | timedelta | datetime) -> Self: ...
def __hash__(self) -> int: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> NaTType: ...

View File

@ -0,0 +1,27 @@
import numpy as np
from pandas._typing import npt
class OutOfBoundsDatetime(ValueError): ...
class OutOfBoundsTimedelta(ValueError): ...
# only exposed for testing
def py_get_unit_from_dtype(dtype: np.dtype): ...
def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
def astype_overflowsafe(
values: np.ndarray,
dtype: np.dtype,
copy: bool = ...,
round_ok: bool = ...,
is_coerce: bool = ...,
) -> np.ndarray: ...
def is_unitless(dtype: np.dtype) -> bool: ...
def compare_mismatched_resolutions(
left: np.ndarray, right: np.ndarray, op
) -> npt.NDArray[np.bool_]: ...
def add_overflowsafe(
left: npt.NDArray[np.int64],
right: npt.NDArray[np.int64],
) -> npt.NDArray[np.int64]: ...
def get_supported_dtype(dtype: np.dtype) -> np.dtype: ...
def is_supported_dtype(dtype: np.dtype) -> bool: ...

View File

@ -0,0 +1,287 @@
from datetime import (
datetime,
time,
timedelta,
)
from typing import (
Any,
Collection,
Literal,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs.nattype import NaTType
from pandas._typing import (
OffsetCalendar,
Self,
npt,
)
from .timedeltas import Timedelta
_BaseOffsetT = TypeVar("_BaseOffsetT", bound=BaseOffset)
_DatetimeT = TypeVar("_DatetimeT", bound=datetime)
_TimedeltaT = TypeVar("_TimedeltaT", bound=timedelta)
_relativedelta_kwds: set[str]
prefix_mapping: dict[str, type]
class ApplyTypeError(TypeError): ...
class BaseOffset:
n: int
normalize: bool
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
def __eq__(self, other) -> bool: ...
def __ne__(self, other) -> bool: ...
def __hash__(self) -> int: ...
@property
def kwds(self) -> dict: ...
@property
def base(self) -> BaseOffset: ...
@overload
def __add__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __add__(self, other: BaseOffset) -> Self: ...
@overload
def __add__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __add__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __radd__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __radd__(self, other: BaseOffset) -> Self: ...
@overload
def __radd__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __radd__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __radd__(self, other: NaTType) -> NaTType: ...
def __sub__(self, other: BaseOffset) -> Self: ...
@overload
def __rsub__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ...
@overload
def __rsub__(self, other: BaseOffset): ...
@overload
def __rsub__(self, other: _DatetimeT) -> _DatetimeT: ...
@overload
def __rsub__(self, other: _TimedeltaT) -> _TimedeltaT: ...
@overload
def __mul__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __mul__(self, other: int): ...
@overload
def __rmul__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __rmul__(self, other: int) -> Self: ...
def __neg__(self) -> Self: ...
def copy(self) -> Self: ...
@property
def name(self) -> str: ...
@property
def rule_code(self) -> str: ...
@property
def freqstr(self) -> str: ...
def _apply(self, other): ...
def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: ...
def rollback(self, dt: datetime) -> datetime: ...
def rollforward(self, dt: datetime) -> datetime: ...
def is_on_offset(self, dt: datetime) -> bool: ...
def __setstate__(self, state) -> None: ...
def __getstate__(self): ...
@property
def nanos(self) -> int: ...
def is_anchored(self) -> bool: ...
def _get_offset(name: str) -> BaseOffset: ...
class SingleConstructorOffset(BaseOffset):
@classmethod
def _from_name(cls, suffix: None = ...): ...
def __reduce__(self): ...
@overload
def to_offset(freq: None, is_period: bool = ...) -> None: ...
@overload
def to_offset(freq: _BaseOffsetT, is_period: bool = ...) -> _BaseOffsetT: ...
@overload
def to_offset(freq: timedelta | str, is_period: bool = ...) -> BaseOffset: ...
class Tick(SingleConstructorOffset):
_creso: int
_prefix: str
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
@property
def delta(self) -> Timedelta: ...
@property
def nanos(self) -> int: ...
def delta_to_tick(delta: timedelta) -> Tick: ...
class Day(Tick): ...
class Hour(Tick): ...
class Minute(Tick): ...
class Second(Tick): ...
class Milli(Tick): ...
class Micro(Tick): ...
class Nano(Tick): ...
class RelativeDeltaOffset(BaseOffset):
def __init__(self, n: int = ..., normalize: bool = ..., **kwds: Any) -> None: ...
class BusinessMixin(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., offset: timedelta = ...
) -> None: ...
class BusinessDay(BusinessMixin): ...
class BusinessHour(BusinessMixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
start: str | time | Collection[str | time] = ...,
end: str | time | Collection[str | time] = ...,
offset: timedelta = ...,
) -> None: ...
class WeekOfMonthMixin(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., weekday: int = ...
) -> None: ...
class YearOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., month: int | None = ...
) -> None: ...
class BYearEnd(YearOffset): ...
class BYearBegin(YearOffset): ...
class YearEnd(YearOffset): ...
class YearBegin(YearOffset): ...
class QuarterOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., startingMonth: int | None = ...
) -> None: ...
class BQuarterEnd(QuarterOffset): ...
class BQuarterBegin(QuarterOffset): ...
class QuarterEnd(QuarterOffset): ...
class QuarterBegin(QuarterOffset): ...
class MonthOffset(SingleConstructorOffset): ...
class MonthEnd(MonthOffset): ...
class MonthBegin(MonthOffset): ...
class BusinessMonthEnd(MonthOffset): ...
class BusinessMonthBegin(MonthOffset): ...
class SemiMonthOffset(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., day_of_month: int | None = ...
) -> None: ...
class SemiMonthEnd(SemiMonthOffset): ...
class SemiMonthBegin(SemiMonthOffset): ...
class Week(SingleConstructorOffset):
def __init__(
self, n: int = ..., normalize: bool = ..., weekday: int | None = ...
) -> None: ...
class WeekOfMonth(WeekOfMonthMixin):
def __init__(
self, n: int = ..., normalize: bool = ..., week: int = ..., weekday: int = ...
) -> None: ...
class LastWeekOfMonth(WeekOfMonthMixin): ...
class FY5253Mixin(SingleConstructorOffset):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekday: int = ...,
startingMonth: int = ...,
variation: Literal["nearest", "last"] = ...,
) -> None: ...
class FY5253(FY5253Mixin): ...
class FY5253Quarter(FY5253Mixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekday: int = ...,
startingMonth: int = ...,
qtr_with_extra_week: int = ...,
variation: Literal["nearest", "last"] = ...,
) -> None: ...
class Easter(SingleConstructorOffset): ...
class _CustomBusinessMonth(BusinessMixin):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessDay(BusinessDay):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessHour(BusinessHour):
def __init__(
self,
n: int = ...,
normalize: bool = ...,
weekmask: str = ...,
holidays: list | None = ...,
calendar: OffsetCalendar | None = ...,
start: str | time | Collection[str | time] = ...,
end: str | time | Collection[str | time] = ...,
offset: timedelta = ...,
) -> None: ...
class CustomBusinessMonthEnd(_CustomBusinessMonth): ...
class CustomBusinessMonthBegin(_CustomBusinessMonth): ...
class OffsetMeta(type): ...
class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): ...
BDay = BusinessDay
BMonthEnd = BusinessMonthEnd
BMonthBegin = BusinessMonthBegin
CBMonthEnd = CustomBusinessMonthEnd
CBMonthBegin = CustomBusinessMonthBegin
CDay = CustomBusinessDay
def roll_qtrday(
other: datetime, n: int, month: int, day_opt: str, modby: int
) -> int: ...
INVALID_FREQ_ERR_MSG: Literal["Invalid frequency: {0}"]
def shift_months(
dtindex: npt.NDArray[np.int64],
months: int,
day_opt: str | None = ...,
reso: int = ...,
) -> npt.NDArray[np.int64]: ...
_offset_map: dict[str, BaseOffset]

View File

@ -0,0 +1,33 @@
from datetime import datetime
import numpy as np
from pandas._typing import npt
class DateParseError(ValueError): ...
def py_parse_datetime_string(
date_string: str,
dayfirst: bool = ...,
yearfirst: bool = ...,
) -> datetime: ...
def parse_datetime_string_with_reso(
date_string: str,
freq: str | None = ...,
dayfirst: bool | None = ...,
yearfirst: bool | None = ...,
) -> tuple[datetime, str]: ...
def _does_string_look_like_datetime(py_string: str) -> bool: ...
def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
def try_parse_dates(
values: npt.NDArray[np.object_], # object[:]
parser,
) -> npt.NDArray[np.object_]: ...
def guess_datetime_format(
dt_str: str,
dayfirst: bool | None = ...,
) -> str | None: ...
def concat_date_cols(
date_cols: tuple,
) -> npt.NDArray[np.object_]: ...
def get_rule_month(source: str) -> str: ...

View File

@ -0,0 +1,135 @@
from datetime import timedelta
from typing import Literal
import numpy as np
from pandas._libs.tslibs.dtypes import PeriodDtypeBase
from pandas._libs.tslibs.nattype import NaTType
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._typing import (
Frequency,
npt,
)
INVALID_FREQ_ERR_MSG: str
DIFFERENT_FREQ: str
class IncompatibleFrequency(ValueError): ...
def periodarr_to_dt64arr(
periodarr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def period_asfreq_arr(
arr: npt.NDArray[np.int64],
freq1: int,
freq2: int,
end: bool,
) -> npt.NDArray[np.int64]: ...
def get_period_field_arr(
field: str,
arr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def from_ordinals(
values: npt.NDArray[np.int64], # const int64_t[:]
freq: timedelta | BaseOffset | str,
) -> npt.NDArray[np.int64]: ...
def extract_ordinals(
values: npt.NDArray[np.object_],
freq: Frequency | int,
) -> npt.NDArray[np.int64]: ...
def extract_freq(
values: npt.NDArray[np.object_],
) -> BaseOffset: ...
def period_array_strftime(
values: npt.NDArray[np.int64],
dtype_code: int,
na_rep,
date_format: str | None,
) -> npt.NDArray[np.object_]: ...
# exposed for tests
def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ...
def period_ordinal(
y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int
) -> int: ...
def freq_to_dtype_code(freq: BaseOffset) -> int: ...
def validate_end_alias(how: str) -> Literal["E", "S"]: ...
class PeriodMixin:
@property
def end_time(self) -> Timestamp: ...
@property
def start_time(self) -> Timestamp: ...
def _require_matching_freq(self, other: BaseOffset, base: bool = ...) -> None: ...
class Period(PeriodMixin):
ordinal: int # int64_t
freq: BaseOffset
_dtype: PeriodDtypeBase
# error: "__new__" must return a class instance (got "Union[Period, NaTType]")
def __new__( # type: ignore[misc]
cls,
value=...,
freq: int | str | BaseOffset | None = ...,
ordinal: int | None = ...,
year: int | None = ...,
month: int | None = ...,
quarter: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
) -> Period | NaTType: ...
@classmethod
def _maybe_convert_freq(cls, freq) -> BaseOffset: ...
@classmethod
def _from_ordinal(cls, ordinal: int, freq: BaseOffset) -> Period: ...
@classmethod
def now(cls, freq: Frequency) -> Period: ...
def strftime(self, fmt: str | None) -> str: ...
def to_timestamp(
self,
freq: str | BaseOffset | None = ...,
how: str = ...,
) -> Timestamp: ...
def asfreq(self, freq: str | BaseOffset, how: str = ...) -> Period: ...
@property
def freqstr(self) -> str: ...
@property
def is_leap_year(self) -> bool: ...
@property
def daysinmonth(self) -> int: ...
@property
def days_in_month(self) -> int: ...
@property
def qyear(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def weekday(self) -> int: ...
@property
def day_of_week(self) -> int: ...
@property
def week(self) -> int: ...
@property
def weekofyear(self) -> int: ...
@property
def second(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def day(self) -> int: ...
@property
def month(self) -> int: ...
@property
def year(self) -> int: ...
def __sub__(self, other) -> Period | BaseOffset: ...
def __add__(self, other) -> Period: ...

View File

@ -0,0 +1,14 @@
import numpy as np
from pandas._typing import npt
def array_strptime(
values: npt.NDArray[np.object_],
fmt: str | None,
exact: bool = ...,
errors: str = ...,
utc: bool = ...,
creso: int = ..., # NPY_DATETIMEUNIT
) -> tuple[np.ndarray, np.ndarray]: ...
# first ndarray is M8[ns], second is object ndarray of tzinfo | None

View File

@ -0,0 +1,174 @@
from datetime import timedelta
from typing import (
ClassVar,
Literal,
TypeAlias,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
NaTType,
Tick,
)
from pandas._typing import (
Frequency,
Self,
npt,
)
# This should be kept consistent with the keys in the dict timedelta_abbrevs
# in pandas/_libs/tslibs/timedeltas.pyx
UnitChoices: TypeAlias = Literal[
"Y",
"y",
"M",
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"T",
"t",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"L",
"l",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
"u",
"ns",
"nanoseconds",
"nano",
"nanos",
"nanosecond",
"n",
]
_S = TypeVar("_S", bound=timedelta)
def get_unit_for_round(freq, creso: int) -> int: ...
def disallow_ambiguous_unit(unit: str | None) -> None: ...
def ints_to_pytimedelta(
m8values: npt.NDArray[np.timedelta64],
box: bool = ...,
) -> npt.NDArray[np.object_]: ...
def array_to_timedelta64(
values: npt.NDArray[np.object_],
unit: str | None = ...,
errors: str = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
def delta_to_nanoseconds(
delta: np.timedelta64 | timedelta | Tick,
reso: int = ..., # NPY_DATETIMEUNIT
round_ok: bool = ...,
) -> int: ...
def floordiv_object_array(
left: np.ndarray, right: npt.NDArray[np.object_]
) -> np.ndarray: ...
def truediv_object_array(
left: np.ndarray, right: npt.NDArray[np.object_]
) -> np.ndarray: ...
class Timedelta(timedelta):
_creso: int
min: ClassVar[Timedelta]
max: ClassVar[Timedelta]
resolution: ClassVar[Timedelta]
value: int # np.int64
_value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
def __new__( # type: ignore[misc]
cls: type[_S],
value=...,
unit: str | None = ...,
**kwargs: float | np.integer | np.floating,
) -> _S | NaTType: ...
@classmethod
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
@property
def days(self) -> int: ...
@property
def seconds(self) -> int: ...
@property
def microseconds(self) -> int: ...
def total_seconds(self) -> float: ...
def to_pytimedelta(self) -> timedelta: ...
def to_timedelta64(self) -> np.timedelta64: ...
@property
def asm8(self) -> np.timedelta64: ...
# TODO: round/floor/ceil could return NaT?
def round(self, freq: Frequency) -> Self: ...
def floor(self, freq: Frequency) -> Self: ...
def ceil(self, freq: Frequency) -> Self: ...
@property
def resolution_string(self) -> str: ...
def __add__(self, other: timedelta) -> Timedelta: ...
def __radd__(self, other: timedelta) -> Timedelta: ...
def __sub__(self, other: timedelta) -> Timedelta: ...
def __rsub__(self, other: timedelta) -> Timedelta: ...
def __neg__(self) -> Timedelta: ...
def __pos__(self) -> Timedelta: ...
def __abs__(self) -> Timedelta: ...
def __mul__(self, other: float) -> Timedelta: ...
def __rmul__(self, other: float) -> Timedelta: ...
# error: Signature of "__floordiv__" incompatible with supertype "timedelta"
@overload # type: ignore[override]
def __floordiv__(self, other: timedelta) -> int: ...
@overload
def __floordiv__(self, other: float) -> Timedelta: ...
@overload
def __floordiv__(
self, other: npt.NDArray[np.timedelta64]
) -> npt.NDArray[np.intp]: ...
@overload
def __floordiv__(
self, other: npt.NDArray[np.number]
) -> npt.NDArray[np.timedelta64] | Timedelta: ...
@overload
def __rfloordiv__(self, other: timedelta | str) -> int: ...
@overload
def __rfloordiv__(self, other: None | NaTType) -> NaTType: ...
@overload
def __rfloordiv__(self, other: np.ndarray) -> npt.NDArray[np.timedelta64]: ...
@overload
def __truediv__(self, other: timedelta) -> float: ...
@overload
def __truediv__(self, other: float) -> Timedelta: ...
def __mod__(self, other: timedelta) -> Timedelta: ...
def __divmod__(self, other: timedelta) -> tuple[int, Timedelta]: ...
def __le__(self, other: timedelta) -> bool: ...
def __lt__(self, other: timedelta) -> bool: ...
def __ge__(self, other: timedelta) -> bool: ...
def __gt__(self, other: timedelta) -> bool: ...
def __hash__(self) -> int: ...
def isoformat(self) -> str: ...
def to_numpy(
self, dtype: npt.DTypeLike = ..., copy: bool = False
) -> np.timedelta64: ...
def view(self, dtype: npt.DTypeLike) -> object: ...
@property
def unit(self) -> str: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ...

View File

@ -0,0 +1,241 @@
from datetime import (
date as _date,
datetime,
time as _time,
timedelta,
tzinfo as _tzinfo,
)
from time import struct_time
from typing import (
ClassVar,
Literal,
TypeAlias,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
BaseOffset,
NaTType,
Period,
Tick,
Timedelta,
)
from pandas._typing import (
Self,
TimestampNonexistent,
)
_TimeZones: TypeAlias = str | _tzinfo | None | int
def integer_op_not_supported(obj: object) -> TypeError: ...
class Timestamp(datetime):
_creso: int
min: ClassVar[Timestamp]
max: ClassVar[Timestamp]
resolution: ClassVar[Timedelta]
_value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
def __new__( # type: ignore[misc]
cls: type[Self],
ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ...,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
*,
nanosecond: int | None = ...,
tz: _TimeZones = ...,
unit: str | int | None = ...,
fold: int | None = ...,
) -> Self | NaTType: ...
@classmethod
def _from_value_and_reso(
cls, value: int, reso: int, tz: _TimeZones
) -> Timestamp: ...
@property
def value(self) -> int: ... # np.int64
@property
def year(self) -> int: ...
@property
def month(self) -> int: ...
@property
def day(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def second(self) -> int: ...
@property
def microsecond(self) -> int: ...
@property
def nanosecond(self) -> int: ...
@property
def tzinfo(self) -> _tzinfo | None: ...
@property
def tz(self) -> _tzinfo | None: ...
@property
def fold(self) -> int: ...
@classmethod
def fromtimestamp(cls, ts: float, tz: _TimeZones = ...) -> Self: ...
@classmethod
def utcfromtimestamp(cls, ts: float) -> Self: ...
@classmethod
def today(cls, tz: _TimeZones = ...) -> Self: ...
@classmethod
def fromordinal(
cls,
ordinal: int,
tz: _TimeZones = ...,
) -> Self: ...
@classmethod
def now(cls, tz: _TimeZones = ...) -> Self: ...
@classmethod
def utcnow(cls) -> Self: ...
# error: Signature of "combine" incompatible with supertype "datetime"
@classmethod
def combine( # type: ignore[override]
cls, date: _date, time: _time
) -> datetime: ...
@classmethod
def fromisoformat(cls, date_string: str) -> Self: ...
def strftime(self, format: str) -> str: ...
def __format__(self, fmt: str) -> str: ...
def toordinal(self) -> int: ...
def timetuple(self) -> struct_time: ...
def timestamp(self) -> float: ...
def utctimetuple(self) -> struct_time: ...
def date(self) -> _date: ...
def time(self) -> _time: ...
def timetz(self) -> _time: ...
# LSP violation: nanosecond is not present in datetime.datetime.replace
# and has positional args following it
def replace( # type: ignore[override]
self,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | type[object] | None = ...,
fold: int | None = ...,
) -> Self: ...
# LSP violation: datetime.datetime.astimezone has a default value for tz
def astimezone(self, tz: _TimeZones) -> Self: ... # type: ignore[override]
def ctime(self) -> str: ...
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
@classmethod
def strptime(
# Note: strptime is actually disabled and raises NotImplementedError
cls,
date_string: str,
format: str,
) -> Self: ...
def utcoffset(self) -> timedelta | None: ...
def tzname(self) -> str | None: ...
def dst(self) -> timedelta | None: ...
def __le__(self, other: datetime) -> bool: ... # type: ignore[override]
def __lt__(self, other: datetime) -> bool: ... # type: ignore[override]
def __ge__(self, other: datetime) -> bool: ... # type: ignore[override]
def __gt__(self, other: datetime) -> bool: ... # type: ignore[override]
# error: Signature of "__add__" incompatible with supertype "date"/"datetime"
@overload # type: ignore[override]
def __add__(self, other: np.ndarray) -> np.ndarray: ...
@overload
def __add__(self, other: timedelta | np.timedelta64 | Tick) -> Self: ...
def __radd__(self, other: timedelta) -> Self: ...
@overload # type: ignore[override]
def __sub__(self, other: datetime) -> Timedelta: ...
@overload
def __sub__(self, other: timedelta | np.timedelta64 | Tick) -> Self: ...
def __hash__(self) -> int: ...
def weekday(self) -> int: ...
def isoweekday(self) -> int: ...
# Return type "Tuple[int, int, int]" of "isocalendar" incompatible with return
# type "_IsoCalendarDate" in supertype "date"
def isocalendar(self) -> tuple[int, int, int]: ... # type: ignore[override]
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
def to_pydatetime(self, warn: bool = ...) -> datetime: ...
def to_datetime64(self) -> np.datetime64: ...
def to_period(self, freq: BaseOffset | str | None = None) -> Period: ...
def to_julian_date(self) -> np.float64: ...
@property
def asm8(self) -> np.datetime64: ...
def tz_convert(self, tz: _TimeZones) -> Self: ...
# TODO: could return NaT?
def tz_localize(
self,
tz: _TimeZones,
ambiguous: bool | Literal["raise", "NaT"] = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def normalize(self) -> Self: ...
# TODO: round/floor/ceil could return NaT?
def round(
self,
freq: str,
ambiguous: bool | Literal["raise", "NaT"] = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def floor(
self,
freq: str,
ambiguous: bool | Literal["raise", "NaT"] = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def ceil(
self,
freq: str,
ambiguous: bool | Literal["raise", "NaT"] = ...,
nonexistent: TimestampNonexistent = ...,
) -> Self: ...
def day_name(self, locale: str | None = ...) -> str: ...
def month_name(self, locale: str | None = ...) -> str: ...
@property
def day_of_week(self) -> int: ...
@property
def dayofweek(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def dayofyear(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def week(self) -> int: ...
def to_numpy(
self, dtype: np.dtype | None = ..., copy: bool = ...
) -> np.datetime64: ...
@property
def _date_repr(self) -> str: ...
@property
def days_in_month(self) -> int: ...
@property
def daysinmonth(self) -> int: ...
@property
def unit(self) -> str: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> Timestamp: ...

View File

@ -0,0 +1,21 @@
from datetime import (
datetime,
tzinfo,
)
from typing import Callable
import numpy as np
# imported from dateutil.tz
dateutil_gettz: Callable[[str], tzinfo]
def tz_standardize(tz: tzinfo) -> tzinfo: ...
def tz_compare(start: tzinfo | None, end: tzinfo | None) -> bool: ...
def infer_tzinfo(
start: datetime | None,
end: datetime | None,
) -> tzinfo | None: ...
def maybe_get_tz(tz: str | int | np.int64 | tzinfo | None) -> tzinfo | None: ...
def get_timezone(tz: tzinfo) -> tzinfo | str: ...
def is_utc(tz: tzinfo | None) -> bool: ...
def is_fixed_offset(tz: tzinfo) -> bool: ...

View File

@ -0,0 +1,21 @@
from datetime import (
timedelta,
tzinfo,
)
from typing import Iterable
import numpy as np
from pandas._typing import npt
# tz_convert_from_utc_single exposed for testing
def tz_convert_from_utc_single(
utc_val: np.int64, tz: tzinfo, creso: int = ...
) -> np.int64: ...
def tz_localize_to_utc(
vals: npt.NDArray[np.int64],
tz: tzinfo | None,
ambiguous: str | bool | Iterable[bool] | None = ...,
nonexistent: str | timedelta | np.timedelta64 | None = ...,
creso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...

View File

@ -0,0 +1,43 @@
"""
For cython types that cannot be represented precisely, closest-available
python equivalents are used, and the precise types kept as adjacent comments.
"""
from datetime import tzinfo
import numpy as np
from pandas._libs.tslibs.dtypes import Resolution
from pandas._typing import npt
def dt64arr_to_periodarr(
stamps: npt.NDArray[np.int64],
freq: int,
tz: tzinfo | None,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def is_date_array_normalized(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int, # NPY_DATETIMEUNIT
) -> bool: ...
def normalize_i8_timestamps(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int, # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
def get_resolution(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> Resolution: ...
def ints_to_pydatetime(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
box: str = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def tz_convert_from_utc(
stamps: npt.NDArray[np.int64],
tz: tzinfo | None,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...

View File

@ -0,0 +1,127 @@
from typing import (
Any,
Callable,
Literal,
)
import numpy as np
from pandas._typing import (
WindowingRankType,
npt,
)
def roll_sum(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_mean(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_var(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
ddof: int = ...,
) -> np.ndarray: ... # np.ndarray[float]
def roll_skew(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_kurt(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_median_c(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_max(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_min(
values: np.ndarray, # np.ndarray[np.float64]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
) -> np.ndarray: ... # np.ndarray[float]
def roll_quantile(
values: np.ndarray, # const float64_t[:]
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
quantile: float, # float64_t
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
) -> np.ndarray: ... # np.ndarray[float]
def roll_rank(
values: np.ndarray,
start: np.ndarray,
end: np.ndarray,
minp: int,
percentile: bool,
method: WindowingRankType,
ascending: bool,
) -> np.ndarray: ... # np.ndarray[float]
def roll_apply(
obj: object,
start: np.ndarray, # np.ndarray[np.int64]
end: np.ndarray, # np.ndarray[np.int64]
minp: int, # int64_t
function: Callable[..., Any],
raw: bool,
args: tuple[Any, ...],
kwargs: dict[str, Any],
) -> npt.NDArray[np.float64]: ...
def roll_weighted_sum(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int,
) -> np.ndarray: ... # np.ndarray[np.float64]
def roll_weighted_mean(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int,
) -> np.ndarray: ... # np.ndarray[np.float64]
def roll_weighted_var(
values: np.ndarray, # const float64_t[:]
weights: np.ndarray, # const float64_t[:]
minp: int, # int64_t
ddof: int, # unsigned int
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewm(
vals: np.ndarray, # const float64_t[:]
start: np.ndarray, # const int64_t[:]
end: np.ndarray, # const int64_t[:]
minp: int,
com: float, # float64_t
adjust: bool,
ignore_na: bool,
deltas: np.ndarray | None = None, # const float64_t[:]
normalize: bool = True,
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewmcov(
input_x: np.ndarray, # const float64_t[:]
start: np.ndarray, # const int64_t[:]
end: np.ndarray, # const int64_t[:]
minp: int,
input_y: np.ndarray, # const float64_t[:]
com: float, # float64_t
adjust: bool,
ignore_na: bool,
bias: bool,
) -> np.ndarray: ... # np.ndarray[np.float64]

View File

@ -0,0 +1,12 @@
import numpy as np
from pandas._typing import npt
def calculate_variable_window_bounds(
num_values: int, # int64_t
window_size: int, # int64_t
min_periods,
center: bool,
closed: str | None,
index: np.ndarray, # const int64_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...

View File

@ -0,0 +1,20 @@
import numpy as np
from pandas._typing import ArrayLike
def write_csv_rows(
data: list[ArrayLike],
data_index: np.ndarray,
nlevels: int,
cols: np.ndarray,
writer: object, # _csv.writer
) -> None: ...
def convert_json_to_lines(arr: str) -> str: ...
def max_len_string_array(
arr: np.ndarray, # pandas_string[:]
) -> int: ...
def word_len(val: object) -> int: ...
def string_array_replace_from_nan_rep(
arr: np.ndarray, # np.ndarray[object, ndim=1]
nan_rep: object,
) -> None: ...

View File

@ -0,0 +1,635 @@
from __future__ import annotations
from decimal import Decimal
import operator
import os
from sys import byteorder
from typing import (
TYPE_CHECKING,
Callable,
ContextManager,
)
import warnings
import numpy as np
from pandas._config import using_string_dtype
from pandas._config.localization import (
can_set_locale,
get_locales,
set_locale,
)
from pandas.compat import pa_version_under10p1
import pandas as pd
from pandas import (
ArrowDtype,
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
)
from pandas._testing._io import (
round_trip_localpath,
round_trip_pathlib,
round_trip_pickle,
write_to_compressed,
)
from pandas._testing._warnings import (
assert_produces_warning,
maybe_produces_warning,
)
from pandas._testing.asserters import (
assert_almost_equal,
assert_attr_equal,
assert_categorical_equal,
assert_class_equal,
assert_contains_all,
assert_copy,
assert_datetime_array_equal,
assert_dict_equal,
assert_equal,
assert_extension_array_equal,
assert_frame_equal,
assert_index_equal,
assert_indexing_slices_equivalent,
assert_interval_array_equal,
assert_is_sorted,
assert_is_valid_plot_return_object,
assert_metadata_equivalent,
assert_numpy_array_equal,
assert_period_array_equal,
assert_series_equal,
assert_sp_array_equal,
assert_timedelta_array_equal,
raise_assert_detail,
)
from pandas._testing.compat import (
get_dtype,
get_obj,
)
from pandas._testing.contexts import (
assert_cow_warning,
decompress_file,
ensure_clean,
raises_chained_assignment_error,
set_timezone,
use_numexpr,
with_csv_dialect,
)
from pandas.core.arrays import (
ArrowExtensionArray,
BaseMaskedArray,
NumpyExtensionArray,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.construction import extract_array
if TYPE_CHECKING:
from pandas._typing import (
Dtype,
NpDtype,
)
UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
SIGNED_INT_NUMPY_DTYPES: list[NpDtype] = [int, "int8", "int16", "int32", "int64"]
SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"]
ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES
ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES
ALL_INT_DTYPES: list[Dtype] = [*ALL_INT_NUMPY_DTYPES, *ALL_INT_EA_DTYPES]
FLOAT_NUMPY_DTYPES: list[NpDtype] = [float, "float32", "float64"]
FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"]
ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
if using_string_dtype():
STRING_DTYPES: list[Dtype] = ["U"]
else:
STRING_DTYPES: list[Dtype] = [str, "str", "U"] # type: ignore[no-redef]
COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]
BOOL_DTYPES: list[Dtype] = [bool, "bool"]
BYTES_DTYPES: list[Dtype] = [bytes, "bytes"]
OBJECT_DTYPES: list[Dtype] = [object, "object"]
ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES
ALL_REAL_EXTENSION_DTYPES = FLOAT_EA_DTYPES + ALL_INT_EA_DTYPES
ALL_REAL_DTYPES: list[Dtype] = [*ALL_REAL_NUMPY_DTYPES, *ALL_REAL_EXTENSION_DTYPES]
ALL_NUMERIC_DTYPES: list[Dtype] = [*ALL_REAL_DTYPES, *COMPLEX_DTYPES]
ALL_NUMPY_DTYPES = (
ALL_REAL_NUMPY_DTYPES
+ COMPLEX_DTYPES
+ STRING_DTYPES
+ DATETIME64_DTYPES
+ TIMEDELTA64_DTYPES
+ BOOL_DTYPES
+ OBJECT_DTYPES
+ BYTES_DTYPES
)
NARROW_NP_DTYPES = [
np.float16,
np.float32,
np.int8,
np.int16,
np.int32,
np.uint8,
np.uint16,
np.uint32,
]
PYTHON_DATA_TYPES = [
str,
int,
float,
complex,
list,
tuple,
range,
dict,
set,
frozenset,
bool,
bytes,
bytearray,
memoryview,
]
ENDIAN = {"little": "<", "big": ">"}[byteorder]
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
NP_NAT_OBJECTS = [
cls("NaT", unit)
for cls in [np.datetime64, np.timedelta64]
for unit in [
"Y",
"M",
"W",
"D",
"h",
"m",
"s",
"ms",
"us",
"ns",
"ps",
"fs",
"as",
]
]
if not pa_version_under10p1:
import pyarrow as pa
UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
ALL_INT_PYARROW_DTYPES_STR_REPR = [
str(ArrowDtype(typ)) for typ in ALL_INT_PYARROW_DTYPES
]
# pa.float16 doesn't seem supported
# https://github.com/apache/arrow/blob/master/python/pyarrow/src/arrow/python/helpers.cc#L86
FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
FLOAT_PYARROW_DTYPES_STR_REPR = [
str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
]
DECIMAL_PYARROW_DTYPES = [pa.decimal128(7, 3)]
STRING_PYARROW_DTYPES = [pa.string()]
BINARY_PYARROW_DTYPES = [pa.binary()]
TIME_PYARROW_DTYPES = [
pa.time32("s"),
pa.time32("ms"),
pa.time64("us"),
pa.time64("ns"),
]
DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()]
DATETIME_PYARROW_DTYPES = [
pa.timestamp(unit=unit, tz=tz)
for unit in ["s", "ms", "us", "ns"]
for tz in [None, "UTC", "US/Pacific", "US/Eastern"]
]
TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]]
BOOL_PYARROW_DTYPES = [pa.bool_()]
# TODO: Add container like pyarrow types:
# https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions
ALL_PYARROW_DTYPES = (
ALL_INT_PYARROW_DTYPES
+ FLOAT_PYARROW_DTYPES
+ DECIMAL_PYARROW_DTYPES
+ STRING_PYARROW_DTYPES
+ BINARY_PYARROW_DTYPES
+ TIME_PYARROW_DTYPES
+ DATE_PYARROW_DTYPES
+ DATETIME_PYARROW_DTYPES
+ TIMEDELTA_PYARROW_DTYPES
+ BOOL_PYARROW_DTYPES
)
ALL_REAL_PYARROW_DTYPES_STR_REPR = (
ALL_INT_PYARROW_DTYPES_STR_REPR + FLOAT_PYARROW_DTYPES_STR_REPR
)
else:
FLOAT_PYARROW_DTYPES_STR_REPR = []
ALL_INT_PYARROW_DTYPES_STR_REPR = []
ALL_PYARROW_DTYPES = []
ALL_REAL_PYARROW_DTYPES_STR_REPR = []
ALL_REAL_NULLABLE_DTYPES = (
FLOAT_NUMPY_DTYPES + ALL_REAL_EXTENSION_DTYPES + ALL_REAL_PYARROW_DTYPES_STR_REPR
)
arithmetic_dunder_methods = [
"__add__",
"__radd__",
"__sub__",
"__rsub__",
"__mul__",
"__rmul__",
"__floordiv__",
"__rfloordiv__",
"__truediv__",
"__rtruediv__",
"__pow__",
"__rpow__",
"__mod__",
"__rmod__",
]
comparison_dunder_methods = ["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]
# -----------------------------------------------------------------------------
# Comparators
def box_expected(expected, box_cls, transpose: bool = True):
"""
Helper function to wrap the expected output of a test in a given box_class.
Parameters
----------
expected : np.ndarray, Index, Series
box_cls : {Index, Series, DataFrame}
Returns
-------
subclass of box_cls
"""
if box_cls is pd.array:
if isinstance(expected, RangeIndex):
# pd.array would return an IntegerArray
expected = NumpyExtensionArray(np.asarray(expected._values))
else:
expected = pd.array(expected, copy=False)
elif box_cls is Index:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
expected = Index(expected)
elif box_cls is Series:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
expected = Series(expected)
elif box_cls is DataFrame:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
expected = Series(expected).to_frame()
if transpose:
# for vector operations, we need a DataFrame to be a single-row,
# not a single-column, in order to operate against non-DataFrame
# vectors of the same length. But convert to two rows to avoid
# single-row special cases in datetime arithmetic
expected = expected.T
expected = pd.concat([expected] * 2, ignore_index=True)
elif box_cls is np.ndarray or box_cls is np.array:
expected = np.array(expected)
elif box_cls is to_array:
expected = to_array(expected)
else:
raise NotImplementedError(box_cls)
return expected
def to_array(obj):
"""
Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.
"""
# temporary implementation until we get pd.array in place
dtype = getattr(obj, "dtype", None)
if dtype is None:
return np.asarray(obj)
return extract_array(obj, extract_numpy=True)
class SubclassedSeries(Series):
_metadata = ["testattr", "name"]
@property
def _constructor(self):
# For testing, those properties return a generic callable, and not
# the actual class. In this case that is equivalent, but it is to
# ensure we don't rely on the property returning a class
# See https://github.com/pandas-dev/pandas/pull/46018 and
# https://github.com/pandas-dev/pandas/issues/32638 and linked issues
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
@property
def _constructor_expanddim(self):
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
class SubclassedDataFrame(DataFrame):
_metadata = ["testattr"]
@property
def _constructor(self):
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
@property
def _constructor_sliced(self):
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:
"""
Convert list of CSV rows to single CSV-formatted string for current OS.
This method is used for creating expected value of to_csv() method.
Parameters
----------
rows_list : List[str]
Each element represents the row of csv.
Returns
-------
str
Expected output of to_csv() in current OS.
"""
sep = os.linesep
return sep.join(rows_list) + sep
def external_error_raised(expected_exception: type[Exception]) -> ContextManager:
"""
Helper function to mark pytest.raises that have an external error message.
Parameters
----------
expected_exception : Exception
Expected error to raise.
Returns
-------
Callable
Regular `pytest.raises` function with `match` equal to `None`.
"""
import pytest
return pytest.raises(expected_exception, match=None)
cython_table = pd.core.common._cython_table.items()
def get_cython_table_params(ndframe, func_names_and_expected):
"""
Combine frame, functions from com._cython_table
keys and expected result.
Parameters
----------
ndframe : DataFrame or Series
func_names_and_expected : Sequence of two items
The first item is a name of a NDFrame method ('sum', 'prod') etc.
The second item is the expected return value.
Returns
-------
list
List of three items (DataFrame, function, expected result)
"""
results = []
for func_name, expected in func_names_and_expected:
results.append((ndframe, func_name, expected))
results += [
(ndframe, func, expected)
for func, name in cython_table
if name == func_name
]
return results
def get_op_from_name(op_name: str) -> Callable:
"""
The operator function for a given op name.
Parameters
----------
op_name : str
The op name, in form of "add" or "__add__".
Returns
-------
function
A function performing the operation.
"""
short_opname = op_name.strip("_")
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)
return op
# -----------------------------------------------------------------------------
# Indexing test helpers
def getitem(x):
return x
def setitem(x):
return x
def loc(x):
return x.loc
def iloc(x):
return x.iloc
def at(x):
return x.at
def iat(x):
return x.iat
# -----------------------------------------------------------------------------
_UNITS = ["s", "ms", "us", "ns"]
def get_finest_unit(left: str, right: str):
"""
Find the higher of two datetime64 units.
"""
if _UNITS.index(left) >= _UNITS.index(right):
return left
return right
def shares_memory(left, right) -> bool:
"""
Pandas-compat for np.shares_memory.
"""
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
return np.shares_memory(left, right)
elif isinstance(left, np.ndarray):
# Call with reversed args to get to unpacking logic below.
return shares_memory(right, left)
if isinstance(left, RangeIndex):
return False
if isinstance(left, MultiIndex):
return shares_memory(left._codes, right)
if isinstance(left, (Index, Series)):
if isinstance(right, (Index, Series)):
return shares_memory(left._values, right._values)
return shares_memory(left._values, right)
if isinstance(left, NDArrayBackedExtensionArray):
return shares_memory(left._ndarray, right)
if isinstance(left, pd.core.arrays.SparseArray):
return shares_memory(left.sp_values, right)
if isinstance(left, pd.core.arrays.IntervalArray):
return shares_memory(left._left, right) or shares_memory(left._right, right)
if isinstance(left, ArrowExtensionArray):
if isinstance(right, ArrowExtensionArray):
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
left_pa_data = left._pa_array
right_pa_data = right._pa_array
left_buf1 = left_pa_data.chunk(0).buffers()[1]
right_buf1 = right_pa_data.chunk(0).buffers()[1]
return left_buf1.address == right_buf1.address
else:
# if we have one one ArrowExtensionArray and one other array, assume
# they can only share memory if they share the same numpy buffer
return np.shares_memory(left, right)
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
# By convention, we'll say these share memory if they share *either*
# the _data or the _mask
return np.shares_memory(left._data, right._data) or np.shares_memory(
left._mask, right._mask
)
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
arr = left._mgr.arrays[0]
return shares_memory(arr, right)
raise NotImplementedError(type(left), type(right))
__all__ = [
"ALL_INT_EA_DTYPES",
"ALL_INT_NUMPY_DTYPES",
"ALL_NUMPY_DTYPES",
"ALL_REAL_NUMPY_DTYPES",
"assert_almost_equal",
"assert_attr_equal",
"assert_categorical_equal",
"assert_class_equal",
"assert_contains_all",
"assert_copy",
"assert_datetime_array_equal",
"assert_dict_equal",
"assert_equal",
"assert_extension_array_equal",
"assert_frame_equal",
"assert_index_equal",
"assert_indexing_slices_equivalent",
"assert_interval_array_equal",
"assert_is_sorted",
"assert_is_valid_plot_return_object",
"assert_metadata_equivalent",
"assert_numpy_array_equal",
"assert_period_array_equal",
"assert_produces_warning",
"assert_series_equal",
"assert_sp_array_equal",
"assert_timedelta_array_equal",
"assert_cow_warning",
"at",
"BOOL_DTYPES",
"box_expected",
"BYTES_DTYPES",
"can_set_locale",
"COMPLEX_DTYPES",
"convert_rows_list_to_csv_str",
"DATETIME64_DTYPES",
"decompress_file",
"ENDIAN",
"ensure_clean",
"external_error_raised",
"FLOAT_EA_DTYPES",
"FLOAT_NUMPY_DTYPES",
"get_cython_table_params",
"get_dtype",
"getitem",
"get_locales",
"get_finest_unit",
"get_obj",
"get_op_from_name",
"iat",
"iloc",
"loc",
"maybe_produces_warning",
"NARROW_NP_DTYPES",
"NP_NAT_OBJECTS",
"NULL_OBJECTS",
"OBJECT_DTYPES",
"raise_assert_detail",
"raises_chained_assignment_error",
"round_trip_localpath",
"round_trip_pathlib",
"round_trip_pickle",
"setitem",
"set_locale",
"set_timezone",
"shares_memory",
"SIGNED_INT_EA_DTYPES",
"SIGNED_INT_NUMPY_DTYPES",
"STRING_DTYPES",
"SubclassedDataFrame",
"SubclassedSeries",
"TIMEDELTA64_DTYPES",
"to_array",
"UNSIGNED_INT_EA_DTYPES",
"UNSIGNED_INT_NUMPY_DTYPES",
"use_numexpr",
"with_csv_dialect",
"write_to_compressed",
]

View File

@ -0,0 +1,93 @@
"""
Hypothesis data generator helpers.
"""
from datetime import datetime
from hypothesis import strategies as st
from hypothesis.extra.dateutil import timezones as dateutil_timezones
from hypothesis.extra.pytz import timezones as pytz_timezones
from pandas.compat import is_platform_windows
import pandas as pd
from pandas.tseries.offsets import (
BMonthBegin,
BMonthEnd,
BQuarterBegin,
BQuarterEnd,
BYearBegin,
BYearEnd,
MonthBegin,
MonthEnd,
QuarterBegin,
QuarterEnd,
YearBegin,
YearEnd,
)
OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)
OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)
OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)
OPTIONAL_DICTS = st.lists(
st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
max_size=10,
min_size=3,
)
OPTIONAL_LISTS = st.lists(
st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
max_size=10,
min_size=3,
)
OPTIONAL_ONE_OF_ALL = st.one_of(
OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
)
if is_platform_windows():
DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
else:
DATETIME_NO_TZ = st.datetimes()
DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
min_value=pd.Timestamp(
1900, 1, 1
).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
max_value=pd.Timestamp(
1900, 1, 1
).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
)
DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
min_value=pd.Timestamp.min.to_pydatetime(warn=False),
max_value=pd.Timestamp.max.to_pydatetime(warn=False),
)
INT_NEG_999_TO_POS_999 = st.integers(-999, 999)
# The strategy for each type is registered in conftest.py, as they don't carry
# enough runtime information (e.g. type hints) to infer how to build them.
YQM_OFFSET = st.one_of(
*map(
st.from_type,
[
MonthBegin,
MonthEnd,
BMonthBegin,
BMonthEnd,
QuarterBegin,
QuarterEnd,
BQuarterBegin,
BQuarterEnd,
YearBegin,
YearEnd,
BYearBegin,
BYearEnd,
],
)
)

View File

@ -0,0 +1,170 @@
from __future__ import annotations
import gzip
import io
import pathlib
import tarfile
from typing import (
TYPE_CHECKING,
Any,
Callable,
)
import uuid
import zipfile
from pandas.compat import (
get_bz2_file,
get_lzma_file,
)
from pandas.compat._optional import import_optional_dependency
import pandas as pd
from pandas._testing.contexts import ensure_clean
if TYPE_CHECKING:
from pandas._typing import (
FilePath,
ReadPickleBuffer,
)
from pandas import (
DataFrame,
Series,
)
# ------------------------------------------------------------------
# File-IO
def round_trip_pickle(
obj: Any, path: FilePath | ReadPickleBuffer | None = None
) -> DataFrame | Series:
"""
Pickle an object and then read it again.
Parameters
----------
obj : any object
The object to pickle and then re-read.
path : str, path object or file-like object, default None
The path where the pickled object is written and then read.
Returns
-------
pandas object
The original object that was pickled and then re-read.
"""
_path = path
if _path is None:
_path = f"__{uuid.uuid4()}__.pickle"
with ensure_clean(_path) as temp_path:
pd.to_pickle(obj, temp_path)
return pd.read_pickle(temp_path)
def round_trip_pathlib(writer, reader, path: str | None = None):
"""
Write an object to file specified by a pathlib.Path and read it back
Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.
Returns
-------
pandas object
The original object that was serialized and then re-read.
"""
Path = pathlib.Path
if path is None:
path = "___pathlib___"
with ensure_clean(path) as path:
writer(Path(path)) # type: ignore[arg-type]
obj = reader(Path(path)) # type: ignore[arg-type]
return obj
def round_trip_localpath(writer, reader, path: str | None = None):
"""
Write an object to file specified by a py.path LocalPath and read it back.
Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.
Returns
-------
pandas object
The original object that was serialized and then re-read.
"""
import pytest
LocalPath = pytest.importorskip("py.path").local
if path is None:
path = "___localpath___"
with ensure_clean(path) as path:
writer(LocalPath(path))
obj = reader(LocalPath(path))
return obj
def write_to_compressed(compression, path, data, dest: str = "test") -> None:
"""
Write data to a compressed file.
Parameters
----------
compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'}
The compression type to use.
path : str
The file path to write the data.
data : str
The data to write.
dest : str, default "test"
The destination file (for ZIP only)
Raises
------
ValueError : An invalid compression value was passed in.
"""
args: tuple[Any, ...] = (data,)
mode = "wb"
method = "write"
compress_method: Callable
if compression == "zip":
compress_method = zipfile.ZipFile
mode = "w"
args = (dest, data)
method = "writestr"
elif compression == "tar":
compress_method = tarfile.TarFile
mode = "w"
file = tarfile.TarInfo(name=dest)
bytes = io.BytesIO(data)
file.size = len(data)
args = (file, bytes)
method = "addfile"
elif compression == "gzip":
compress_method = gzip.GzipFile
elif compression == "bz2":
compress_method = get_bz2_file()
elif compression == "zstd":
compress_method = import_optional_dependency("zstandard").open
elif compression == "xz":
compress_method = get_lzma_file()
else:
raise ValueError(f"Unrecognized compression type: {compression}")
with compress_method(path, mode=mode) as f:
getattr(f, method)(*args)

View File

@ -0,0 +1,232 @@
from __future__ import annotations
from contextlib import (
contextmanager,
nullcontext,
)
import inspect
import re
import sys
from typing import (
TYPE_CHECKING,
Literal,
cast,
)
import warnings
from pandas.compat import PY311
if TYPE_CHECKING:
from collections.abc import (
Generator,
Sequence,
)
@contextmanager
def assert_produces_warning(
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None = Warning,
filter_level: Literal[
"error", "ignore", "always", "default", "module", "once"
] = "always",
check_stacklevel: bool = True,
raise_on_extra_warnings: bool = True,
match: str | None = None,
) -> Generator[list[warnings.WarningMessage], None, None]:
"""
Context manager for running code expected to either raise a specific warning,
multiple specific warnings, or not raise any warnings. Verifies that the code
raises the expected warning(s), and that it does not raise any other unexpected
warnings. It is basically a wrapper around ``warnings.catch_warnings``.
Parameters
----------
expected_warning : {Warning, False, tuple[Warning, ...], None}, default Warning
The type of Exception raised. ``exception.Warning`` is the base
class for all warnings. To raise multiple types of exceptions,
pass them as a tuple. To check that no warning is returned,
specify ``False`` or ``None``.
filter_level : str or None, default "always"
Specifies whether warnings are ignored, displayed, or turned
into errors.
Valid values are:
* "error" - turns matching warnings into exceptions
* "ignore" - discard the warning
* "always" - always emit a warning
* "default" - print the warning the first time it is generated
from each location
* "module" - print the warning the first time it is generated
from each module
* "once" - print the warning the first time it is generated
check_stacklevel : bool, default True
If True, displays the line that called the function containing
the warning to show were the function is called. Otherwise, the
line that implements the function is displayed.
raise_on_extra_warnings : bool, default True
Whether extra warnings not of the type `expected_warning` should
cause the test to fail.
match : str, optional
Match warning message.
Examples
--------
>>> import warnings
>>> with assert_produces_warning():
... warnings.warn(UserWarning())
...
>>> with assert_produces_warning(False):
... warnings.warn(RuntimeWarning())
...
Traceback (most recent call last):
...
AssertionError: Caused unexpected warning(s): ['RuntimeWarning'].
>>> with assert_produces_warning(UserWarning):
... warnings.warn(RuntimeWarning())
Traceback (most recent call last):
...
AssertionError: Did not see expected warning of class 'UserWarning'.
..warn:: This is *not* thread-safe.
"""
__tracebackhide__ = True
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter(filter_level)
try:
yield w
finally:
if expected_warning:
expected_warning = cast(type[Warning], expected_warning)
_assert_caught_expected_warning(
caught_warnings=w,
expected_warning=expected_warning,
match=match,
check_stacklevel=check_stacklevel,
)
if raise_on_extra_warnings:
_assert_caught_no_extra_warnings(
caught_warnings=w,
expected_warning=expected_warning,
)
def maybe_produces_warning(warning: type[Warning], condition: bool, **kwargs):
"""
Return a context manager that possibly checks a warning based on the condition
"""
if condition:
return assert_produces_warning(warning, **kwargs)
else:
return nullcontext()
def _assert_caught_expected_warning(
*,
caught_warnings: Sequence[warnings.WarningMessage],
expected_warning: type[Warning],
match: str | None,
check_stacklevel: bool,
) -> None:
"""Assert that there was the expected warning among the caught warnings."""
saw_warning = False
matched_message = False
unmatched_messages = []
for actual_warning in caught_warnings:
if issubclass(actual_warning.category, expected_warning):
saw_warning = True
if check_stacklevel:
_assert_raised_with_correct_stacklevel(actual_warning)
if match is not None:
if re.search(match, str(actual_warning.message)):
matched_message = True
else:
unmatched_messages.append(actual_warning.message)
if not saw_warning:
raise AssertionError(
f"Did not see expected warning of class "
f"{repr(expected_warning.__name__)}"
)
if match and not matched_message:
raise AssertionError(
f"Did not see warning {repr(expected_warning.__name__)} "
f"matching '{match}'. The emitted warning messages are "
f"{unmatched_messages}"
)
def _assert_caught_no_extra_warnings(
*,
caught_warnings: Sequence[warnings.WarningMessage],
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
) -> None:
"""Assert that no extra warnings apart from the expected ones are caught."""
extra_warnings = []
for actual_warning in caught_warnings:
if _is_unexpected_warning(actual_warning, expected_warning):
# GH#38630 pytest.filterwarnings does not suppress these.
if actual_warning.category == ResourceWarning:
# GH 44732: Don't make the CI flaky by filtering SSL-related
# ResourceWarning from dependencies
if "unclosed <ssl.SSLSocket" in str(actual_warning.message):
continue
# GH 44844: Matplotlib leaves font files open during the entire process
# upon import. Don't make CI flaky if ResourceWarning raised
# due to these open files.
if any("matplotlib" in mod for mod in sys.modules):
continue
if PY311 and actual_warning.category == EncodingWarning:
# EncodingWarnings are checked in the CI
# pyproject.toml errors on EncodingWarnings in pandas
# Ignore EncodingWarnings from other libraries
continue
extra_warnings.append(
(
actual_warning.category.__name__,
actual_warning.message,
actual_warning.filename,
actual_warning.lineno,
)
)
if extra_warnings:
raise AssertionError(f"Caused unexpected warning(s): {repr(extra_warnings)}")
def _is_unexpected_warning(
actual_warning: warnings.WarningMessage,
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
) -> bool:
"""Check if the actual warning issued is unexpected."""
if actual_warning and not expected_warning:
return True
expected_warning = cast(type[Warning], expected_warning)
return bool(not issubclass(actual_warning.category, expected_warning))
def _assert_raised_with_correct_stacklevel(
actual_warning: warnings.WarningMessage,
) -> None:
# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
frame = inspect.currentframe()
for _ in range(4):
frame = frame.f_back # type: ignore[union-attr]
try:
caller_filename = inspect.getfile(frame) # type: ignore[arg-type]
finally:
# See note in
# https://docs.python.org/3/library/inspect.html#inspect.Traceback
del frame
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller_filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller_filename, msg

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,29 @@
"""
Helpers for sharing tests between DataFrame/Series
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from pandas import DataFrame
if TYPE_CHECKING:
from pandas._typing import DtypeObj
def get_dtype(obj) -> DtypeObj:
if isinstance(obj, DataFrame):
# Note: we are assuming only one column
return obj.dtypes.iat[0]
else:
return obj.dtype
def get_obj(df: DataFrame, klass):
"""
For sharing tests using frame_or_series, either return the DataFrame
unchanged or return it's first column as a Series.
"""
if klass is DataFrame:
return df
return df._ixs(0, axis=1)

Some files were not shown because too many files have changed in this diff Show More