done
This commit is contained in:
		| @ -0,0 +1,7 @@ | ||||
| from pandas.tests.extension.json.array import ( | ||||
|     JSONArray, | ||||
|     JSONDtype, | ||||
|     make_data, | ||||
| ) | ||||
|  | ||||
| __all__ = ["JSONArray", "JSONDtype", "make_data"] | ||||
| @ -0,0 +1,273 @@ | ||||
| """ | ||||
| Test extension array for storing nested data in a pandas container. | ||||
|  | ||||
| The JSONArray stores lists of dictionaries. The storage mechanism is a list, | ||||
| not an ndarray. | ||||
|  | ||||
| Note | ||||
| ---- | ||||
| We currently store lists of UserDicts. Pandas has a few places | ||||
| internally that specifically check for dicts, and does non-scalar things | ||||
| in that case. We *want* the dictionaries to be treated as scalars, so we | ||||
| hack around pandas by using UserDicts. | ||||
| """ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from collections import ( | ||||
|     UserDict, | ||||
|     abc, | ||||
| ) | ||||
| import itertools | ||||
| import numbers | ||||
| import string | ||||
| import sys | ||||
| from typing import ( | ||||
|     TYPE_CHECKING, | ||||
|     Any, | ||||
| ) | ||||
| import warnings | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas.util._exceptions import find_stack_level | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_bool_dtype, | ||||
|     is_list_like, | ||||
|     pandas_dtype, | ||||
| ) | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.api.extensions import ( | ||||
|     ExtensionArray, | ||||
|     ExtensionDtype, | ||||
| ) | ||||
| from pandas.core.indexers import unpack_tuple_and_ellipses | ||||
|  | ||||
| if TYPE_CHECKING: | ||||
|     from collections.abc import Mapping | ||||
|  | ||||
|     from pandas._typing import type_t | ||||
|  | ||||
|  | ||||
| class JSONDtype(ExtensionDtype): | ||||
|     type = abc.Mapping | ||||
|     name = "json" | ||||
|     na_value: Mapping[str, Any] = UserDict() | ||||
|  | ||||
|     @classmethod | ||||
|     def construct_array_type(cls) -> type_t[JSONArray]: | ||||
|         """ | ||||
|         Return the array type associated with this dtype. | ||||
|  | ||||
|         Returns | ||||
|         ------- | ||||
|         type | ||||
|         """ | ||||
|         return JSONArray | ||||
|  | ||||
|  | ||||
| class JSONArray(ExtensionArray): | ||||
|     dtype = JSONDtype() | ||||
|     __array_priority__ = 1000 | ||||
|  | ||||
|     def __init__(self, values, dtype=None, copy=False) -> None: | ||||
|         for val in values: | ||||
|             if not isinstance(val, self.dtype.type): | ||||
|                 raise TypeError("All values must be of type " + str(self.dtype.type)) | ||||
|         self.data = values | ||||
|  | ||||
|         # Some aliases for common attribute names to ensure pandas supports | ||||
|         # these | ||||
|         self._items = self._data = self.data | ||||
|         # those aliases are currently not working due to assumptions | ||||
|         # in internal code (GH-20735) | ||||
|         # self._values = self.values = self.data | ||||
|  | ||||
|     @classmethod | ||||
|     def _from_sequence(cls, scalars, *, dtype=None, copy=False): | ||||
|         return cls(scalars) | ||||
|  | ||||
|     @classmethod | ||||
|     def _from_factorized(cls, values, original): | ||||
|         return cls([UserDict(x) for x in values if x != ()]) | ||||
|  | ||||
|     def __getitem__(self, item): | ||||
|         if isinstance(item, tuple): | ||||
|             item = unpack_tuple_and_ellipses(item) | ||||
|  | ||||
|         if isinstance(item, numbers.Integral): | ||||
|             return self.data[item] | ||||
|         elif isinstance(item, slice) and item == slice(None): | ||||
|             # Make sure we get a view | ||||
|             return type(self)(self.data) | ||||
|         elif isinstance(item, slice): | ||||
|             # slice | ||||
|             return type(self)(self.data[item]) | ||||
|         elif not is_list_like(item): | ||||
|             # e.g. "foo" or 2.5 | ||||
|             # exception message copied from numpy | ||||
|             raise IndexError( | ||||
|                 r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " | ||||
|                 r"(`None`) and integer or boolean arrays are valid indices" | ||||
|             ) | ||||
|         else: | ||||
|             item = pd.api.indexers.check_array_indexer(self, item) | ||||
|             if is_bool_dtype(item.dtype): | ||||
|                 return type(self)._from_sequence( | ||||
|                     [x for x, m in zip(self, item) if m], dtype=self.dtype | ||||
|                 ) | ||||
|             # integer | ||||
|             return type(self)([self.data[i] for i in item]) | ||||
|  | ||||
|     def __setitem__(self, key, value) -> None: | ||||
|         if isinstance(key, numbers.Integral): | ||||
|             self.data[key] = value | ||||
|         else: | ||||
|             if not isinstance(value, (type(self), abc.Sequence)): | ||||
|                 # broadcast value | ||||
|                 value = itertools.cycle([value]) | ||||
|  | ||||
|             if isinstance(key, np.ndarray) and key.dtype == "bool": | ||||
|                 # masking | ||||
|                 for i, (k, v) in enumerate(zip(key, value)): | ||||
|                     if k: | ||||
|                         assert isinstance(v, self.dtype.type) | ||||
|                         self.data[i] = v | ||||
|             else: | ||||
|                 for k, v in zip(key, value): | ||||
|                     assert isinstance(v, self.dtype.type) | ||||
|                     self.data[k] = v | ||||
|  | ||||
|     def __len__(self) -> int: | ||||
|         return len(self.data) | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         return NotImplemented | ||||
|  | ||||
|     def __ne__(self, other): | ||||
|         return NotImplemented | ||||
|  | ||||
|     def __array__(self, dtype=None, copy=None): | ||||
|         if copy is False: | ||||
|             warnings.warn( | ||||
|                 "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " | ||||
|                 "changed and passing 'copy=False' raises an error when returning " | ||||
|                 "a zero-copy NumPy array is not possible. pandas will follow " | ||||
|                 "this behavior starting with pandas 3.0.\nThis conversion to " | ||||
|                 "NumPy requires a copy, but 'copy=False' was passed. Consider " | ||||
|                 "using 'np.asarray(..)' instead.", | ||||
|                 FutureWarning, | ||||
|                 stacklevel=find_stack_level(), | ||||
|             ) | ||||
|  | ||||
|         if dtype is None: | ||||
|             dtype = object | ||||
|         if dtype == object: | ||||
|             # on py38 builds it looks like numpy is inferring to a non-1D array | ||||
|             return construct_1d_object_array_from_listlike(list(self)) | ||||
|         if copy is None: | ||||
|             # Note: branch avoids `copy=None` for NumPy 1.x support | ||||
|             return np.asarray(self.data, dtype=dtype) | ||||
|         return np.asarray(self.data, dtype=dtype, copy=copy) | ||||
|  | ||||
|     @property | ||||
|     def nbytes(self) -> int: | ||||
|         return sys.getsizeof(self.data) | ||||
|  | ||||
|     def isna(self): | ||||
|         return np.array([x == self.dtype.na_value for x in self.data], dtype=bool) | ||||
|  | ||||
|     def take(self, indexer, allow_fill=False, fill_value=None): | ||||
|         # re-implement here, since NumPy has trouble setting | ||||
|         # sized objects like UserDicts into scalar slots of | ||||
|         # an ndarary. | ||||
|         indexer = np.asarray(indexer) | ||||
|         msg = ( | ||||
|             "Index is out of bounds or cannot do a " | ||||
|             "non-empty take from an empty array." | ||||
|         ) | ||||
|  | ||||
|         if allow_fill: | ||||
|             if fill_value is None: | ||||
|                 fill_value = self.dtype.na_value | ||||
|             # bounds check | ||||
|             if (indexer < -1).any(): | ||||
|                 raise ValueError | ||||
|             try: | ||||
|                 output = [ | ||||
|                     self.data[loc] if loc != -1 else fill_value for loc in indexer | ||||
|                 ] | ||||
|             except IndexError as err: | ||||
|                 raise IndexError(msg) from err | ||||
|         else: | ||||
|             try: | ||||
|                 output = [self.data[loc] for loc in indexer] | ||||
|             except IndexError as err: | ||||
|                 raise IndexError(msg) from err | ||||
|  | ||||
|         return type(self)._from_sequence(output, dtype=self.dtype) | ||||
|  | ||||
|     def copy(self): | ||||
|         return type(self)(self.data[:]) | ||||
|  | ||||
|     def astype(self, dtype, copy=True): | ||||
|         # NumPy has issues when all the dicts are the same length. | ||||
|         # np.array([UserDict(...), UserDict(...)]) fails, | ||||
|         # but np.array([{...}, {...}]) works, so cast. | ||||
|         from pandas.core.arrays.string_ import StringDtype | ||||
|  | ||||
|         dtype = pandas_dtype(dtype) | ||||
|         # needed to add this check for the Series constructor | ||||
|         if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: | ||||
|             if copy: | ||||
|                 return self.copy() | ||||
|             return self | ||||
|         elif isinstance(dtype, StringDtype): | ||||
|             arr_cls = dtype.construct_array_type() | ||||
|             return arr_cls._from_sequence(self, dtype=dtype, copy=False) | ||||
|         elif not copy: | ||||
|             return np.asarray([dict(x) for x in self], dtype=dtype) | ||||
|         else: | ||||
|             return np.array([dict(x) for x in self], dtype=dtype, copy=copy) | ||||
|  | ||||
|     def unique(self): | ||||
|         # Parent method doesn't work since np.array will try to infer | ||||
|         # a 2-dim object. | ||||
|         return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}]) | ||||
|  | ||||
|     @classmethod | ||||
|     def _concat_same_type(cls, to_concat): | ||||
|         data = list(itertools.chain.from_iterable(x.data for x in to_concat)) | ||||
|         return cls(data) | ||||
|  | ||||
|     def _values_for_factorize(self): | ||||
|         frozen = self._values_for_argsort() | ||||
|         if len(frozen) == 0: | ||||
|             # factorize_array expects 1-d array, this is a len-0 2-d array. | ||||
|             frozen = frozen.ravel() | ||||
|         return frozen, () | ||||
|  | ||||
|     def _values_for_argsort(self): | ||||
|         # Bypass NumPy's shape inference to get a (N,) array of tuples. | ||||
|         frozen = [tuple(x.items()) for x in self] | ||||
|         return construct_1d_object_array_from_listlike(frozen) | ||||
|  | ||||
|     def _pad_or_backfill(self, *, method, limit=None, copy=True): | ||||
|         # GH#56616 - test EA method without limit_area argument | ||||
|         return super()._pad_or_backfill(method=method, limit=limit, copy=copy) | ||||
|  | ||||
|  | ||||
| def make_data(): | ||||
|     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer | ||||
|     rng = np.random.default_rng(2) | ||||
|     return [ | ||||
|         UserDict( | ||||
|             [ | ||||
|                 (rng.choice(list(string.ascii_letters)), rng.integers(0, 100)) | ||||
|                 for _ in range(rng.integers(0, 10)) | ||||
|             ] | ||||
|         ) | ||||
|         for _ in range(100) | ||||
|     ] | ||||
| @ -0,0 +1,490 @@ | ||||
| import collections | ||||
| import operator | ||||
| import sys | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.extension import base | ||||
| from pandas.tests.extension.json.array import ( | ||||
|     JSONArray, | ||||
|     JSONDtype, | ||||
|     make_data, | ||||
| ) | ||||
|  | ||||
| # We intentionally don't run base.BaseSetitemTests because pandas' | ||||
| # internals has trouble setting sequences of values into scalar positions. | ||||
| unhashable = pytest.mark.xfail(reason="Unhashable") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def dtype(): | ||||
|     return JSONDtype() | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(): | ||||
|     """Length-100 PeriodArray for semantics test.""" | ||||
|     data = make_data() | ||||
|  | ||||
|     # Why the while loop? NumPy is unable to construct an ndarray from | ||||
|     # equal-length ndarrays. Many of our operations involve coercing the | ||||
|     # EA to an ndarray of objects. To avoid random test failures, we ensure | ||||
|     # that our data is coercible to an ndarray. Several tests deal with only | ||||
|     # the first two elements, so that's what we'll check. | ||||
|  | ||||
|     while len(data[0]) == len(data[1]): | ||||
|         data = make_data() | ||||
|  | ||||
|     return JSONArray(data) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_missing(): | ||||
|     """Length 2 array with [NA, Valid]""" | ||||
|     return JSONArray([{}, {"a": 10}]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_for_sorting(): | ||||
|     return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_missing_for_sorting(): | ||||
|     return JSONArray([{"b": 1}, {}, {"a": 4}]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def na_cmp(): | ||||
|     return operator.eq | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_for_grouping(): | ||||
|     return JSONArray( | ||||
|         [ | ||||
|             {"b": 1}, | ||||
|             {"b": 1}, | ||||
|             {}, | ||||
|             {}, | ||||
|             {"a": 0, "c": 2}, | ||||
|             {"a": 0, "c": 2}, | ||||
|             {"b": 1}, | ||||
|             {"c": 2}, | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|  | ||||
| class TestJSONArray(base.ExtensionTests): | ||||
|     @pytest.mark.xfail( | ||||
|         reason="comparison method not implemented for JSONArray (GH-37867)" | ||||
|     ) | ||||
|     def test_contains(self, data): | ||||
|         # GH-37867 | ||||
|         super().test_contains(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="not implemented constructor from dtype") | ||||
|     def test_from_dtype(self, data): | ||||
|         # construct from our dtype & string dtype | ||||
|         super().test_from_dtype(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="RecursionError, GH-33900") | ||||
|     def test_series_constructor_no_data_with_index(self, dtype, na_value): | ||||
|         # RecursionError: maximum recursion depth exceeded in comparison | ||||
|         rec_limit = sys.getrecursionlimit() | ||||
|         try: | ||||
|             # Limit to avoid stack overflow on Windows CI | ||||
|             sys.setrecursionlimit(100) | ||||
|             super().test_series_constructor_no_data_with_index(dtype, na_value) | ||||
|         finally: | ||||
|             sys.setrecursionlimit(rec_limit) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="RecursionError, GH-33900") | ||||
|     def test_series_constructor_scalar_na_with_index(self, dtype, na_value): | ||||
|         # RecursionError: maximum recursion depth exceeded in comparison | ||||
|         rec_limit = sys.getrecursionlimit() | ||||
|         try: | ||||
|             # Limit to avoid stack overflow on Windows CI | ||||
|             sys.setrecursionlimit(100) | ||||
|             super().test_series_constructor_scalar_na_with_index(dtype, na_value) | ||||
|         finally: | ||||
|             sys.setrecursionlimit(rec_limit) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="collection as scalar, GH-33901") | ||||
|     def test_series_constructor_scalar_with_index(self, data, dtype): | ||||
|         # TypeError: All values must be of type <class 'collections.abc.Mapping'> | ||||
|         rec_limit = sys.getrecursionlimit() | ||||
|         try: | ||||
|             # Limit to avoid stack overflow on Windows CI | ||||
|             sys.setrecursionlimit(100) | ||||
|             super().test_series_constructor_scalar_with_index(data, dtype) | ||||
|         finally: | ||||
|             sys.setrecursionlimit(rec_limit) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Different definitions of NA") | ||||
|     def test_stack(self): | ||||
|         """ | ||||
|         The test does .astype(object).stack(future_stack=True). If we happen to have | ||||
|         any missing values in `data`, then we'll end up with different | ||||
|         rows since we consider `{}` NA, but `.astype(object)` doesn't. | ||||
|         """ | ||||
|         super().test_stack() | ||||
|  | ||||
|     @pytest.mark.xfail(reason="dict for NA") | ||||
|     def test_unstack(self, data, index): | ||||
|         # The base test has NaN for the expected NA value. | ||||
|         # this matches otherwise | ||||
|         return super().test_unstack(data, index) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Setting a dict as a scalar") | ||||
|     def test_fillna_series(self): | ||||
|         """We treat dictionaries as a mapping in fillna, not a scalar.""" | ||||
|         super().test_fillna_series() | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Setting a dict as a scalar") | ||||
|     def test_fillna_frame(self): | ||||
|         """We treat dictionaries as a mapping in fillna, not a scalar.""" | ||||
|         super().test_fillna_frame() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "limit_area, input_ilocs, expected_ilocs", | ||||
|         [ | ||||
|             ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]), | ||||
|             ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]), | ||||
|             ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]), | ||||
|             ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]), | ||||
|             ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]), | ||||
|             ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]), | ||||
|             ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]), | ||||
|             ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_ffill_limit_area( | ||||
|         self, data_missing, limit_area, input_ilocs, expected_ilocs | ||||
|     ): | ||||
|         # GH#56616 | ||||
|         msg = "JSONArray does not implement limit_area" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             super().test_ffill_limit_area( | ||||
|                 data_missing, limit_area, input_ilocs, expected_ilocs | ||||
|             ) | ||||
|  | ||||
|     @unhashable | ||||
|     def test_value_counts(self, all_data, dropna): | ||||
|         super().test_value_counts(all_data, dropna) | ||||
|  | ||||
|     @unhashable | ||||
|     def test_value_counts_with_normalize(self, data): | ||||
|         super().test_value_counts_with_normalize(data) | ||||
|  | ||||
|     @unhashable | ||||
|     def test_sort_values_frame(self): | ||||
|         # TODO (EA.factorize): see if _values_for_factorize allows this. | ||||
|         super().test_sort_values_frame() | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     def test_sort_values(self, data_for_sorting, ascending, sort_by_key): | ||||
|         super().test_sort_values(data_for_sorting, ascending, sort_by_key) | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     def test_sort_values_missing( | ||||
|         self, data_missing_for_sorting, ascending, sort_by_key | ||||
|     ): | ||||
|         super().test_sort_values_missing( | ||||
|             data_missing_for_sorting, ascending, sort_by_key | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="combine for JSONArray not supported") | ||||
|     def test_combine_le(self, data_repeated): | ||||
|         super().test_combine_le(data_repeated) | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="combine for JSONArray not supported - " | ||||
|         "may pass depending on random data", | ||||
|         strict=False, | ||||
|         raises=AssertionError, | ||||
|     ) | ||||
|     def test_combine_first(self, data): | ||||
|         super().test_combine_first(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="broadcasting error") | ||||
|     def test_where_series(self, data, na_value): | ||||
|         # Fails with | ||||
|         # *** ValueError: operands could not be broadcast together | ||||
|         # with shapes (4,) (4,) (0,) | ||||
|         super().test_where_series(data, na_value) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Can't compare dicts.") | ||||
|     def test_searchsorted(self, data_for_sorting): | ||||
|         super().test_searchsorted(data_for_sorting) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Can't compare dicts.") | ||||
|     def test_equals(self, data, na_value, as_series): | ||||
|         super().test_equals(data, na_value, as_series) | ||||
|  | ||||
|     @pytest.mark.skip("fill-value is interpreted as a dict of values") | ||||
|     def test_fillna_copy_frame(self, data_missing): | ||||
|         super().test_fillna_copy_frame(data_missing) | ||||
|  | ||||
|     def test_equals_same_data_different_object( | ||||
|         self, data, using_copy_on_write, request | ||||
|     ): | ||||
|         if using_copy_on_write: | ||||
|             mark = pytest.mark.xfail(reason="Fails with CoW") | ||||
|             request.applymarker(mark) | ||||
|         super().test_equals_same_data_different_object(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)") | ||||
|     def test_astype_str(self): | ||||
|         """This currently fails in NumPy on np.array(self, dtype=str) with | ||||
|  | ||||
|         *** ValueError: setting an array element with a sequence | ||||
|         """ | ||||
|         super().test_astype_str() | ||||
|  | ||||
|     @unhashable | ||||
|     def test_groupby_extension_transform(self): | ||||
|         """ | ||||
|         This currently fails in Series.name.setter, since the | ||||
|         name must be hashable, but the value is a dictionary. | ||||
|         I think this is what we want, i.e. `.name` should be the original | ||||
|         values, and not the values for factorization. | ||||
|         """ | ||||
|         super().test_groupby_extension_transform() | ||||
|  | ||||
|     @unhashable | ||||
|     def test_groupby_extension_apply(self): | ||||
|         """ | ||||
|         This fails in Index._do_unique_check with | ||||
|  | ||||
|         >   hash(val) | ||||
|         E   TypeError: unhashable type: 'UserDict' with | ||||
|  | ||||
|         I suspect that once we support Index[ExtensionArray], | ||||
|         we'll be able to dispatch unique. | ||||
|         """ | ||||
|         super().test_groupby_extension_apply() | ||||
|  | ||||
|     @unhashable | ||||
|     def test_groupby_extension_agg(self): | ||||
|         """ | ||||
|         This fails when we get to tm.assert_series_equal when left.index | ||||
|         contains dictionaries, which are not hashable. | ||||
|         """ | ||||
|         super().test_groupby_extension_agg() | ||||
|  | ||||
|     @unhashable | ||||
|     def test_groupby_extension_no_sort(self): | ||||
|         """ | ||||
|         This fails when we get to tm.assert_series_equal when left.index | ||||
|         contains dictionaries, which are not hashable. | ||||
|         """ | ||||
|         super().test_groupby_extension_no_sort() | ||||
|  | ||||
|     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): | ||||
|         if len(data[0]) != 1: | ||||
|             mark = pytest.mark.xfail(reason="raises in coercing to Series") | ||||
|             request.applymarker(mark) | ||||
|         super().test_arith_frame_with_scalar(data, all_arithmetic_operators) | ||||
|  | ||||
|     def test_compare_array(self, data, comparison_op, request): | ||||
|         if comparison_op.__name__ in ["eq", "ne"]: | ||||
|             mark = pytest.mark.xfail(reason="Comparison methods not implemented") | ||||
|             request.applymarker(mark) | ||||
|         super().test_compare_array(data, comparison_op) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="ValueError: Must have equal len keys and value") | ||||
|     def test_setitem_loc_scalar_mixed(self, data): | ||||
|         super().test_setitem_loc_scalar_mixed(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="ValueError: Must have equal len keys and value") | ||||
|     def test_setitem_loc_scalar_multiple_homogoneous(self, data): | ||||
|         super().test_setitem_loc_scalar_multiple_homogoneous(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="ValueError: Must have equal len keys and value") | ||||
|     def test_setitem_iloc_scalar_mixed(self, data): | ||||
|         super().test_setitem_iloc_scalar_mixed(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="ValueError: Must have equal len keys and value") | ||||
|     def test_setitem_iloc_scalar_multiple_homogoneous(self, data): | ||||
|         super().test_setitem_iloc_scalar_multiple_homogoneous(data) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "mask", | ||||
|         [ | ||||
|             np.array([True, True, True, False, False]), | ||||
|             pd.array([True, True, True, False, False], dtype="boolean"), | ||||
|             pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), | ||||
|         ], | ||||
|         ids=["numpy-array", "boolean-array", "boolean-array-na"], | ||||
|     ) | ||||
|     def test_setitem_mask(self, data, mask, box_in_series, request): | ||||
|         if box_in_series: | ||||
|             mark = pytest.mark.xfail( | ||||
|                 reason="cannot set using a list-like indexer with a different length" | ||||
|             ) | ||||
|             request.applymarker(mark) | ||||
|         elif not isinstance(mask, np.ndarray): | ||||
|             mark = pytest.mark.xfail(reason="Issues unwanted DeprecationWarning") | ||||
|             request.applymarker(mark) | ||||
|         super().test_setitem_mask(data, mask, box_in_series) | ||||
|  | ||||
|     def test_setitem_mask_raises(self, data, box_in_series, request): | ||||
|         if not box_in_series: | ||||
|             mark = pytest.mark.xfail(reason="Fails to raise") | ||||
|             request.applymarker(mark) | ||||
|  | ||||
|         super().test_setitem_mask_raises(data, box_in_series) | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="cannot set using a list-like indexer with a different length" | ||||
|     ) | ||||
|     def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): | ||||
|         super().test_setitem_mask_boolean_array_with_na(data, box_in_series) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], | ||||
|         ids=["list", "integer-array", "numpy-array"], | ||||
|     ) | ||||
|     def test_setitem_integer_array(self, data, idx, box_in_series, request): | ||||
|         if box_in_series: | ||||
|             mark = pytest.mark.xfail( | ||||
|                 reason="cannot set using a list-like indexer with a different length" | ||||
|             ) | ||||
|             request.applymarker(mark) | ||||
|         super().test_setitem_integer_array(data, idx, box_in_series) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="list indices must be integers or slices, not NAType") | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx, box_in_series", | ||||
|         [ | ||||
|             ([0, 1, 2, pd.NA], False), | ||||
|             pytest.param( | ||||
|                 [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") | ||||
|             ), | ||||
|             (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), | ||||
|             (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), | ||||
|         ], | ||||
|         ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], | ||||
|     ) | ||||
|     def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): | ||||
|         super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Fails to raise") | ||||
|     def test_setitem_scalar_key_sequence_raise(self, data): | ||||
|         super().test_setitem_scalar_key_sequence_raise(data) | ||||
|  | ||||
|     def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): | ||||
|         if "full_slice" in request.node.name: | ||||
|             mark = pytest.mark.xfail(reason="slice is not iterable") | ||||
|             request.applymarker(mark) | ||||
|         super().test_setitem_with_expansion_dataframe_column(data, full_indexer) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="slice is not iterable") | ||||
|     def test_setitem_frame_2d_values(self, data): | ||||
|         super().test_setitem_frame_2d_values(data) | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="cannot set using a list-like indexer with a different length" | ||||
|     ) | ||||
|     @pytest.mark.parametrize("setter", ["loc", None]) | ||||
|     def test_setitem_mask_broadcast(self, data, setter): | ||||
|         super().test_setitem_mask_broadcast(data, setter) | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="cannot set using a slice indexer with a different length" | ||||
|     ) | ||||
|     def test_setitem_slice(self, data, box_in_series): | ||||
|         super().test_setitem_slice(data, box_in_series) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="slice object is not iterable") | ||||
|     def test_setitem_loc_iloc_slice(self, data): | ||||
|         super().test_setitem_loc_iloc_slice(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="slice object is not iterable") | ||||
|     def test_setitem_slice_mismatch_length_raises(self, data): | ||||
|         super().test_setitem_slice_mismatch_length_raises(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="slice object is not iterable") | ||||
|     def test_setitem_slice_array(self, data): | ||||
|         super().test_setitem_slice_array(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Fail to raise") | ||||
|     def test_setitem_invalid(self, data, invalid_scalar): | ||||
|         super().test_setitem_invalid(data, invalid_scalar) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="only integer scalar arrays can be converted") | ||||
|     def test_setitem_2d_values(self, data): | ||||
|         super().test_setitem_2d_values(data) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="data type 'json' not understood") | ||||
|     @pytest.mark.parametrize("engine", ["c", "python"]) | ||||
|     def test_EA_types(self, engine, data, request): | ||||
|         super().test_EA_types(engine, data, request) | ||||
|  | ||||
|  | ||||
| def custom_assert_series_equal(left, right, *args, **kwargs): | ||||
|     # NumPy doesn't handle an array of equal-length UserDicts. | ||||
|     # The default assert_series_equal eventually does a | ||||
|     # Series.values, which raises. We work around it by | ||||
|     # converting the UserDicts to dicts. | ||||
|     if left.dtype.name == "json": | ||||
|         assert left.dtype == right.dtype | ||||
|         left = pd.Series( | ||||
|             JSONArray(left.values.astype(object)), index=left.index, name=left.name | ||||
|         ) | ||||
|         right = pd.Series( | ||||
|             JSONArray(right.values.astype(object)), | ||||
|             index=right.index, | ||||
|             name=right.name, | ||||
|         ) | ||||
|     tm.assert_series_equal(left, right, *args, **kwargs) | ||||
|  | ||||
|  | ||||
| def custom_assert_frame_equal(left, right, *args, **kwargs): | ||||
|     obj_type = kwargs.get("obj", "DataFrame") | ||||
|     tm.assert_index_equal( | ||||
|         left.columns, | ||||
|         right.columns, | ||||
|         exact=kwargs.get("check_column_type", "equiv"), | ||||
|         check_names=kwargs.get("check_names", True), | ||||
|         check_exact=kwargs.get("check_exact", False), | ||||
|         check_categorical=kwargs.get("check_categorical", True), | ||||
|         obj=f"{obj_type}.columns", | ||||
|     ) | ||||
|  | ||||
|     jsons = (left.dtypes == "json").index | ||||
|  | ||||
|     for col in jsons: | ||||
|         custom_assert_series_equal(left[col], right[col], *args, **kwargs) | ||||
|  | ||||
|     left = left.drop(columns=jsons) | ||||
|     right = right.drop(columns=jsons) | ||||
|     tm.assert_frame_equal(left, right, *args, **kwargs) | ||||
|  | ||||
|  | ||||
| def test_custom_asserts(): | ||||
|     # This would always trigger the KeyError from trying to put | ||||
|     # an array of equal-length UserDicts inside an ndarray. | ||||
|     data = JSONArray( | ||||
|         [ | ||||
|             collections.UserDict({"a": 1}), | ||||
|             collections.UserDict({"b": 2}), | ||||
|             collections.UserDict({"c": 3}), | ||||
|         ] | ||||
|     ) | ||||
|     a = pd.Series(data) | ||||
|     custom_assert_series_equal(a, a) | ||||
|     custom_assert_frame_equal(a.to_frame(), a.to_frame()) | ||||
|  | ||||
|     b = pd.Series(data.take([0, 0, 1])) | ||||
|     msg = r"Series are different" | ||||
|     with pytest.raises(AssertionError, match=msg): | ||||
|         custom_assert_series_equal(a, b) | ||||
|  | ||||
|     with pytest.raises(AssertionError, match=msg): | ||||
|         custom_assert_frame_equal(a.to_frame(), b.to_frame()) | ||||
		Reference in New Issue
	
	Block a user