done
This commit is contained in:
		| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class DropDuplicates: | ||||
|     def test_drop_duplicates_metadata(self, idx): | ||||
|         # GH#10115 | ||||
|         result = idx.drop_duplicates() | ||||
|         tm.assert_index_equal(idx, result) | ||||
|         assert idx.freq == result.freq | ||||
|  | ||||
|         idx_dup = idx.append(idx) | ||||
|         result = idx_dup.drop_duplicates() | ||||
|  | ||||
|         expected = idx | ||||
|         if not isinstance(idx, PeriodIndex): | ||||
|             # freq is reset except for PeriodIndex | ||||
|             assert idx_dup.freq is None | ||||
|             assert result.freq is None | ||||
|             expected = idx._with_freq(None) | ||||
|         else: | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "keep, expected, index", | ||||
|         [ | ||||
|             ( | ||||
|                 "first", | ||||
|                 np.concatenate(([False] * 10, [True] * 5)), | ||||
|                 np.arange(0, 10, dtype=np.int64), | ||||
|             ), | ||||
|             ( | ||||
|                 "last", | ||||
|                 np.concatenate(([True] * 5, [False] * 10)), | ||||
|                 np.arange(5, 15, dtype=np.int64), | ||||
|             ), | ||||
|             ( | ||||
|                 False, | ||||
|                 np.concatenate(([True] * 5, [False] * 5, [True] * 5)), | ||||
|                 np.arange(5, 10, dtype=np.int64), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_drop_duplicates(self, keep, expected, index, idx): | ||||
|         # to check Index/Series compat | ||||
|         idx = idx.append(idx[:5]) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) | ||||
|         expected = idx[~expected] | ||||
|  | ||||
|         result = idx.drop_duplicates(keep=keep) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = Series(idx).drop_duplicates(keep=keep) | ||||
|         expected = Series(expected, index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesPeriodIndex(DropDuplicates): | ||||
|     @pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"]) | ||||
|     def freq(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq): | ||||
|         return period_range("2011-01-01", periods=10, freq=freq, name="idx") | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesDatetimeIndex(DropDuplicates): | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq_sample): | ||||
|         return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesTimedeltaIndex(DropDuplicates): | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq_sample): | ||||
|         return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") | ||||
| @ -0,0 +1,181 @@ | ||||
| """ | ||||
| Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex | ||||
| """ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class EqualsTests: | ||||
|     def test_not_equals_numeric(self, index): | ||||
|         assert not index.equals(Index(index.asi8)) | ||||
|         assert not index.equals(Index(index.asi8.astype("u8"))) | ||||
|         assert not index.equals(Index(index.asi8).astype("f8")) | ||||
|  | ||||
|     def test_equals(self, index): | ||||
|         assert index.equals(index) | ||||
|         assert index.equals(index.astype(object)) | ||||
|         assert index.equals(CategoricalIndex(index)) | ||||
|         assert index.equals(CategoricalIndex(index.astype(object))) | ||||
|  | ||||
|     def test_not_equals_non_arraylike(self, index): | ||||
|         assert not index.equals(list(index)) | ||||
|  | ||||
|     def test_not_equals_strings(self, index): | ||||
|         other = Index([str(x) for x in index], dtype=object) | ||||
|         assert not index.equals(other) | ||||
|         assert not index.equals(CategoricalIndex(other)) | ||||
|  | ||||
|     def test_not_equals_misc_strs(self, index): | ||||
|         other = Index(list("abc")) | ||||
|         assert not index.equals(other) | ||||
|  | ||||
|  | ||||
| class TestPeriodIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return period_range("2013-01-01", periods=5, freq="D") | ||||
|  | ||||
|     # TODO: de-duplicate with other test_equals2 methods | ||||
|     @pytest.mark.parametrize("freq", ["D", "M"]) | ||||
|     def test_equals2(self, freq): | ||||
|         # GH#13107 | ||||
|         idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h") | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # same internal, different tz | ||||
|         idx3 = PeriodIndex._simple_new( | ||||
|             idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h")) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) | ||||
|         assert not idx.equals(idx3) | ||||
|         assert not idx.equals(idx3.copy()) | ||||
|         assert not idx.equals(idx3.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx3) | ||||
|         assert not idx.equals(list(idx3)) | ||||
|         assert not idx.equals(pd.Series(idx3)) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return date_range("2013-01-01", periods=5) | ||||
|  | ||||
|     def test_equals2(self): | ||||
|         # GH#13107 | ||||
|         idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # same internal, different tz | ||||
|         idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") | ||||
|         tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) | ||||
|         assert not idx.equals(idx3) | ||||
|         assert not idx.equals(idx3.copy()) | ||||
|         assert not idx.equals(idx3.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx3) | ||||
|         assert not idx.equals(list(idx3)) | ||||
|         assert not idx.equals(pd.Series(idx3)) | ||||
|  | ||||
|         # check that we do not raise when comparing with OutOfBounds objects | ||||
|         oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) | ||||
|         assert not idx.equals(oob) | ||||
|         assert not idx2.equals(oob) | ||||
|         assert not idx3.equals(oob) | ||||
|  | ||||
|         # check that we do not raise when comparing with OutOfBounds dt64 | ||||
|         oob2 = oob.map(np.datetime64) | ||||
|         assert not idx.equals(oob2) | ||||
|         assert not idx2.equals(oob2) | ||||
|         assert not idx3.equals(oob2) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_not_equals_bday(self, freq): | ||||
|         rng = date_range("2009-01-01", "2010-01-01", freq=freq) | ||||
|         assert not rng.equals(list(rng)) | ||||
|  | ||||
|  | ||||
| class TestTimedeltaIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return timedelta_range("1 day", periods=10) | ||||
|  | ||||
|     def test_equals2(self): | ||||
|         # GH#13107 | ||||
|         idx = TimedeltaIndex(["1 days", "2 days", "NaT"]) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"]) | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.astype(object).equals(idx2.astype(object)) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # Check that we dont raise OverflowError on comparisons outside the | ||||
|         #  implementation range GH#28532 | ||||
|         oob = Index([timedelta(days=10**6)] * 3, dtype=object) | ||||
|         assert not idx.equals(oob) | ||||
|         assert not idx2.equals(oob) | ||||
|  | ||||
|         oob2 = Index([np.timedelta64(x) for x in oob], dtype=object) | ||||
|         assert (oob == oob2).all() | ||||
|         assert not idx.equals(oob2) | ||||
|         assert not idx2.equals(oob2) | ||||
|  | ||||
|         oob3 = oob.map(np.timedelta64) | ||||
|         assert (oob3 == oob).all() | ||||
|         assert not idx.equals(oob3) | ||||
|         assert not idx2.equals(oob3) | ||||
| @ -0,0 +1,45 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| dtlike_dtypes = [ | ||||
|     np.dtype("timedelta64[ns]"), | ||||
|     np.dtype("datetime64[ns]"), | ||||
|     pd.DatetimeTZDtype("ns", "Asia/Tokyo"), | ||||
|     pd.PeriodDtype("ns"), | ||||
| ] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ldtype", dtlike_dtypes) | ||||
| @pytest.mark.parametrize("rdtype", dtlike_dtypes) | ||||
| def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): | ||||
|     vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2) | ||||
|  | ||||
|     def construct(dtype): | ||||
|         if dtype is dtlike_dtypes[-1]: | ||||
|             # PeriodArray will try to cast ints to strings | ||||
|             return DatetimeIndex(vals).astype(dtype) | ||||
|         return Index(vals, dtype=dtype) | ||||
|  | ||||
|     left = construct(ldtype) | ||||
|     right = construct(rdtype) | ||||
|  | ||||
|     result = left.get_indexer_non_unique(right) | ||||
|  | ||||
|     if ldtype is rdtype: | ||||
|         ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) | ||||
|         ex2 = np.array([], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result[0], ex1) | ||||
|         tm.assert_numpy_array_equal(result[1], ex2) | ||||
|  | ||||
|     else: | ||||
|         no_matches = np.array([-1] * 6, dtype=np.intp) | ||||
|         missing = np.arange(6, dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result[0], no_matches) | ||||
|         tm.assert_numpy_array_equal(result[1], missing) | ||||
| @ -0,0 +1,46 @@ | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     NaT, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_is_monotonic_with_nat(): | ||||
|     # GH#31437 | ||||
|     # PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex, | ||||
|     #  in particular never be monotonic when we have NaT | ||||
|     dti = date_range("2016-01-01", periods=3) | ||||
|     pi = dti.to_period("D") | ||||
|     tdi = Index(dti.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert obj.is_monotonic_increasing | ||||
|         assert obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
|  | ||||
|     dti1 = dti.insert(0, NaT) | ||||
|     pi1 = dti1.to_period("D") | ||||
|     tdi1 = Index(dti1.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
|  | ||||
|     dti2 = dti.insert(3, NaT) | ||||
|     pi2 = dti2.to_period("h") | ||||
|     tdi2 = Index(dti2.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
| @ -0,0 +1,53 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class NATests: | ||||
|     def test_nat(self, index_without_na): | ||||
|         empty_index = index_without_na[:0] | ||||
|  | ||||
|         index_with_na = index_without_na.copy(deep=True) | ||||
|         index_with_na._data[1] = NaT | ||||
|  | ||||
|         assert empty_index._na_value is NaT | ||||
|         assert index_with_na._na_value is NaT | ||||
|         assert index_without_na._na_value is NaT | ||||
|  | ||||
|         idx = index_without_na | ||||
|         assert idx._can_hold_na | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) | ||||
|         assert idx.hasnans is False | ||||
|  | ||||
|         idx = index_with_na | ||||
|         assert idx._can_hold_na | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) | ||||
|         assert idx.hasnans is True | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) | ||||
|  | ||||
|  | ||||
| class TestTimedeltaIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self): | ||||
|         return TimedeltaIndex(["1 days", "2 days"]) | ||||
|  | ||||
|  | ||||
| class TestPeriodIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self): | ||||
|         return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") | ||||
| @ -0,0 +1,315 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def check_freq_ascending(ordered, orig, ascending): | ||||
|     """ | ||||
|     Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex | ||||
|     when the original index is generated (or generate-able) with | ||||
|     period_range/date_range/timedelta_range. | ||||
|     """ | ||||
|     if isinstance(ordered, PeriodIndex): | ||||
|         assert ordered.freq == orig.freq | ||||
|     elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): | ||||
|         if ascending: | ||||
|             assert ordered.freq.n == orig.freq.n | ||||
|         else: | ||||
|             assert ordered.freq.n == -1 * orig.freq.n | ||||
|  | ||||
|  | ||||
| def check_freq_nonmonotonic(ordered, orig): | ||||
|     """ | ||||
|     Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex | ||||
|     when the original index is _not_ generated (or generate-able) with | ||||
|     period_range/date_range//timedelta_range. | ||||
|     """ | ||||
|     if isinstance(ordered, PeriodIndex): | ||||
|         assert ordered.freq == orig.freq | ||||
|     elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): | ||||
|         assert ordered.freq is None | ||||
|  | ||||
|  | ||||
| class TestSortValues: | ||||
|     @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) | ||||
|     def non_monotonic_idx(self, request): | ||||
|         if request.param is DatetimeIndex: | ||||
|             return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) | ||||
|         elif request.param is PeriodIndex: | ||||
|             dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) | ||||
|             return dti.to_period("D") | ||||
|         else: | ||||
|             return TimedeltaIndex( | ||||
|                 ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] | ||||
|             ) | ||||
|  | ||||
|     def test_argmin_argmax(self, non_monotonic_idx): | ||||
|         assert non_monotonic_idx.argmin() == 1 | ||||
|         assert non_monotonic_idx.argmax() == 0 | ||||
|  | ||||
|     def test_sort_values(self, non_monotonic_idx): | ||||
|         idx = non_monotonic_idx | ||||
|         ordered = idx.sort_values() | ||||
|         assert ordered.is_monotonic_increasing | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         assert ordered[::-1].is_monotonic_increasing | ||||
|  | ||||
|         ordered, dexer = idx.sort_values(return_indexer=True) | ||||
|         assert ordered.is_monotonic_increasing | ||||
|         tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) | ||||
|  | ||||
|         ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         assert ordered[::-1].is_monotonic_increasing | ||||
|         tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) | ||||
|  | ||||
|     def check_sort_values_with_freq(self, idx): | ||||
|         ordered = idx.sort_values() | ||||
|         tm.assert_index_equal(ordered, idx) | ||||
|         check_freq_ascending(ordered, idx, True) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         expected = idx[::-1] | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_ascending(ordered, idx, False) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True) | ||||
|         tm.assert_index_equal(ordered, idx) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) | ||||
|         check_freq_ascending(ordered, idx, True) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         expected = idx[::-1] | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) | ||||
|         check_freq_ascending(ordered, idx, False) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["D", "h"]) | ||||
|     def test_sort_values_with_freq_timedeltaindex(self, freq): | ||||
|         # GH#10295 | ||||
|         idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") | ||||
|  | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [ | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" | ||||
|             ), | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], | ||||
|                 freq="h", | ||||
|                 name="tzidx", | ||||
|                 tz="Asia/Tokyo", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_with_freq_datetimeindex(self, idx): | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) | ||||
|     def test_sort_values_with_freq_periodindex(self, freq): | ||||
|         # here with_freq refers to being period_range-like | ||||
|         idx = PeriodIndex( | ||||
|             ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" | ||||
|         ) | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [ | ||||
|             PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"), | ||||
|             Index([2011, 2012, 2013], name="idx"),  # for compatibility check | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_with_freq_periodindex2(self, idx): | ||||
|         # here with_freq indicates this is period_range-like | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     def check_sort_values_without_freq(self, idx, expected): | ||||
|         ordered = idx.sort_values(na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         if not idx.isna().any(): | ||||
|             ordered = idx.sort_values() | ||||
|             tm.assert_index_equal(ordered, expected) | ||||
|             check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|  | ||||
|         exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(indexer, exp) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         if not idx.isna().any(): | ||||
|             ordered, indexer = idx.sort_values(return_indexer=True) | ||||
|             tm.assert_index_equal(ordered, expected) | ||||
|  | ||||
|             exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) | ||||
|             tm.assert_numpy_array_equal(indexer, exp) | ||||
|             check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|  | ||||
|         exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(indexer, exp) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|     def test_sort_values_without_freq_timedeltaindex(self): | ||||
|         # GH#10295 | ||||
|  | ||||
|         idx = TimedeltaIndex( | ||||
|             ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" | ||||
|         ) | ||||
|         expected = TimedeltaIndex( | ||||
|             ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" | ||||
|         ) | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index_dates,expected_dates", | ||||
|         [ | ||||
|             ( | ||||
|                 ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], | ||||
|                 ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|             ( | ||||
|                 ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], | ||||
|                 ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|             ( | ||||
|                 [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], | ||||
|                 [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_without_freq_datetimeindex( | ||||
|         self, index_dates, expected_dates, tz_naive_fixture | ||||
|     ): | ||||
|         tz = tz_naive_fixture | ||||
|  | ||||
|         # without freq | ||||
|         idx = DatetimeIndex(index_dates, tz=tz, name="idx") | ||||
|         expected = DatetimeIndex(expected_dates, tz=tz, name="idx") | ||||
|  | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-01", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx1", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx1", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-01", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx2", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx2", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], | ||||
|                     freq="D", | ||||
|                     name="idx3", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|                     freq="D", | ||||
|                     name="idx3", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y" | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y" | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 # For compatibility check | ||||
|                 Index([2011, 2013, 2015, 2012, 2011], name="idx"), | ||||
|                 Index([2011, 2011, 2012, 2013, 2015], name="idx"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_without_freq_periodindex(self, idx, expected): | ||||
|         # here without_freq means not generateable by period_range | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     def test_sort_values_without_freq_periodindex_nat(self): | ||||
|         # doesn't quite fit into check_sort_values_without_freq | ||||
|         idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") | ||||
|         expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") | ||||
|  | ||||
|         ordered = idx.sort_values(na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|  | ||||
| def test_order_stability_compat(): | ||||
|     # GH#35922. sort_values is stable both for normal and datetime-like Index | ||||
|     pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y") | ||||
|     iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") | ||||
|     ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) | ||||
|     ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) | ||||
|     tm.assert_numpy_array_equal(indexer1, indexer2) | ||||
| @ -0,0 +1,103 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     TimedeltaIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestValueCounts: | ||||
|     # GH#7735 | ||||
|  | ||||
|     def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def test_value_counts_unique_timedeltaindex(self): | ||||
|         orig = timedelta_range("1 days 09:00:00", freq="h", periods=10) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def test_value_counts_unique_periodindex(self): | ||||
|         orig = period_range("2011-01-01 09:00", freq="h", periods=10) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def _check_value_counts_with_repeats(self, orig): | ||||
|         # create repeated values, 'n'th element is repeated by n+1 times | ||||
|         idx = type(orig)( | ||||
|             np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype | ||||
|         ) | ||||
|  | ||||
|         exp_idx = orig[::-1] | ||||
|         if not isinstance(exp_idx, PeriodIndex): | ||||
|             exp_idx = exp_idx._with_freq(None) | ||||
|         expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(idx.unique(), orig) | ||||
|  | ||||
|     def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         idx = DatetimeIndex( | ||||
|             [ | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 NaT, | ||||
|             ], | ||||
|             tz=tz, | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def test_value_counts_unique_timedeltaindex2(self): | ||||
|         idx = TimedeltaIndex( | ||||
|             [ | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 08:00:00", | ||||
|                 "1 days 08:00:00", | ||||
|                 NaT, | ||||
|             ] | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def test_value_counts_unique_periodindex2(self): | ||||
|         idx = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 NaT, | ||||
|             ], | ||||
|             freq="h", | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def _check_value_counts_dropna(self, idx): | ||||
|         exp_idx = idx[[2, 3]] | ||||
|         expected = Series([3, 2], index=exp_idx, name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(), expected) | ||||
|  | ||||
|         exp_idx = idx[[2, 3, -1]] | ||||
|         expected = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(dropna=False), expected) | ||||
|  | ||||
|         tm.assert_index_equal(idx.unique(), exp_idx) | ||||
		Reference in New Issue
	
	Block a user