done
This commit is contained in:
		
							
								
								
									
										748
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_hashtable.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										748
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_hashtable.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,748 @@ | ||||
| from collections.abc import Generator | ||||
| from contextlib import contextmanager | ||||
| import re | ||||
| import struct | ||||
| import tracemalloc | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import hashtable as ht | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.algorithms import isin | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def activated_tracemalloc() -> Generator[None, None, None]: | ||||
|     tracemalloc.start() | ||||
|     try: | ||||
|         yield | ||||
|     finally: | ||||
|         tracemalloc.stop() | ||||
|  | ||||
|  | ||||
| def get_allocated_khash_memory(): | ||||
|     snapshot = tracemalloc.take_snapshot() | ||||
|     snapshot = snapshot.filter_traces( | ||||
|         (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),) | ||||
|     ) | ||||
|     return sum(x.size for x in snapshot.traces) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "table_type, dtype", | ||||
|     [ | ||||
|         (ht.PyObjectHashTable, np.object_), | ||||
|         (ht.Complex128HashTable, np.complex128), | ||||
|         (ht.Int64HashTable, np.int64), | ||||
|         (ht.UInt64HashTable, np.uint64), | ||||
|         (ht.Float64HashTable, np.float64), | ||||
|         (ht.Complex64HashTable, np.complex64), | ||||
|         (ht.Int32HashTable, np.int32), | ||||
|         (ht.UInt32HashTable, np.uint32), | ||||
|         (ht.Float32HashTable, np.float32), | ||||
|         (ht.Int16HashTable, np.int16), | ||||
|         (ht.UInt16HashTable, np.uint16), | ||||
|         (ht.Int8HashTable, np.int8), | ||||
|         (ht.UInt8HashTable, np.uint8), | ||||
|         (ht.IntpHashTable, np.intp), | ||||
|     ], | ||||
| ) | ||||
| class TestHashTable: | ||||
|     def test_get_set_contains_len(self, table_type, dtype): | ||||
|         index = 5 | ||||
|         table = table_type(55) | ||||
|         assert len(table) == 0 | ||||
|         assert index not in table | ||||
|  | ||||
|         table.set_item(index, 42) | ||||
|         assert len(table) == 1 | ||||
|         assert index in table | ||||
|         assert table.get_item(index) == 42 | ||||
|  | ||||
|         table.set_item(index + 1, 41) | ||||
|         assert index in table | ||||
|         assert index + 1 in table | ||||
|         assert len(table) == 2 | ||||
|         assert table.get_item(index) == 42 | ||||
|         assert table.get_item(index + 1) == 41 | ||||
|  | ||||
|         table.set_item(index, 21) | ||||
|         assert index in table | ||||
|         assert index + 1 in table | ||||
|         assert len(table) == 2 | ||||
|         assert table.get_item(index) == 21 | ||||
|         assert table.get_item(index + 1) == 41 | ||||
|         assert index + 2 not in table | ||||
|  | ||||
|         table.set_item(index + 1, 21) | ||||
|         assert index in table | ||||
|         assert index + 1 in table | ||||
|         assert len(table) == 2 | ||||
|         assert table.get_item(index) == 21 | ||||
|         assert table.get_item(index + 1) == 21 | ||||
|  | ||||
|         with pytest.raises(KeyError, match=str(index + 2)): | ||||
|             table.get_item(index + 2) | ||||
|  | ||||
|     def test_get_set_contains_len_mask(self, table_type, dtype): | ||||
|         if table_type == ht.PyObjectHashTable: | ||||
|             pytest.skip("Mask not supported for object") | ||||
|         index = 5 | ||||
|         table = table_type(55, uses_mask=True) | ||||
|         assert len(table) == 0 | ||||
|         assert index not in table | ||||
|  | ||||
|         table.set_item(index, 42) | ||||
|         assert len(table) == 1 | ||||
|         assert index in table | ||||
|         assert table.get_item(index) == 42 | ||||
|         with pytest.raises(KeyError, match="NA"): | ||||
|             table.get_na() | ||||
|  | ||||
|         table.set_item(index + 1, 41) | ||||
|         table.set_na(41) | ||||
|         assert pd.NA in table | ||||
|         assert index in table | ||||
|         assert index + 1 in table | ||||
|         assert len(table) == 3 | ||||
|         assert table.get_item(index) == 42 | ||||
|         assert table.get_item(index + 1) == 41 | ||||
|         assert table.get_na() == 41 | ||||
|  | ||||
|         table.set_na(21) | ||||
|         assert index in table | ||||
|         assert index + 1 in table | ||||
|         assert len(table) == 3 | ||||
|         assert table.get_item(index + 1) == 41 | ||||
|         assert table.get_na() == 21 | ||||
|         assert index + 2 not in table | ||||
|  | ||||
|         with pytest.raises(KeyError, match=str(index + 2)): | ||||
|             table.get_item(index + 2) | ||||
|  | ||||
|     def test_map_keys_to_values(self, table_type, dtype, writable): | ||||
|         # only Int64HashTable has this method | ||||
|         if table_type == ht.Int64HashTable: | ||||
|             N = 77 | ||||
|             table = table_type() | ||||
|             keys = np.arange(N).astype(dtype) | ||||
|             vals = np.arange(N).astype(np.int64) + N | ||||
|             keys.flags.writeable = writable | ||||
|             vals.flags.writeable = writable | ||||
|             table.map_keys_to_values(keys, vals) | ||||
|             for i in range(N): | ||||
|                 assert table.get_item(keys[i]) == i + N | ||||
|  | ||||
|     def test_map_locations(self, table_type, dtype, writable): | ||||
|         N = 8 | ||||
|         table = table_type() | ||||
|         keys = (np.arange(N) + N).astype(dtype) | ||||
|         keys.flags.writeable = writable | ||||
|         table.map_locations(keys) | ||||
|         for i in range(N): | ||||
|             assert table.get_item(keys[i]) == i | ||||
|  | ||||
|     def test_map_locations_mask(self, table_type, dtype, writable): | ||||
|         if table_type == ht.PyObjectHashTable: | ||||
|             pytest.skip("Mask not supported for object") | ||||
|         N = 3 | ||||
|         table = table_type(uses_mask=True) | ||||
|         keys = (np.arange(N) + N).astype(dtype) | ||||
|         keys.flags.writeable = writable | ||||
|         table.map_locations(keys, np.array([False, False, True])) | ||||
|         for i in range(N - 1): | ||||
|             assert table.get_item(keys[i]) == i | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))): | ||||
|             table.get_item(keys[N - 1]) | ||||
|  | ||||
|         assert table.get_na() == 2 | ||||
|  | ||||
|     def test_lookup(self, table_type, dtype, writable): | ||||
|         N = 3 | ||||
|         table = table_type() | ||||
|         keys = (np.arange(N) + N).astype(dtype) | ||||
|         keys.flags.writeable = writable | ||||
|         table.map_locations(keys) | ||||
|         result = table.lookup(keys) | ||||
|         expected = np.arange(N) | ||||
|         tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64)) | ||||
|  | ||||
|     def test_lookup_wrong(self, table_type, dtype): | ||||
|         if dtype in (np.int8, np.uint8): | ||||
|             N = 100 | ||||
|         else: | ||||
|             N = 512 | ||||
|         table = table_type() | ||||
|         keys = (np.arange(N) + N).astype(dtype) | ||||
|         table.map_locations(keys) | ||||
|         wrong_keys = np.arange(N).astype(dtype) | ||||
|         result = table.lookup(wrong_keys) | ||||
|         assert np.all(result == -1) | ||||
|  | ||||
|     def test_lookup_mask(self, table_type, dtype, writable): | ||||
|         if table_type == ht.PyObjectHashTable: | ||||
|             pytest.skip("Mask not supported for object") | ||||
|         N = 3 | ||||
|         table = table_type(uses_mask=True) | ||||
|         keys = (np.arange(N) + N).astype(dtype) | ||||
|         mask = np.array([False, True, False]) | ||||
|         keys.flags.writeable = writable | ||||
|         table.map_locations(keys, mask) | ||||
|         result = table.lookup(keys, mask) | ||||
|         expected = np.arange(N) | ||||
|         tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64)) | ||||
|  | ||||
|         result = table.lookup(np.array([1 + N]).astype(dtype), np.array([False])) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.astype(np.int64), np.array([-1], dtype=np.int64) | ||||
|         ) | ||||
|  | ||||
|     def test_unique(self, table_type, dtype, writable): | ||||
|         if dtype in (np.int8, np.uint8): | ||||
|             N = 88 | ||||
|         else: | ||||
|             N = 1000 | ||||
|         table = table_type() | ||||
|         expected = (np.arange(N) + N).astype(dtype) | ||||
|         keys = np.repeat(expected, 5) | ||||
|         keys.flags.writeable = writable | ||||
|         unique = table.unique(keys) | ||||
|         tm.assert_numpy_array_equal(unique, expected) | ||||
|  | ||||
|     def test_tracemalloc_works(self, table_type, dtype): | ||||
|         if dtype in (np.int8, np.uint8): | ||||
|             N = 256 | ||||
|         else: | ||||
|             N = 30000 | ||||
|         keys = np.arange(N).astype(dtype) | ||||
|         with activated_tracemalloc(): | ||||
|             table = table_type() | ||||
|             table.map_locations(keys) | ||||
|             used = get_allocated_khash_memory() | ||||
|             my_size = table.sizeof() | ||||
|             assert used == my_size | ||||
|             del table | ||||
|             assert get_allocated_khash_memory() == 0 | ||||
|  | ||||
|     def test_tracemalloc_for_empty(self, table_type, dtype): | ||||
|         with activated_tracemalloc(): | ||||
|             table = table_type() | ||||
|             used = get_allocated_khash_memory() | ||||
|             my_size = table.sizeof() | ||||
|             assert used == my_size | ||||
|             del table | ||||
|             assert get_allocated_khash_memory() == 0 | ||||
|  | ||||
|     def test_get_state(self, table_type, dtype): | ||||
|         table = table_type(1000) | ||||
|         state = table.get_state() | ||||
|         assert state["size"] == 0 | ||||
|         assert state["n_occupied"] == 0 | ||||
|         assert "n_buckets" in state | ||||
|         assert "upper_bound" in state | ||||
|  | ||||
|     @pytest.mark.parametrize("N", range(1, 110)) | ||||
|     def test_no_reallocation(self, table_type, dtype, N): | ||||
|         keys = np.arange(N).astype(dtype) | ||||
|         preallocated_table = table_type(N) | ||||
|         n_buckets_start = preallocated_table.get_state()["n_buckets"] | ||||
|         preallocated_table.map_locations(keys) | ||||
|         n_buckets_end = preallocated_table.get_state()["n_buckets"] | ||||
|         # original number of buckets was enough: | ||||
|         assert n_buckets_start == n_buckets_end | ||||
|         # check with clean table (not too much preallocated) | ||||
|         clean_table = table_type() | ||||
|         clean_table.map_locations(keys) | ||||
|         assert n_buckets_start == clean_table.get_state()["n_buckets"] | ||||
|  | ||||
|  | ||||
| class TestHashTableUnsorted: | ||||
|     # TODO: moved from test_algos; may be redundancies with other tests | ||||
|     def test_string_hashtable_set_item_signature(self): | ||||
|         # GH#30419 fix typing in StringHashTable.set_item to prevent segfault | ||||
|         tbl = ht.StringHashTable() | ||||
|  | ||||
|         tbl.set_item("key", 1) | ||||
|         assert tbl.get_item("key") == 1 | ||||
|  | ||||
|         with pytest.raises(TypeError, match="'key' has incorrect type"): | ||||
|             # key arg typed as string, not object | ||||
|             tbl.set_item(4, 6) | ||||
|         with pytest.raises(TypeError, match="'val' has incorrect type"): | ||||
|             tbl.get_item(4) | ||||
|  | ||||
|     def test_lookup_nan(self, writable): | ||||
|         # GH#21688 ensure we can deal with readonly memory views | ||||
|         xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) | ||||
|         xs.setflags(write=writable) | ||||
|         m = ht.Float64HashTable() | ||||
|         m.map_locations(xs) | ||||
|         tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp)) | ||||
|  | ||||
|     def test_add_signed_zeros(self): | ||||
|         # GH#21866 inconsistent hash-function for float64 | ||||
|         # default hash-function would lead to different hash-buckets | ||||
|         # for 0.0 and -0.0 if there are more than 2^30 hash-buckets | ||||
|         # but this would mean 16GB | ||||
|         N = 4  # 12 * 10**8 would trigger the error, if you have enough memory | ||||
|         m = ht.Float64HashTable(N) | ||||
|         m.set_item(0.0, 0) | ||||
|         m.set_item(-0.0, 0) | ||||
|         assert len(m) == 1  # 0.0 and -0.0 are equivalent | ||||
|  | ||||
|     def test_add_different_nans(self): | ||||
|         # GH#21866 inconsistent hash-function for float64 | ||||
|         # create different nans from bit-patterns: | ||||
|         NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] | ||||
|         NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] | ||||
|         assert NAN1 != NAN1 | ||||
|         assert NAN2 != NAN2 | ||||
|         # default hash function would lead to different hash-buckets | ||||
|         # for NAN1 and NAN2 even if there are only 4 buckets: | ||||
|         m = ht.Float64HashTable() | ||||
|         m.set_item(NAN1, 0) | ||||
|         m.set_item(NAN2, 0) | ||||
|         assert len(m) == 1  # NAN1 and NAN2 are equivalent | ||||
|  | ||||
|     def test_lookup_overflow(self, writable): | ||||
|         xs = np.array([1, 2, 2**63], dtype=np.uint64) | ||||
|         # GH 21688 ensure we can deal with readonly memory views | ||||
|         xs.setflags(write=writable) | ||||
|         m = ht.UInt64HashTable() | ||||
|         m.map_locations(xs) | ||||
|         tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp)) | ||||
|  | ||||
|     @pytest.mark.parametrize("nvals", [0, 10])  # resizing to 0 is special case | ||||
|     @pytest.mark.parametrize( | ||||
|         "htable, uniques, dtype, safely_resizes", | ||||
|         [ | ||||
|             (ht.PyObjectHashTable, ht.ObjectVector, "object", False), | ||||
|             (ht.StringHashTable, ht.ObjectVector, "object", True), | ||||
|             (ht.Float64HashTable, ht.Float64Vector, "float64", False), | ||||
|             (ht.Int64HashTable, ht.Int64Vector, "int64", False), | ||||
|             (ht.Int32HashTable, ht.Int32Vector, "int32", False), | ||||
|             (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False), | ||||
|         ], | ||||
|     ) | ||||
|     def test_vector_resize( | ||||
|         self, writable, htable, uniques, dtype, safely_resizes, nvals | ||||
|     ): | ||||
|         # Test for memory errors after internal vector | ||||
|         # reallocations (GH 7157) | ||||
|         # Changed from using np.random.default_rng(2).rand to range | ||||
|         # which could cause flaky CI failures when safely_resizes=False | ||||
|         vals = np.array(range(1000), dtype=dtype) | ||||
|  | ||||
|         # GH 21688 ensures we can deal with read-only memory views | ||||
|         vals.setflags(write=writable) | ||||
|  | ||||
|         # initialise instances; cannot initialise in parametrization, | ||||
|         # as otherwise external views would be held on the array (which is | ||||
|         # one of the things this test is checking) | ||||
|         htable = htable() | ||||
|         uniques = uniques() | ||||
|  | ||||
|         # get_labels may append to uniques | ||||
|         htable.get_labels(vals[:nvals], uniques, 0, -1) | ||||
|         # to_array() sets an external_view_exists flag on uniques. | ||||
|         tmp = uniques.to_array() | ||||
|         oldshape = tmp.shape | ||||
|  | ||||
|         # subsequent get_labels() calls can no longer append to it | ||||
|         # (except for StringHashTables + ObjectVector) | ||||
|         if safely_resizes: | ||||
|             htable.get_labels(vals, uniques, 0, -1) | ||||
|         else: | ||||
|             with pytest.raises(ValueError, match="external reference.*"): | ||||
|                 htable.get_labels(vals, uniques, 0, -1) | ||||
|  | ||||
|         uniques.to_array()  # should not raise here | ||||
|         assert tmp.shape == oldshape | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "hashtable", | ||||
|         [ | ||||
|             ht.PyObjectHashTable, | ||||
|             ht.StringHashTable, | ||||
|             ht.Float64HashTable, | ||||
|             ht.Int64HashTable, | ||||
|             ht.Int32HashTable, | ||||
|             ht.UInt64HashTable, | ||||
|         ], | ||||
|     ) | ||||
|     def test_hashtable_large_sizehint(self, hashtable): | ||||
|         # GH#22729 smoketest for not raising when passing a large size_hint | ||||
|         size_hint = np.iinfo(np.uint32).max + 1 | ||||
|         hashtable(size_hint=size_hint) | ||||
|  | ||||
|  | ||||
| class TestPyObjectHashTableWithNans: | ||||
|     def test_nan_float(self): | ||||
|         nan1 = float("nan") | ||||
|         nan2 = float("nan") | ||||
|         assert nan1 is not nan2 | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|  | ||||
|     def test_nan_complex_both(self): | ||||
|         nan1 = complex(float("nan"), float("nan")) | ||||
|         nan2 = complex(float("nan"), float("nan")) | ||||
|         assert nan1 is not nan2 | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|  | ||||
|     def test_nan_complex_real(self): | ||||
|         nan1 = complex(float("nan"), 1) | ||||
|         nan2 = complex(float("nan"), 1) | ||||
|         other = complex(float("nan"), 2) | ||||
|         assert nan1 is not nan2 | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|         with pytest.raises(KeyError, match=None) as error: | ||||
|             table.get_item(other) | ||||
|         assert str(error.value) == str(other) | ||||
|  | ||||
|     def test_nan_complex_imag(self): | ||||
|         nan1 = complex(1, float("nan")) | ||||
|         nan2 = complex(1, float("nan")) | ||||
|         other = complex(2, float("nan")) | ||||
|         assert nan1 is not nan2 | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|         with pytest.raises(KeyError, match=None) as error: | ||||
|             table.get_item(other) | ||||
|         assert str(error.value) == str(other) | ||||
|  | ||||
|     def test_nan_in_tuple(self): | ||||
|         nan1 = (float("nan"),) | ||||
|         nan2 = (float("nan"),) | ||||
|         assert nan1[0] is not nan2[0] | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|  | ||||
|     def test_nan_in_nested_tuple(self): | ||||
|         nan1 = (1, (2, (float("nan"),))) | ||||
|         nan2 = (1, (2, (float("nan"),))) | ||||
|         other = (1, 2) | ||||
|         table = ht.PyObjectHashTable() | ||||
|         table.set_item(nan1, 42) | ||||
|         assert table.get_item(nan2) == 42 | ||||
|         with pytest.raises(KeyError, match=None) as error: | ||||
|             table.get_item(other) | ||||
|         assert str(error.value) == str(other) | ||||
|  | ||||
|  | ||||
| def test_hash_equal_tuple_with_nans(): | ||||
|     a = (float("nan"), (float("nan"), float("nan"))) | ||||
|     b = (float("nan"), (float("nan"), float("nan"))) | ||||
|     assert ht.object_hash(a) == ht.object_hash(b) | ||||
|     assert ht.objects_are_equal(a, b) | ||||
|  | ||||
|  | ||||
| def test_get_labels_groupby_for_Int64(writable): | ||||
|     table = ht.Int64HashTable() | ||||
|     vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64) | ||||
|     vals.flags.writeable = writable | ||||
|     arr, unique = table.get_labels_groupby(vals) | ||||
|     expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp) | ||||
|     expected_unique = np.array([1, 2], dtype=np.int64) | ||||
|     tm.assert_numpy_array_equal(arr, expected_arr) | ||||
|     tm.assert_numpy_array_equal(unique, expected_unique) | ||||
|  | ||||
|  | ||||
| def test_tracemalloc_works_for_StringHashTable(): | ||||
|     N = 1000 | ||||
|     keys = np.arange(N).astype(np.str_).astype(np.object_) | ||||
|     with activated_tracemalloc(): | ||||
|         table = ht.StringHashTable() | ||||
|         table.map_locations(keys) | ||||
|         used = get_allocated_khash_memory() | ||||
|         my_size = table.sizeof() | ||||
|         assert used == my_size | ||||
|         del table | ||||
|         assert get_allocated_khash_memory() == 0 | ||||
|  | ||||
|  | ||||
| def test_tracemalloc_for_empty_StringHashTable(): | ||||
|     with activated_tracemalloc(): | ||||
|         table = ht.StringHashTable() | ||||
|         used = get_allocated_khash_memory() | ||||
|         my_size = table.sizeof() | ||||
|         assert used == my_size | ||||
|         del table | ||||
|         assert get_allocated_khash_memory() == 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("N", range(1, 110)) | ||||
| def test_no_reallocation_StringHashTable(N): | ||||
|     keys = np.arange(N).astype(np.str_).astype(np.object_) | ||||
|     preallocated_table = ht.StringHashTable(N) | ||||
|     n_buckets_start = preallocated_table.get_state()["n_buckets"] | ||||
|     preallocated_table.map_locations(keys) | ||||
|     n_buckets_end = preallocated_table.get_state()["n_buckets"] | ||||
|     # original number of buckets was enough: | ||||
|     assert n_buckets_start == n_buckets_end | ||||
|     # check with clean table (not too much preallocated) | ||||
|     clean_table = ht.StringHashTable() | ||||
|     clean_table.map_locations(keys) | ||||
|     assert n_buckets_start == clean_table.get_state()["n_buckets"] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "table_type, dtype", | ||||
|     [ | ||||
|         (ht.Float64HashTable, np.float64), | ||||
|         (ht.Float32HashTable, np.float32), | ||||
|         (ht.Complex128HashTable, np.complex128), | ||||
|         (ht.Complex64HashTable, np.complex64), | ||||
|     ], | ||||
| ) | ||||
| class TestHashTableWithNans: | ||||
|     def test_get_set_contains_len(self, table_type, dtype): | ||||
|         index = float("nan") | ||||
|         table = table_type() | ||||
|         assert index not in table | ||||
|  | ||||
|         table.set_item(index, 42) | ||||
|         assert len(table) == 1 | ||||
|         assert index in table | ||||
|         assert table.get_item(index) == 42 | ||||
|  | ||||
|         table.set_item(index, 41) | ||||
|         assert len(table) == 1 | ||||
|         assert index in table | ||||
|         assert table.get_item(index) == 41 | ||||
|  | ||||
|     def test_map_locations(self, table_type, dtype): | ||||
|         N = 10 | ||||
|         table = table_type() | ||||
|         keys = np.full(N, np.nan, dtype=dtype) | ||||
|         table.map_locations(keys) | ||||
|         assert len(table) == 1 | ||||
|         assert table.get_item(np.nan) == N - 1 | ||||
|  | ||||
|     def test_unique(self, table_type, dtype): | ||||
|         N = 1020 | ||||
|         table = table_type() | ||||
|         keys = np.full(N, np.nan, dtype=dtype) | ||||
|         unique = table.unique(keys) | ||||
|         assert np.all(np.isnan(unique)) and len(unique) == 1 | ||||
|  | ||||
|  | ||||
| def test_unique_for_nan_objects_floats(): | ||||
|     table = ht.PyObjectHashTable() | ||||
|     keys = np.array([float("nan") for i in range(50)], dtype=np.object_) | ||||
|     unique = table.unique(keys) | ||||
|     assert len(unique) == 1 | ||||
|  | ||||
|  | ||||
| def test_unique_for_nan_objects_complex(): | ||||
|     table = ht.PyObjectHashTable() | ||||
|     keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_) | ||||
|     unique = table.unique(keys) | ||||
|     assert len(unique) == 1 | ||||
|  | ||||
|  | ||||
| def test_unique_for_nan_objects_tuple(): | ||||
|     table = ht.PyObjectHashTable() | ||||
|     keys = np.array( | ||||
|         [1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_ | ||||
|     ) | ||||
|     unique = table.unique(keys) | ||||
|     assert len(unique) == 2 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     [ | ||||
|         np.object_, | ||||
|         np.complex128, | ||||
|         np.int64, | ||||
|         np.uint64, | ||||
|         np.float64, | ||||
|         np.complex64, | ||||
|         np.int32, | ||||
|         np.uint32, | ||||
|         np.float32, | ||||
|         np.int16, | ||||
|         np.uint16, | ||||
|         np.int8, | ||||
|         np.uint8, | ||||
|         np.intp, | ||||
|     ], | ||||
| ) | ||||
| class TestHelpFunctions: | ||||
|     def test_value_count(self, dtype, writable): | ||||
|         N = 43 | ||||
|         expected = (np.arange(N) + N).astype(dtype) | ||||
|         values = np.repeat(expected, 5) | ||||
|         values.flags.writeable = writable | ||||
|         keys, counts, _ = ht.value_count(values, False) | ||||
|         tm.assert_numpy_array_equal(np.sort(keys), expected) | ||||
|         assert np.all(counts == 5) | ||||
|  | ||||
|     def test_value_count_mask(self, dtype): | ||||
|         if dtype == np.object_: | ||||
|             pytest.skip("mask not implemented for object dtype") | ||||
|         values = np.array([1] * 5, dtype=dtype) | ||||
|         mask = np.zeros((5,), dtype=np.bool_) | ||||
|         mask[1] = True | ||||
|         mask[4] = True | ||||
|         keys, counts, na_counter = ht.value_count(values, False, mask=mask) | ||||
|         assert len(keys) == 2 | ||||
|         assert na_counter == 2 | ||||
|  | ||||
|     def test_value_count_stable(self, dtype, writable): | ||||
|         # GH12679 | ||||
|         values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype) | ||||
|         values.flags.writeable = writable | ||||
|         keys, counts, _ = ht.value_count(values, False) | ||||
|         tm.assert_numpy_array_equal(keys, values) | ||||
|         assert np.all(counts == 1) | ||||
|  | ||||
|     def test_duplicated_first(self, dtype, writable): | ||||
|         N = 100 | ||||
|         values = np.repeat(np.arange(N).astype(dtype), 5) | ||||
|         values.flags.writeable = writable | ||||
|         result = ht.duplicated(values) | ||||
|         expected = np.ones_like(values, dtype=np.bool_) | ||||
|         expected[::5] = False | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_ismember_yes(self, dtype, writable): | ||||
|         N = 127 | ||||
|         arr = np.arange(N).astype(dtype) | ||||
|         values = np.arange(N).astype(dtype) | ||||
|         arr.flags.writeable = writable | ||||
|         values.flags.writeable = writable | ||||
|         result = ht.ismember(arr, values) | ||||
|         expected = np.ones_like(values, dtype=np.bool_) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_ismember_no(self, dtype): | ||||
|         N = 17 | ||||
|         arr = np.arange(N).astype(dtype) | ||||
|         values = (np.arange(N) + N).astype(dtype) | ||||
|         result = ht.ismember(arr, values) | ||||
|         expected = np.zeros_like(values, dtype=np.bool_) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_mode(self, dtype, writable): | ||||
|         if dtype in (np.int8, np.uint8): | ||||
|             N = 53 | ||||
|         else: | ||||
|             N = 11111 | ||||
|         values = np.repeat(np.arange(N).astype(dtype), 5) | ||||
|         values[0] = 42 | ||||
|         values.flags.writeable = writable | ||||
|         result = ht.mode(values, False)[0] | ||||
|         assert result == 42 | ||||
|  | ||||
|     def test_mode_stable(self, dtype, writable): | ||||
|         values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype) | ||||
|         values.flags.writeable = writable | ||||
|         keys = ht.mode(values, False)[0] | ||||
|         tm.assert_numpy_array_equal(keys, values) | ||||
|  | ||||
|  | ||||
| def test_modes_with_nans(): | ||||
|     # GH42688, nans aren't mangled | ||||
|     nulls = [pd.NA, np.nan, pd.NaT, None] | ||||
|     values = np.array([True] + nulls * 2, dtype=np.object_) | ||||
|     modes = ht.mode(values, False)[0] | ||||
|     assert modes.size == len(nulls) | ||||
|  | ||||
|  | ||||
| def test_unique_label_indices_intp(writable): | ||||
|     keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp) | ||||
|     keys.flags.writeable = writable | ||||
|     result = ht.unique_label_indices(keys) | ||||
|     expected = np.array([0, 1, 5], dtype=np.intp) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_unique_label_indices(): | ||||
|     a = np.random.default_rng(2).integers(1, 1 << 10, 1 << 15).astype(np.intp) | ||||
|  | ||||
|     left = ht.unique_label_indices(a) | ||||
|     right = np.unique(a, return_index=True)[1] | ||||
|  | ||||
|     tm.assert_numpy_array_equal(left, right, check_dtype=False) | ||||
|  | ||||
|     a[np.random.default_rng(2).choice(len(a), 10)] = -1 | ||||
|     left = ht.unique_label_indices(a) | ||||
|     right = np.unique(a, return_index=True)[1][1:] | ||||
|     tm.assert_numpy_array_equal(left, right, check_dtype=False) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     [ | ||||
|         np.float64, | ||||
|         np.float32, | ||||
|         np.complex128, | ||||
|         np.complex64, | ||||
|     ], | ||||
| ) | ||||
| class TestHelpFunctionsWithNans: | ||||
|     def test_value_count(self, dtype): | ||||
|         values = np.array([np.nan, np.nan, np.nan], dtype=dtype) | ||||
|         keys, counts, _ = ht.value_count(values, True) | ||||
|         assert len(keys) == 0 | ||||
|         keys, counts, _ = ht.value_count(values, False) | ||||
|         assert len(keys) == 1 and np.all(np.isnan(keys)) | ||||
|         assert counts[0] == 3 | ||||
|  | ||||
|     def test_duplicated_first(self, dtype): | ||||
|         values = np.array([np.nan, np.nan, np.nan], dtype=dtype) | ||||
|         result = ht.duplicated(values) | ||||
|         expected = np.array([False, True, True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_ismember_yes(self, dtype): | ||||
|         arr = np.array([np.nan, np.nan, np.nan], dtype=dtype) | ||||
|         values = np.array([np.nan, np.nan], dtype=dtype) | ||||
|         result = ht.ismember(arr, values) | ||||
|         expected = np.array([True, True, True], dtype=np.bool_) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_ismember_no(self, dtype): | ||||
|         arr = np.array([np.nan, np.nan, np.nan], dtype=dtype) | ||||
|         values = np.array([1], dtype=dtype) | ||||
|         result = ht.ismember(arr, values) | ||||
|         expected = np.array([False, False, False], dtype=np.bool_) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_mode(self, dtype): | ||||
|         values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype) | ||||
|         assert ht.mode(values, True)[0] == 42 | ||||
|         assert np.isnan(ht.mode(values, False)[0]) | ||||
|  | ||||
|  | ||||
| def test_ismember_tuple_with_nans(): | ||||
|     # GH-41836 | ||||
|     values = [("a", float("nan")), ("b", 1)] | ||||
|     comps = [("a", float("nan"))] | ||||
|  | ||||
|     msg = "isin with argument that is not not a Series" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = isin(values, comps) | ||||
|     expected = np.array([True, False], dtype=np.bool_) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_float_complex_int_are_equal_as_objects(): | ||||
|     values = ["a", 5, 5.0, 5.0 + 0j] | ||||
|     comps = list(range(129)) | ||||
|     result = isin(np.array(values, dtype=object), np.asarray(comps)) | ||||
|     expected = np.array([False, True, True, True], dtype=np.bool_) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
							
								
								
									
										390
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_join.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										390
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_join.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,390 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import join as libjoin | ||||
| from pandas._libs.join import ( | ||||
|     inner_join, | ||||
|     left_outer_join, | ||||
| ) | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIndexer: | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", ["int32", "int64", "float32", "float64", "object"] | ||||
|     ) | ||||
|     def test_outer_join_indexer(self, dtype): | ||||
|         indexer = libjoin.outer_join_indexer | ||||
|  | ||||
|         left = np.arange(3, dtype=dtype) | ||||
|         right = np.arange(2, 5, dtype=dtype) | ||||
|         empty = np.array([], dtype=dtype) | ||||
|  | ||||
|         result, lindexer, rindexer = indexer(left, right) | ||||
|         assert isinstance(result, np.ndarray) | ||||
|         assert isinstance(lindexer, np.ndarray) | ||||
|         assert isinstance(rindexer, np.ndarray) | ||||
|         tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) | ||||
|         exp = np.array([0, 1, 2, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(lindexer, exp) | ||||
|         exp = np.array([-1, -1, 0, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(rindexer, exp) | ||||
|  | ||||
|         result, lindexer, rindexer = indexer(empty, right) | ||||
|         tm.assert_numpy_array_equal(result, right) | ||||
|         exp = np.array([-1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(lindexer, exp) | ||||
|         exp = np.array([0, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(rindexer, exp) | ||||
|  | ||||
|         result, lindexer, rindexer = indexer(left, empty) | ||||
|         tm.assert_numpy_array_equal(result, left) | ||||
|         exp = np.array([0, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(lindexer, exp) | ||||
|         exp = np.array([-1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(rindexer, exp) | ||||
|  | ||||
|     def test_cython_left_outer_join(self): | ||||
|         left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) | ||||
|         right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp) | ||||
|         max_group = 5 | ||||
|  | ||||
|         ls, rs = left_outer_join(left, right, max_group) | ||||
|  | ||||
|         exp_ls = left.argsort(kind="mergesort") | ||||
|         exp_rs = right.argsort(kind="mergesort") | ||||
|  | ||||
|         exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10]) | ||||
|         exp_ri = np.array( | ||||
|             [0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1] | ||||
|         ) | ||||
|  | ||||
|         exp_ls = exp_ls.take(exp_li) | ||||
|         exp_ls[exp_li == -1] = -1 | ||||
|  | ||||
|         exp_rs = exp_rs.take(exp_ri) | ||||
|         exp_rs[exp_ri == -1] = -1 | ||||
|  | ||||
|         tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) | ||||
|         tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) | ||||
|  | ||||
|     def test_cython_right_outer_join(self): | ||||
|         left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) | ||||
|         right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp) | ||||
|         max_group = 5 | ||||
|  | ||||
|         rs, ls = left_outer_join(right, left, max_group) | ||||
|  | ||||
|         exp_ls = left.argsort(kind="mergesort") | ||||
|         exp_rs = right.argsort(kind="mergesort") | ||||
|  | ||||
|         #            0        1        1        1 | ||||
|         exp_li = np.array( | ||||
|             [ | ||||
|                 0, | ||||
|                 1, | ||||
|                 2, | ||||
|                 3, | ||||
|                 4, | ||||
|                 5, | ||||
|                 3, | ||||
|                 4, | ||||
|                 5, | ||||
|                 3, | ||||
|                 4, | ||||
|                 5, | ||||
|                 #            2        2        4 | ||||
|                 6, | ||||
|                 7, | ||||
|                 8, | ||||
|                 6, | ||||
|                 7, | ||||
|                 8, | ||||
|                 -1, | ||||
|             ] | ||||
|         ) | ||||
|         exp_ri = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) | ||||
|  | ||||
|         exp_ls = exp_ls.take(exp_li) | ||||
|         exp_ls[exp_li == -1] = -1 | ||||
|  | ||||
|         exp_rs = exp_rs.take(exp_ri) | ||||
|         exp_rs[exp_ri == -1] = -1 | ||||
|  | ||||
|         tm.assert_numpy_array_equal(ls, exp_ls) | ||||
|         tm.assert_numpy_array_equal(rs, exp_rs) | ||||
|  | ||||
|     def test_cython_inner_join(self): | ||||
|         left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp) | ||||
|         right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.intp) | ||||
|         max_group = 5 | ||||
|  | ||||
|         ls, rs = inner_join(left, right, max_group) | ||||
|  | ||||
|         exp_ls = left.argsort(kind="mergesort") | ||||
|         exp_rs = right.argsort(kind="mergesort") | ||||
|  | ||||
|         exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8]) | ||||
|         exp_ri = np.array([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5]) | ||||
|  | ||||
|         exp_ls = exp_ls.take(exp_li) | ||||
|         exp_ls[exp_li == -1] = -1 | ||||
|  | ||||
|         exp_rs = exp_rs.take(exp_ri) | ||||
|         exp_rs[exp_ri == -1] = -1 | ||||
|  | ||||
|         tm.assert_numpy_array_equal(ls, exp_ls) | ||||
|         tm.assert_numpy_array_equal(rs, exp_rs) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("readonly", [True, False]) | ||||
| def test_left_join_indexer_unique(readonly): | ||||
|     a = np.array([1, 2, 3, 4, 5], dtype=np.int64) | ||||
|     b = np.array([2, 2, 3, 4, 4], dtype=np.int64) | ||||
|     if readonly: | ||||
|         # GH#37312, GH#37264 | ||||
|         a.setflags(write=False) | ||||
|         b.setflags(write=False) | ||||
|  | ||||
|     result = libjoin.left_join_indexer_unique(b, a) | ||||
|     expected = np.array([1, 1, 2, 3, 3], dtype=np.intp) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_left_outer_join_bug(): | ||||
|     left = np.array( | ||||
|         [ | ||||
|             0, | ||||
|             1, | ||||
|             0, | ||||
|             1, | ||||
|             1, | ||||
|             2, | ||||
|             3, | ||||
|             1, | ||||
|             0, | ||||
|             2, | ||||
|             1, | ||||
|             2, | ||||
|             0, | ||||
|             1, | ||||
|             1, | ||||
|             2, | ||||
|             3, | ||||
|             2, | ||||
|             3, | ||||
|             2, | ||||
|             1, | ||||
|             1, | ||||
|             3, | ||||
|             0, | ||||
|             3, | ||||
|             2, | ||||
|             3, | ||||
|             0, | ||||
|             0, | ||||
|             2, | ||||
|             3, | ||||
|             2, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             3, | ||||
|             0, | ||||
|             1, | ||||
|             3, | ||||
|             0, | ||||
|             0, | ||||
|             1, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             0, | ||||
|             1, | ||||
|             0, | ||||
|             1, | ||||
|             1, | ||||
|             0, | ||||
|             2, | ||||
|             2, | ||||
|             2, | ||||
|             2, | ||||
|             2, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             2, | ||||
|             0, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             3, | ||||
|             2, | ||||
|             2, | ||||
|             0, | ||||
|             1, | ||||
|             3, | ||||
|             0, | ||||
|             2, | ||||
|             3, | ||||
|             2, | ||||
|             3, | ||||
|             3, | ||||
|             2, | ||||
|             3, | ||||
|             3, | ||||
|             1, | ||||
|             3, | ||||
|             2, | ||||
|             0, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             1, | ||||
|             1, | ||||
|             0, | ||||
|             2, | ||||
|             3, | ||||
|             3, | ||||
|             1, | ||||
|             2, | ||||
|             0, | ||||
|             3, | ||||
|             1, | ||||
|             2, | ||||
|             0, | ||||
|             2, | ||||
|         ], | ||||
|         dtype=np.intp, | ||||
|     ) | ||||
|  | ||||
|     right = np.array([3, 1], dtype=np.intp) | ||||
|     max_groups = 4 | ||||
|  | ||||
|     lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False) | ||||
|  | ||||
|     exp_lidx = np.arange(len(left), dtype=np.intp) | ||||
|     exp_ridx = -np.ones(len(left), dtype=np.intp) | ||||
|  | ||||
|     exp_ridx[left == 1] = 1 | ||||
|     exp_ridx[left == 3] = 0 | ||||
|  | ||||
|     tm.assert_numpy_array_equal(lidx, exp_lidx) | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|  | ||||
|  | ||||
| def test_inner_join_indexer(): | ||||
|     a = np.array([1, 2, 3, 4, 5], dtype=np.int64) | ||||
|     b = np.array([0, 3, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.inner_join_indexer(a, b) | ||||
|  | ||||
|     index_exp = np.array([3, 5], dtype=np.int64) | ||||
|     tm.assert_almost_equal(index, index_exp) | ||||
|  | ||||
|     aexp = np.array([2, 4], dtype=np.intp) | ||||
|     bexp = np.array([1, 2], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ares, aexp) | ||||
|     tm.assert_almost_equal(bres, bexp) | ||||
|  | ||||
|     a = np.array([5], dtype=np.int64) | ||||
|     b = np.array([5], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.inner_join_indexer(a, b) | ||||
|     tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) | ||||
|     tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) | ||||
|     tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) | ||||
|  | ||||
|  | ||||
| def test_outer_join_indexer(): | ||||
|     a = np.array([1, 2, 3, 4, 5], dtype=np.int64) | ||||
|     b = np.array([0, 3, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.outer_join_indexer(a, b) | ||||
|  | ||||
|     index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) | ||||
|     tm.assert_almost_equal(index, index_exp) | ||||
|  | ||||
|     aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.intp) | ||||
|     bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ares, aexp) | ||||
|     tm.assert_almost_equal(bres, bexp) | ||||
|  | ||||
|     a = np.array([5], dtype=np.int64) | ||||
|     b = np.array([5], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.outer_join_indexer(a, b) | ||||
|     tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) | ||||
|     tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) | ||||
|     tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) | ||||
|  | ||||
|  | ||||
| def test_left_join_indexer(): | ||||
|     a = np.array([1, 2, 3, 4, 5], dtype=np.int64) | ||||
|     b = np.array([0, 3, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.left_join_indexer(a, b) | ||||
|  | ||||
|     tm.assert_almost_equal(index, a) | ||||
|  | ||||
|     aexp = np.array([0, 1, 2, 3, 4], dtype=np.intp) | ||||
|     bexp = np.array([-1, -1, 1, -1, 2], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ares, aexp) | ||||
|     tm.assert_almost_equal(bres, bexp) | ||||
|  | ||||
|     a = np.array([5], dtype=np.int64) | ||||
|     b = np.array([5], dtype=np.int64) | ||||
|  | ||||
|     index, ares, bres = libjoin.left_join_indexer(a, b) | ||||
|     tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) | ||||
|     tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp)) | ||||
|     tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp)) | ||||
|  | ||||
|  | ||||
| def test_left_join_indexer2(): | ||||
|     idx = np.array([1, 1, 2, 5], dtype=np.int64) | ||||
|     idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     res, lidx, ridx = libjoin.left_join_indexer(idx2, idx) | ||||
|  | ||||
|     exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) | ||||
|     tm.assert_almost_equal(res, exp_res) | ||||
|  | ||||
|     exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) | ||||
|     tm.assert_almost_equal(lidx, exp_lidx) | ||||
|  | ||||
|     exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ridx, exp_ridx) | ||||
|  | ||||
|  | ||||
| def test_outer_join_indexer2(): | ||||
|     idx = np.array([1, 1, 2, 5], dtype=np.int64) | ||||
|     idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     res, lidx, ridx = libjoin.outer_join_indexer(idx2, idx) | ||||
|  | ||||
|     exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) | ||||
|     tm.assert_almost_equal(res, exp_res) | ||||
|  | ||||
|     exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) | ||||
|     tm.assert_almost_equal(lidx, exp_lidx) | ||||
|  | ||||
|     exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ridx, exp_ridx) | ||||
|  | ||||
|  | ||||
| def test_inner_join_indexer2(): | ||||
|     idx = np.array([1, 1, 2, 5], dtype=np.int64) | ||||
|     idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64) | ||||
|  | ||||
|     res, lidx, ridx = libjoin.inner_join_indexer(idx2, idx) | ||||
|  | ||||
|     exp_res = np.array([1, 1, 2, 5], dtype=np.int64) | ||||
|     tm.assert_almost_equal(res, exp_res) | ||||
|  | ||||
|     exp_lidx = np.array([0, 0, 1, 2], dtype=np.intp) | ||||
|     tm.assert_almost_equal(lidx, exp_lidx) | ||||
|  | ||||
|     exp_ridx = np.array([0, 1, 2, 3], dtype=np.intp) | ||||
|     tm.assert_almost_equal(ridx, exp_ridx) | ||||
							
								
								
									
										299
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_lib.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_lib.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,299 @@ | ||||
| import pickle | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import ( | ||||
|     Timedelta, | ||||
|     lib, | ||||
|     writers as libwriters, | ||||
| ) | ||||
| from pandas.compat import IS64 | ||||
|  | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestMisc: | ||||
|     def test_max_len_string_array(self): | ||||
|         arr = a = np.array(["foo", "b", np.nan], dtype="object") | ||||
|         assert libwriters.max_len_string_array(arr) == 3 | ||||
|  | ||||
|         # unicode | ||||
|         arr = a.astype("U").astype(object) | ||||
|         assert libwriters.max_len_string_array(arr) == 3 | ||||
|  | ||||
|         # bytes for python3 | ||||
|         arr = a.astype("S").astype(object) | ||||
|         assert libwriters.max_len_string_array(arr) == 3 | ||||
|  | ||||
|         # raises | ||||
|         msg = "No matching signature found" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             libwriters.max_len_string_array(arr.astype("U")) | ||||
|  | ||||
|     def test_fast_unique_multiple_list_gen_sort(self): | ||||
|         keys = [["p", "a"], ["n", "d"], ["a", "s"]] | ||||
|  | ||||
|         gen = (key for key in keys) | ||||
|         expected = np.array(["a", "d", "n", "p", "s"]) | ||||
|         out = lib.fast_unique_multiple_list_gen(gen, sort=True) | ||||
|         tm.assert_numpy_array_equal(np.array(out), expected) | ||||
|  | ||||
|         gen = (key for key in keys) | ||||
|         expected = np.array(["p", "a", "n", "d", "s"]) | ||||
|         out = lib.fast_unique_multiple_list_gen(gen, sort=False) | ||||
|         tm.assert_numpy_array_equal(np.array(out), expected) | ||||
|  | ||||
|     def test_fast_multiget_timedelta_resos(self): | ||||
|         # This will become relevant for test_constructor_dict_timedelta64_index | ||||
|         #  once Timedelta constructor preserves reso when passed a | ||||
|         #  np.timedelta64 object | ||||
|         td = Timedelta(days=1) | ||||
|  | ||||
|         mapping1 = {td: 1} | ||||
|         mapping2 = {td.as_unit("s"): 1} | ||||
|  | ||||
|         oindex = Index([td * n for n in range(3)])._values.astype(object) | ||||
|  | ||||
|         expected = lib.fast_multiget(mapping1, oindex) | ||||
|         result = lib.fast_multiget(mapping2, oindex) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # case that can't be cast to td64ns | ||||
|         td = Timedelta(np.timedelta64(146000, "D")) | ||||
|         assert hash(td) == hash(td.as_unit("ms")) | ||||
|         assert hash(td) == hash(td.as_unit("us")) | ||||
|         mapping1 = {td: 1} | ||||
|         mapping2 = {td.as_unit("ms"): 1} | ||||
|  | ||||
|         oindex = Index([td * n for n in range(3)])._values.astype(object) | ||||
|  | ||||
|         expected = lib.fast_multiget(mapping1, oindex) | ||||
|         result = lib.fast_multiget(mapping2, oindex) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestIndexing: | ||||
|     def test_maybe_indices_to_slice_left_edge(self): | ||||
|         target = np.arange(100) | ||||
|  | ||||
|         # slice | ||||
|         indices = np.array([], dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize("end", [1, 2, 5, 20, 99]) | ||||
|     @pytest.mark.parametrize("step", [1, 2, 4]) | ||||
|     def test_maybe_indices_to_slice_left_edge_not_slice_end_steps(self, end, step): | ||||
|         target = np.arange(100) | ||||
|         indices = np.arange(0, end, step, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|         # reverse | ||||
|         indices = indices[::-1] | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]] | ||||
|     ) | ||||
|     def test_maybe_indices_to_slice_left_edge_not_slice(self, case): | ||||
|         # not slice | ||||
|         target = np.arange(100) | ||||
|         indices = np.array(case, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize("start", [0, 2, 5, 20, 97, 98]) | ||||
|     @pytest.mark.parametrize("step", [1, 2, 4]) | ||||
|     def test_maybe_indices_to_slice_right_edge(self, start, step): | ||||
|         target = np.arange(100) | ||||
|  | ||||
|         # slice | ||||
|         indices = np.arange(start, 99, step, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|         # reverse | ||||
|         indices = indices[::-1] | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     def test_maybe_indices_to_slice_right_edge_not_slice(self): | ||||
|         # not slice | ||||
|         target = np.arange(100) | ||||
|         indices = np.array([97, 98, 99, 100], dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|  | ||||
|         msg = "index 100 is out of bounds for axis (0|1) with size 100" | ||||
|  | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             target[indices] | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             target[maybe_slice] | ||||
|  | ||||
|         indices = np.array([100, 99, 98, 97], dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|  | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             target[indices] | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             target[maybe_slice] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]] | ||||
|     ) | ||||
|     def test_maybe_indices_to_slice_right_edge_cases(self, case): | ||||
|         target = np.arange(100) | ||||
|         indices = np.array(case, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize("step", [1, 2, 4, 5, 8, 9]) | ||||
|     def test_maybe_indices_to_slice_both_edges(self, step): | ||||
|         target = np.arange(10) | ||||
|  | ||||
|         # slice | ||||
|         indices = np.arange(0, 9, step, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|         # reverse | ||||
|         indices = indices[::-1] | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize("case", [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]) | ||||
|     def test_maybe_indices_to_slice_both_edges_not_slice(self, case): | ||||
|         # not slice | ||||
|         target = np.arange(10) | ||||
|         indices = np.array(case, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize("start, end", [(2, 10), (5, 25), (65, 97)]) | ||||
|     @pytest.mark.parametrize("step", [1, 2, 4, 20]) | ||||
|     def test_maybe_indices_to_slice_middle(self, start, end, step): | ||||
|         target = np.arange(100) | ||||
|  | ||||
|         # slice | ||||
|         indices = np.arange(start, end, step, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|         # reverse | ||||
|         indices = indices[::-1] | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]] | ||||
|     ) | ||||
|     def test_maybe_indices_to_slice_middle_not_slice(self, case): | ||||
|         # not slice | ||||
|         target = np.arange(100) | ||||
|         indices = np.array(case, dtype=np.intp) | ||||
|         maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) | ||||
|  | ||||
|         assert not isinstance(maybe_slice, slice) | ||||
|         tm.assert_numpy_array_equal(maybe_slice, indices) | ||||
|         tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) | ||||
|  | ||||
|     def test_maybe_booleans_to_slice(self): | ||||
|         arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) | ||||
|         result = lib.maybe_booleans_to_slice(arr) | ||||
|         assert result.dtype == np.bool_ | ||||
|  | ||||
|         result = lib.maybe_booleans_to_slice(arr[:0]) | ||||
|         assert result == slice(0, 0) | ||||
|  | ||||
|     def test_get_reverse_indexer(self): | ||||
|         indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp) | ||||
|         result = lib.get_reverse_indexer(indexer, 5) | ||||
|         expected = np.array([4, 2, 3, 6, 7], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "int32"]) | ||||
|     def test_is_range_indexer(self, dtype): | ||||
|         # GH#50592 | ||||
|         left = np.arange(0, 100, dtype=dtype) | ||||
|         assert lib.is_range_indexer(left, 100) | ||||
|  | ||||
|     @pytest.mark.skipif( | ||||
|         not IS64, | ||||
|         reason="2**31 is too big for Py_ssize_t on 32-bit. " | ||||
|         "It doesn't matter though since you cannot create an array that long on 32-bit", | ||||
|     ) | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "int32"]) | ||||
|     def test_is_range_indexer_big_n(self, dtype): | ||||
|         # GH53616 | ||||
|         left = np.arange(0, 100, dtype=dtype) | ||||
|  | ||||
|         assert not lib.is_range_indexer(left, 2**31) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "int32"]) | ||||
|     def test_is_range_indexer_not_equal(self, dtype): | ||||
|         # GH#50592 | ||||
|         left = np.array([1, 2], dtype=dtype) | ||||
|         assert not lib.is_range_indexer(left, 2) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "int32"]) | ||||
|     def test_is_range_indexer_not_equal_shape(self, dtype): | ||||
|         # GH#50592 | ||||
|         left = np.array([0, 1, 2], dtype=dtype) | ||||
|         assert not lib.is_range_indexer(left, 2) | ||||
|  | ||||
|  | ||||
| def test_cache_readonly_preserve_docstrings(): | ||||
|     # GH18197 | ||||
|     assert Index.hasnans.__doc__ is not None | ||||
|  | ||||
|  | ||||
| def test_no_default_pickle(): | ||||
|     # GH#40397 | ||||
|     obj = tm.round_trip_pickle(lib.no_default) | ||||
|     assert obj is lib.no_default | ||||
|  | ||||
|  | ||||
| def test_ensure_string_array_copy(): | ||||
|     # ensure the original array is not modified in case of copy=False with | ||||
|     # pickle-roundtripped object dtype array | ||||
|     # https://github.com/pandas-dev/pandas/issues/54654 | ||||
|     arr = np.array(["a", None], dtype=object) | ||||
|     arr = pickle.loads(pickle.dumps(arr)) | ||||
|     result = lib.ensure_string_array(arr, copy=False) | ||||
|     assert not np.shares_memory(arr, result) | ||||
|     assert arr[1] is None | ||||
|     assert result[1] is np.nan | ||||
							
								
								
									
										162
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_libalgos.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								lib/python3.11/site-packages/pandas/tests/libs/test_libalgos.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,162 @@ | ||||
| from datetime import datetime | ||||
| from itertools import permutations | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas._libs import algos as libalgos | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_ensure_platform_int(): | ||||
|     arr = np.arange(100, dtype=np.intp) | ||||
|  | ||||
|     result = libalgos.ensure_platform_int(arr) | ||||
|     assert result is arr | ||||
|  | ||||
|  | ||||
| def test_is_lexsorted(): | ||||
|     failure = [ | ||||
|         np.array( | ||||
|             ([3] * 32) + ([2] * 32) + ([1] * 32) + ([0] * 32), | ||||
|             dtype="int64", | ||||
|         ), | ||||
|         np.array( | ||||
|             list(range(31))[::-1] * 4, | ||||
|             dtype="int64", | ||||
|         ), | ||||
|     ] | ||||
|  | ||||
|     assert not libalgos.is_lexsorted(failure) | ||||
|  | ||||
|  | ||||
| def test_groupsort_indexer(): | ||||
|     a = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) | ||||
|     b = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) | ||||
|  | ||||
|     result = libalgos.groupsort_indexer(a, 1000)[0] | ||||
|  | ||||
|     # need to use a stable sort | ||||
|     # np.argsort returns int, groupsort_indexer | ||||
|     # always returns intp | ||||
|     expected = np.argsort(a, kind="mergesort") | ||||
|     expected = expected.astype(np.intp) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # compare with lexsort | ||||
|     # np.lexsort returns int, groupsort_indexer | ||||
|     # always returns intp | ||||
|     key = a * 1000 + b | ||||
|     result = libalgos.groupsort_indexer(key, 1000000)[0] | ||||
|     expected = np.lexsort((b, a)) | ||||
|     expected = expected.astype(np.intp) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestPadBackfill: | ||||
|     def test_backfill(self): | ||||
|         old = np.array([1, 5, 10], dtype=np.int64) | ||||
|         new = np.array(list(range(12)), dtype=np.int64) | ||||
|  | ||||
|         filler = libalgos.backfill["int64_t"](old, new) | ||||
|  | ||||
|         expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(filler, expect_filler) | ||||
|  | ||||
|         # corner case | ||||
|         old = np.array([1, 4], dtype=np.int64) | ||||
|         new = np.array(list(range(5, 10)), dtype=np.int64) | ||||
|         filler = libalgos.backfill["int64_t"](old, new) | ||||
|  | ||||
|         expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(filler, expect_filler) | ||||
|  | ||||
|     def test_pad(self): | ||||
|         old = np.array([1, 5, 10], dtype=np.int64) | ||||
|         new = np.array(list(range(12)), dtype=np.int64) | ||||
|  | ||||
|         filler = libalgos.pad["int64_t"](old, new) | ||||
|  | ||||
|         expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(filler, expect_filler) | ||||
|  | ||||
|         # corner case | ||||
|         old = np.array([5, 10], dtype=np.int64) | ||||
|         new = np.arange(5, dtype=np.int64) | ||||
|         filler = libalgos.pad["int64_t"](old, new) | ||||
|         expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(filler, expect_filler) | ||||
|  | ||||
|     def test_pad_backfill_object_segfault(self): | ||||
|         old = np.array([], dtype="O") | ||||
|         new = np.array([datetime(2010, 12, 31)], dtype="O") | ||||
|  | ||||
|         result = libalgos.pad["object"](old, new) | ||||
|         expected = np.array([-1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = libalgos.pad["object"](new, old) | ||||
|         expected = np.array([], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = libalgos.backfill["object"](old, new) | ||||
|         expected = np.array([-1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = libalgos.backfill["object"](new, old) | ||||
|         expected = np.array([], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestInfinity: | ||||
|     def test_infinity_sort(self): | ||||
|         # GH#13445 | ||||
|         # numpy's argsort can be unhappy if something is less than | ||||
|         # itself.  Instead, let's give our infinities a self-consistent | ||||
|         # ordering, but outside the float extended real line. | ||||
|  | ||||
|         Inf = libalgos.Infinity() | ||||
|         NegInf = libalgos.NegInfinity() | ||||
|  | ||||
|         ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] | ||||
|  | ||||
|         assert all(Inf >= x for x in ref_nums) | ||||
|         assert all(Inf > x or x is Inf for x in ref_nums) | ||||
|         assert Inf >= Inf and Inf == Inf | ||||
|         assert not Inf < Inf and not Inf > Inf | ||||
|         assert libalgos.Infinity() == libalgos.Infinity() | ||||
|         assert not libalgos.Infinity() != libalgos.Infinity() | ||||
|  | ||||
|         assert all(NegInf <= x for x in ref_nums) | ||||
|         assert all(NegInf < x or x is NegInf for x in ref_nums) | ||||
|         assert NegInf <= NegInf and NegInf == NegInf | ||||
|         assert not NegInf < NegInf and not NegInf > NegInf | ||||
|         assert libalgos.NegInfinity() == libalgos.NegInfinity() | ||||
|         assert not libalgos.NegInfinity() != libalgos.NegInfinity() | ||||
|  | ||||
|         for perm in permutations(ref_nums): | ||||
|             assert sorted(perm) == ref_nums | ||||
|  | ||||
|         # smoke tests | ||||
|         np.array([libalgos.Infinity()] * 32).argsort() | ||||
|         np.array([libalgos.NegInfinity()] * 32).argsort() | ||||
|  | ||||
|     def test_infinity_against_nan(self): | ||||
|         Inf = libalgos.Infinity() | ||||
|         NegInf = libalgos.NegInfinity() | ||||
|  | ||||
|         assert not Inf > np.nan | ||||
|         assert not Inf >= np.nan | ||||
|         assert not Inf < np.nan | ||||
|         assert not Inf <= np.nan | ||||
|         assert not Inf == np.nan | ||||
|         assert Inf != np.nan | ||||
|  | ||||
|         assert not NegInf > np.nan | ||||
|         assert not NegInf >= np.nan | ||||
|         assert not NegInf < np.nan | ||||
|         assert not NegInf <= np.nan | ||||
|         assert not NegInf == np.nan | ||||
|         assert NegInf != np.nan | ||||
		Reference in New Issue
	
	Block a user