done
This commit is contained in:
		| @ -0,0 +1,50 @@ | ||||
| from collections.abc import Generator | ||||
| from contextlib import contextmanager | ||||
| import pathlib | ||||
| import tempfile | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas.io.pytables import HDFStore | ||||
|  | ||||
| tables = pytest.importorskip("tables") | ||||
| # set these parameters so we don't have file sharing | ||||
| tables.parameters.MAX_NUMEXPR_THREADS = 1 | ||||
| tables.parameters.MAX_BLOSC_THREADS = 1 | ||||
| tables.parameters.MAX_THREADS = 1 | ||||
|  | ||||
|  | ||||
| def safe_close(store): | ||||
|     try: | ||||
|         if store is not None: | ||||
|             store.close() | ||||
|     except OSError: | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # contextmanager to ensure the file cleanup | ||||
| @contextmanager | ||||
| def ensure_clean_store( | ||||
|     path, mode="a", complevel=None, complib=None, fletcher32=False | ||||
| ) -> Generator[HDFStore, None, None]: | ||||
|     with tempfile.TemporaryDirectory() as tmpdirname: | ||||
|         tmp_path = pathlib.Path(tmpdirname, path) | ||||
|         with HDFStore( | ||||
|             tmp_path, | ||||
|             mode=mode, | ||||
|             complevel=complevel, | ||||
|             complib=complib, | ||||
|             fletcher32=fletcher32, | ||||
|         ) as store: | ||||
|             yield store | ||||
|  | ||||
|  | ||||
| def _maybe_remove(store, key): | ||||
|     """ | ||||
|     For tests using tables, try removing the table to be sure there is | ||||
|     no content from previous tests using the same table name. | ||||
|     """ | ||||
|     try: | ||||
|         store.remove(key) | ||||
|     except (ValueError, KeyError): | ||||
|         pass | ||||
| @ -0,0 +1,9 @@ | ||||
| import uuid | ||||
|  | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def setup_path(): | ||||
|     """Fixture for setup path""" | ||||
|     return f"tmp.__{uuid.uuid4()}__.h5" | ||||
							
								
								
									
										1015
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_append.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1015
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_append.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,214 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     concat, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_categorical(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         # Basic | ||||
|         _maybe_remove(store, "s") | ||||
|         s = Series( | ||||
|             Categorical( | ||||
|                 ["a", "b", "b", "a", "a", "c"], | ||||
|                 categories=["a", "b", "c", "d"], | ||||
|                 ordered=False, | ||||
|             ) | ||||
|         ) | ||||
|         store.append("s", s, format="table") | ||||
|         result = store.select("s") | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|         _maybe_remove(store, "s_ordered") | ||||
|         s = Series( | ||||
|             Categorical( | ||||
|                 ["a", "b", "b", "a", "a", "c"], | ||||
|                 categories=["a", "b", "c", "d"], | ||||
|                 ordered=True, | ||||
|             ) | ||||
|         ) | ||||
|         store.append("s_ordered", s, format="table") | ||||
|         result = store.select("s_ordered") | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]}) | ||||
|         store.append("df", df, format="table") | ||||
|         result = store.select("df") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         # Dtypes | ||||
|         _maybe_remove(store, "si") | ||||
|         s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category") | ||||
|         store.append("si", s) | ||||
|         result = store.select("si") | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|         _maybe_remove(store, "si2") | ||||
|         s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category") | ||||
|         store.append("si2", s) | ||||
|         result = store.select("si2") | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|         # Multiple | ||||
|         _maybe_remove(store, "df2") | ||||
|         df2 = df.copy() | ||||
|         df2["s2"] = Series(list("abcdefg")).astype("category") | ||||
|         store.append("df2", df2) | ||||
|         result = store.select("df2") | ||||
|         tm.assert_frame_equal(result, df2) | ||||
|  | ||||
|         # Make sure the metadata is OK | ||||
|         info = store.info() | ||||
|         assert "/df2   " in info | ||||
|         # df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical | ||||
|         assert "/df2/meta/values_block_0/meta" in info | ||||
|         assert "/df2/meta/values_block_2/meta" in info | ||||
|  | ||||
|         # unordered | ||||
|         _maybe_remove(store, "s2") | ||||
|         s = Series( | ||||
|             Categorical( | ||||
|                 ["a", "b", "b", "a", "a", "c"], | ||||
|                 categories=["a", "b", "c", "d"], | ||||
|                 ordered=False, | ||||
|             ) | ||||
|         ) | ||||
|         store.append("s2", s, format="table") | ||||
|         result = store.select("s2") | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|         # Query | ||||
|         _maybe_remove(store, "df3") | ||||
|         store.append("df3", df, data_columns=["s"]) | ||||
|         expected = df[df.s.isin(["b", "c"])] | ||||
|         result = store.select("df3", where=['s in ["b","c"]']) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = df[df.s.isin(["b", "c"])] | ||||
|         result = store.select("df3", where=['s = ["b","c"]']) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = df[df.s.isin(["d"])] | ||||
|         result = store.select("df3", where=['s in ["d"]']) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = df[df.s.isin(["f"])] | ||||
|         result = store.select("df3", where=['s in ["f"]']) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # Appending with same categories is ok | ||||
|         store.append("df3", df) | ||||
|  | ||||
|         df = concat([df, df]) | ||||
|         expected = df[df.s.isin(["b", "c"])] | ||||
|         result = store.select("df3", where=['s in ["b","c"]']) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # Appending must have the same categories | ||||
|         df3 = df.copy() | ||||
|         df3["s"] = df3["s"].cat.remove_unused_categories() | ||||
|  | ||||
|         msg = "cannot append a categorical with different categories to the existing" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df3", df3) | ||||
|  | ||||
|         # Remove, and make sure meta data is removed (its a recursive | ||||
|         # removal so should be). | ||||
|         result = store.select("df3/meta/s/meta") | ||||
|         assert result is not None | ||||
|         store.remove("df3") | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="'No object named df3/meta/s/meta in the file'" | ||||
|         ): | ||||
|             store.select("df3/meta/s/meta") | ||||
|  | ||||
|  | ||||
| def test_categorical_conversion(tmp_path, setup_path): | ||||
|     # GH13322 | ||||
|     # Check that read_hdf with categorical columns doesn't return rows if | ||||
|     # where criteria isn't met. | ||||
|     obsids = ["ESP_012345_6789", "ESP_987654_3210"] | ||||
|     imgids = ["APF00006np", "APF0001imm"] | ||||
|     data = [4.3, 9.8] | ||||
|  | ||||
|     # Test without categories | ||||
|     df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data}) | ||||
|  | ||||
|     # We are expecting an empty DataFrame matching types of df | ||||
|     expected = df.iloc[[], :] | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table", data_columns=True) | ||||
|     result = read_hdf(path, "df", where="obsids=B") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # Test with categories | ||||
|     df.obsids = df.obsids.astype("category") | ||||
|     df.imgids = df.imgids.astype("category") | ||||
|  | ||||
|     # We are expecting an empty DataFrame matching types of df | ||||
|     expected = df.iloc[[], :] | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table", data_columns=True) | ||||
|     result = read_hdf(path, "df", where="obsids=B") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_categorical_nan_only_columns(tmp_path, setup_path): | ||||
|     # GH18413 | ||||
|     # Check that read_hdf with categorical columns with NaN-only values can | ||||
|     # be read back. | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": ["a", "b", "c", np.nan], | ||||
|             "b": [np.nan, np.nan, np.nan, np.nan], | ||||
|             "c": [1, 2, 3, 4], | ||||
|             "d": Series([None] * 4, dtype=object), | ||||
|         } | ||||
|     ) | ||||
|     df["a"] = df.a.astype("category") | ||||
|     df["b"] = df.b.astype("category") | ||||
|     df["d"] = df.b.astype("category") | ||||
|     expected = df | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table", data_columns=True) | ||||
|     result = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "where, df, expected", | ||||
|     [ | ||||
|         ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})), | ||||
|         ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})), | ||||
|     ], | ||||
| ) | ||||
| def test_convert_value( | ||||
|     tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame | ||||
| ): | ||||
|     # GH39420 | ||||
|     # Check that read_hdf with categorical columns can filter by where condition. | ||||
|     df.col = df.col.astype("category") | ||||
|     max_widths = {"col": 1} | ||||
|     categorical_values = sorted(df.col.unique()) | ||||
|     expected.col = expected.col.astype("category") | ||||
|     expected.col = expected.col.cat.set_categories(categorical_values) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table", min_itemsize=max_widths) | ||||
|     result = read_hdf(path, where=where) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,75 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| tables = pytest.importorskip("tables") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def pytables_hdf5_file(tmp_path): | ||||
|     """ | ||||
|     Use PyTables to create a simple HDF5 file. | ||||
|     """ | ||||
|     table_schema = { | ||||
|         "c0": tables.Time64Col(pos=0), | ||||
|         "c1": tables.StringCol(5, pos=1), | ||||
|         "c2": tables.Int64Col(pos=2), | ||||
|     } | ||||
|  | ||||
|     t0 = 1_561_105_000.0 | ||||
|  | ||||
|     testsamples = [ | ||||
|         {"c0": t0, "c1": "aaaaa", "c2": 1}, | ||||
|         {"c0": t0 + 1, "c1": "bbbbb", "c2": 2}, | ||||
|         {"c0": t0 + 2, "c1": "ccccc", "c2": 10**5}, | ||||
|         {"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295}, | ||||
|     ] | ||||
|  | ||||
|     objname = "pandas_test_timeseries" | ||||
|  | ||||
|     path = tmp_path / "written_with_pytables.h5" | ||||
|     with tables.open_file(path, mode="w") as f: | ||||
|         t = f.create_table("/", name=objname, description=table_schema) | ||||
|         for sample in testsamples: | ||||
|             for key, value in sample.items(): | ||||
|                 t.row[key] = value | ||||
|             t.row.append() | ||||
|  | ||||
|     yield path, objname, pd.DataFrame(testsamples) | ||||
|  | ||||
|  | ||||
| class TestReadPyTablesHDF5: | ||||
|     """ | ||||
|     A group of tests which covers reading HDF5 files written by plain PyTables | ||||
|     (not written by pandas). | ||||
|  | ||||
|     Was introduced for regression-testing issue 11188. | ||||
|     """ | ||||
|  | ||||
|     def test_read_complete(self, pytables_hdf5_file): | ||||
|         path, objname, df = pytables_hdf5_file | ||||
|         result = pd.read_hdf(path, key=objname) | ||||
|         expected = df | ||||
|         tm.assert_frame_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     def test_read_with_start(self, pytables_hdf5_file): | ||||
|         path, objname, df = pytables_hdf5_file | ||||
|         # This is a regression test for pandas-dev/pandas/issues/11188 | ||||
|         result = pd.read_hdf(path, key=objname, start=1) | ||||
|         expected = df[1:].reset_index(drop=True) | ||||
|         tm.assert_frame_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     def test_read_with_stop(self, pytables_hdf5_file): | ||||
|         path, objname, df = pytables_hdf5_file | ||||
|         # This is a regression test for pandas-dev/pandas/issues/11188 | ||||
|         result = pd.read_hdf(path, key=objname, stop=1) | ||||
|         expected = df[:1].reset_index(drop=True) | ||||
|         tm.assert_frame_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     def test_read_with_startstop(self, pytables_hdf5_file): | ||||
|         path, objname, df = pytables_hdf5_file | ||||
|         # This is a regression test for pandas-dev/pandas/issues/11188 | ||||
|         result = pd.read_hdf(path, key=objname, start=1, stop=2) | ||||
|         expected = df[1:2].reset_index(drop=True) | ||||
|         tm.assert_frame_equal(result, expected, check_index_type=True) | ||||
| @ -0,0 +1,195 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.io.pytables.common import ensure_clean_store | ||||
|  | ||||
| from pandas.io.pytables import read_hdf | ||||
|  | ||||
|  | ||||
| def test_complex_fixed(tmp_path, setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)).astype(np.complex64), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df") | ||||
|     reread = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)).astype(np.complex128), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df") | ||||
|     reread = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_table(tmp_path, setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)).astype(np.complex64), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table") | ||||
|     reread = read_hdf(path, key="df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)).astype(np.complex128), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table", mode="w") | ||||
|     reread = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_mixed_fixed(tmp_path, setup_path): | ||||
|     complex64 = np.array( | ||||
|         [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 | ||||
|     ) | ||||
|     complex128 = np.array( | ||||
|         [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||||
|     ) | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": [1, 2, 3, 4], | ||||
|             "B": ["a", "b", "c", "d"], | ||||
|             "C": complex64, | ||||
|             "D": complex128, | ||||
|             "E": [1.0, 2.0, 3.0, 4.0], | ||||
|         }, | ||||
|         index=list("abcd"), | ||||
|     ) | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df") | ||||
|     reread = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_mixed_table(tmp_path, setup_path): | ||||
|     complex64 = np.array( | ||||
|         [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 | ||||
|     ) | ||||
|     complex128 = np.array( | ||||
|         [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||||
|     ) | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": [1, 2, 3, 4], | ||||
|             "B": ["a", "b", "c", "d"], | ||||
|             "C": complex64, | ||||
|             "D": complex128, | ||||
|             "E": [1.0, 2.0, 3.0, 4.0], | ||||
|         }, | ||||
|         index=list("abcd"), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("df", df, data_columns=["A", "B"]) | ||||
|         result = store.select("df", where="A>2") | ||||
|         tm.assert_frame_equal(df.loc[df.A > 2], result) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table") | ||||
|     reread = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_across_dimensions_fixed(tmp_path, setup_path): | ||||
|     complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||||
|     s = Series(complex128, index=list("abcd")) | ||||
|     df = DataFrame({"A": s, "B": s}) | ||||
|  | ||||
|     objs = [s, df] | ||||
|     comps = [tm.assert_series_equal, tm.assert_frame_equal] | ||||
|     for obj, comp in zip(objs, comps): | ||||
|         path = tmp_path / setup_path | ||||
|         obj.to_hdf(path, key="obj", format="fixed") | ||||
|         reread = read_hdf(path, "obj") | ||||
|         comp(obj, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_across_dimensions(tmp_path, setup_path): | ||||
|     complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||||
|     s = Series(complex128, index=list("abcd")) | ||||
|     df = DataFrame({"A": s, "B": s}) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="obj", format="table") | ||||
|     reread = read_hdf(path, "obj") | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_indexing_error(setup_path): | ||||
|     complex128 = np.array( | ||||
|         [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 | ||||
|     ) | ||||
|     df = DataFrame( | ||||
|         {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, | ||||
|         index=list("abcd"), | ||||
|     ) | ||||
|  | ||||
|     msg = ( | ||||
|         "Columns containing complex values can be stored " | ||||
|         "but cannot be indexed when using table format. " | ||||
|         "Either use fixed format, set index=False, " | ||||
|         "or do not include the columns containing complex " | ||||
|         "values to data_columns when initializing the table." | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.append("df", df, data_columns=["C"]) | ||||
|  | ||||
|  | ||||
| def test_complex_series_error(tmp_path, setup_path): | ||||
|     complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) | ||||
|     s = Series(complex128, index=list("abcd")) | ||||
|  | ||||
|     msg = ( | ||||
|         "Columns containing complex values can be stored " | ||||
|         "but cannot be indexed when using table format. " | ||||
|         "Either use fixed format, set index=False, " | ||||
|         "or do not include the columns containing complex " | ||||
|         "values to data_columns when initializing the table." | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         s.to_hdf(path, key="obj", format="t") | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     s.to_hdf(path, key="obj", format="t", index=False) | ||||
|     reread = read_hdf(path, "obj") | ||||
|     tm.assert_series_equal(s, reread) | ||||
|  | ||||
|  | ||||
| def test_complex_append(setup_path): | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": np.random.default_rng(2).standard_normal(100).astype(np.complex128), | ||||
|             "b": np.random.default_rng(2).standard_normal(100), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("df", df, data_columns=["b"]) | ||||
|         store.append("df", df) | ||||
|         result = store.select("df") | ||||
|         tm.assert_frame_equal(pd.concat([df, df], axis=0), result) | ||||
| @ -0,0 +1,256 @@ | ||||
| import datetime | ||||
| from io import BytesIO | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     HDFStore, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     _testing as tm, | ||||
|     date_range, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ensure_clean_store | ||||
|  | ||||
| from pandas.io.pytables import ( | ||||
|     Term, | ||||
|     _maybe_adjust_name, | ||||
| ) | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_pass_spec_to_storer(setup_path): | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("df", df) | ||||
|         msg = ( | ||||
|             "cannot pass a column specification when reading a Fixed format " | ||||
|             "store. this store must be selected in its entirety" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.select("df", columns=["A"]) | ||||
|         msg = ( | ||||
|             "cannot pass a where specification when reading from a Fixed " | ||||
|             "format store. this store must be selected in its entirety" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.select("df", where=[("columns=A")]) | ||||
|  | ||||
|  | ||||
| def test_table_index_incompatible_dtypes(setup_path): | ||||
|     df1 = DataFrame({"a": [1, 2, 3]}) | ||||
|     df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3)) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("frame", df1, format="table") | ||||
|         msg = re.escape("incompatible kind in col [integer - datetime64[ns]]") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.put("frame", df2, format="table", append=True) | ||||
|  | ||||
|  | ||||
| def test_unimplemented_dtypes_table_columns(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         dtypes = [("date", datetime.date(2001, 1, 2))] | ||||
|  | ||||
|         # currently not supported dtypes #### | ||||
|         for n, f in dtypes: | ||||
|             df = DataFrame( | ||||
|                 1.1 * np.arange(120).reshape((30, 4)), | ||||
|                 columns=Index(list("ABCD"), dtype=object), | ||||
|                 index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|             ) | ||||
|             df[n] = f | ||||
|             msg = re.escape(f"[{n}] is not implemented as a table column") | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 store.append(f"df1_{n}", df) | ||||
|  | ||||
|     # frame | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|     df["obj1"] = "foo" | ||||
|     df["obj2"] = "bar" | ||||
|     df["datetime1"] = datetime.date(2001, 1, 2) | ||||
|     df = df._consolidate() | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         # this fails because we have a date in the object block...... | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 re.escape( | ||||
|                     "Cannot serialize the column [datetime1]\nbecause its data " | ||||
|                     "contents are not [string] but [date] object dtype" | ||||
|                 ), | ||||
|                 re.escape("[date] is not implemented as a table column"), | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.append("df_unimplemented", df) | ||||
|  | ||||
|  | ||||
| def test_invalid_terms(tmp_path, setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|         df["string"] = "foo" | ||||
|         df.loc[df.index[0:4], "string"] = "bar" | ||||
|  | ||||
|         store.put("df", df, format="table") | ||||
|  | ||||
|         # some invalid terms | ||||
|         msg = re.escape("__init__() missing 1 required positional argument: 'where'") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Term() | ||||
|  | ||||
|         # more invalid | ||||
|         msg = re.escape( | ||||
|             "cannot process expression [df.index[3]], " | ||||
|             "[2000-01-06 00:00:00] is not a valid condition" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.select("df", "df.index[3]") | ||||
|  | ||||
|         msg = "invalid syntax" | ||||
|         with pytest.raises(SyntaxError, match=msg): | ||||
|             store.select("df", "index>") | ||||
|  | ||||
|     # from the docs | ||||
|     path = tmp_path / setup_path | ||||
|     dfq = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=list("ABCD"), | ||||
|         index=date_range("20130101", periods=10), | ||||
|     ) | ||||
|     dfq.to_hdf(path, key="dfq", format="table", data_columns=True) | ||||
|  | ||||
|     # check ok | ||||
|     read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']") | ||||
|     read_hdf(path, "dfq", where="A>0 or C>0") | ||||
|  | ||||
|     # catch the invalid reference | ||||
|     path = tmp_path / setup_path | ||||
|     dfq = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=list("ABCD"), | ||||
|         index=date_range("20130101", periods=10), | ||||
|     ) | ||||
|     dfq.to_hdf(path, key="dfq", format="table") | ||||
|  | ||||
|     msg = ( | ||||
|         r"The passed where expression: A>0 or C>0\n\s*" | ||||
|         r"contains an invalid variable reference\n\s*" | ||||
|         r"all of the variable references must be a reference to\n\s*" | ||||
|         r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" | ||||
|         r"The currently defined references are: index,columns\n" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         read_hdf(path, "dfq", where="A>0 or C>0") | ||||
|  | ||||
|  | ||||
| def test_append_with_diff_col_name_types_raises_value_error(setup_path): | ||||
|     df = DataFrame(np.random.default_rng(2).standard_normal((10, 1))) | ||||
|     df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) | ||||
|     df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)}) | ||||
|     df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)}) | ||||
|     df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)}) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         name = "df_diff_valerror" | ||||
|         store.append(name, df) | ||||
|  | ||||
|         for d in (df2, df3, df4, df5): | ||||
|             msg = re.escape( | ||||
|                 "cannot match existing table structure for [0] on appending data" | ||||
|             ) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 store.append(name, d) | ||||
|  | ||||
|  | ||||
| def test_invalid_complib(setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     with tm.ensure_clean(setup_path) as path: | ||||
|         msg = r"complib only supports \[.*\] compression." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.to_hdf(path, key="df", complib="foolib") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "idx", | ||||
|     [ | ||||
|         date_range("2019", freq="D", periods=3, tz="UTC"), | ||||
|         CategoricalIndex(list("abc")), | ||||
|     ], | ||||
| ) | ||||
| def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path): | ||||
|     # GH 7775 | ||||
|     mi = MultiIndex.from_arrays([idx, idx]) | ||||
|     df = DataFrame(0, index=mi, columns=["a"]) | ||||
|     path = tmp_path / setup_path | ||||
|     with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): | ||||
|         df.to_hdf(path, key="df") | ||||
|  | ||||
|  | ||||
| def test_unsuppored_hdf_file_error(datapath): | ||||
|     # GH 9539 | ||||
|     data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5") | ||||
|     message = ( | ||||
|         r"Dataset\(s\) incompatible with Pandas data types, " | ||||
|         "not table, or no datasets found in HDF5 file." | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=message): | ||||
|         read_hdf(data_path) | ||||
|  | ||||
|  | ||||
| def test_read_hdf_errors(setup_path, tmp_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     msg = r"File [\S]* does not exist" | ||||
|     with pytest.raises(OSError, match=msg): | ||||
|         read_hdf(path, "key") | ||||
|  | ||||
|     df.to_hdf(path, key="df") | ||||
|     store = HDFStore(path, mode="r") | ||||
|     store.close() | ||||
|  | ||||
|     msg = "The HDFStore must be open for reading." | ||||
|     with pytest.raises(OSError, match=msg): | ||||
|         read_hdf(store, "df") | ||||
|  | ||||
|  | ||||
| def test_read_hdf_generic_buffer_errors(): | ||||
|     msg = "Support for generic buffers has not been implemented." | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         read_hdf(BytesIO(b""), "df") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"]) | ||||
| def test_maybe_adjust_name_bad_version_raises(bad_version): | ||||
|     msg = "Version is incorrect, expected sequence of 3 integers" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         _maybe_adjust_name("values_block_0", version=bad_version) | ||||
| @ -0,0 +1,517 @@ | ||||
| import os | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import ( | ||||
|     PY311, | ||||
|     is_ci_environment, | ||||
|     is_platform_linux, | ||||
|     is_platform_little_endian, | ||||
| ) | ||||
| from pandas.errors import ( | ||||
|     ClosedFileError, | ||||
|     PossibleDataLossError, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     HDFStore, | ||||
|     Index, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     date_range, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
|     tables, | ||||
| ) | ||||
|  | ||||
| from pandas.io import pytables | ||||
| from pandas.io.pytables import Term | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("mode", ["r", "r+", "a", "w"]) | ||||
| def test_mode(setup_path, tmp_path, mode, using_infer_string): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|     msg = r"[\S]* does not exist" | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     # constructor | ||||
|     if mode in ["r", "r+"]: | ||||
|         with pytest.raises(OSError, match=msg): | ||||
|             HDFStore(path, mode=mode) | ||||
|  | ||||
|     else: | ||||
|         with HDFStore(path, mode=mode) as store: | ||||
|             assert store._handle.mode == mode | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     # context | ||||
|     if mode in ["r", "r+"]: | ||||
|         with pytest.raises(OSError, match=msg): | ||||
|             with HDFStore(path, mode=mode) as store: | ||||
|                 pass | ||||
|     else: | ||||
|         with HDFStore(path, mode=mode) as store: | ||||
|             assert store._handle.mode == mode | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     # conv write | ||||
|     if mode in ["r", "r+"]: | ||||
|         with pytest.raises(OSError, match=msg): | ||||
|             df.to_hdf(path, key="df", mode=mode) | ||||
|         df.to_hdf(path, key="df", mode="w") | ||||
|     else: | ||||
|         df.to_hdf(path, key="df", mode=mode) | ||||
|  | ||||
|     # conv read | ||||
|     if mode in ["w"]: | ||||
|         msg = ( | ||||
|             "mode w is not allowed while performing a read. " | ||||
|             r"Allowed modes are r, r\+ and a." | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             read_hdf(path, "df", mode=mode) | ||||
|     else: | ||||
|         result = read_hdf(path, "df", mode=mode) | ||||
|         if using_infer_string: | ||||
|             df.columns = df.columns.astype("str") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_default_mode(tmp_path, setup_path, using_infer_string): | ||||
|     # read_hdf uses default mode | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="w") | ||||
|     result = read_hdf(path, "df") | ||||
|     expected = df.copy() | ||||
|     if using_infer_string: | ||||
|         expected.columns = expected.columns.astype("str") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reopen_handle(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     store = HDFStore(path, mode="a") | ||||
|     store["a"] = Series( | ||||
|         np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|     ) | ||||
|  | ||||
|     msg = ( | ||||
|         r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the " | ||||
|         "current file!" | ||||
|     ) | ||||
|     # invalid mode change | ||||
|     with pytest.raises(PossibleDataLossError, match=msg): | ||||
|         store.open("w") | ||||
|  | ||||
|     store.close() | ||||
|     assert not store.is_open | ||||
|  | ||||
|     # truncation ok here | ||||
|     store.open("w") | ||||
|     assert store.is_open | ||||
|     assert len(store) == 0 | ||||
|     store.close() | ||||
|     assert not store.is_open | ||||
|  | ||||
|     store = HDFStore(path, mode="a") | ||||
|     store["a"] = Series( | ||||
|         np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|     ) | ||||
|  | ||||
|     # reopen as read | ||||
|     store.open("r") | ||||
|     assert store.is_open | ||||
|     assert len(store) == 1 | ||||
|     assert store._mode == "r" | ||||
|     store.close() | ||||
|     assert not store.is_open | ||||
|  | ||||
|     # reopen as append | ||||
|     store.open("a") | ||||
|     assert store.is_open | ||||
|     assert len(store) == 1 | ||||
|     assert store._mode == "a" | ||||
|     store.close() | ||||
|     assert not store.is_open | ||||
|  | ||||
|     # reopen as append (again) | ||||
|     store.open("a") | ||||
|     assert store.is_open | ||||
|     assert len(store) == 1 | ||||
|     assert store._mode == "a" | ||||
|     store.close() | ||||
|     assert not store.is_open | ||||
|  | ||||
|  | ||||
| def test_open_args(setup_path, using_infer_string): | ||||
|     with tm.ensure_clean(setup_path) as path: | ||||
|         df = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|         ) | ||||
|  | ||||
|         # create an in memory store | ||||
|         store = HDFStore( | ||||
|             path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0 | ||||
|         ) | ||||
|         store["df"] = df | ||||
|         store.append("df2", df) | ||||
|  | ||||
|         expected = df.copy() | ||||
|         if using_infer_string: | ||||
|             expected.index = expected.index.astype("str") | ||||
|             expected.columns = expected.columns.astype("str") | ||||
|  | ||||
|         tm.assert_frame_equal(store["df"], expected) | ||||
|         tm.assert_frame_equal(store["df2"], expected) | ||||
|  | ||||
|         store.close() | ||||
|  | ||||
|     # the file should not have actually been written | ||||
|     assert not os.path.exists(path) | ||||
|  | ||||
|  | ||||
| def test_flush(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["a"] = Series(range(5)) | ||||
|         store.flush() | ||||
|         store.flush(fsync=True) | ||||
|  | ||||
|  | ||||
| def test_complibs_default_settings(tmp_path, setup_path, using_infer_string): | ||||
|     # GH15943 | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|  | ||||
|     # Set complevel and check if complib is automatically set to | ||||
|     # default value | ||||
|     tmpfile = tmp_path / setup_path | ||||
|     df.to_hdf(tmpfile, key="df", complevel=9) | ||||
|     result = read_hdf(tmpfile, "df") | ||||
|     expected = df.copy() | ||||
|     if using_infer_string: | ||||
|         expected.index = expected.index.astype("str") | ||||
|         expected.columns = expected.columns.astype("str") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     with tables.open_file(tmpfile, mode="r") as h5file: | ||||
|         for node in h5file.walk_nodes(where="/df", classname="Leaf"): | ||||
|             assert node.filters.complevel == 9 | ||||
|             assert node.filters.complib == "zlib" | ||||
|  | ||||
|     # Set complib and check to see if compression is disabled | ||||
|     tmpfile = tmp_path / setup_path | ||||
|     df.to_hdf(tmpfile, key="df", complib="zlib") | ||||
|     result = read_hdf(tmpfile, "df") | ||||
|     expected = df.copy() | ||||
|     if using_infer_string: | ||||
|         expected.index = expected.index.astype("str") | ||||
|         expected.columns = expected.columns.astype("str") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     with tables.open_file(tmpfile, mode="r") as h5file: | ||||
|         for node in h5file.walk_nodes(where="/df", classname="Leaf"): | ||||
|             assert node.filters.complevel == 0 | ||||
|             assert node.filters.complib is None | ||||
|  | ||||
|     # Check if not setting complib or complevel results in no compression | ||||
|     tmpfile = tmp_path / setup_path | ||||
|     df.to_hdf(tmpfile, key="df") | ||||
|     result = read_hdf(tmpfile, "df") | ||||
|     expected = df.copy() | ||||
|     if using_infer_string: | ||||
|         expected.index = expected.index.astype("str") | ||||
|         expected.columns = expected.columns.astype("str") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     with tables.open_file(tmpfile, mode="r") as h5file: | ||||
|         for node in h5file.walk_nodes(where="/df", classname="Leaf"): | ||||
|             assert node.filters.complevel == 0 | ||||
|             assert node.filters.complib is None | ||||
|  | ||||
|  | ||||
| def test_complibs_default_settings_override(tmp_path, setup_path): | ||||
|     # Check if file-defaults can be overridden on a per table basis | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|     tmpfile = tmp_path / setup_path | ||||
|     store = HDFStore(tmpfile) | ||||
|     store.append("dfc", df, complevel=9, complib="blosc") | ||||
|     store.append("df", df) | ||||
|     store.close() | ||||
|  | ||||
|     with tables.open_file(tmpfile, mode="r") as h5file: | ||||
|         for node in h5file.walk_nodes(where="/df", classname="Leaf"): | ||||
|             assert node.filters.complevel == 0 | ||||
|             assert node.filters.complib is None | ||||
|         for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): | ||||
|             assert node.filters.complevel == 9 | ||||
|             assert node.filters.complib == "blosc" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("lvl", range(10)) | ||||
| @pytest.mark.parametrize("lib", tables.filters.all_complibs) | ||||
| @pytest.mark.filterwarnings("ignore:object name is not a valid") | ||||
| @pytest.mark.skipif( | ||||
|     not PY311 and is_ci_environment() and is_platform_linux(), | ||||
|     reason="Segfaulting in a CI environment" | ||||
|     # with xfail, would sometimes raise UnicodeDecodeError | ||||
|     # invalid state byte | ||||
| ) | ||||
| def test_complibs(tmp_path, lvl, lib, request): | ||||
|     # GH14478 | ||||
|     if PY311 and is_platform_linux() and lib == "blosc2" and lvl != 0: | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11") | ||||
|         ) | ||||
|     df = DataFrame( | ||||
|         np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_) | ||||
|     ) | ||||
|  | ||||
|     # Remove lzo if its not available on this platform | ||||
|     if not tables.which_lib_version("lzo"): | ||||
|         pytest.skip("lzo not available") | ||||
|     # Remove bzip2 if its not available on this platform | ||||
|     if not tables.which_lib_version("bzip2"): | ||||
|         pytest.skip("bzip2 not available") | ||||
|  | ||||
|     tmpfile = tmp_path / f"{lvl}_{lib}.h5" | ||||
|     gname = f"{lvl}_{lib}" | ||||
|  | ||||
|     # Write and read file to see if data is consistent | ||||
|     df.to_hdf(tmpfile, key=gname, complib=lib, complevel=lvl) | ||||
|     result = read_hdf(tmpfile, gname) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     # Open file and check metadata for correct amount of compression | ||||
|     with tables.open_file(tmpfile, mode="r") as h5table: | ||||
|         for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"): | ||||
|             assert node.filters.complevel == lvl | ||||
|             if lvl == 0: | ||||
|                 assert node.filters.complib is None | ||||
|             else: | ||||
|                 assert node.filters.complib == lib | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif( | ||||
|     not is_platform_little_endian(), reason="reason platform is not little endian" | ||||
| ) | ||||
| def test_encoding(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame({"A": "foo", "B": "bar"}, index=range(5)) | ||||
|         df.loc[2, "A"] = np.nan | ||||
|         df.loc[3, "B"] = np.nan | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df, encoding="ascii") | ||||
|         tm.assert_frame_equal(store["df"], df) | ||||
|  | ||||
|         expected = df.reindex(columns=["A"]) | ||||
|         result = store.select("df", Term("columns=A", encoding="ascii")) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "val", | ||||
|     [ | ||||
|         [b"E\xc9, 17", b"", b"a", b"b", b"c"], | ||||
|         [b"E\xc9, 17", b"a", b"b", b"c"], | ||||
|         [b"EE, 17", b"", b"a", b"b", b"c"], | ||||
|         [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"], | ||||
|         [b"", b"a", b"b", b"c"], | ||||
|         [b"\xf8\xfc", b"a", b"b", b"c"], | ||||
|         [b"A\xf8\xfc", b"", b"a", b"b", b"c"], | ||||
|         [np.nan, b"", b"b", b"c"], | ||||
|         [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("dtype", ["category", None]) | ||||
| def test_latin_encoding(tmp_path, setup_path, dtype, val): | ||||
|     enc = "latin-1" | ||||
|     nan_rep = "" | ||||
|     key = "data" | ||||
|  | ||||
|     val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] | ||||
|     ser = Series(val, dtype=dtype) | ||||
|  | ||||
|     store = tmp_path / setup_path | ||||
|     ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep) | ||||
|     retr = read_hdf(store, key) | ||||
|  | ||||
|     # TODO:(3.0): once Categorical replace deprecation is enforced, | ||||
|     #  we may be able to re-simplify the construction of s_nan | ||||
|     if dtype == "category": | ||||
|         if nan_rep in ser.cat.categories: | ||||
|             s_nan = ser.cat.remove_categories([nan_rep]) | ||||
|         else: | ||||
|             s_nan = ser | ||||
|     else: | ||||
|         s_nan = ser.replace(nan_rep, np.nan) | ||||
|  | ||||
|     tm.assert_series_equal(s_nan, retr) | ||||
|  | ||||
|  | ||||
| def test_multiple_open_close(tmp_path, setup_path): | ||||
|     # gh-4409: open & close multiple times | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|     df.to_hdf(path, key="df", mode="w", format="table") | ||||
|  | ||||
|     # single | ||||
|     store = HDFStore(path) | ||||
|     assert "CLOSED" not in store.info() | ||||
|     assert store.is_open | ||||
|  | ||||
|     store.close() | ||||
|     assert "CLOSED" in store.info() | ||||
|     assert not store.is_open | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     if pytables._table_file_open_policy_is_strict: | ||||
|         # multiples | ||||
|         store1 = HDFStore(path) | ||||
|         msg = ( | ||||
|             r"The file [\S]* is already opened\.  Please close it before " | ||||
|             r"reopening in write mode\." | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             HDFStore(path) | ||||
|  | ||||
|         store1.close() | ||||
|     else: | ||||
|         # multiples | ||||
|         store1 = HDFStore(path) | ||||
|         store2 = HDFStore(path) | ||||
|  | ||||
|         assert "CLOSED" not in store1.info() | ||||
|         assert "CLOSED" not in store2.info() | ||||
|         assert store1.is_open | ||||
|         assert store2.is_open | ||||
|  | ||||
|         store1.close() | ||||
|         assert "CLOSED" in store1.info() | ||||
|         assert not store1.is_open | ||||
|         assert "CLOSED" not in store2.info() | ||||
|         assert store2.is_open | ||||
|  | ||||
|         store2.close() | ||||
|         assert "CLOSED" in store1.info() | ||||
|         assert "CLOSED" in store2.info() | ||||
|         assert not store1.is_open | ||||
|         assert not store2.is_open | ||||
|  | ||||
|         # nested close | ||||
|         store = HDFStore(path, mode="w") | ||||
|         store.append("df", df) | ||||
|  | ||||
|         store2 = HDFStore(path) | ||||
|         store2.append("df2", df) | ||||
|         store2.close() | ||||
|         assert "CLOSED" in store2.info() | ||||
|         assert not store2.is_open | ||||
|  | ||||
|         store.close() | ||||
|         assert "CLOSED" in store.info() | ||||
|         assert not store.is_open | ||||
|  | ||||
|         # double closing | ||||
|         store = HDFStore(path, mode="w") | ||||
|         store.append("df", df) | ||||
|  | ||||
|         store2 = HDFStore(path) | ||||
|         store.close() | ||||
|         assert "CLOSED" in store.info() | ||||
|         assert not store.is_open | ||||
|  | ||||
|         store2.close() | ||||
|         assert "CLOSED" in store2.info() | ||||
|         assert not store2.is_open | ||||
|  | ||||
|     # ops on a closed store | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD"), dtype=object), | ||||
|         index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|     ) | ||||
|     df.to_hdf(path, key="df", mode="w", format="table") | ||||
|  | ||||
|     store = HDFStore(path) | ||||
|     store.close() | ||||
|  | ||||
|     msg = r"[\S]* file is not open!" | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.keys() | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         "df" in store | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         len(store) | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store["df"] | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.select("df") | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.get("df") | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.append("df2", df) | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.put("df3", df) | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.get_storer("df2") | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.remove("df2") | ||||
|  | ||||
|     with pytest.raises(ClosedFileError, match=msg): | ||||
|         store.select("df") | ||||
|  | ||||
|     msg = "'HDFStore' object has no attribute 'df'" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         store.df | ||||
|  | ||||
|  | ||||
| def test_fspath(): | ||||
|     with tm.ensure_clean("foo.h5") as path: | ||||
|         with HDFStore(path) as store: | ||||
|             assert os.fspath(store) == str(path) | ||||
| @ -0,0 +1,87 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     HDFStore, | ||||
|     Index, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     ensure_clean_store, | ||||
|     tables, | ||||
| ) | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_keys(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["a"] = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|         store["b"] = Series( | ||||
|             range(10), dtype="float64", index=[f"i_{i}" for i in range(10)] | ||||
|         ) | ||||
|         store["c"] = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|         ) | ||||
|  | ||||
|         assert len(store) == 3 | ||||
|         expected = {"/a", "/b", "/c"} | ||||
|         assert set(store.keys()) == expected | ||||
|         assert set(store) == expected | ||||
|  | ||||
|  | ||||
| def test_non_pandas_keys(tmp_path, setup_path): | ||||
|     class Table1(tables.IsDescription): | ||||
|         value1 = tables.Float32Col() | ||||
|  | ||||
|     class Table2(tables.IsDescription): | ||||
|         value2 = tables.Float32Col() | ||||
|  | ||||
|     class Table3(tables.IsDescription): | ||||
|         value3 = tables.Float32Col() | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     with tables.open_file(path, mode="w") as h5file: | ||||
|         group = h5file.create_group("/", "group") | ||||
|         h5file.create_table(group, "table1", Table1, "Table 1") | ||||
|         h5file.create_table(group, "table2", Table2, "Table 2") | ||||
|         h5file.create_table(group, "table3", Table3, "Table 3") | ||||
|     with HDFStore(path) as store: | ||||
|         assert len(store.keys(include="native")) == 3 | ||||
|         expected = {"/group/table1", "/group/table2", "/group/table3"} | ||||
|         assert set(store.keys(include="native")) == expected | ||||
|         assert set(store.keys(include="pandas")) == set() | ||||
|         for name in expected: | ||||
|             df = store.get(name) | ||||
|             assert len(df.columns) == 1 | ||||
|  | ||||
|  | ||||
| def test_keys_illegal_include_keyword_value(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         with pytest.raises( | ||||
|             ValueError, | ||||
|             match="`include` should be either 'pandas' or 'native' but is 'illegal'", | ||||
|         ): | ||||
|             store.keys(include="illegal") | ||||
|  | ||||
|  | ||||
| def test_keys_ignore_hdf_softlink(setup_path): | ||||
|     # GH 20523 | ||||
|     # Puts a softlink into HDF file and rereads | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame({"A": range(5), "B": range(5)}) | ||||
|         store.put("df", df) | ||||
|  | ||||
|         assert store.keys() == ["/df"] | ||||
|  | ||||
|         store._handle.create_soft_link(store._handle.root, "symlink", "df") | ||||
|  | ||||
|         # Should ignore the softlink | ||||
|         assert store.keys() == ["/df"] | ||||
| @ -0,0 +1,419 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import Timestamp | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     HDFStore, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     concat, | ||||
|     date_range, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
| from pandas.util import _test_decorators as td | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_format_type(tmp_path, setup_path): | ||||
|     df = DataFrame({"A": [1, 2]}) | ||||
|     with HDFStore(tmp_path / setup_path) as store: | ||||
|         store.put("a", df, format="fixed") | ||||
|         store.put("b", df, format="table") | ||||
|  | ||||
|         assert store.get_storer("a").format_type == "fixed" | ||||
|         assert store.get_storer("b").format_type == "table" | ||||
|  | ||||
|  | ||||
| def test_format_kwarg_in_constructor(tmp_path, setup_path): | ||||
|     # GH 13291 | ||||
|  | ||||
|     msg = "format is not a defined argument for HDFStore" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         HDFStore(tmp_path / setup_path, format="table") | ||||
|  | ||||
|  | ||||
| def test_api_default_format(tmp_path, setup_path): | ||||
|     # default_format option | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD")), | ||||
|             index=Index([f"i-{i}" for i in range(30)]), | ||||
|         ) | ||||
|  | ||||
|         with pd.option_context("io.hdf.default_format", "fixed"): | ||||
|             _maybe_remove(store, "df") | ||||
|             store.put("df", df) | ||||
|             assert not store.get_storer("df").is_table | ||||
|  | ||||
|             msg = "Can only append to Tables" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 store.append("df2", df) | ||||
|  | ||||
|         with pd.option_context("io.hdf.default_format", "table"): | ||||
|             _maybe_remove(store, "df") | ||||
|             store.put("df", df) | ||||
|             assert store.get_storer("df").is_table | ||||
|  | ||||
|             _maybe_remove(store, "df2") | ||||
|             store.append("df2", df) | ||||
|             assert store.get_storer("df").is_table | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=Index([f"i-{i}" for i in range(30)]), | ||||
|     ) | ||||
|  | ||||
|     with pd.option_context("io.hdf.default_format", "fixed"): | ||||
|         df.to_hdf(path, key="df") | ||||
|         with HDFStore(path) as store: | ||||
|             assert not store.get_storer("df").is_table | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.to_hdf(path, key="df2", append=True) | ||||
|  | ||||
|     with pd.option_context("io.hdf.default_format", "table"): | ||||
|         df.to_hdf(path, key="df3") | ||||
|         with HDFStore(path) as store: | ||||
|             assert store.get_storer("df3").is_table | ||||
|         df.to_hdf(path, key="df4", append=True) | ||||
|         with HDFStore(path) as store: | ||||
|             assert store.get_storer("df4").is_table | ||||
|  | ||||
|  | ||||
| def test_put(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         ts = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((20, 4)), | ||||
|             columns=Index(list("ABCD")), | ||||
|             index=date_range("2000-01-01", periods=20, freq="B"), | ||||
|         ) | ||||
|         store["a"] = ts | ||||
|         store["b"] = df[:10] | ||||
|         store["foo/bar/bah"] = df[:10] | ||||
|         store["foo"] = df[:10] | ||||
|         store["/foo"] = df[:10] | ||||
|         store.put("c", df[:10], format="table") | ||||
|  | ||||
|         # not OK, not a table | ||||
|         msg = "Can only append to Tables" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("b", df[10:], append=True) | ||||
|  | ||||
|         # node does not currently exist, test _is_table_type returns False | ||||
|         # in this case | ||||
|         _maybe_remove(store, "f") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("f", df[10:], append=True) | ||||
|  | ||||
|         # can't put to a table (use append instead) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("c", df[10:], append=True) | ||||
|  | ||||
|         # overwrite table | ||||
|         store.put("c", df[:10], format="table", append=False) | ||||
|         tm.assert_frame_equal(df[:10], store["c"]) | ||||
|  | ||||
|  | ||||
| def test_put_string_index(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         index = Index([f"I am a very long string index: {i}" for i in range(20)]) | ||||
|         s = Series(np.arange(20), index=index) | ||||
|         df = DataFrame({"A": s, "B": s}) | ||||
|  | ||||
|         store["a"] = s | ||||
|         tm.assert_series_equal(store["a"], s) | ||||
|  | ||||
|         store["b"] = df | ||||
|         tm.assert_frame_equal(store["b"], df) | ||||
|  | ||||
|         # mixed length | ||||
|         index = Index( | ||||
|             ["abcdefghijklmnopqrstuvwxyz1234567890"] | ||||
|             + [f"I am a very long string index: {i}" for i in range(20)] | ||||
|         ) | ||||
|         s = Series(np.arange(21), index=index) | ||||
|         df = DataFrame({"A": s, "B": s}) | ||||
|         store["a"] = s | ||||
|         tm.assert_series_equal(store["a"], s) | ||||
|  | ||||
|         store["b"] = df | ||||
|         tm.assert_frame_equal(store["b"], df) | ||||
|  | ||||
|  | ||||
| def test_put_compression(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD")), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|  | ||||
|         store.put("c", df, format="table", complib="zlib") | ||||
|         tm.assert_frame_equal(store["c"], df) | ||||
|  | ||||
|         # can't compress if format='fixed' | ||||
|         msg = "Compression not supported on Fixed format stores" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("b", df, format="fixed", complib="zlib") | ||||
|  | ||||
|  | ||||
| @td.skip_if_windows | ||||
| def test_put_compression_blosc(setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         # can't compress if format='fixed' | ||||
|         msg = "Compression not supported on Fixed format stores" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("b", df, format="fixed", complib="blosc") | ||||
|  | ||||
|         store.put("c", df, format="table", complib="blosc") | ||||
|         tm.assert_frame_equal(store["c"], df) | ||||
|  | ||||
|  | ||||
| def test_put_datetime_ser(setup_path): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60663 | ||||
|     ser = Series(3 * [Timestamp("20010102").as_unit("ns")]) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("ser", ser) | ||||
|         expected = ser.copy() | ||||
|         result = store.get("ser") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_put_mixed_type(setup_path, using_infer_string): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|     df["obj1"] = "foo" | ||||
|     df["obj2"] = "bar" | ||||
|     df["bool1"] = df["A"] > 0 | ||||
|     df["bool2"] = df["B"] > 0 | ||||
|     df["bool3"] = True | ||||
|     df["int1"] = 1 | ||||
|     df["int2"] = 2 | ||||
|     df["timestamp1"] = Timestamp("20010102").as_unit("ns") | ||||
|     df["timestamp2"] = Timestamp("20010103").as_unit("ns") | ||||
|     df["datetime1"] = Timestamp("20010102").as_unit("ns") | ||||
|     df["datetime2"] = Timestamp("20010103").as_unit("ns") | ||||
|     df.loc[df.index[3:6], ["obj1"]] = np.nan | ||||
|     df = df._consolidate() | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df") | ||||
|  | ||||
|         warning = None if using_infer_string else pd.errors.PerformanceWarning | ||||
|         with tm.assert_produces_warning(warning): | ||||
|             store.put("df", df) | ||||
|  | ||||
|         expected = store.get("df") | ||||
|         tm.assert_frame_equal(expected, df) | ||||
|  | ||||
|  | ||||
| def test_put_str_frame(setup_path, string_dtype_arguments): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60663 | ||||
|     dtype = pd.StringDtype(*string_dtype_arguments) | ||||
|     df = DataFrame({"a": pd.array(["x", pd.NA, "y"], dtype=dtype)}) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df") | ||||
|  | ||||
|         store.put("df", df) | ||||
|         expected_dtype = "str" if dtype.na_value is np.nan else "string" | ||||
|         expected = df.astype(expected_dtype) | ||||
|         result = store.get("df") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_put_str_series(setup_path, string_dtype_arguments): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60663 | ||||
|     dtype = pd.StringDtype(*string_dtype_arguments) | ||||
|     ser = Series(["x", pd.NA, "y"], dtype=dtype) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df") | ||||
|  | ||||
|         store.put("ser", ser) | ||||
|         expected_dtype = "str" if dtype.na_value is np.nan else "string" | ||||
|         expected = ser.astype(expected_dtype) | ||||
|         result = store.get("ser") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("format", ["table", "fixed"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "index", | ||||
|     [ | ||||
|         Index([str(i) for i in range(10)]), | ||||
|         Index(np.arange(10, dtype=float)), | ||||
|         Index(np.arange(10)), | ||||
|         date_range("2020-01-01", periods=10), | ||||
|         pd.period_range("2020-01-01", periods=10), | ||||
|     ], | ||||
| ) | ||||
| def test_store_index_types(setup_path, format, index): | ||||
|     # GH5386 | ||||
|     # test storing various index types | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 2)), | ||||
|             columns=list("AB"), | ||||
|             index=index, | ||||
|         ) | ||||
|         _maybe_remove(store, "df") | ||||
|         store.put("df", df, format=format) | ||||
|         tm.assert_frame_equal(df, store["df"]) | ||||
|  | ||||
|  | ||||
| def test_column_multiindex(setup_path, using_infer_string): | ||||
|     # GH 4710 | ||||
|     # recreate multi-indexes properly | ||||
|  | ||||
|     index = MultiIndex.from_tuples( | ||||
|         [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"] | ||||
|     ) | ||||
|     df = DataFrame(np.arange(12).reshape(3, 4), columns=index) | ||||
|     expected = df.set_axis(df.index.to_numpy()) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         if using_infer_string: | ||||
|             # TODO(infer_string) make this work for string dtype | ||||
|             msg = "Saving a MultiIndex with an extension dtype is not supported." | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 store.put("df", df) | ||||
|             return | ||||
|         store.put("df", df) | ||||
|         tm.assert_frame_equal( | ||||
|             store["df"], expected, check_index_type=True, check_column_type=True | ||||
|         ) | ||||
|  | ||||
|         store.put("df1", df, format="table") | ||||
|         tm.assert_frame_equal( | ||||
|             store["df1"], expected, check_index_type=True, check_column_type=True | ||||
|         ) | ||||
|  | ||||
|         msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("df2", df, format="table", data_columns=["A"]) | ||||
|         msg = re.escape("cannot use a multi-index on axis [1] with data_columns True") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.put("df3", df, format="table", data_columns=True) | ||||
|  | ||||
|     # appending multi-column on existing table (see GH 6167) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("df2", df) | ||||
|         store.append("df2", df) | ||||
|  | ||||
|         tm.assert_frame_equal(store["df2"], concat((df, df))) | ||||
|  | ||||
|     # non_index_axes name | ||||
|     df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")) | ||||
|     expected = df.set_axis(df.index.to_numpy()) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("df1", df, format="table") | ||||
|         tm.assert_frame_equal( | ||||
|             store["df1"], expected, check_index_type=True, check_column_type=True | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_store_multiindex(setup_path): | ||||
|     # validate multi-index names | ||||
|     # GH 5527 | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|  | ||||
|         def make_index(names=None): | ||||
|             dti = date_range("2013-12-01", "2013-12-02") | ||||
|             mi = MultiIndex.from_product([dti, range(2), range(3)], names=names) | ||||
|             return mi | ||||
|  | ||||
|         # no names | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index()) | ||||
|         store.append("df", df) | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|         # partial names | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame( | ||||
|             np.zeros((12, 2)), | ||||
|             columns=["a", "b"], | ||||
|             index=make_index(["date", None, None]), | ||||
|         ) | ||||
|         store.append("df", df) | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|         # series | ||||
|         _maybe_remove(store, "ser") | ||||
|         ser = Series(np.zeros(12), index=make_index(["date", None, None])) | ||||
|         store.append("ser", ser) | ||||
|         xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"])) | ||||
|         tm.assert_series_equal(store.select("ser"), xp) | ||||
|  | ||||
|         # dup with column | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame( | ||||
|             np.zeros((12, 2)), | ||||
|             columns=["a", "b"], | ||||
|             index=make_index(["date", "a", "t"]), | ||||
|         ) | ||||
|         msg = "duplicate names/columns in the multi-index when storing as a table" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df", df) | ||||
|  | ||||
|         # dup within level | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame( | ||||
|             np.zeros((12, 2)), | ||||
|             columns=["a", "b"], | ||||
|             index=make_index(["date", "date", "date"]), | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df", df) | ||||
|  | ||||
|         # fully names | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame( | ||||
|             np.zeros((12, 2)), | ||||
|             columns=["a", "b"], | ||||
|             index=make_index(["date", "s", "t"]), | ||||
|         ) | ||||
|         store.append("df", df) | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("format", ["fixed", "table"]) | ||||
| def test_store_periodindex(tmp_path, setup_path, format): | ||||
|     # GH 7796 | ||||
|     # test of PeriodIndex in HDFStore | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((5, 1)), | ||||
|         index=pd.period_range("20220101", freq="M", periods=5), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="w", format=format) | ||||
|     expected = pd.read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, expected) | ||||
| @ -0,0 +1,14 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @td.skip_if_installed("tables") | ||||
| def test_pytables_raises(): | ||||
|     df = pd.DataFrame({"A": [1, 2]}) | ||||
|     with pytest.raises(ImportError, match="tables"): | ||||
|         with tm.ensure_clean("foo.h5") as path: | ||||
|             df.to_hdf(path, key="df") | ||||
| @ -0,0 +1,417 @@ | ||||
| from contextlib import closing | ||||
| from pathlib import Path | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import Timestamp | ||||
| from pandas.compat import is_platform_windows | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     HDFStore, | ||||
|     Index, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     date_range, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
| from pandas.util import _test_decorators as td | ||||
|  | ||||
| from pandas.io.pytables import TableIterator | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_read_missing_key_close_store(tmp_path, setup_path): | ||||
|     # GH 25766 | ||||
|     path = tmp_path / setup_path | ||||
|     df = DataFrame({"a": range(2), "b": range(2)}) | ||||
|     df.to_hdf(path, key="k1") | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'No object named k2 in the file'"): | ||||
|         read_hdf(path, "k2") | ||||
|  | ||||
|     # smoke test to test that file is properly closed after | ||||
|     # read with KeyError before another write | ||||
|     df.to_hdf(path, key="k2") | ||||
|  | ||||
|  | ||||
| def test_read_index_error_close_store(tmp_path, setup_path): | ||||
|     # GH 25766 | ||||
|     path = tmp_path / setup_path | ||||
|     df = DataFrame({"A": [], "B": []}, index=[]) | ||||
|     df.to_hdf(path, key="k1") | ||||
|  | ||||
|     with pytest.raises(IndexError, match=r"list index out of range"): | ||||
|         read_hdf(path, "k1", stop=0) | ||||
|  | ||||
|     # smoke test to test that file is properly closed after | ||||
|     # read with IndexError before another write | ||||
|     df.to_hdf(path, key="k1") | ||||
|  | ||||
|  | ||||
| def test_read_missing_key_opened_store(tmp_path, setup_path): | ||||
|     # GH 28699 | ||||
|     path = tmp_path / setup_path | ||||
|     df = DataFrame({"a": range(2), "b": range(2)}) | ||||
|     df.to_hdf(path, key="k1") | ||||
|  | ||||
|     with HDFStore(path, "r") as store: | ||||
|         with pytest.raises(KeyError, match="'No object named k2 in the file'"): | ||||
|             read_hdf(store, "k2") | ||||
|  | ||||
|         # Test that the file is still open after a KeyError and that we can | ||||
|         # still read from it. | ||||
|         read_hdf(store, "k1") | ||||
|  | ||||
|  | ||||
| def test_read_column(setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df") | ||||
|  | ||||
|         # GH 17912 | ||||
|         # HDFStore.select_column should raise a KeyError | ||||
|         # exception if the key is not a valid store | ||||
|         with pytest.raises(KeyError, match="No object named df in the file"): | ||||
|             store.select_column("df", "index") | ||||
|  | ||||
|         store.append("df", df) | ||||
|         # error | ||||
|         with pytest.raises( | ||||
|             KeyError, match=re.escape("'column [foo] not found in the table'") | ||||
|         ): | ||||
|             store.select_column("df", "foo") | ||||
|  | ||||
|         msg = re.escape("select_column() got an unexpected keyword argument 'where'") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             store.select_column("df", "index", where=["index>5"]) | ||||
|  | ||||
|         # valid | ||||
|         result = store.select_column("df", "index") | ||||
|         tm.assert_almost_equal(result.values, Series(df.index).values) | ||||
|         assert isinstance(result, Series) | ||||
|  | ||||
|         # not a data indexable column | ||||
|         msg = re.escape( | ||||
|             "column [values_block_0] can not be extracted individually; " | ||||
|             "it is not data indexable" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.select_column("df", "values_block_0") | ||||
|  | ||||
|         # a data column | ||||
|         df2 = df.copy() | ||||
|         df2["string"] = "foo" | ||||
|         store.append("df2", df2, data_columns=["string"]) | ||||
|         result = store.select_column("df2", "string") | ||||
|         tm.assert_almost_equal(result.values, df2["string"].values) | ||||
|  | ||||
|         # a data column with NaNs, result excludes the NaNs | ||||
|         df3 = df.copy() | ||||
|         df3["string"] = "foo" | ||||
|         df3.loc[df3.index[4:6], "string"] = np.nan | ||||
|         store.append("df3", df3, data_columns=["string"]) | ||||
|         result = store.select_column("df3", "string") | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values) | ||||
|  | ||||
|         # start/stop | ||||
|         result = store.select_column("df3", "string", start=2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[2:]) | ||||
|  | ||||
|         result = store.select_column("df3", "string", start=-2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[-2:]) | ||||
|  | ||||
|         result = store.select_column("df3", "string", stop=2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[:2]) | ||||
|  | ||||
|         result = store.select_column("df3", "string", stop=-2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[:-2]) | ||||
|  | ||||
|         result = store.select_column("df3", "string", start=2, stop=-2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[2:-2]) | ||||
|  | ||||
|         result = store.select_column("df3", "string", start=-2, stop=2) | ||||
|         tm.assert_almost_equal(result.values, df3["string"].values[-2:2]) | ||||
|  | ||||
|         # GH 10392 - make sure column name is preserved | ||||
|         df4 = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": "foo"}) | ||||
|         store.append("df4", df4, data_columns=True) | ||||
|         expected = df4["B"] | ||||
|         result = store.select_column("df4", "B") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pytables_native_read(datapath): | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" | ||||
|     ) as store: | ||||
|         d2 = store["detector/readout"] | ||||
|     assert isinstance(d2, DataFrame) | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows") | ||||
| def test_pytables_native2_read(datapath): | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r" | ||||
|     ) as store: | ||||
|         str(store) | ||||
|         d1 = store["detector"] | ||||
|     assert isinstance(d1, DataFrame) | ||||
|  | ||||
|  | ||||
| def test_legacy_table_fixed_format_read_py2(datapath): | ||||
|     # GH 24510 | ||||
|     # legacy table with fixed format written in Python 2 | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r" | ||||
|     ) as store: | ||||
|         result = store.select("df") | ||||
|     expected = DataFrame( | ||||
|         [[1, 2, 3, "D"]], | ||||
|         columns=["A", "B", "C", "D"], | ||||
|         index=Index(["ABC"], name="INDEX_NAME"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_legacy_table_fixed_format_read_datetime_py2(datapath): | ||||
|     # GH 31750 | ||||
|     # legacy table with fixed format and datetime64 column written in Python 2 | ||||
|     expected = DataFrame( | ||||
|         [[Timestamp("2020-02-06T18:00")]], | ||||
|         columns=["A"], | ||||
|         index=Index(["date"]), | ||||
|         dtype="M8[ns]", | ||||
|     ) | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"), | ||||
|         mode="r", | ||||
|     ) as store: | ||||
|         result = store.select("df") | ||||
|     tm.assert_frame_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_legacy_table_read_py2(datapath): | ||||
|     # issue: 24925 | ||||
|     # legacy table written in Python 2 | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r" | ||||
|     ) as store: | ||||
|         result = store.select("table") | ||||
|  | ||||
|     expected = DataFrame({"a": ["a", "b"], "b": [2, 3]}) | ||||
|     tm.assert_frame_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string): | ||||
|     # GH10330 | ||||
|     # No check for non-string path_or-buf, and no test of open store | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     df.index.name = "letters" | ||||
|     df = df.set_index(keys="E", append=True) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     if using_infer_string: | ||||
|         # TODO(infer_string) make this work for string dtype | ||||
|         msg = "Saving a MultiIndex with an extension dtype is not supported." | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             df.to_hdf(path, key="df", mode="w") | ||||
|         return | ||||
|     df.to_hdf(path, key="df", mode="w") | ||||
|     direct = read_hdf(path, "df") | ||||
|     with HDFStore(path, mode="r") as store: | ||||
|         indirect = read_hdf(store, "df") | ||||
|         tm.assert_frame_equal(direct, indirect) | ||||
|         assert store.is_open | ||||
|  | ||||
|  | ||||
| def test_read_hdf_index_not_view(tmp_path, setup_path): | ||||
|     # GH 37441 | ||||
|     # Ensure that the index of the DataFrame is not a view | ||||
|     # into the original recarray that pytables reads in | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=[0, 1, 2, 3], | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="w", format="table") | ||||
|  | ||||
|     df2 = read_hdf(path, "df") | ||||
|     assert df2.index._data.base is None | ||||
|     tm.assert_frame_equal(df, df2) | ||||
|  | ||||
|  | ||||
| def test_read_hdf_iterator(tmp_path, setup_path): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     df.index.name = "letters" | ||||
|     df = df.set_index(keys="E", append=True) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="w", format="t") | ||||
|     direct = read_hdf(path, "df") | ||||
|     iterator = read_hdf(path, "df", iterator=True) | ||||
|     with closing(iterator.store): | ||||
|         assert isinstance(iterator, TableIterator) | ||||
|         indirect = next(iterator.__iter__()) | ||||
|     tm.assert_frame_equal(direct, indirect) | ||||
|  | ||||
|  | ||||
| def test_read_nokey(tmp_path, setup_path): | ||||
|     # GH10443 | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|  | ||||
|     # Categorical dtype not supported for "fixed" format. So no need | ||||
|     # to test with that dtype in the dataframe here. | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="a") | ||||
|     reread = read_hdf(path) | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|     df.to_hdf(path, key="df2", mode="a") | ||||
|  | ||||
|     msg = "key must be provided when HDF5 file contains multiple datasets." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         read_hdf(path) | ||||
|  | ||||
|  | ||||
| def test_read_nokey_table(tmp_path, setup_path): | ||||
|     # GH13231 | ||||
|     df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")}) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", mode="a", format="table") | ||||
|     reread = read_hdf(path) | ||||
|     tm.assert_frame_equal(df, reread) | ||||
|     df.to_hdf(path, key="df2", mode="a", format="table") | ||||
|  | ||||
|     msg = "key must be provided when HDF5 file contains multiple datasets." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         read_hdf(path) | ||||
|  | ||||
|  | ||||
| def test_read_nokey_empty(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|     store = HDFStore(path) | ||||
|     store.close() | ||||
|     msg = re.escape( | ||||
|         "Dataset(s) incompatible with Pandas data types, not table, or no " | ||||
|         "datasets found in HDF5 file." | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         read_hdf(path) | ||||
|  | ||||
|  | ||||
| def test_read_from_pathlib_path(tmp_path, setup_path): | ||||
|     # GH11773 | ||||
|     expected = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     filename = tmp_path / setup_path | ||||
|     path_obj = Path(filename) | ||||
|  | ||||
|     expected.to_hdf(path_obj, key="df", mode="a") | ||||
|     actual = read_hdf(path_obj, key="df") | ||||
|  | ||||
|     tm.assert_frame_equal(expected, actual) | ||||
|  | ||||
|  | ||||
| @td.skip_if_no("py.path") | ||||
| def test_read_from_py_localpath(tmp_path, setup_path): | ||||
|     # GH11773 | ||||
|     from py.path import local as LocalPath | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         np.random.default_rng(2).random((4, 5)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCDE"), | ||||
|     ) | ||||
|     filename = tmp_path / setup_path | ||||
|     path_obj = LocalPath(filename) | ||||
|  | ||||
|     expected.to_hdf(path_obj, key="df", mode="a") | ||||
|     actual = read_hdf(path_obj, key="df") | ||||
|  | ||||
|     tm.assert_frame_equal(expected, actual) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("format", ["fixed", "table"]) | ||||
| def test_read_hdf_series_mode_r(tmp_path, format, setup_path): | ||||
|     # GH 16583 | ||||
|     # Tests that reading a Series saved to an HDF file | ||||
|     # still works if a mode='r' argument is supplied | ||||
|     series = Series(range(10), dtype=np.float64) | ||||
|     path = tmp_path / setup_path | ||||
|     series.to_hdf(path, key="data", format=format) | ||||
|     result = read_hdf(path, key="data", mode="r") | ||||
|     tm.assert_series_equal(result, series) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning") | ||||
| @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||||
| def test_read_py2_hdf_file_in_py3(datapath): | ||||
|     # GH 16781 | ||||
|  | ||||
|     # tests reading a PeriodIndex DataFrame written in Python2 in Python3 | ||||
|  | ||||
|     # the file was generated in Python 2.7 like so: | ||||
|     # | ||||
|     # df = DataFrame([1.,2,3], index=pd.PeriodIndex( | ||||
|     #              ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) | ||||
|     # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         [1.0, 2, 3], | ||||
|         index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store( | ||||
|         datapath( | ||||
|             "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5" | ||||
|         ), | ||||
|         mode="r", | ||||
|     ) as store: | ||||
|         result = store["p"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_read_infer_string(tmp_path, setup_path): | ||||
|     # GH#54431 | ||||
|     df = DataFrame({"a": ["a", "b", None]}) | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="data", format="table") | ||||
|     with pd.option_context("future.infer_string", True): | ||||
|         result = read_hdf(path, key="data", mode="r") | ||||
|     expected = DataFrame( | ||||
|         {"a": ["a", "b", None]}, | ||||
|         dtype=pd.StringDtype(na_value=np.nan), | ||||
|         columns=Index(["a"], dtype=pd.StringDtype(na_value=np.nan)), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,92 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     date_range, | ||||
|     errors, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
|  | ||||
| pytestmark = pytest.mark.single_cpu | ||||
|  | ||||
|  | ||||
| def test_retain_index_attributes(setup_path, unit): | ||||
|     # GH 3499, losing frequency info on index recreation | ||||
|     dti = date_range("2000-1-1", periods=3, freq="h", unit=unit) | ||||
|     df = DataFrame({"A": Series(range(3), index=dti)}) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "data") | ||||
|         store.put("data", df, format="table") | ||||
|  | ||||
|         result = store.get("data") | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|         for attr in ["freq", "tz", "name"]: | ||||
|             for idx in ["index", "columns"]: | ||||
|                 assert getattr(getattr(df, idx), attr, None) == getattr( | ||||
|                     getattr(result, idx), attr, None | ||||
|                 ) | ||||
|  | ||||
|         dti2 = date_range("2002-1-1", periods=3, freq="D", unit=unit) | ||||
|         # try to append a table with a different frequency | ||||
|         with tm.assert_produces_warning(errors.AttributeConflictWarning): | ||||
|             df2 = DataFrame({"A": Series(range(3), index=dti2)}) | ||||
|             store.append("data", df2) | ||||
|  | ||||
|         assert store.get_storer("data").info["index"]["freq"] is None | ||||
|  | ||||
|         # this is ok | ||||
|         _maybe_remove(store, "df2") | ||||
|         dti3 = DatetimeIndex( | ||||
|             ["2001-01-01", "2001-01-02", "2002-01-01"], dtype=f"M8[{unit}]" | ||||
|         ) | ||||
|         df2 = DataFrame( | ||||
|             { | ||||
|                 "A": Series( | ||||
|                     range(3), | ||||
|                     index=dti3, | ||||
|                 ) | ||||
|             } | ||||
|         ) | ||||
|         store.append("df2", df2) | ||||
|         dti4 = date_range("2002-1-1", periods=3, freq="D", unit=unit) | ||||
|         df3 = DataFrame({"A": Series(range(3), index=dti4)}) | ||||
|         store.append("df2", df3) | ||||
|  | ||||
|  | ||||
| def test_retain_index_attributes2(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     with tm.assert_produces_warning(errors.AttributeConflictWarning): | ||||
|         df = DataFrame( | ||||
|             {"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="h"))} | ||||
|         ) | ||||
|         df.to_hdf(path, key="data", mode="w", append=True) | ||||
|         df2 = DataFrame( | ||||
|             {"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))} | ||||
|         ) | ||||
|  | ||||
|         df2.to_hdf(path, key="data", append=True) | ||||
|  | ||||
|         idx = date_range("2000-1-1", periods=3, freq="h") | ||||
|         idx.name = "foo" | ||||
|         df = DataFrame({"A": Series(range(3), index=idx)}) | ||||
|         df.to_hdf(path, key="data", mode="w", append=True) | ||||
|  | ||||
|     assert read_hdf(path, key="data").index.name == "foo" | ||||
|  | ||||
|     with tm.assert_produces_warning(errors.AttributeConflictWarning): | ||||
|         idx2 = date_range("2001-1-1", periods=3, freq="h") | ||||
|         idx2.name = "bar" | ||||
|         df2 = DataFrame({"A": Series(range(3), index=idx2)}) | ||||
|         df2.to_hdf(path, key="data", append=True) | ||||
|  | ||||
|     assert read_hdf(path, "data").index.name is None | ||||
| @ -0,0 +1,587 @@ | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import Timestamp | ||||
| from pandas.compat import is_platform_windows | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     read_hdf, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
| from pandas.util import _test_decorators as td | ||||
|  | ||||
| pytestmark = [pytest.mark.single_cpu] | ||||
|  | ||||
|  | ||||
| def test_conv_read_write(): | ||||
|     with tm.ensure_clean() as path: | ||||
|  | ||||
|         def roundtrip(key, obj, **kwargs): | ||||
|             obj.to_hdf(path, key=key, **kwargs) | ||||
|             return read_hdf(path, key) | ||||
|  | ||||
|         o = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|         tm.assert_series_equal(o, roundtrip("series", o)) | ||||
|  | ||||
|         o = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]) | ||||
|         tm.assert_series_equal(o, roundtrip("string_series", o)) | ||||
|  | ||||
|         o = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD")), | ||||
|             index=Index([f"i-{i}" for i in range(30)]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(o, roundtrip("frame", o)) | ||||
|  | ||||
|         # table | ||||
|         df = DataFrame({"A": range(5), "B": range(5)}) | ||||
|         df.to_hdf(path, key="table", append=True) | ||||
|         result = read_hdf(path, "table", where=["index>2"]) | ||||
|         tm.assert_frame_equal(df[df.index > 2], result) | ||||
|  | ||||
|  | ||||
| def test_long_strings(setup_path): | ||||
|     # GH6166 | ||||
|     data = ["a" * 50] * 10 | ||||
|     df = DataFrame({"a": data}, index=data) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("df", df, data_columns=["a"]) | ||||
|  | ||||
|         result = store.select("df") | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|  | ||||
| def test_api(tmp_path, setup_path): | ||||
|     # GH4584 | ||||
|     # API issue when to_hdf doesn't accept append AND format args | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     df = DataFrame(range(20)) | ||||
|     df.iloc[:10].to_hdf(path, key="df", append=True, format="table") | ||||
|     df.iloc[10:].to_hdf(path, key="df", append=True, format="table") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     # append to False | ||||
|     df.iloc[:10].to_hdf(path, key="df", append=False, format="table") | ||||
|     df.iloc[10:].to_hdf(path, key="df", append=True, format="table") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|  | ||||
| def test_api_append(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     df = DataFrame(range(20)) | ||||
|     df.iloc[:10].to_hdf(path, key="df", append=True) | ||||
|     df.iloc[10:].to_hdf(path, key="df", append=True, format="table") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     # append to False | ||||
|     df.iloc[:10].to_hdf(path, key="df", append=False, format="table") | ||||
|     df.iloc[10:].to_hdf(path, key="df", append=True) | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|  | ||||
| def test_api_2(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|  | ||||
|     df = DataFrame(range(20)) | ||||
|     df.to_hdf(path, key="df", append=False, format="fixed") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     df.to_hdf(path, key="df", append=False, format="f") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     df.to_hdf(path, key="df", append=False) | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     df.to_hdf(path, key="df") | ||||
|     tm.assert_frame_equal(read_hdf(path, "df"), df) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df = DataFrame(range(20)) | ||||
|  | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df.iloc[:10], append=True, format="table") | ||||
|         store.append("df", df.iloc[10:], append=True, format="table") | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|         # append to False | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df.iloc[:10], append=False, format="table") | ||||
|         store.append("df", df.iloc[10:], append=True, format="table") | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|         # formats | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df.iloc[:10], append=False, format="table") | ||||
|         store.append("df", df.iloc[10:], append=True, format="table") | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df.iloc[:10], append=False, format="table") | ||||
|         store.append("df", df.iloc[10:], append=True, format=None) | ||||
|         tm.assert_frame_equal(store.select("df"), df) | ||||
|  | ||||
|  | ||||
| def test_api_invalid(tmp_path, setup_path): | ||||
|     path = tmp_path / setup_path | ||||
|     # Invalid. | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=Index([f"i-{i}" for i in range(30)]), | ||||
|     ) | ||||
|  | ||||
|     msg = "Can only append to Tables" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.to_hdf(path, key="df", append=True, format="f") | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.to_hdf(path, key="df", append=True, format="fixed") | ||||
|  | ||||
|     msg = r"invalid HDFStore format specified \[foo\]" | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.to_hdf(path, key="df", append=True, format="foo") | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.to_hdf(path, key="df", append=False, format="foo") | ||||
|  | ||||
|     # File path doesn't exist | ||||
|     path = "" | ||||
|     msg = f"File {path} does not exist" | ||||
|  | ||||
|     with pytest.raises(FileNotFoundError, match=msg): | ||||
|         read_hdf(path, "df") | ||||
|  | ||||
|  | ||||
| def test_get(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["a"] = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|         left = store.get("a") | ||||
|         right = store["a"] | ||||
|         tm.assert_series_equal(left, right) | ||||
|  | ||||
|         left = store.get("/a") | ||||
|         right = store["/a"] | ||||
|         tm.assert_series_equal(left, right) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="'No object named b in the file'"): | ||||
|             store.get("b") | ||||
|  | ||||
|  | ||||
| def test_put_integer(setup_path): | ||||
|     # non-date, non-string index | ||||
|     df = DataFrame(np.random.default_rng(2).standard_normal((50, 100))) | ||||
|     _check_roundtrip(df, tm.assert_frame_equal, setup_path) | ||||
|  | ||||
|  | ||||
| def test_table_values_dtypes_roundtrip(setup_path, using_infer_string): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8") | ||||
|         store.append("df_f8", df1) | ||||
|         tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes) | ||||
|  | ||||
|         df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8") | ||||
|         store.append("df_i8", df2) | ||||
|         tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes) | ||||
|  | ||||
|         # incompatible dtype | ||||
|         msg = re.escape( | ||||
|             "Cannot serialize the column [a] " | ||||
|             "because its data contents are not [float] " | ||||
|             "but [integer] object dtype" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df_i8", df1) | ||||
|  | ||||
|         # check creation/storage/retrieval of float32 (a bit hacky to | ||||
|         # actually create them thought) | ||||
|         df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"]) | ||||
|         store.append("df_f4", df1) | ||||
|         tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes) | ||||
|         assert df1.dtypes.iloc[0] == "float32" | ||||
|  | ||||
|         # check with mixed dtypes | ||||
|         df1 = DataFrame( | ||||
|             { | ||||
|                 c: Series(np.random.default_rng(2).integers(5), dtype=c) | ||||
|                 for c in ["float32", "float64", "int32", "int64", "int16", "int8"] | ||||
|             } | ||||
|         ) | ||||
|         df1["string"] = "foo" | ||||
|         df1["float322"] = 1.0 | ||||
|         df1["float322"] = df1["float322"].astype("float32") | ||||
|         df1["bool"] = df1["float32"] > 0 | ||||
|         df1["time1"] = Timestamp("20130101") | ||||
|         df1["time2"] = Timestamp("20130102") | ||||
|  | ||||
|         store.append("df_mixed_dtypes1", df1) | ||||
|         result = store.select("df_mixed_dtypes1").dtypes.value_counts() | ||||
|         result.index = [str(i) for i in result.index] | ||||
|         str_dtype = "str" if using_infer_string else "object" | ||||
|         expected = Series( | ||||
|             { | ||||
|                 "float32": 2, | ||||
|                 "float64": 1, | ||||
|                 "int32": 1, | ||||
|                 "bool": 1, | ||||
|                 "int16": 1, | ||||
|                 "int8": 1, | ||||
|                 "int64": 1, | ||||
|                 str_dtype: 1, | ||||
|                 "datetime64[ns]": 2, | ||||
|             }, | ||||
|             name="count", | ||||
|         ) | ||||
|         result = result.sort_index() | ||||
|         expected = expected.sort_index() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") | ||||
| def test_series(setup_path): | ||||
|     s = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]) | ||||
|     _check_roundtrip(s, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|     ts = Series( | ||||
|         np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|     ) | ||||
|     _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|     ts2 = Series(ts.index, Index(ts.index)) | ||||
|     _check_roundtrip(ts2, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|     ts3 = Series(ts.values, Index(np.asarray(ts.index))) | ||||
|     _check_roundtrip( | ||||
|         ts3, tm.assert_series_equal, path=setup_path, check_index_type=False | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_float_index(setup_path): | ||||
|     # GH #454 | ||||
|     index = np.random.default_rng(2).standard_normal(10) | ||||
|     s = Series(np.random.default_rng(2).standard_normal(10), index=index) | ||||
|     _check_roundtrip(s, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_tuple_index(setup_path): | ||||
|     # GH #492 | ||||
|     col = np.arange(10) | ||||
|     idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)] | ||||
|     data = np.random.default_rng(2).standard_normal(30).reshape((3, 10)) | ||||
|     DF = DataFrame(data, index=idx, columns=col) | ||||
|  | ||||
|     with tm.assert_produces_warning(pd.errors.PerformanceWarning): | ||||
|         _check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") | ||||
| def test_index_types(setup_path): | ||||
|     values = np.random.default_rng(2).standard_normal(2) | ||||
|  | ||||
|     func = lambda lhs, rhs: tm.assert_series_equal(lhs, rhs, check_index_type=True) | ||||
|  | ||||
|     ser = Series(values, [0, "y"]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [datetime.datetime.today(), 0]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, ["y", 0]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [datetime.date.today(), "a"]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [0, "y"]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [datetime.datetime.today(), 0]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, ["y", 0]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [datetime.date.today(), "a"]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [1.23, "b"]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [1, 1.53]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser = Series(values, [1, 5]) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     dti = DatetimeIndex(["2012-01-01", "2012-01-02"], dtype="M8[ns]") | ||||
|     ser = Series(values, index=dti) | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|     ser.index = ser.index.as_unit("s") | ||||
|     _check_roundtrip(ser, func, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_timeseries_preepoch(setup_path, request): | ||||
|     dr = bdate_range("1/1/1940", "1/1/1960") | ||||
|     ts = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr) | ||||
|     try: | ||||
|         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) | ||||
|     except OverflowError: | ||||
|         if is_platform_windows(): | ||||
|             request.applymarker( | ||||
|                 pytest.mark.xfail("known failure on some windows platforms") | ||||
|             ) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "compression", [False, pytest.param(True, marks=td.skip_if_windows)] | ||||
| ) | ||||
| def test_frame(compression, setup_path): | ||||
|     df = DataFrame( | ||||
|         1.1 * np.arange(120).reshape((30, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=Index([f"i-{i}" for i in range(30)]), | ||||
|     ) | ||||
|  | ||||
|     # put in some random NAs | ||||
|     df.iloc[0, 0] = np.nan | ||||
|     df.iloc[5, 3] = np.nan | ||||
|  | ||||
|     _check_roundtrip_table( | ||||
|         df, tm.assert_frame_equal, path=setup_path, compression=compression | ||||
|     ) | ||||
|     _check_roundtrip( | ||||
|         df, tm.assert_frame_equal, path=setup_path, compression=compression | ||||
|     ) | ||||
|  | ||||
|     tdf = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|     _check_roundtrip( | ||||
|         tdf, tm.assert_frame_equal, path=setup_path, compression=compression | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         # not consolidated | ||||
|         df["foo"] = np.random.default_rng(2).standard_normal(len(df)) | ||||
|         store["df"] = df | ||||
|         recons = store["df"] | ||||
|         assert recons._mgr.is_consolidated() | ||||
|  | ||||
|     # empty | ||||
|     df2 = df[:0] | ||||
|     # Prevent df2 from having index with inferred_type as string | ||||
|     df2.index = Index([]) | ||||
|     _check_roundtrip(df2[:0], tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_empty_series_frame(setup_path): | ||||
|     s0 = Series(dtype=object) | ||||
|     s1 = Series(name="myseries", dtype=object) | ||||
|     df0 = DataFrame() | ||||
|     df1 = DataFrame(index=["a", "b", "c"]) | ||||
|     df2 = DataFrame(columns=["d", "e", "f"]) | ||||
|  | ||||
|     _check_roundtrip(s0, tm.assert_series_equal, path=setup_path) | ||||
|     _check_roundtrip(s1, tm.assert_series_equal, path=setup_path) | ||||
|     _check_roundtrip(df0, tm.assert_frame_equal, path=setup_path) | ||||
|     _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) | ||||
|     _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"]) | ||||
| def test_empty_series(dtype, setup_path): | ||||
|     s = Series(dtype=dtype) | ||||
|     _check_roundtrip(s, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_can_serialize_dates(setup_path): | ||||
|     rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] | ||||
|     frame = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|     ) | ||||
|  | ||||
|     _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_store_hierarchical( | ||||
|     setup_path, using_infer_string, multiindex_dataframe_random_data | ||||
| ): | ||||
|     frame = multiindex_dataframe_random_data | ||||
|  | ||||
|     if using_infer_string: | ||||
|         # TODO(infer_string) make this work for string dtype | ||||
|         msg = "Saving a MultiIndex with an extension dtype is not supported." | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) | ||||
|         return | ||||
|     _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) | ||||
|     _check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path) | ||||
|     _check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|     # check that the names are stored | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["frame"] = frame | ||||
|         recons = store["frame"] | ||||
|         tm.assert_frame_equal(recons, frame) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "compression", [False, pytest.param(True, marks=td.skip_if_windows)] | ||||
| ) | ||||
| def test_store_mixed(compression, setup_path): | ||||
|     def _make_one(): | ||||
|         df = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD")), | ||||
|             index=Index([f"i-{i}" for i in range(30)]), | ||||
|         ) | ||||
|         df["obj1"] = "foo" | ||||
|         df["obj2"] = "bar" | ||||
|         df["bool1"] = df["A"] > 0 | ||||
|         df["bool2"] = df["B"] > 0 | ||||
|         df["int1"] = 1 | ||||
|         df["int2"] = 2 | ||||
|         return df._consolidate() | ||||
|  | ||||
|     df1 = _make_one() | ||||
|     df2 = _make_one() | ||||
|  | ||||
|     _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) | ||||
|     _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["obj"] = df1 | ||||
|         tm.assert_frame_equal(store["obj"], df1) | ||||
|         store["obj"] = df2 | ||||
|         tm.assert_frame_equal(store["obj"], df2) | ||||
|  | ||||
|     # check that can store Series of all of these types | ||||
|     _check_roundtrip( | ||||
|         df1["obj1"], | ||||
|         tm.assert_series_equal, | ||||
|         path=setup_path, | ||||
|         compression=compression, | ||||
|     ) | ||||
|     _check_roundtrip( | ||||
|         df1["bool1"], | ||||
|         tm.assert_series_equal, | ||||
|         path=setup_path, | ||||
|         compression=compression, | ||||
|     ) | ||||
|     _check_roundtrip( | ||||
|         df1["int1"], | ||||
|         tm.assert_series_equal, | ||||
|         path=setup_path, | ||||
|         compression=compression, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def _check_roundtrip(obj, comparator, path, compression=False, **kwargs): | ||||
|     options = {} | ||||
|     if compression: | ||||
|         options["complib"] = "blosc" | ||||
|  | ||||
|     with ensure_clean_store(path, "w", **options) as store: | ||||
|         store["obj"] = obj | ||||
|         retrieved = store["obj"] | ||||
|         comparator(retrieved, obj, **kwargs) | ||||
|  | ||||
|  | ||||
| def _check_roundtrip_table(obj, comparator, path, compression=False): | ||||
|     options = {} | ||||
|     if compression: | ||||
|         options["complib"] = "blosc" | ||||
|  | ||||
|     with ensure_clean_store(path, "w", **options) as store: | ||||
|         store.put("obj", obj, format="table") | ||||
|         retrieved = store["obj"] | ||||
|  | ||||
|         comparator(retrieved, obj) | ||||
|  | ||||
|  | ||||
| def test_unicode_index(setup_path): | ||||
|     unicode_values = ["\u03c3", "\u03c3\u03c3"] | ||||
|  | ||||
|     s = Series( | ||||
|         np.random.default_rng(2).standard_normal(len(unicode_values)), | ||||
|         unicode_values, | ||||
|     ) | ||||
|     _check_roundtrip(s, tm.assert_series_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_unicode_longer_encoded(setup_path): | ||||
|     # GH 11234 | ||||
|     char = "\u0394" | ||||
|     df = DataFrame({"A": [char]}) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("df", df, format="table", encoding="utf-8") | ||||
|         result = store.get("df") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     df = DataFrame({"A": ["a", char], "B": ["b", "b"]}) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("df", df, format="table", encoding="utf-8") | ||||
|         result = store.get("df") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_store_datetime_mixed(setup_path): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) | ||||
|     ts = Series( | ||||
|         np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|     ) | ||||
|     df["d"] = ts.index[:3] | ||||
|     _check_roundtrip(df, tm.assert_frame_equal, path=setup_path) | ||||
|  | ||||
|  | ||||
| def test_round_trip_equals(tmp_path, setup_path): | ||||
|     # GH 9330 | ||||
|     df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) | ||||
|  | ||||
|     path = tmp_path / setup_path | ||||
|     df.to_hdf(path, key="df", format="table") | ||||
|     other = read_hdf(path, "df") | ||||
|     tm.assert_frame_equal(df, other) | ||||
|     assert df.equals(other) | ||||
|     assert other.equals(df) | ||||
|  | ||||
|  | ||||
| def test_infer_string_columns(tmp_path, setup_path): | ||||
|     # GH# | ||||
|     pytest.importorskip("pyarrow") | ||||
|     path = tmp_path / setup_path | ||||
|     with pd.option_context("future.infer_string", True): | ||||
|         df = DataFrame(1, columns=list("ABCD"), index=list(range(10))).set_index( | ||||
|             ["A", "B"] | ||||
|         ) | ||||
|         expected = df.copy() | ||||
|         df.to_hdf(path, key="df", format="table") | ||||
|  | ||||
|         result = read_hdf(path, "df") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
							
								
								
									
										1046
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_select.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1046
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_select.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1129
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_store.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1129
									
								
								lib/python3.11/site-packages/pandas/tests/io/pytables/test_store.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,52 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.io.pytables import ( | ||||
|     HDFStore, | ||||
|     read_hdf, | ||||
| ) | ||||
|  | ||||
| pytest.importorskip("tables") | ||||
|  | ||||
|  | ||||
| class TestHDFStoreSubclass: | ||||
|     # GH 33748 | ||||
|     def test_supported_for_subclass_dataframe(self, tmp_path): | ||||
|         data = {"a": [1, 2], "b": [3, 4]} | ||||
|         sdf = tm.SubclassedDataFrame(data, dtype=np.intp) | ||||
|  | ||||
|         expected = DataFrame(data, dtype=np.intp) | ||||
|  | ||||
|         path = tmp_path / "temp.h5" | ||||
|         sdf.to_hdf(path, key="df") | ||||
|         result = read_hdf(path, "df") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         path = tmp_path / "temp.h5" | ||||
|         with HDFStore(path) as store: | ||||
|             store.put("df", sdf) | ||||
|         result = read_hdf(path, "df") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_supported_for_subclass_series(self, tmp_path): | ||||
|         data = [1, 2, 3] | ||||
|         sser = tm.SubclassedSeries(data, dtype=np.intp) | ||||
|  | ||||
|         expected = Series(data, dtype=np.intp) | ||||
|  | ||||
|         path = tmp_path / "temp.h5" | ||||
|         sser.to_hdf(path, key="ser") | ||||
|         result = read_hdf(path, "ser") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         path = tmp_path / "temp.h5" | ||||
|         with HDFStore(path) as store: | ||||
|             store.put("ser", sser) | ||||
|         result = read_hdf(path, "ser") | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,72 @@ | ||||
| import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     _testing as tm, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| from pandas.tests.io.pytables.common import ensure_clean_store | ||||
|  | ||||
| pytestmark = pytest.mark.single_cpu | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("unit", ["us", "ns"]) | ||||
| def test_store_datetime_fractional_secs(setup_path, unit): | ||||
|     dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) | ||||
|     dti = DatetimeIndex([dt], dtype=f"M8[{unit}]") | ||||
|     series = Series([0], index=dti) | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["a"] = series | ||||
|         assert store["a"].index[0] == dt | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||||
| def test_tseries_indices_series(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         idx = date_range("2020-01-01", periods=10) | ||||
|         ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) | ||||
|         store["a"] = ser | ||||
|         result = store["a"] | ||||
|  | ||||
|         tm.assert_series_equal(result, ser) | ||||
|         assert result.index.freq == ser.index.freq | ||||
|         tm.assert_class_equal(result.index, ser.index, obj="series index") | ||||
|  | ||||
|         idx = period_range("2020-01-01", periods=10, freq="D") | ||||
|         ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) | ||||
|         store["a"] = ser | ||||
|         result = store["a"] | ||||
|  | ||||
|         tm.assert_series_equal(result, ser) | ||||
|         assert result.index.freq == ser.index.freq | ||||
|         tm.assert_class_equal(result.index, ser.index, obj="series index") | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||||
| def test_tseries_indices_frame(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         idx = date_range("2020-01-01", periods=10) | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx | ||||
|         ) | ||||
|         store["a"] = df | ||||
|         result = store["a"] | ||||
|  | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         assert result.index.freq == df.index.freq | ||||
|         tm.assert_class_equal(result.index, df.index, obj="dataframe index") | ||||
|  | ||||
|         idx = period_range("2020-01-01", periods=10, freq="D") | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 3)), idx) | ||||
|         store["a"] = df | ||||
|         result = store["a"] | ||||
|  | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         assert result.index.freq == df.index.freq | ||||
|         tm.assert_class_equal(result.index, df.index, obj="dataframe index") | ||||
| @ -0,0 +1,378 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs.timezones import maybe_get_tz | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.io.pytables.common import ( | ||||
|     _maybe_remove, | ||||
|     ensure_clean_store, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _compare_with_tz(a, b): | ||||
|     tm.assert_frame_equal(a, b) | ||||
|  | ||||
|     # compare the zones on each element | ||||
|     for c in a.columns: | ||||
|         for i in a.index: | ||||
|             a_e = a.loc[i, c] | ||||
|             b_e = b.loc[i, c] | ||||
|             if not (a_e == b_e and a_e.tz == b_e.tz): | ||||
|                 raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]") | ||||
|  | ||||
|  | ||||
| # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows | ||||
| # filename issues. | ||||
| gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x) | ||||
| gettz_pytz = lambda x: x | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) | ||||
| def test_append_with_timezones(setup_path, gettz): | ||||
|     # as columns | ||||
|  | ||||
|     # Single-tzinfo, no DST transition | ||||
|     df_est = DataFrame( | ||||
|         { | ||||
|             "A": [ | ||||
|                 Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns") | ||||
|                 + timedelta(hours=1) * i | ||||
|                 for i in range(5) | ||||
|             ] | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # frame with all columns having same tzinfo, but different sides | ||||
|     #  of DST transition | ||||
|     df_crosses_dst = DataFrame( | ||||
|         { | ||||
|             "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), | ||||
|             "B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"), | ||||
|         }, | ||||
|         index=range(5), | ||||
|     ) | ||||
|  | ||||
|     df_mixed_tz = DataFrame( | ||||
|         { | ||||
|             "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), | ||||
|             "B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"), | ||||
|         }, | ||||
|         index=range(5), | ||||
|     ) | ||||
|  | ||||
|     df_different_tz = DataFrame( | ||||
|         { | ||||
|             "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), | ||||
|             "B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"), | ||||
|         }, | ||||
|         index=range(5), | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df_tz") | ||||
|         store.append("df_tz", df_est, data_columns=["A"]) | ||||
|         result = store["df_tz"] | ||||
|         _compare_with_tz(result, df_est) | ||||
|         tm.assert_frame_equal(result, df_est) | ||||
|  | ||||
|         # select with tz aware | ||||
|         expected = df_est[df_est.A >= df_est.A[3]] | ||||
|         result = store.select("df_tz", where="A>=df_est.A[3]") | ||||
|         _compare_with_tz(result, expected) | ||||
|  | ||||
|         # ensure we include dates in DST and STD time here. | ||||
|         _maybe_remove(store, "df_tz") | ||||
|         store.append("df_tz", df_crosses_dst) | ||||
|         result = store["df_tz"] | ||||
|         _compare_with_tz(result, df_crosses_dst) | ||||
|         tm.assert_frame_equal(result, df_crosses_dst) | ||||
|  | ||||
|         msg = ( | ||||
|             r"invalid info for \[values_block_1\] for \[tz\], " | ||||
|             r"existing_value \[(dateutil/.*)?(US/Eastern|America/New_York)\] " | ||||
|             r"conflicts with new value \[(dateutil/.*)?EET\]" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df_tz", df_mixed_tz) | ||||
|  | ||||
|         # this is ok | ||||
|         _maybe_remove(store, "df_tz") | ||||
|         store.append("df_tz", df_mixed_tz, data_columns=["A", "B"]) | ||||
|         result = store["df_tz"] | ||||
|         _compare_with_tz(result, df_mixed_tz) | ||||
|         tm.assert_frame_equal(result, df_mixed_tz) | ||||
|  | ||||
|         # can't append with diff timezone | ||||
|         msg = ( | ||||
|             r"invalid info for \[B\] for \[tz\], " | ||||
|             r"existing_value \[(dateutil/.*)?EET\] " | ||||
|             r"conflicts with new value \[(dateutil/.*)?CET\]" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             store.append("df_tz", df_different_tz) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) | ||||
| def test_append_with_timezones_as_index(setup_path, gettz): | ||||
|     # GH#4098 example | ||||
|  | ||||
|     dti = date_range("2000-1-1", periods=3, freq="h", tz=gettz("US/Eastern")) | ||||
|     dti = dti._with_freq(None)  # freq doesn't round-trip | ||||
|  | ||||
|     df = DataFrame({"A": Series(range(3), index=dti)}) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         _maybe_remove(store, "df") | ||||
|         store.put("df", df) | ||||
|         result = store.select("df") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         _maybe_remove(store, "df") | ||||
|         store.append("df", df) | ||||
|         result = store.select("df") | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_roundtrip_tz_aware_index(setup_path, unit): | ||||
|     # GH 17618 | ||||
|     ts = Timestamp("2000-01-01 01:00:00", tz="US/Eastern") | ||||
|     dti = DatetimeIndex([ts]).as_unit(unit) | ||||
|     df = DataFrame(data=[0], index=dti) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("frame", df, format="fixed") | ||||
|         recons = store["frame"] | ||||
|         tm.assert_frame_equal(recons, df) | ||||
|  | ||||
|     value = recons.index[0]._value | ||||
|     denom = {"ns": 1, "us": 1000, "ms": 10**6, "s": 10**9}[unit] | ||||
|     assert value == 946706400000000000 // denom | ||||
|  | ||||
|  | ||||
| def test_store_index_name_with_tz(setup_path): | ||||
|     # GH 13884 | ||||
|     df = DataFrame({"A": [1, 2]}) | ||||
|     df.index = DatetimeIndex([1234567890123456787, 1234567890123456788]) | ||||
|     df.index = df.index.tz_localize("UTC") | ||||
|     df.index.name = "foo" | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.put("frame", df, format="table") | ||||
|         recons = store["frame"] | ||||
|         tm.assert_frame_equal(recons, df) | ||||
|  | ||||
|  | ||||
| def test_tseries_select_index_column(setup_path): | ||||
|     # GH7777 | ||||
|     # selecting a UTC datetimeindex column did | ||||
|     # not preserve UTC tzinfo set before storing | ||||
|  | ||||
|     # check that no tz still works | ||||
|     rng = date_range("1/1/2000", "1/30/2000") | ||||
|     frame = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("frame", frame) | ||||
|         result = store.select_column("frame", "index") | ||||
|         assert rng.tz == DatetimeIndex(result.values).tz | ||||
|  | ||||
|     # check utc | ||||
|     rng = date_range("1/1/2000", "1/30/2000", tz="UTC") | ||||
|     frame = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("frame", frame) | ||||
|         result = store.select_column("frame", "index") | ||||
|         assert rng.tz == result.dt.tz | ||||
|  | ||||
|     # double check non-utc | ||||
|     rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") | ||||
|     frame = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store.append("frame", frame) | ||||
|         result = store.select_column("frame", "index") | ||||
|         assert rng.tz == result.dt.tz | ||||
|  | ||||
|  | ||||
| def test_timezones_fixed_format_frame_non_empty(setup_path): | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         # index | ||||
|         rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") | ||||
|         rng = rng._with_freq(None)  # freq doesn't round-trip | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|         ) | ||||
|         store["df"] = df | ||||
|         result = store["df"] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         # as data | ||||
|         # GH11411 | ||||
|         _maybe_remove(store, "df") | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": rng, | ||||
|                 "B": rng.tz_convert("UTC").tz_localize(None), | ||||
|                 "C": rng.tz_convert("CET"), | ||||
|                 "D": range(len(rng)), | ||||
|             }, | ||||
|             index=rng, | ||||
|         ) | ||||
|         store["df"] = df | ||||
|         result = store["df"] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series): | ||||
|     # GH 20594 | ||||
|  | ||||
|     dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) | ||||
|  | ||||
|     obj = Series(dtype=dtype, name="A") | ||||
|     if frame_or_series is DataFrame: | ||||
|         obj = obj.to_frame() | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["obj"] = obj | ||||
|         result = store["obj"] | ||||
|         tm.assert_equal(result, obj) | ||||
|  | ||||
|  | ||||
| def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): | ||||
|     # GH 20594 | ||||
|  | ||||
|     dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         s = Series([0], dtype=dtype) | ||||
|         store["s"] = s | ||||
|         result = store["s"] | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| def test_fixed_offset_tz(setup_path): | ||||
|     rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") | ||||
|     frame = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng | ||||
|     ) | ||||
|  | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         store["frame"] = frame | ||||
|         recons = store["frame"] | ||||
|         tm.assert_index_equal(recons.index, rng) | ||||
|         assert rng.tz == recons.index.tz | ||||
|  | ||||
|  | ||||
| @td.skip_if_windows | ||||
| def test_store_timezone(setup_path): | ||||
|     # GH2852 | ||||
|     # issue storing datetime.date with a timezone as it resets when read | ||||
|     # back in a new timezone | ||||
|  | ||||
|     # original method | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         today = date(2013, 9, 10) | ||||
|         df = DataFrame([1, 2, 3], index=[today, today, today]) | ||||
|         store["obj1"] = df | ||||
|         result = store["obj1"] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     # with tz setting | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         with tm.set_timezone("EST5EDT"): | ||||
|             today = date(2013, 9, 10) | ||||
|             df = DataFrame([1, 2, 3], index=[today, today, today]) | ||||
|             store["obj1"] = df | ||||
|  | ||||
|         with tm.set_timezone("CST6CDT"): | ||||
|             result = store["obj1"] | ||||
|  | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_legacy_datetimetz_object(datapath): | ||||
|     # legacy from < 0.17.0 | ||||
|     # 8260 | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"), | ||||
|             "B": Timestamp("20130603", tz="CET").as_unit("ns"), | ||||
|         }, | ||||
|         index=range(5), | ||||
|     ) | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r" | ||||
|     ) as store: | ||||
|         result = store["df"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_dst_transitions(setup_path): | ||||
|     # make sure we are not failing on transitions | ||||
|     with ensure_clean_store(setup_path) as store: | ||||
|         times = date_range( | ||||
|             "2013-10-26 23:00", | ||||
|             "2013-10-27 01:00", | ||||
|             tz="Europe/London", | ||||
|             freq="h", | ||||
|             ambiguous="infer", | ||||
|         ) | ||||
|         times = times._with_freq(None)  # freq doesn't round-trip | ||||
|  | ||||
|         for i in [times, times + pd.Timedelta("10min")]: | ||||
|             _maybe_remove(store, "df") | ||||
|             df = DataFrame({"A": range(len(i)), "B": i}, index=i) | ||||
|             store.append("df", df) | ||||
|             result = store.select("df") | ||||
|             tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_read_with_where_tz_aware_index(tmp_path, setup_path): | ||||
|     # GH 11926 | ||||
|     periods = 10 | ||||
|     dts = date_range("20151201", periods=periods, freq="D", tz="UTC") | ||||
|     mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) | ||||
|     expected = DataFrame({"MYCOL": 0}, index=mi) | ||||
|  | ||||
|     key = "mykey" | ||||
|     path = tmp_path / setup_path | ||||
|     with pd.HDFStore(path) as store: | ||||
|         store.append(key, expected, format="table", append=True) | ||||
|     result = pd.read_hdf(path, key, where="DATE > 20151130") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_py2_created_with_datetimez(datapath): | ||||
|     # The test HDF5 file was created in Python 2, but could not be read in | ||||
|     # Python 3. | ||||
|     # | ||||
|     # GH26443 | ||||
|     index = DatetimeIndex(["2019-01-01T18:00"], dtype="M8[ns, America/New_York]") | ||||
|     expected = DataFrame({"data": 123}, index=index) | ||||
|     with ensure_clean_store( | ||||
|         datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" | ||||
|     ) as store: | ||||
|         result = store["key"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
		Reference in New Issue
	
	Block a user