done
This commit is contained in:
		
							
								
								
									
										38
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/conftest.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/conftest.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,38 @@ | ||||
| from pathlib import Path | ||||
|  | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xml_data_path(): | ||||
|     return Path(__file__).parent.parent / "data" / "xml" | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xml_books(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "books.xml") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xml_doc_ch_utf(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "doc_ch_utf.xml") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xml_baby_names(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "baby_names.xml") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def kml_cta_rail_lines(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "cta_rail_lines.kml") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xsl_flatten_doc(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "flatten_doc.xsl") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def xsl_row_field_output(xml_data_path, datapath): | ||||
|     return datapath(xml_data_path / "row_field_output.xsl") | ||||
							
								
								
									
										1375
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/test_to_xml.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1375
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/test_to_xml.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2079
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/test_xml.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2079
									
								
								lib/python3.11/site-packages/pandas/tests/io/xml/test_xml.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,485 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from io import StringIO | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import ParserWarning | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.io.xml import read_xml | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"]) | ||||
| def parser(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[None, {"book": ["category", "title", "author", "year", "price"]}] | ||||
| ) | ||||
| def iterparse(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def read_xml_iterparse(data, **kwargs): | ||||
|     with tm.ensure_clean() as path: | ||||
|         with open(path, "w", encoding="utf-8") as f: | ||||
|             f.write(data) | ||||
|         return read_xml(path, **kwargs) | ||||
|  | ||||
|  | ||||
| xml_types = """\ | ||||
| <?xml version='1.0' encoding='utf-8'?> | ||||
| <data> | ||||
|   <row> | ||||
|     <shape>square</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides>4.0</sides> | ||||
|    </row> | ||||
|   <row> | ||||
|     <shape>circle</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides/> | ||||
|   </row> | ||||
|   <row> | ||||
|     <shape>triangle</shape> | ||||
|     <degrees>00180</degrees> | ||||
|     <sides>3.0</sides> | ||||
|   </row> | ||||
| </data>""" | ||||
|  | ||||
| xml_dates = """<?xml version='1.0' encoding='utf-8'?> | ||||
| <data> | ||||
|   <row> | ||||
|     <shape>square</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides>4.0</sides> | ||||
|     <date>2020-01-01</date> | ||||
|    </row> | ||||
|   <row> | ||||
|     <shape>circle</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides/> | ||||
|     <date>2021-01-01</date> | ||||
|   </row> | ||||
|   <row> | ||||
|     <shape>triangle</shape> | ||||
|     <degrees>00180</degrees> | ||||
|     <sides>3.0</sides> | ||||
|     <date>2022-01-01</date> | ||||
|   </row> | ||||
| </data>""" | ||||
|  | ||||
|  | ||||
| # DTYPE | ||||
|  | ||||
|  | ||||
| def test_dtype_single_str(parser): | ||||
|     df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_types, | ||||
|         parser=parser, | ||||
|         dtype={"degrees": "str"}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": ["00360", "00360", "00180"], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_dtypes_all_str(parser): | ||||
|     df_result = read_xml(StringIO(xml_dates), dtype="string", parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         dtype="string", | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": ["00360", "00360", "00180"], | ||||
|             "sides": ["4.0", None, "3.0"], | ||||
|             "date": ["2020-01-01", "2021-01-01", "2022-01-01"], | ||||
|         }, | ||||
|         dtype="string", | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_dtypes_with_names(parser): | ||||
|     df_result = read_xml( | ||||
|         StringIO(xml_dates), | ||||
|         names=["Col1", "Col2", "Col3", "Col4"], | ||||
|         dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"}, | ||||
|         parser=parser, | ||||
|     ) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         names=["Col1", "Col2", "Col3", "Col4"], | ||||
|         dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "Col1": ["square", "circle", "triangle"], | ||||
|             "Col2": Series(["00360", "00360", "00180"]).astype("string"), | ||||
|             "Col3": Series([4.0, float("nan"), 3.0]).astype("Int64"), | ||||
|             "Col4": DatetimeIndex( | ||||
|                 ["2020-01-01", "2021-01-01", "2022-01-01"], dtype="M8[ns]" | ||||
|             ), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_dtype_nullable_int(parser): | ||||
|     df_result = read_xml(StringIO(xml_types), dtype={"sides": "Int64"}, parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_types, | ||||
|         parser=parser, | ||||
|         dtype={"sides": "Int64"}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": Series([4.0, float("nan"), 3.0]).astype("Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_dtype_float(parser): | ||||
|     df_result = read_xml(StringIO(xml_types), dtype={"degrees": "float"}, parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_types, | ||||
|         parser=parser, | ||||
|         dtype={"degrees": "float"}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": Series([360, 360, 180]).astype("float"), | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_wrong_dtype(xml_books, parser, iterparse): | ||||
|     with pytest.raises( | ||||
|         ValueError, match=('Unable to parse string "Everyday Italian" at position 0') | ||||
|     ): | ||||
|         read_xml( | ||||
|             xml_books, dtype={"title": "Int64"}, parser=parser, iterparse=iterparse | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_both_dtype_converters(parser): | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": ["00360", "00360", "00180"], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"): | ||||
|         df_result = read_xml( | ||||
|             StringIO(xml_types), | ||||
|             dtype={"degrees": "str"}, | ||||
|             converters={"degrees": str}, | ||||
|             parser=parser, | ||||
|         ) | ||||
|         df_iter = read_xml_iterparse( | ||||
|             xml_types, | ||||
|             dtype={"degrees": "str"}, | ||||
|             converters={"degrees": str}, | ||||
|             parser=parser, | ||||
|             iterparse={"row": ["shape", "degrees", "sides"]}, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(df_result, df_expected) | ||||
|         tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| # CONVERTERS | ||||
|  | ||||
|  | ||||
| def test_converters_str(parser): | ||||
|     df_result = read_xml( | ||||
|         StringIO(xml_types), converters={"degrees": str}, parser=parser | ||||
|     ) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_types, | ||||
|         parser=parser, | ||||
|         converters={"degrees": str}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": ["00360", "00360", "00180"], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_converters_date(parser): | ||||
|     convert_to_datetime = lambda x: to_datetime(x) | ||||
|     df_result = read_xml( | ||||
|         StringIO(xml_dates), converters={"date": convert_to_datetime}, parser=parser | ||||
|     ) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         converters={"date": convert_to_datetime}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|             "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_wrong_converters_type(xml_books, parser, iterparse): | ||||
|     with pytest.raises(TypeError, match=("Type converters must be a dict or subclass")): | ||||
|         read_xml( | ||||
|             xml_books, converters={"year", str}, parser=parser, iterparse=iterparse | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_callable_func_converters(xml_books, parser, iterparse): | ||||
|     with pytest.raises(TypeError, match=("'float' object is not callable")): | ||||
|         read_xml( | ||||
|             xml_books, converters={"year": float()}, parser=parser, iterparse=iterparse | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_callable_str_converters(xml_books, parser, iterparse): | ||||
|     with pytest.raises(TypeError, match=("'str' object is not callable")): | ||||
|         read_xml( | ||||
|             xml_books, converters={"year": "float"}, parser=parser, iterparse=iterparse | ||||
|         ) | ||||
|  | ||||
|  | ||||
| # PARSE DATES | ||||
|  | ||||
|  | ||||
| def test_parse_dates_column_name(parser): | ||||
|     df_result = read_xml(StringIO(xml_dates), parse_dates=["date"], parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         parse_dates=["date"], | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|             "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_parse_dates_column_index(parser): | ||||
|     df_result = read_xml(StringIO(xml_dates), parse_dates=[3], parser=parser) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         parse_dates=[3], | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|             "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_parse_dates_true(parser): | ||||
|     df_result = read_xml(StringIO(xml_dates), parse_dates=True, parser=parser) | ||||
|  | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml_dates, | ||||
|         parser=parser, | ||||
|         parse_dates=True, | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|             "date": ["2020-01-01", "2021-01-01", "2022-01-01"], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_parse_dates_dictionary(parser): | ||||
|     xml = """<?xml version='1.0' encoding='utf-8'?> | ||||
| <data> | ||||
|   <row> | ||||
|     <shape>square</shape> | ||||
|     <degrees>360</degrees> | ||||
|     <sides>4.0</sides> | ||||
|     <year>2020</year> | ||||
|     <month>12</month> | ||||
|     <day>31</day> | ||||
|    </row> | ||||
|   <row> | ||||
|     <shape>circle</shape> | ||||
|     <degrees>360</degrees> | ||||
|     <sides/> | ||||
|     <year>2021</year> | ||||
|     <month>12</month> | ||||
|     <day>31</day> | ||||
|   </row> | ||||
|   <row> | ||||
|     <shape>triangle</shape> | ||||
|     <degrees>180</degrees> | ||||
|     <sides>3.0</sides> | ||||
|     <year>2022</year> | ||||
|     <month>12</month> | ||||
|     <day>31</day> | ||||
|   </row> | ||||
| </data>""" | ||||
|  | ||||
|     df_result = read_xml( | ||||
|         StringIO(xml), parse_dates={"date_end": ["year", "month", "day"]}, parser=parser | ||||
|     ) | ||||
|     df_iter = read_xml_iterparse( | ||||
|         xml, | ||||
|         parser=parser, | ||||
|         parse_dates={"date_end": ["year", "month", "day"]}, | ||||
|         iterparse={"row": ["shape", "degrees", "sides", "year", "month", "day"]}, | ||||
|     ) | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "date_end": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]), | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df_result, df_expected) | ||||
|     tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_day_first_parse_dates(parser): | ||||
|     xml = """\ | ||||
| <?xml version='1.0' encoding='utf-8'?> | ||||
| <data> | ||||
|   <row> | ||||
|     <shape>square</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides>4.0</sides> | ||||
|     <date>31/12/2020</date> | ||||
|    </row> | ||||
|   <row> | ||||
|     <shape>circle</shape> | ||||
|     <degrees>00360</degrees> | ||||
|     <sides/> | ||||
|     <date>31/12/2021</date> | ||||
|   </row> | ||||
|   <row> | ||||
|     <shape>triangle</shape> | ||||
|     <degrees>00180</degrees> | ||||
|     <sides>3.0</sides> | ||||
|     <date>31/12/2022</date> | ||||
|   </row> | ||||
| </data>""" | ||||
|  | ||||
|     df_expected = DataFrame( | ||||
|         { | ||||
|             "shape": ["square", "circle", "triangle"], | ||||
|             "degrees": [360, 360, 180], | ||||
|             "sides": [4.0, float("nan"), 3.0], | ||||
|             "date": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     with tm.assert_produces_warning( | ||||
|         UserWarning, match="Parsing dates in %d/%m/%Y format" | ||||
|     ): | ||||
|         df_result = read_xml(StringIO(xml), parse_dates=["date"], parser=parser) | ||||
|         df_iter = read_xml_iterparse( | ||||
|             xml, | ||||
|             parse_dates=["date"], | ||||
|             parser=parser, | ||||
|             iterparse={"row": ["shape", "degrees", "sides", "date"]}, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(df_result, df_expected) | ||||
|         tm.assert_frame_equal(df_iter, df_expected) | ||||
|  | ||||
|  | ||||
| def test_wrong_parse_dates_type(xml_books, parser, iterparse): | ||||
|     with pytest.raises( | ||||
|         TypeError, match=("Only booleans, lists, and dictionaries are accepted") | ||||
|     ): | ||||
|         read_xml(xml_books, parse_dates={"date"}, parser=parser, iterparse=iterparse) | ||||
		Reference in New Issue
	
	Block a user