done
This commit is contained in:
		| @ -0,0 +1,7 @@ | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def sort(request): | ||||
|     """Boolean sort keyword for concat and DataFrame.append.""" | ||||
|     return request.param | ||||
| @ -0,0 +1,389 @@ | ||||
| import datetime as dt | ||||
| from itertools import combinations | ||||
|  | ||||
| import dateutil | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     concat, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAppend: | ||||
|     def test_append(self, sort, float_frame): | ||||
|         mixed_frame = float_frame.copy() | ||||
|         mixed_frame["foo"] = "bar" | ||||
|  | ||||
|         begin_index = float_frame.index[:5] | ||||
|         end_index = float_frame.index[5:] | ||||
|  | ||||
|         begin_frame = float_frame.reindex(begin_index) | ||||
|         end_frame = float_frame.reindex(end_index) | ||||
|  | ||||
|         appended = begin_frame._append(end_frame) | ||||
|         tm.assert_almost_equal(appended["A"], float_frame["A"]) | ||||
|  | ||||
|         del end_frame["A"] | ||||
|         partial_appended = begin_frame._append(end_frame, sort=sort) | ||||
|         assert "A" in partial_appended | ||||
|  | ||||
|         partial_appended = end_frame._append(begin_frame, sort=sort) | ||||
|         assert "A" in partial_appended | ||||
|  | ||||
|         # mixed type handling | ||||
|         appended = mixed_frame[:5]._append(mixed_frame[5:]) | ||||
|         tm.assert_frame_equal(appended, mixed_frame) | ||||
|  | ||||
|         # what to test here | ||||
|         mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort) | ||||
|         mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort) | ||||
|  | ||||
|         # all equal except 'foo' column | ||||
|         tm.assert_frame_equal( | ||||
|             mixed_appended.reindex(columns=["A", "B", "C", "D"]), | ||||
|             mixed_appended2.reindex(columns=["A", "B", "C", "D"]), | ||||
|         ) | ||||
|  | ||||
|     def test_append_empty(self, float_frame): | ||||
|         empty = DataFrame() | ||||
|  | ||||
|         appended = float_frame._append(empty) | ||||
|         tm.assert_frame_equal(float_frame, appended) | ||||
|         assert appended is not float_frame | ||||
|  | ||||
|         appended = empty._append(float_frame) | ||||
|         tm.assert_frame_equal(float_frame, appended) | ||||
|         assert appended is not float_frame | ||||
|  | ||||
|     def test_append_overlap_raises(self, float_frame): | ||||
|         msg = "Indexes have overlapping values" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             float_frame._append(float_frame, verify_integrity=True) | ||||
|  | ||||
|     def test_append_new_columns(self): | ||||
|         # see gh-6129: new columns | ||||
|         df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}}) | ||||
|         row = Series([5, 6, 7], index=["a", "b", "c"], name="z") | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "a": {"x": 1, "y": 2, "z": 5}, | ||||
|                 "b": {"x": 3, "y": 4, "z": 6}, | ||||
|                 "c": {"z": 7}, | ||||
|             } | ||||
|         ) | ||||
|         result = df._append(row) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_append_length0_frame(self, sort): | ||||
|         df = DataFrame(columns=["A", "B", "C"]) | ||||
|         df3 = DataFrame(index=[0, 1], columns=["A", "B"]) | ||||
|         df5 = df._append(df3, sort=sort) | ||||
|  | ||||
|         expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) | ||||
|         tm.assert_frame_equal(df5, expected) | ||||
|  | ||||
|     def test_append_records(self): | ||||
|         arr1 = np.zeros((2,), dtype=("i4,f4,S10")) | ||||
|         arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] | ||||
|  | ||||
|         arr2 = np.zeros((3,), dtype=("i4,f4,S10")) | ||||
|         arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")] | ||||
|  | ||||
|         df1 = DataFrame(arr1) | ||||
|         df2 = DataFrame(arr2) | ||||
|  | ||||
|         result = df1._append(df2, ignore_index=True) | ||||
|         expected = DataFrame(np.concatenate((arr1, arr2))) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # rewrite sort fixture, since we also want to test default of None | ||||
|     def test_append_sorts(self, sort): | ||||
|         df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) | ||||
|         df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) | ||||
|  | ||||
|         result = df1._append(df2, sort=sort) | ||||
|  | ||||
|         # for None / True | ||||
|         expected = DataFrame( | ||||
|             {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]}, | ||||
|             columns=["a", "b", "c"], | ||||
|         ) | ||||
|         if sort is False: | ||||
|             expected = expected[["b", "a", "c"]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_append_different_columns(self, sort): | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "bools": np.random.default_rng(2).standard_normal(10) > 0, | ||||
|                 "ints": np.random.default_rng(2).integers(0, 10, 10), | ||||
|                 "floats": np.random.default_rng(2).standard_normal(10), | ||||
|                 "strings": ["foo", "bar"] * 5, | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         a = df[:5].loc[:, ["bools", "ints", "floats"]] | ||||
|         b = df[5:].loc[:, ["strings", "ints", "floats"]] | ||||
|  | ||||
|         appended = a._append(b, sort=sort) | ||||
|         assert isna(appended["strings"][0:4]).all() | ||||
|         assert isna(appended["bools"][5:]).all() | ||||
|  | ||||
|     def test_append_many(self, sort, float_frame): | ||||
|         chunks = [ | ||||
|             float_frame[:5], | ||||
|             float_frame[5:10], | ||||
|             float_frame[10:15], | ||||
|             float_frame[15:], | ||||
|         ] | ||||
|  | ||||
|         result = chunks[0]._append(chunks[1:]) | ||||
|         tm.assert_frame_equal(result, float_frame) | ||||
|  | ||||
|         chunks[-1] = chunks[-1].copy() | ||||
|         chunks[-1]["foo"] = "bar" | ||||
|         result = chunks[0]._append(chunks[1:], sort=sort) | ||||
|         tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame) | ||||
|         assert (result["foo"][15:] == "bar").all() | ||||
|         assert result["foo"][:15].isna().all() | ||||
|  | ||||
|     def test_append_preserve_index_name(self): | ||||
|         # #980 | ||||
|         df1 = DataFrame(columns=["A", "B", "C"]) | ||||
|         df1 = df1.set_index(["A"]) | ||||
|         df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"]) | ||||
|         df2 = df2.set_index(["A"]) | ||||
|  | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = df1._append(df2) | ||||
|         assert result.index.name == "A" | ||||
|  | ||||
|     indexes_can_append = [ | ||||
|         pd.RangeIndex(3), | ||||
|         Index([4, 5, 6]), | ||||
|         Index([4.5, 5.5, 6.5]), | ||||
|         Index(list("abc")), | ||||
|         pd.CategoricalIndex("A B C".split()), | ||||
|         pd.CategoricalIndex("D E F".split(), ordered=True), | ||||
|         pd.IntervalIndex.from_breaks([7, 8, 9, 10]), | ||||
|         pd.DatetimeIndex( | ||||
|             [ | ||||
|                 dt.datetime(2013, 1, 3, 0, 0), | ||||
|                 dt.datetime(2013, 1, 3, 6, 10), | ||||
|                 dt.datetime(2013, 1, 3, 7, 12), | ||||
|             ] | ||||
|         ), | ||||
|         pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()]), | ||||
|     ] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", indexes_can_append, ids=lambda x: type(x).__name__ | ||||
|     ) | ||||
|     def test_append_same_columns_type(self, index): | ||||
|         # GH18359 | ||||
|  | ||||
|         # df wider than ser | ||||
|         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) | ||||
|         ser_index = index[:2] | ||||
|         ser = Series([7, 8], index=ser_index, name=2) | ||||
|         result = df._append(ser) | ||||
|         expected = DataFrame( | ||||
|             [[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index | ||||
|         ) | ||||
|         # integer dtype is preserved for columns present in ser.index | ||||
|         assert expected.dtypes.iloc[0].kind == "i" | ||||
|         assert expected.dtypes.iloc[1].kind == "i" | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # ser wider than df | ||||
|         ser_index = index | ||||
|         index = index[:2] | ||||
|         df = DataFrame([[1, 2], [4, 5]], columns=index) | ||||
|         ser = Series([7, 8, 9], index=ser_index, name=2) | ||||
|         result = df._append(ser) | ||||
|         expected = DataFrame( | ||||
|             [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], | ||||
|             index=[0, 1, 2], | ||||
|             columns=ser_index, | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "df_columns, series_index", | ||||
|         combinations(indexes_can_append, r=2), | ||||
|         ids=lambda x: type(x).__name__, | ||||
|     ) | ||||
|     def test_append_different_columns_types(self, df_columns, series_index): | ||||
|         # GH18359 | ||||
|         # See also test 'test_append_different_columns_types_raises' below | ||||
|         # for errors raised when appending | ||||
|  | ||||
|         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) | ||||
|         ser = Series([7, 8, 9], index=series_index, name=2) | ||||
|  | ||||
|         result = df._append(ser) | ||||
|         idx_diff = ser.index.difference(df_columns) | ||||
|         combined_columns = Index(df_columns.tolist()).append(idx_diff) | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [1.0, 2.0, 3.0, np.nan, np.nan, np.nan], | ||||
|                 [4, 5, 6, np.nan, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, np.nan, 7, 8, 9], | ||||
|             ], | ||||
|             index=[0, 1, 2], | ||||
|             columns=combined_columns, | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_append_dtype_coerce(self, sort): | ||||
|         # GH 4993 | ||||
|         # appending with datetime will incorrectly convert datetime64 | ||||
|  | ||||
|         df1 = DataFrame( | ||||
|             index=[1, 2], | ||||
|             data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)], | ||||
|             columns=["start_time"], | ||||
|         ) | ||||
|         df2 = DataFrame( | ||||
|             index=[4, 5], | ||||
|             data=[ | ||||
|                 [dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)], | ||||
|                 [dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)], | ||||
|             ], | ||||
|             columns=["start_time", "end_time"], | ||||
|         ) | ||||
|  | ||||
|         expected = concat( | ||||
|             [ | ||||
|                 Series( | ||||
|                     [ | ||||
|                         pd.NaT, | ||||
|                         pd.NaT, | ||||
|                         dt.datetime(2013, 1, 3, 6, 10), | ||||
|                         dt.datetime(2013, 1, 4, 7, 10), | ||||
|                     ], | ||||
|                     name="end_time", | ||||
|                 ), | ||||
|                 Series( | ||||
|                     [ | ||||
|                         dt.datetime(2013, 1, 1, 0, 0), | ||||
|                         dt.datetime(2013, 1, 2, 0, 0), | ||||
|                         dt.datetime(2013, 1, 3, 0, 0), | ||||
|                         dt.datetime(2013, 1, 4, 0, 0), | ||||
|                     ], | ||||
|                     name="start_time", | ||||
|                 ), | ||||
|             ], | ||||
|             axis=1, | ||||
|             sort=sort, | ||||
|         ) | ||||
|         result = df1._append(df2, ignore_index=True, sort=sort) | ||||
|         if sort: | ||||
|             expected = expected[["end_time", "start_time"]] | ||||
|         else: | ||||
|             expected = expected[["start_time", "end_time"]] | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_append_missing_column_proper_upcast(self, sort): | ||||
|         df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")}) | ||||
|         df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)}) | ||||
|  | ||||
|         appended = df1._append(df2, ignore_index=True, sort=sort) | ||||
|         assert appended["A"].dtype == "f8" | ||||
|         assert appended["B"].dtype == "O" | ||||
|  | ||||
|     def test_append_empty_frame_to_series_with_dateutil_tz(self): | ||||
|         # GH 23682 | ||||
|         date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc()) | ||||
|         ser = Series({"a": 1.0, "b": 2.0, "date": date}) | ||||
|         df = DataFrame(columns=["c", "d"]) | ||||
|         result_a = df._append(ser, ignore_index=True) | ||||
|         expected = DataFrame( | ||||
|             [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"] | ||||
|         ) | ||||
|         # These columns get cast to object after append | ||||
|         expected["c"] = expected["c"].astype(object) | ||||
|         expected["d"] = expected["d"].astype(object) | ||||
|         tm.assert_frame_equal(result_a, expected) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"] | ||||
|         ) | ||||
|         expected["c"] = expected["c"].astype(object) | ||||
|         expected["d"] = expected["d"].astype(object) | ||||
|         result_b = result_a._append(ser, ignore_index=True) | ||||
|         tm.assert_frame_equal(result_b, expected) | ||||
|  | ||||
|         result = df._append([ser, ser], ignore_index=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_append_empty_tz_frame_with_datetime64ns(self, using_array_manager): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35460 | ||||
|         df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") | ||||
|  | ||||
|         # pd.NaT gets inferred as tz-naive, so append result is tz-naive | ||||
|         result = df._append({"a": pd.NaT}, ignore_index=True) | ||||
|         if using_array_manager: | ||||
|             expected = DataFrame({"a": [pd.NaT]}, dtype=object) | ||||
|         else: | ||||
|             expected = DataFrame({"a": [np.nan]}, dtype=object) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # also test with typed value to append | ||||
|         df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") | ||||
|         other = Series({"a": pd.NaT}, dtype="datetime64[ns]") | ||||
|         result = df._append(other, ignore_index=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # mismatched tz | ||||
|         other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]") | ||||
|         result = df._append(other, ignore_index=True) | ||||
|         expected = DataFrame({"a": [pd.NaT]}).astype(object) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] | ||||
|     ) | ||||
|     @pytest.mark.parametrize("val", [1, "NaT"]) | ||||
|     def test_append_empty_frame_with_timedelta64ns_nat( | ||||
|         self, dtype_str, val, using_array_manager | ||||
|     ): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35460 | ||||
|         df = DataFrame(columns=["a"]).astype(dtype_str) | ||||
|  | ||||
|         other = DataFrame({"a": [np.timedelta64(val, "ns")]}) | ||||
|         result = df._append(other, ignore_index=True) | ||||
|  | ||||
|         expected = other.astype(object) | ||||
|         if isinstance(val, str) and dtype_str != "int64" and not using_array_manager: | ||||
|             # TODO: expected used to be `other.astype(object)` which is a more | ||||
|             #  reasonable result.  This was changed when tightening | ||||
|             #  assert_frame_equal's treatment of mismatched NAs to match the | ||||
|             #  existing behavior. | ||||
|             expected = DataFrame({"a": [np.nan]}, dtype=object) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] | ||||
|     ) | ||||
|     @pytest.mark.parametrize("val", [1, "NaT"]) | ||||
|     def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35460 | ||||
|         df = DataFrame({"a": pd.array([1], dtype=dtype_str)}) | ||||
|  | ||||
|         other = DataFrame({"a": [np.timedelta64(val, "ns")]}) | ||||
|         result = df._append(other, ignore_index=True) | ||||
|  | ||||
|         expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,753 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=list( | ||||
|         { | ||||
|             "bool": [True, False, True], | ||||
|             "int64": [1, 2, 3], | ||||
|             "float64": [1.1, np.nan, 3.3], | ||||
|             "category": Categorical(["X", "Y", "Z"]), | ||||
|             "object": ["a", "b", "c"], | ||||
|             "datetime64[ns]": [ | ||||
|                 pd.Timestamp("2011-01-01"), | ||||
|                 pd.Timestamp("2011-01-02"), | ||||
|                 pd.Timestamp("2011-01-03"), | ||||
|             ], | ||||
|             "datetime64[ns, US/Eastern]": [ | ||||
|                 pd.Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|                 pd.Timestamp("2011-01-02", tz="US/Eastern"), | ||||
|                 pd.Timestamp("2011-01-03", tz="US/Eastern"), | ||||
|             ], | ||||
|             "timedelta64[ns]": [ | ||||
|                 pd.Timedelta("1 days"), | ||||
|                 pd.Timedelta("2 days"), | ||||
|                 pd.Timedelta("3 days"), | ||||
|             ], | ||||
|             "period[M]": [ | ||||
|                 pd.Period("2011-01", freq="M"), | ||||
|                 pd.Period("2011-02", freq="M"), | ||||
|                 pd.Period("2011-03", freq="M"), | ||||
|             ], | ||||
|         }.items() | ||||
|     ) | ||||
| ) | ||||
| def item(request): | ||||
|     key, data = request.param | ||||
|     return key, data | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def item2(item): | ||||
|     return item | ||||
|  | ||||
|  | ||||
| class TestConcatAppendCommon: | ||||
|     """ | ||||
|     Test common dtype coercion rules between concat and append. | ||||
|     """ | ||||
|  | ||||
|     def test_dtypes(self, item, index_or_series, using_infer_string): | ||||
|         # to confirm test case covers intended dtypes | ||||
|         typ, vals = item | ||||
|         obj = index_or_series(vals) | ||||
|         if typ == "object" and using_infer_string: | ||||
|             typ = "string" | ||||
|         if isinstance(obj, Index): | ||||
|             assert obj.dtype == typ | ||||
|         elif isinstance(obj, Series): | ||||
|             if typ.startswith("period"): | ||||
|                 assert obj.dtype == "Period[M]" | ||||
|             else: | ||||
|                 assert obj.dtype == typ | ||||
|  | ||||
|     def test_concatlike_same_dtypes(self, item): | ||||
|         # GH 13660 | ||||
|         typ1, vals1 = item | ||||
|  | ||||
|         vals2 = vals1 | ||||
|         vals3 = vals1 | ||||
|  | ||||
|         if typ1 == "category": | ||||
|             exp_data = Categorical(list(vals1) + list(vals2)) | ||||
|             exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3)) | ||||
|         else: | ||||
|             exp_data = vals1 + vals2 | ||||
|             exp_data3 = vals1 + vals2 + vals3 | ||||
|  | ||||
|         # ----- Index ----- # | ||||
|  | ||||
|         # index.append | ||||
|         res = Index(vals1).append(Index(vals2)) | ||||
|         exp = Index(exp_data) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # 3 elements | ||||
|         res = Index(vals1).append([Index(vals2), Index(vals3)]) | ||||
|         exp = Index(exp_data3) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # index.append name mismatch | ||||
|         i1 = Index(vals1, name="x") | ||||
|         i2 = Index(vals2, name="y") | ||||
|         res = i1.append(i2) | ||||
|         exp = Index(exp_data) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # index.append name match | ||||
|         i1 = Index(vals1, name="x") | ||||
|         i2 = Index(vals2, name="x") | ||||
|         res = i1.append(i2) | ||||
|         exp = Index(exp_data, name="x") | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # cannot append non-index | ||||
|         with pytest.raises(TypeError, match="all inputs must be Index"): | ||||
|             Index(vals1).append(vals2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match="all inputs must be Index"): | ||||
|             Index(vals1).append([Index(vals2), vals3]) | ||||
|  | ||||
|         # ----- Series ----- # | ||||
|  | ||||
|         # series.append | ||||
|         res = Series(vals1)._append(Series(vals2), ignore_index=True) | ||||
|         exp = Series(exp_data) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # concat | ||||
|         res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # 3 elements | ||||
|         res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True) | ||||
|         exp = Series(exp_data3) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         res = pd.concat( | ||||
|             [Series(vals1), Series(vals2), Series(vals3)], | ||||
|             ignore_index=True, | ||||
|         ) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         # name mismatch | ||||
|         s1 = Series(vals1, name="x") | ||||
|         s2 = Series(vals2, name="y") | ||||
|         res = s1._append(s2, ignore_index=True) | ||||
|         exp = Series(exp_data) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         res = pd.concat([s1, s2], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # name match | ||||
|         s1 = Series(vals1, name="x") | ||||
|         s2 = Series(vals2, name="x") | ||||
|         res = s1._append(s2, ignore_index=True) | ||||
|         exp = Series(exp_data, name="x") | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         res = pd.concat([s1, s2], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # cannot append non-index | ||||
|         msg = ( | ||||
|             r"cannot concatenate object of type '.+'; " | ||||
|             "only Series and DataFrame objs are valid" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Series(vals1)._append(vals2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Series(vals1)._append([Series(vals2), vals3]) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             pd.concat([Series(vals1), vals2]) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             pd.concat([Series(vals1), Series(vals2), vals3]) | ||||
|  | ||||
|     def test_concatlike_dtypes_coercion(self, item, item2, request): | ||||
|         # GH 13660 | ||||
|         typ1, vals1 = item | ||||
|         typ2, vals2 = item2 | ||||
|  | ||||
|         vals3 = vals2 | ||||
|  | ||||
|         # basically infer | ||||
|         exp_index_dtype = None | ||||
|         exp_series_dtype = None | ||||
|  | ||||
|         if typ1 == typ2: | ||||
|             pytest.skip("same dtype is tested in test_concatlike_same_dtypes") | ||||
|         elif typ1 == "category" or typ2 == "category": | ||||
|             pytest.skip("categorical type tested elsewhere") | ||||
|  | ||||
|         # specify expected dtype | ||||
|         if typ1 == "bool" and typ2 in ("int64", "float64"): | ||||
|             # series coerces to numeric based on numpy rule | ||||
|             # index doesn't because bool is object dtype | ||||
|             exp_series_dtype = typ2 | ||||
|             mark = pytest.mark.xfail(reason="GH#39187 casting to object") | ||||
|             request.applymarker(mark) | ||||
|         elif typ2 == "bool" and typ1 in ("int64", "float64"): | ||||
|             exp_series_dtype = typ1 | ||||
|             mark = pytest.mark.xfail(reason="GH#39187 casting to object") | ||||
|             request.applymarker(mark) | ||||
|         elif typ1 in {"datetime64[ns, US/Eastern]", "timedelta64[ns]"} or typ2 in { | ||||
|             "datetime64[ns, US/Eastern]", | ||||
|             "timedelta64[ns]", | ||||
|         }: | ||||
|             exp_index_dtype = object | ||||
|             exp_series_dtype = object | ||||
|  | ||||
|         exp_data = vals1 + vals2 | ||||
|         exp_data3 = vals1 + vals2 + vals3 | ||||
|  | ||||
|         # ----- Index ----- # | ||||
|  | ||||
|         # index.append | ||||
|         # GH#39817 | ||||
|         res = Index(vals1).append(Index(vals2)) | ||||
|         exp = Index(exp_data, dtype=exp_index_dtype) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # 3 elements | ||||
|         res = Index(vals1).append([Index(vals2), Index(vals3)]) | ||||
|         exp = Index(exp_data3, dtype=exp_index_dtype) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         # ----- Series ----- # | ||||
|  | ||||
|         # series._append | ||||
|         # GH#39817 | ||||
|         res = Series(vals1)._append(Series(vals2), ignore_index=True) | ||||
|         exp = Series(exp_data, dtype=exp_series_dtype) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # concat | ||||
|         # GH#39817 | ||||
|         res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # 3 elements | ||||
|         # GH#39817 | ||||
|         res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True) | ||||
|         exp = Series(exp_data3, dtype=exp_series_dtype) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         # GH#39817 | ||||
|         res = pd.concat( | ||||
|             [Series(vals1), Series(vals2), Series(vals3)], | ||||
|             ignore_index=True, | ||||
|         ) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_concatlike_common_coerce_to_pandas_object(self): | ||||
|         # GH 13626 | ||||
|         # result must be Timestamp/Timedelta, not datetime.datetime/timedelta | ||||
|         dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"]) | ||||
|         tdi = pd.TimedeltaIndex(["1 days", "2 days"]) | ||||
|  | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01"), | ||||
|                 pd.Timestamp("2011-01-02"), | ||||
|                 pd.Timedelta("1 days"), | ||||
|                 pd.Timedelta("2 days"), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         res = dti.append(tdi) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|         assert isinstance(res[0], pd.Timestamp) | ||||
|         assert isinstance(res[-1], pd.Timedelta) | ||||
|  | ||||
|         dts = Series(dti) | ||||
|         tds = Series(tdi) | ||||
|         res = dts._append(tds) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|         assert isinstance(res.iloc[0], pd.Timestamp) | ||||
|         assert isinstance(res.iloc[-1], pd.Timedelta) | ||||
|  | ||||
|         res = pd.concat([dts, tds]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|         assert isinstance(res.iloc[0], pd.Timestamp) | ||||
|         assert isinstance(res.iloc[-1], pd.Timedelta) | ||||
|  | ||||
|     def test_concatlike_datetimetz(self, tz_aware_fixture): | ||||
|         tz = tz_aware_fixture | ||||
|         # GH 7795 | ||||
|         dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) | ||||
|         dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz) | ||||
|  | ||||
|         exp = pd.DatetimeIndex( | ||||
|             ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz | ||||
|         ) | ||||
|  | ||||
|         res = dti1.append(dti2) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         dts1 = Series(dti1) | ||||
|         dts2 = Series(dti2) | ||||
|         res = dts1._append(dts2) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([dts1, dts2]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"]) | ||||
|     def test_concatlike_datetimetz_short(self, tz): | ||||
|         # GH#7795 | ||||
|         ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz) | ||||
|         ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz) | ||||
|         df1 = DataFrame(0, index=ix1, columns=["A", "B"]) | ||||
|         df2 = DataFrame(0, index=ix2, columns=["A", "B"]) | ||||
|  | ||||
|         exp_idx = pd.DatetimeIndex( | ||||
|             ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], | ||||
|             tz=tz, | ||||
|         ).as_unit("ns") | ||||
|         exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) | ||||
|  | ||||
|         tm.assert_frame_equal(df1._append(df2), exp) | ||||
|         tm.assert_frame_equal(pd.concat([df1, df2]), exp) | ||||
|  | ||||
|     def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): | ||||
|         tz = tz_aware_fixture | ||||
|         # GH 13660 | ||||
|  | ||||
|         # different tz coerces to object | ||||
|         dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) | ||||
|         dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"]) | ||||
|  | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01", tz=tz), | ||||
|                 pd.Timestamp("2011-01-02", tz=tz), | ||||
|                 pd.Timestamp("2012-01-01"), | ||||
|                 pd.Timestamp("2012-01-02"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|  | ||||
|         res = dti1.append(dti2) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         dts1 = Series(dti1) | ||||
|         dts2 = Series(dti2) | ||||
|         res = dts1._append(dts2) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([dts1, dts2]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         # different tz | ||||
|         dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific") | ||||
|  | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01", tz=tz), | ||||
|                 pd.Timestamp("2011-01-02", tz=tz), | ||||
|                 pd.Timestamp("2012-01-01", tz="US/Pacific"), | ||||
|                 pd.Timestamp("2012-01-02", tz="US/Pacific"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|  | ||||
|         res = dti1.append(dti3) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         dts1 = Series(dti1) | ||||
|         dts3 = Series(dti3) | ||||
|         res = dts1._append(dts3) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([dts1, dts3]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|     def test_concatlike_common_period(self): | ||||
|         # GH 13660 | ||||
|         pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") | ||||
|         pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M") | ||||
|  | ||||
|         exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M") | ||||
|  | ||||
|         res = pi1.append(pi2) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         ps1 = Series(pi1) | ||||
|         ps2 = Series(pi2) | ||||
|         res = ps1._append(ps2) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([ps1, ps2]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|     def test_concatlike_common_period_diff_freq_to_object(self): | ||||
|         # GH 13221 | ||||
|         pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") | ||||
|         pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D") | ||||
|  | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Period("2011-01", freq="M"), | ||||
|                 pd.Period("2011-02", freq="M"), | ||||
|                 pd.Period("2012-01-01", freq="D"), | ||||
|                 pd.Period("2012-02-01", freq="D"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|  | ||||
|         res = pi1.append(pi2) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         ps1 = Series(pi1) | ||||
|         ps2 = Series(pi2) | ||||
|         res = ps1._append(ps2) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([ps1, ps2]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|     def test_concatlike_common_period_mixed_dt_to_object(self): | ||||
|         # GH 13221 | ||||
|         # different datetimelike | ||||
|         pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") | ||||
|         tdi = pd.TimedeltaIndex(["1 days", "2 days"]) | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Period("2011-01", freq="M"), | ||||
|                 pd.Period("2011-02", freq="M"), | ||||
|                 pd.Timedelta("1 days"), | ||||
|                 pd.Timedelta("2 days"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|  | ||||
|         res = pi1.append(tdi) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         ps1 = Series(pi1) | ||||
|         tds = Series(tdi) | ||||
|         res = ps1._append(tds) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([ps1, tds]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         # inverse | ||||
|         exp = Index( | ||||
|             [ | ||||
|                 pd.Timedelta("1 days"), | ||||
|                 pd.Timedelta("2 days"), | ||||
|                 pd.Period("2011-01", freq="M"), | ||||
|                 pd.Period("2011-02", freq="M"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|  | ||||
|         res = tdi.append(pi1) | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         ps1 = Series(pi1) | ||||
|         tds = Series(tdi) | ||||
|         res = tds._append(ps1) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|         res = pd.concat([tds, ps1]) | ||||
|         tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) | ||||
|  | ||||
|     def test_concat_categorical(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         # same categories -> category | ||||
|         s1 = Series([1, 2, np.nan], dtype="category") | ||||
|         s2 = Series([2, 1, 2], dtype="category") | ||||
|  | ||||
|         exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category") | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         # partially different categories => not-category | ||||
|         s1 = Series([3, 2], dtype="category") | ||||
|         s2 = Series([2, 1], dtype="category") | ||||
|  | ||||
|         exp = Series([3, 2, 2, 1]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         # completely different categories (same dtype) => not-category | ||||
|         s1 = Series([10, 11, np.nan], dtype="category") | ||||
|         s2 = Series([np.nan, 1, 3, 2], dtype="category") | ||||
|  | ||||
|         exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|     def test_union_categorical_same_categories_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/19096 | ||||
|         a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"])) | ||||
|         b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"])) | ||||
|         result = pd.concat([a, b], ignore_index=True) | ||||
|         expected = Series( | ||||
|             Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"]) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categorical_coercion(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         # category + not-category => not-category | ||||
|         s1 = Series([1, 2, np.nan], dtype="category") | ||||
|         s2 = Series([2, 1, 2]) | ||||
|  | ||||
|         exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         # result shouldn't be affected by 1st elem dtype | ||||
|         exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|         # all values are not in category => not-category | ||||
|         s1 = Series([3, 2], dtype="category") | ||||
|         s2 = Series([2, 1]) | ||||
|  | ||||
|         exp = Series([3, 2, 2, 1]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([2, 1, 3, 2]) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|         # completely different categories => not-category | ||||
|         s1 = Series([10, 11, np.nan], dtype="category") | ||||
|         s2 = Series([1, 3, 2]) | ||||
|  | ||||
|         exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|         # different dtype => not-category | ||||
|         s1 = Series([10, 11, np.nan], dtype="category") | ||||
|         s2 = Series(["a", "b", "c"]) | ||||
|  | ||||
|         exp = Series([10, 11, np.nan, "a", "b", "c"]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series(["a", "b", "c", 10, 11, np.nan]) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|         # if normal series only contains NaN-likes => not-category | ||||
|         s1 = Series([10, 11], dtype="category") | ||||
|         s2 = Series([np.nan, np.nan, np.nan]) | ||||
|  | ||||
|         exp = Series([10, 11, np.nan, np.nan, np.nan]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([np.nan, np.nan, np.nan, 10, 11]) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|     def test_concat_categorical_3elem_coercion(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         # mixed dtypes => not-category | ||||
|         s1 = Series([1, 2, np.nan], dtype="category") | ||||
|         s2 = Series([2, 1, 2], dtype="category") | ||||
|         s3 = Series([1, 2, 1, 2, np.nan]) | ||||
|  | ||||
|         exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float") | ||||
|         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float") | ||||
|         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) | ||||
|  | ||||
|         # values are all in either category => not-category | ||||
|         s1 = Series([4, 5, 6], dtype="category") | ||||
|         s2 = Series([1, 2, 3], dtype="category") | ||||
|         s3 = Series([1, 3, 4]) | ||||
|  | ||||
|         exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3]) | ||||
|         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) | ||||
|  | ||||
|         # values are all in either category => not-category | ||||
|         s1 = Series([4, 5, 6], dtype="category") | ||||
|         s2 = Series([1, 2, 3], dtype="category") | ||||
|         s3 = Series([10, 11, 12]) | ||||
|  | ||||
|         exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3]) | ||||
|         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) | ||||
|  | ||||
|     def test_concat_categorical_multi_coercion(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         s1 = Series([1, 3], dtype="category") | ||||
|         s2 = Series([3, 4], dtype="category") | ||||
|         s3 = Series([2, 3]) | ||||
|         s4 = Series([2, 2], dtype="category") | ||||
|         s5 = Series([1, np.nan]) | ||||
|         s6 = Series([1, 3, 2], dtype="category") | ||||
|  | ||||
|         # mixed dtype, values are all in categories => not-category | ||||
|         exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2]) | ||||
|         res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|         res = s1._append([s2, s3, s4, s5, s6], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3]) | ||||
|         res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|         res = s6._append([s5, s4, s3, s2, s1], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_concat_categorical_ordered(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         s1 = Series(Categorical([1, 2, np.nan], ordered=True)) | ||||
|         s2 = Series(Categorical([2, 1, 2], ordered=True)) | ||||
|  | ||||
|         exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True)) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True)) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp) | ||||
|  | ||||
|     def test_concat_categorical_coercion_nan(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         # some edge cases | ||||
|         # category + not-category => not category | ||||
|         s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category") | ||||
|         s2 = Series([np.nan, 1]) | ||||
|  | ||||
|         exp = Series([np.nan, np.nan, np.nan, 1]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         s1 = Series([1, np.nan], dtype="category") | ||||
|         s2 = Series([np.nan, np.nan]) | ||||
|  | ||||
|         exp = Series([1, np.nan, np.nan, np.nan], dtype="float") | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         # mixed dtype, all nan-likes => not-category | ||||
|         s1 = Series([np.nan, np.nan], dtype="category") | ||||
|         s2 = Series([np.nan, np.nan]) | ||||
|  | ||||
|         exp = Series([np.nan, np.nan, np.nan, np.nan]) | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|         # all category nan-likes => category | ||||
|         s1 = Series([np.nan, np.nan], dtype="category") | ||||
|         s2 = Series([np.nan, np.nan], dtype="category") | ||||
|  | ||||
|         exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category") | ||||
|  | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|     def test_concat_categorical_empty(self): | ||||
|         # GH 13524 | ||||
|  | ||||
|         s1 = Series([], dtype="category") | ||||
|         s2 = Series([1, 2], dtype="category") | ||||
|  | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) | ||||
|             tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) | ||||
|             tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) | ||||
|  | ||||
|         s1 = Series([], dtype="category") | ||||
|         s2 = Series([], dtype="category") | ||||
|  | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) | ||||
|  | ||||
|         s1 = Series([], dtype="category") | ||||
|         s2 = Series([], dtype="object") | ||||
|  | ||||
|         # different dtype => not-category | ||||
|         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) | ||||
|         tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) | ||||
|         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) | ||||
|         tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) | ||||
|  | ||||
|         s1 = Series([], dtype="category") | ||||
|         s2 = Series([np.nan, np.nan]) | ||||
|  | ||||
|         # empty Series is ignored | ||||
|         exp = Series([np.nan, np.nan]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) | ||||
|             tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) | ||||
|             tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) | ||||
|  | ||||
|     def test_categorical_concat_append(self): | ||||
|         cat = Categorical(["a", "b"], categories=["a", "b"]) | ||||
|         vals = [1, 2] | ||||
|         df = DataFrame({"cats": cat, "vals": vals}) | ||||
|         cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) | ||||
|         vals2 = [1, 2, 1, 2] | ||||
|         exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1])) | ||||
|  | ||||
|         tm.assert_frame_equal(pd.concat([df, df]), exp) | ||||
|         tm.assert_frame_equal(df._append(df), exp) | ||||
|  | ||||
|         # GH 13524 can concat different categories | ||||
|         cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||||
|         vals3 = [1, 2] | ||||
|         df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) | ||||
|  | ||||
|         res = pd.concat([df, df_different_categories], ignore_index=True) | ||||
|         exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]}) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|         res = df._append(df_different_categories, ignore_index=True) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
| @ -0,0 +1,271 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalConcat: | ||||
|     def test_categorical_concat(self, sort): | ||||
|         # See GH 10177 | ||||
|         df1 = DataFrame( | ||||
|             np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"] | ||||
|         ) | ||||
|  | ||||
|         df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"]) | ||||
|  | ||||
|         cat_values = ["one", "one", "two", "one", "two", "two", "one"] | ||||
|         df2["h"] = Series(Categorical(cat_values)) | ||||
|  | ||||
|         res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort) | ||||
|         exp = DataFrame( | ||||
|             { | ||||
|                 "a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], | ||||
|                 "b": [ | ||||
|                     1, | ||||
|                     4, | ||||
|                     7, | ||||
|                     10, | ||||
|                     13, | ||||
|                     16, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                     np.nan, | ||||
|                 ], | ||||
|                 "c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], | ||||
|                 "h": [None] * 6 + cat_values, | ||||
|             } | ||||
|         ) | ||||
|         exp["h"] = exp["h"].astype(df2["h"].dtype) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|     def test_categorical_concat_dtypes(self, using_infer_string): | ||||
|         # GH8143 | ||||
|         index = ["cat", "obj", "num"] | ||||
|         cat = Categorical(["a", "b", "c"]) | ||||
|         obj = Series(["a", "b", "c"]) | ||||
|         num = Series([1, 2, 3]) | ||||
|         df = pd.concat([Series(cat), obj, num], axis=1, keys=index) | ||||
|  | ||||
|         result = df.dtypes == (object if not using_infer_string else "str") | ||||
|         expected = Series([False, True, False], index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.dtypes == "int64" | ||||
|         expected = Series([False, False, True], index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.dtypes == "category" | ||||
|         expected = Series([True, False, False], index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categoricalindex(self): | ||||
|         # GH 16111, categories that aren't lexsorted | ||||
|         categories = [9, 0, 1, 2, 3] | ||||
|  | ||||
|         a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories)) | ||||
|         b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories)) | ||||
|         c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories)) | ||||
|  | ||||
|         result = pd.concat([a, b, c], axis=1) | ||||
|  | ||||
|         exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) | ||||
|         exp = DataFrame( | ||||
|             { | ||||
|                 0: [1, 1, np.nan, np.nan], | ||||
|                 1: [np.nan, 2, 2, np.nan], | ||||
|                 2: [np.nan, np.nan, 3, 3], | ||||
|             }, | ||||
|             columns=[0, 1, 2], | ||||
|             index=exp_idx, | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, exp) | ||||
|  | ||||
|     def test_categorical_concat_preserve(self): | ||||
|         # GH 8641  series concat not preserving category dtype | ||||
|         # GH 13524 can concat different categories | ||||
|         s = Series(list("abc"), dtype="category") | ||||
|         s2 = Series(list("abd"), dtype="category") | ||||
|  | ||||
|         exp = Series(list("abcabd")) | ||||
|         res = pd.concat([s, s2], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         exp = Series(list("abcabc"), dtype="category") | ||||
|         res = pd.concat([s, s], ignore_index=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category") | ||||
|         res = pd.concat([s, s]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         a = Series(np.arange(6, dtype="int64")) | ||||
|         b = Series(list("aabbca")) | ||||
|  | ||||
|         df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))}) | ||||
|         res = pd.concat([df2, df2]) | ||||
|         exp = DataFrame( | ||||
|             { | ||||
|                 "A": pd.concat([a, a]), | ||||
|                 "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|     def test_categorical_index_preserver(self): | ||||
|         a = Series(np.arange(6, dtype="int64")) | ||||
|         b = Series(list("aabbca")) | ||||
|  | ||||
|         df2 = DataFrame( | ||||
|             {"A": a, "B": b.astype(CategoricalDtype(list("cab")))} | ||||
|         ).set_index("B") | ||||
|         result = pd.concat([df2, df2]) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "A": pd.concat([a, a]), | ||||
|                 "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), | ||||
|             } | ||||
|         ).set_index("B") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # wrong categories -> uses concat_compat, which casts to object | ||||
|         df3 = DataFrame( | ||||
|             {"A": a, "B": Categorical(b, categories=list("abe"))} | ||||
|         ).set_index("B") | ||||
|         result = pd.concat([df2, df3]) | ||||
|         expected = pd.concat( | ||||
|             [ | ||||
|                 df2.set_axis(df2.index.astype(object), axis=0), | ||||
|                 df3.set_axis(df3.index.astype(object), axis=0), | ||||
|             ] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categorical_tz(self): | ||||
|         # GH-23816 | ||||
|         a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific")) | ||||
|         b = Series(["a", "b"], dtype="category") | ||||
|         result = pd.concat([a, b], ignore_index=True) | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 pd.Timestamp("2017-01-01", tz="US/Pacific"), | ||||
|                 pd.Timestamp("2017-01-02", tz="US/Pacific"), | ||||
|                 "a", | ||||
|                 "b", | ||||
|             ] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categorical_datetime(self): | ||||
|         # GH-39443 | ||||
|         df1 = DataFrame( | ||||
|             {"x": Series(datetime(2021, 1, 1), index=[0], dtype="category")} | ||||
|         ) | ||||
|         df2 = DataFrame( | ||||
|             {"x": Series(datetime(2021, 1, 2), index=[1], dtype="category")} | ||||
|         ) | ||||
|  | ||||
|         result = pd.concat([df1, df2]) | ||||
|         expected = DataFrame( | ||||
|             {"x": Series([datetime(2021, 1, 1), datetime(2021, 1, 2)])} | ||||
|         ) | ||||
|  | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categorical_unchanged(self): | ||||
|         # GH-12007 | ||||
|         # test fix for when concat on categorical and float | ||||
|         # coerces dtype categorical -> float | ||||
|         df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A")) | ||||
|         ser = Series([0, 1, 2], index=[0, 1, 3], name="B") | ||||
|         result = pd.concat([df, ser], axis=1) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "A": Series(["a", "b", "c", np.nan], dtype="category"), | ||||
|                 "B": Series([0, 1, np.nan, 2], dtype="float"), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_categorical_concat_gh7864(self): | ||||
|         # GH 7864 | ||||
|         # make sure ordering is preserved | ||||
|         df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")}) | ||||
|         df["grade"] = Categorical(df["raw_grade"]) | ||||
|         df["grade"].cat.set_categories(["e", "a", "b"]) | ||||
|  | ||||
|         df1 = df[0:3] | ||||
|         df2 = df[3:] | ||||
|  | ||||
|         tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories) | ||||
|         tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories) | ||||
|  | ||||
|         dfx = pd.concat([df1, df2]) | ||||
|         tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories) | ||||
|  | ||||
|         dfa = df1._append(df2) | ||||
|         tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories) | ||||
|  | ||||
|     def test_categorical_index_upcast(self): | ||||
|         # GH 17629 | ||||
|         # test upcasting to object when concatenating on categorical indexes | ||||
|         # with non-identical categories | ||||
|  | ||||
|         a = DataFrame({"foo": [1, 2]}, index=Categorical(["foo", "bar"])) | ||||
|         b = DataFrame({"foo": [4, 3]}, index=Categorical(["baz", "bar"])) | ||||
|  | ||||
|         res = pd.concat([a, b]) | ||||
|         exp = DataFrame({"foo": [1, 2, 4, 3]}, index=["foo", "bar", "baz", "bar"]) | ||||
|  | ||||
|         tm.assert_equal(res, exp) | ||||
|  | ||||
|         a = Series([1, 2], index=Categorical(["foo", "bar"])) | ||||
|         b = Series([4, 3], index=Categorical(["baz", "bar"])) | ||||
|  | ||||
|         res = pd.concat([a, b]) | ||||
|         exp = Series([1, 2, 4, 3], index=["foo", "bar", "baz", "bar"]) | ||||
|  | ||||
|         tm.assert_equal(res, exp) | ||||
|  | ||||
|     def test_categorical_missing_from_one_frame(self): | ||||
|         # GH 25412 | ||||
|         df1 = DataFrame({"f1": [1, 2, 3]}) | ||||
|         df2 = DataFrame({"f1": [2, 3, 1], "f2": Series([4, 4, 4]).astype("category")}) | ||||
|         result = pd.concat([df1, df2], sort=True) | ||||
|         dtype = CategoricalDtype([4]) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "f1": [1, 2, 3, 2, 3, 1], | ||||
|                 "f2": Categorical.from_codes([-1, -1, -1, 0, 0, 0], dtype=dtype), | ||||
|             }, | ||||
|             index=[0, 1, 2, 0, 1, 2], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_categorical_same_categories_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/24845 | ||||
|  | ||||
|         c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False) | ||||
|         c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False) | ||||
|         c3 = pd.CategoricalIndex( | ||||
|             ["a", "a", "b", "b"], categories=["a", "b"], ordered=False | ||||
|         ) | ||||
|  | ||||
|         df1 = DataFrame({"A": [1, 2]}, index=c1) | ||||
|         df2 = DataFrame({"A": [3, 4]}, index=c2) | ||||
|  | ||||
|         result = pd.concat((df1, df2)) | ||||
|         expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,917 @@ | ||||
| from collections import ( | ||||
|     abc, | ||||
|     deque, | ||||
| ) | ||||
| from collections.abc import Iterator | ||||
| from datetime import datetime | ||||
| from decimal import Decimal | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     concat, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import SparseArray | ||||
| from pandas.tests.extension.decimal import to_decimal | ||||
|  | ||||
|  | ||||
| class TestConcatenate: | ||||
|     def test_append_concat(self): | ||||
|         # GH#1815 | ||||
|         d1 = date_range("12/31/1990", "12/31/1999", freq="YE-DEC") | ||||
|         d2 = date_range("12/31/2000", "12/31/2009", freq="YE-DEC") | ||||
|  | ||||
|         s1 = Series(np.random.default_rng(2).standard_normal(10), d1) | ||||
|         s2 = Series(np.random.default_rng(2).standard_normal(10), d2) | ||||
|  | ||||
|         s1 = s1.to_period() | ||||
|         s2 = s2.to_period() | ||||
|  | ||||
|         # drops index | ||||
|         result = concat([s1, s2]) | ||||
|         assert isinstance(result.index, PeriodIndex) | ||||
|         assert result.index[0] == s1.index[0] | ||||
|  | ||||
|     # test is not written to work with string dtype (checks .base) | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") | ||||
|     def test_concat_copy(self, using_array_manager, using_copy_on_write): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) | ||||
|         df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) | ||||
|         df3 = DataFrame({5: "foo"}, index=range(4)) | ||||
|  | ||||
|         # These are actual copies. | ||||
|         result = concat([df, df2, df3], axis=1, copy=True) | ||||
|  | ||||
|         if not using_copy_on_write: | ||||
|             for arr in result._mgr.arrays: | ||||
|                 assert not any( | ||||
|                     np.shares_memory(arr, y) | ||||
|                     for x in [df, df2, df3] | ||||
|                     for y in x._mgr.arrays | ||||
|                 ) | ||||
|         else: | ||||
|             for arr in result._mgr.arrays: | ||||
|                 assert arr.base is not None | ||||
|  | ||||
|         # These are the same. | ||||
|         result = concat([df, df2, df3], axis=1, copy=False) | ||||
|  | ||||
|         for arr in result._mgr.arrays: | ||||
|             if arr.dtype.kind == "f": | ||||
|                 assert arr.base is df._mgr.arrays[0].base | ||||
|             elif arr.dtype.kind in ["i", "u"]: | ||||
|                 assert arr.base is df2._mgr.arrays[0].base | ||||
|             elif arr.dtype == object: | ||||
|                 if using_array_manager: | ||||
|                     # we get the same array object, which has no base | ||||
|                     assert arr is df3._mgr.arrays[0] | ||||
|                 else: | ||||
|                     assert arr.base is not None | ||||
|                 assert arr.base is not None | ||||
|  | ||||
|         # Float block was consolidated. | ||||
|         df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1))) | ||||
|         result = concat([df, df2, df3, df4], axis=1, copy=False) | ||||
|         for arr in result._mgr.arrays: | ||||
|             if arr.dtype.kind == "f": | ||||
|                 if using_array_manager or using_copy_on_write: | ||||
|                     # this is a view on some array in either df or df4 | ||||
|                     assert any( | ||||
|                         np.shares_memory(arr, other) | ||||
|                         for other in df._mgr.arrays + df4._mgr.arrays | ||||
|                     ) | ||||
|                 else: | ||||
|                     # the block was consolidated, so we got a copy anyway | ||||
|                     assert arr.base is None | ||||
|             elif arr.dtype.kind in ["i", "u"]: | ||||
|                 assert arr.base is df2._mgr.arrays[0].base | ||||
|             elif arr.dtype == object: | ||||
|                 # this is a view on df3 | ||||
|                 assert any(np.shares_memory(arr, other) for other in df3._mgr.arrays) | ||||
|  | ||||
|     def test_concat_with_group_keys(self): | ||||
|         # axis=0 | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((3, 4))) | ||||
|         df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4))) | ||||
|  | ||||
|         result = concat([df, df2], keys=[0, 1]) | ||||
|         exp_index = MultiIndex.from_arrays( | ||||
|             [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]] | ||||
|         ) | ||||
|         expected = DataFrame(np.r_[df.values, df2.values], index=exp_index) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat([df, df], keys=[0, 1]) | ||||
|         exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) | ||||
|         expected = DataFrame(np.r_[df.values, df.values], index=exp_index2) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # axis=1 | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) | ||||
|         df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4))) | ||||
|  | ||||
|         result = concat([df, df2], keys=[0, 1], axis=1) | ||||
|         expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat([df, df], keys=[0, 1], axis=1) | ||||
|         expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_keys_specific_levels(self): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) | ||||
|         pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] | ||||
|         level = ["three", "two", "one", "zero"] | ||||
|         result = concat( | ||||
|             pieces, | ||||
|             axis=1, | ||||
|             keys=["one", "two", "three"], | ||||
|             levels=[level], | ||||
|             names=["group_key"], | ||||
|         ) | ||||
|  | ||||
|         tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) | ||||
|         tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) | ||||
|  | ||||
|         assert result.columns.names == ["group_key", None] | ||||
|  | ||||
|     @pytest.mark.parametrize("mapping", ["mapping", "dict"]) | ||||
|     def test_concat_mapping(self, mapping, non_dict_mapping_subclass): | ||||
|         constructor = dict if mapping == "dict" else non_dict_mapping_subclass | ||||
|         frames = constructor( | ||||
|             { | ||||
|                 "foo": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), | ||||
|                 "bar": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), | ||||
|                 "baz": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), | ||||
|                 "qux": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         sorted_keys = list(frames.keys()) | ||||
|  | ||||
|         result = concat(frames) | ||||
|         expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat(frames, axis=1) | ||||
|         expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         keys = ["baz", "foo", "bar"] | ||||
|         result = concat(frames, keys=keys) | ||||
|         expected = concat([frames[k] for k in keys], keys=keys) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_keys_and_levels(self): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((1, 3))) | ||||
|         df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4))) | ||||
|  | ||||
|         levels = [["foo", "baz"], ["one", "two"]] | ||||
|         names = ["first", "second"] | ||||
|         result = concat( | ||||
|             [df, df2, df, df2], | ||||
|             keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], | ||||
|             levels=levels, | ||||
|             names=names, | ||||
|         ) | ||||
|         expected = concat([df, df2, df, df2]) | ||||
|         exp_index = MultiIndex( | ||||
|             levels=levels + [[0]], | ||||
|             codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]], | ||||
|             names=names + [None], | ||||
|         ) | ||||
|         expected.index = exp_index | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # no names | ||||
|         result = concat( | ||||
|             [df, df2, df, df2], | ||||
|             keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], | ||||
|             levels=levels, | ||||
|         ) | ||||
|         assert result.index.names == (None,) * 3 | ||||
|  | ||||
|         # no levels | ||||
|         result = concat( | ||||
|             [df, df2, df, df2], | ||||
|             keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], | ||||
|             names=["first", "second"], | ||||
|         ) | ||||
|         assert result.index.names == ("first", "second", None) | ||||
|         tm.assert_index_equal( | ||||
|             result.index.levels[0], Index(["baz", "foo"], name="first") | ||||
|         ) | ||||
|  | ||||
|     def test_concat_keys_levels_no_overlap(self): | ||||
|         # GH #1406 | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"]) | ||||
|         df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"]) | ||||
|  | ||||
|         msg = "Values not found in passed level" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) | ||||
|  | ||||
|         msg = "Key one not in level" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) | ||||
|  | ||||
|     def test_crossed_dtypes_weird_corner(self): | ||||
|         columns = ["A", "B", "C", "D"] | ||||
|         df1 = DataFrame( | ||||
|             { | ||||
|                 "A": np.array([1, 2, 3, 4], dtype="f8"), | ||||
|                 "B": np.array([1, 2, 3, 4], dtype="i8"), | ||||
|                 "C": np.array([1, 2, 3, 4], dtype="f8"), | ||||
|                 "D": np.array([1, 2, 3, 4], dtype="i8"), | ||||
|             }, | ||||
|             columns=columns, | ||||
|         ) | ||||
|  | ||||
|         df2 = DataFrame( | ||||
|             { | ||||
|                 "A": np.array([1, 2, 3, 4], dtype="i8"), | ||||
|                 "B": np.array([1, 2, 3, 4], dtype="f8"), | ||||
|                 "C": np.array([1, 2, 3, 4], dtype="i8"), | ||||
|                 "D": np.array([1, 2, 3, 4], dtype="f8"), | ||||
|             }, | ||||
|             columns=columns, | ||||
|         ) | ||||
|  | ||||
|         appended = concat([df1, df2], ignore_index=True) | ||||
|         expected = DataFrame( | ||||
|             np.concatenate([df1.values, df2.values], axis=0), columns=columns | ||||
|         ) | ||||
|         tm.assert_frame_equal(appended, expected) | ||||
|  | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"]) | ||||
|         df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"]) | ||||
|         result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) | ||||
|         assert result.index.names == ("first", "second") | ||||
|  | ||||
|     def test_with_mixed_tuples(self, sort): | ||||
|         # 10697 | ||||
|         # columns have mixed tuples, so handle properly | ||||
|         df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2)) | ||||
|         df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2)) | ||||
|  | ||||
|         # it works | ||||
|         concat([df1, df2], sort=sort) | ||||
|  | ||||
|     def test_concat_mixed_objs_columns(self): | ||||
|         # Test column-wise concat for mixed series/frames (axis=1) | ||||
|         # G2385 | ||||
|  | ||||
|         index = date_range("01-Jan-2013", periods=10, freq="h") | ||||
|         arr = np.arange(10, dtype="int64") | ||||
|         s1 = Series(arr, index=index) | ||||
|         s2 = Series(arr, index=index) | ||||
|         df = DataFrame(arr.reshape(-1, 1), index=index) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0] | ||||
|         ) | ||||
|         result = concat([df, df], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1] | ||||
|         ) | ||||
|         result = concat([s1, s2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] | ||||
|         ) | ||||
|         result = concat([s1, s2, s1], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] | ||||
|         ) | ||||
|         result = concat([s1, df, s2, s2, s1], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # with names | ||||
|         s1.name = "foo" | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0] | ||||
|         ) | ||||
|         result = concat([s1, df, s2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         s2.name = "bar" | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"] | ||||
|         ) | ||||
|         result = concat([s1, df, s2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # ignore index | ||||
|         expected = DataFrame( | ||||
|             np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] | ||||
|         ) | ||||
|         result = concat([s1, df, s2], axis=1, ignore_index=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_mixed_objs_index(self): | ||||
|         # Test row-wise concat for mixed series/frames with a common name | ||||
|         # GH2385, GH15047 | ||||
|  | ||||
|         index = date_range("01-Jan-2013", periods=10, freq="h") | ||||
|         arr = np.arange(10, dtype="int64") | ||||
|         s1 = Series(arr, index=index) | ||||
|         s2 = Series(arr, index=index) | ||||
|         df = DataFrame(arr.reshape(-1, 1), index=index) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] | ||||
|         ) | ||||
|         result = concat([s1, df, s2]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_mixed_objs_index_names(self): | ||||
|         # Test row-wise concat for mixed series/frames with distinct names | ||||
|         # GH2385, GH15047 | ||||
|  | ||||
|         index = date_range("01-Jan-2013", periods=10, freq="h") | ||||
|         arr = np.arange(10, dtype="int64") | ||||
|         s1 = Series(arr, index=index, name="foo") | ||||
|         s2 = Series(arr, index=index, name="bar") | ||||
|         df = DataFrame(arr.reshape(-1, 1), index=index) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, | ||||
|             index=index.tolist() * 3, | ||||
|             columns=["foo", 0, "bar"], | ||||
|         ) | ||||
|         result = concat([s1, df, s2]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # Rename all series to 0 when ignore_index=True | ||||
|         expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) | ||||
|         result = concat([s1, df, s2], ignore_index=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_dtype_coercion(self): | ||||
|         # 12411 | ||||
|         df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) | ||||
|  | ||||
|         result = concat([df.iloc[[0]], df.iloc[[1]]]) | ||||
|         tm.assert_series_equal(result.dtypes, df.dtypes) | ||||
|  | ||||
|         # 12045 | ||||
|         df = DataFrame({"date": [datetime(2012, 1, 1), datetime(1012, 1, 2)]}) | ||||
|         result = concat([df.iloc[[0]], df.iloc[[1]]]) | ||||
|         tm.assert_series_equal(result.dtypes, df.dtypes) | ||||
|  | ||||
|         # 11594 | ||||
|         df = DataFrame({"text": ["some words"] + [None] * 9}) | ||||
|         result = concat([df.iloc[[0]], df.iloc[[1]]]) | ||||
|         tm.assert_series_equal(result.dtypes, df.dtypes) | ||||
|  | ||||
|     def test_concat_single_with_key(self): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) | ||||
|  | ||||
|         result = concat([df], keys=["foo"]) | ||||
|         expected = concat([df, df], keys=["foo", "bar"]) | ||||
|         tm.assert_frame_equal(result, expected[:10]) | ||||
|  | ||||
|     def test_concat_no_items_raises(self): | ||||
|         with pytest.raises(ValueError, match="No objects to concatenate"): | ||||
|             concat([]) | ||||
|  | ||||
|     def test_concat_exclude_none(self): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) | ||||
|  | ||||
|         pieces = [df[:5], None, None, df[5:]] | ||||
|         result = concat(pieces) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         with pytest.raises(ValueError, match="All objects passed were None"): | ||||
|             concat([None, None]) | ||||
|  | ||||
|     def test_concat_keys_with_none(self): | ||||
|         # #1649 | ||||
|         df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]]) | ||||
|  | ||||
|         result = concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) | ||||
|         expected = concat({"b": df0, "c": df0[:2], "d": df0[:1], "e": df0}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat( | ||||
|             [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"] | ||||
|         ) | ||||
|         expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_bug_1719(self): | ||||
|         ts1 = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|         ts2 = ts1.copy()[::2] | ||||
|  | ||||
|         # to join with union | ||||
|         # these two are of different length! | ||||
|         left = concat([ts1, ts2], join="outer", axis=1) | ||||
|         right = concat([ts2, ts1], join="outer", axis=1) | ||||
|  | ||||
|         assert len(left) == len(right) | ||||
|  | ||||
|     def test_concat_bug_2972(self): | ||||
|         ts0 = Series(np.zeros(5)) | ||||
|         ts1 = Series(np.ones(5)) | ||||
|         ts0.name = ts1.name = "same name" | ||||
|         result = concat([ts0, ts1], axis=1) | ||||
|  | ||||
|         expected = DataFrame({0: ts0, 1: ts1}) | ||||
|         expected.columns = ["same name", "same name"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_bug_3602(self): | ||||
|         # GH 3602, duplicate columns | ||||
|         df1 = DataFrame( | ||||
|             { | ||||
|                 "firmNo": [0, 0, 0, 0], | ||||
|                 "prc": [6, 6, 6, 6], | ||||
|                 "stringvar": ["rrr", "rrr", "rrr", "rrr"], | ||||
|             } | ||||
|         ) | ||||
|         df2 = DataFrame( | ||||
|             {"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]} | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [0, 6, "rrr", 9, 1, 6], | ||||
|                 [0, 6, "rrr", 10, 2, 6], | ||||
|                 [0, 6, "rrr", 11, 3, 6], | ||||
|                 [0, 6, "rrr", 12, 4, 6], | ||||
|             ] | ||||
|         ) | ||||
|         expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"] | ||||
|  | ||||
|         result = concat([df1, df2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_iterables(self): | ||||
|         # GH8645 check concat works with tuples, list, generators, and weird | ||||
|         # stuff like deque and custom iterables | ||||
|         df1 = DataFrame([1, 2, 3]) | ||||
|         df2 = DataFrame([4, 5, 6]) | ||||
|         expected = DataFrame([1, 2, 3, 4, 5, 6]) | ||||
|         tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected) | ||||
|         tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected) | ||||
|         tm.assert_frame_equal( | ||||
|             concat((df for df in (df1, df2)), ignore_index=True), expected | ||||
|         ) | ||||
|         tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) | ||||
|  | ||||
|         class CustomIterator1: | ||||
|             def __len__(self) -> int: | ||||
|                 return 2 | ||||
|  | ||||
|             def __getitem__(self, index): | ||||
|                 try: | ||||
|                     return {0: df1, 1: df2}[index] | ||||
|                 except KeyError as err: | ||||
|                     raise IndexError from err | ||||
|  | ||||
|         tm.assert_frame_equal(concat(CustomIterator1(), ignore_index=True), expected) | ||||
|  | ||||
|         class CustomIterator2(abc.Iterable): | ||||
|             def __iter__(self) -> Iterator: | ||||
|                 yield df1 | ||||
|                 yield df2 | ||||
|  | ||||
|         tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected) | ||||
|  | ||||
|     def test_concat_order(self): | ||||
|         # GH 17344, GH#47331 | ||||
|         dfs = [DataFrame(index=range(3), columns=["a", 1, None])] | ||||
|         dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for _ in range(100)] | ||||
|  | ||||
|         result = concat(dfs, sort=True).columns | ||||
|         expected = Index([1, "a", None]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_concat_different_extension_dtypes_upcasts(self): | ||||
|         a = Series(pd.array([1, 2], dtype="Int64")) | ||||
|         b = Series(to_decimal([1, 2])) | ||||
|  | ||||
|         result = concat([a, b], ignore_index=True) | ||||
|         expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_ordered_dict(self): | ||||
|         # GH 21510 | ||||
|         expected = concat( | ||||
|             [Series(range(3)), Series(range(4))], keys=["First", "Another"] | ||||
|         ) | ||||
|         result = concat({"First": Series(range(3)), "Another": Series(range(4))}) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_duplicate_indices_raise(self): | ||||
|         # GH 45888: test raise for concat DataFrames with duplicate indices | ||||
|         # https://github.com/pandas-dev/pandas/issues/36263 | ||||
|         df1 = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal(5), | ||||
|             index=[0, 1, 2, 3, 3], | ||||
|             columns=["a"], | ||||
|         ) | ||||
|         df2 = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal(5), | ||||
|             index=[0, 1, 2, 2, 4], | ||||
|             columns=["b"], | ||||
|         ) | ||||
|         msg = "Reindexing only valid with uniquely valued Index objects" | ||||
|         with pytest.raises(InvalidIndexError, match=msg): | ||||
|             concat([df1, df2], axis=1) | ||||
|  | ||||
|  | ||||
| def test_concat_no_unnecessary_upcast(float_numpy_dtype, frame_or_series): | ||||
|     # GH 13247 | ||||
|     dims = frame_or_series(dtype=object).ndim | ||||
|     dt = float_numpy_dtype | ||||
|  | ||||
|     dfs = [ | ||||
|         frame_or_series(np.array([1], dtype=dt, ndmin=dims)), | ||||
|         frame_or_series(np.array([np.nan], dtype=dt, ndmin=dims)), | ||||
|         frame_or_series(np.array([5], dtype=dt, ndmin=dims)), | ||||
|     ] | ||||
|     x = concat(dfs) | ||||
|     assert x.values.dtype == dt | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("pdt", [Series, DataFrame]) | ||||
| def test_concat_will_upcast(pdt, any_signed_int_numpy_dtype): | ||||
|     dt = any_signed_int_numpy_dtype | ||||
|     dims = pdt().ndim | ||||
|     dfs = [ | ||||
|         pdt(np.array([1], dtype=dt, ndmin=dims)), | ||||
|         pdt(np.array([np.nan], ndmin=dims)), | ||||
|         pdt(np.array([5], dtype=dt, ndmin=dims)), | ||||
|     ] | ||||
|     x = concat(dfs) | ||||
|     assert x.values.dtype == "float64" | ||||
|  | ||||
|  | ||||
| def test_concat_empty_and_non_empty_frame_regression(): | ||||
|     # GH 18178 regression test | ||||
|     df1 = DataFrame({"foo": [1]}) | ||||
|     df2 = DataFrame({"foo": []}) | ||||
|     expected = DataFrame({"foo": [1.0]}) | ||||
|     result = concat([df1, df2]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_sparse(): | ||||
|     # GH 23557 | ||||
|     a = Series(SparseArray([0, 1, 2])) | ||||
|     expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype( | ||||
|         pd.SparseDtype(np.int64, 0) | ||||
|     ) | ||||
|     result = concat([a, a], axis=1) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_dense_sparse(): | ||||
|     # GH 30668 | ||||
|     dtype = pd.SparseDtype(np.float64, None) | ||||
|     a = Series(pd.arrays.SparseArray([1, None]), dtype=dtype) | ||||
|     b = Series([1], dtype=float) | ||||
|     expected = Series(data=[1, None, 1], index=[0, 1, 0]).astype(dtype) | ||||
|     result = concat([a, b], axis=0) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]]) | ||||
| def test_duplicate_keys(keys): | ||||
|     # GH 33654 | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     s1 = Series([7, 8, 9], name="c") | ||||
|     s2 = Series([10, 11, 12], name="d") | ||||
|     result = concat([df, s1, s2], axis=1, keys=keys) | ||||
|     expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] | ||||
|     expected_columns = MultiIndex.from_tuples( | ||||
|         [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")] | ||||
|     ) | ||||
|     expected = DataFrame(expected_values, columns=expected_columns) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicate_keys_same_frame(): | ||||
|     # GH 43595 | ||||
|     keys = ["e", "e"] | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     result = concat([df, df], axis=1, keys=keys) | ||||
|     expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]] | ||||
|     expected_columns = MultiIndex.from_tuples( | ||||
|         [(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")] | ||||
|     ) | ||||
|     expected = DataFrame(expected_values, columns=expected_columns) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "obj", | ||||
|     [ | ||||
|         tm.SubclassedDataFrame({"A": np.arange(0, 10)}), | ||||
|         tm.SubclassedSeries(np.arange(0, 10), name="A"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_preserves_subclass(obj): | ||||
|     # GH28330 -- preserve subclass | ||||
|  | ||||
|     result = concat([obj, obj]) | ||||
|     assert isinstance(result, type(obj)) | ||||
|  | ||||
|  | ||||
| def test_concat_frame_axis0_extension_dtypes(): | ||||
|     # preserve extension dtype (through common_dtype mechanism) | ||||
|     df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")}) | ||||
|     df2 = DataFrame({"a": np.array([4, 5, 6])}) | ||||
|  | ||||
|     result = concat([df1, df2], ignore_index=True) | ||||
|     expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = concat([df2, df1], ignore_index=True) | ||||
|     expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_preserves_extension_int64_dtype(): | ||||
|     # GH 24768 | ||||
|     df_a = DataFrame({"a": [-1]}, dtype="Int64") | ||||
|     df_b = DataFrame({"b": [1]}, dtype="Int64") | ||||
|     result = concat([df_a, df_b], ignore_index=True) | ||||
|     expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype1,dtype2,expected_dtype", | ||||
|     [ | ||||
|         ("bool", "bool", "bool"), | ||||
|         ("boolean", "bool", "boolean"), | ||||
|         ("bool", "boolean", "boolean"), | ||||
|         ("boolean", "boolean", "boolean"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_bool_types(dtype1, dtype2, expected_dtype): | ||||
|     # GH 42800 | ||||
|     ser1 = Series([True, False], dtype=dtype1) | ||||
|     ser2 = Series([False, True], dtype=dtype2) | ||||
|     result = concat([ser1, ser2], ignore_index=True) | ||||
|     expected = Series([True, False, False, True], dtype=expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("keys", "integrity"), | ||||
|     [ | ||||
|         (["red"] * 3, True), | ||||
|         (["red"] * 3, False), | ||||
|         (["red", "blue", "red"], False), | ||||
|         (["red", "blue", "red"], True), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_repeated_keys(keys, integrity): | ||||
|     # GH: 20816 | ||||
|     series_list = [Series({"a": 1}), Series({"b": 2}), Series({"c": 3})] | ||||
|     result = concat(series_list, keys=keys, verify_integrity=integrity) | ||||
|     tuples = list(zip(keys, ["a", "b", "c"])) | ||||
|     expected = Series([1, 2, 3], index=MultiIndex.from_tuples(tuples)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_null_object_with_dti(): | ||||
|     # GH#40841 | ||||
|     dti = pd.DatetimeIndex( | ||||
|         ["2021-04-08 21:21:14+00:00"], dtype="datetime64[ns, UTC]", name="Time (UTC)" | ||||
|     ) | ||||
|     right = DataFrame(data={"C": [0.5274]}, index=dti) | ||||
|  | ||||
|     idx = Index([None], dtype="object", name="Maybe Time (UTC)") | ||||
|     left = DataFrame(data={"A": [None], "B": [np.nan]}, index=idx) | ||||
|  | ||||
|     result = concat([left, right], axis="columns") | ||||
|  | ||||
|     exp_index = Index([None, dti[0]], dtype=object) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "A": np.array([None, np.nan], dtype=object), | ||||
|             "B": [np.nan, np.nan], | ||||
|             "C": [np.nan, 0.5274], | ||||
|         }, | ||||
|         index=exp_index, | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_multiindex_with_empty_rangeindex(): | ||||
|     # GH#41234 | ||||
|     mi = MultiIndex.from_tuples([("B", 1), ("C", 1)]) | ||||
|     df1 = DataFrame([[1, 2]], columns=mi) | ||||
|     df2 = DataFrame(index=[1], columns=pd.RangeIndex(0)) | ||||
|  | ||||
|     result = concat([df1, df2]) | ||||
|     expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         Series(data=[1, 2]), | ||||
|         DataFrame( | ||||
|             data={ | ||||
|                 "col1": [1, 2], | ||||
|             } | ||||
|         ), | ||||
|         DataFrame(dtype=float), | ||||
|         Series(dtype=float), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_drop_attrs(data): | ||||
|     # GH#41828 | ||||
|     df1 = data.copy() | ||||
|     df1.attrs = {1: 1} | ||||
|     df2 = data.copy() | ||||
|     df2.attrs = {1: 2} | ||||
|     df = concat([df1, df2]) | ||||
|     assert len(df.attrs) == 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         Series(data=[1, 2]), | ||||
|         DataFrame( | ||||
|             data={ | ||||
|                 "col1": [1, 2], | ||||
|             } | ||||
|         ), | ||||
|         DataFrame(dtype=float), | ||||
|         Series(dtype=float), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_retain_attrs(data): | ||||
|     # GH#41828 | ||||
|     df1 = data.copy() | ||||
|     df1.attrs = {1: 1} | ||||
|     df2 = data.copy() | ||||
|     df2.attrs = {1: 1} | ||||
|     df = concat([df1, df2]) | ||||
|     assert df.attrs[1] == 1 | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) | ||||
| @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) | ||||
| def test_concat_ignore_empty_object_float(empty_dtype, df_dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/45637 | ||||
|     df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) | ||||
|     empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype) | ||||
|  | ||||
|     msg = "The behavior of DataFrame concatenation with empty or all-NA entries" | ||||
|     warn = None | ||||
|     if df_dtype == "datetime64[ns]" or ( | ||||
|         df_dtype == "float64" and empty_dtype != "float64" | ||||
|     ): | ||||
|         warn = FutureWarning | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = concat([empty, df]) | ||||
|     expected = df | ||||
|     if df_dtype == "int64": | ||||
|         # TODO what exact behaviour do we want for integer eventually? | ||||
|         if empty_dtype == "float64": | ||||
|             expected = df.astype("float64") | ||||
|         else: | ||||
|             expected = df.astype("object") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) | ||||
| @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) | ||||
| def test_concat_ignore_all_na_object_float(empty_dtype, df_dtype): | ||||
|     df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) | ||||
|     empty = DataFrame({"foo": [np.nan], "bar": [np.nan]}, dtype=empty_dtype) | ||||
|  | ||||
|     if df_dtype == "int64": | ||||
|         # TODO what exact behaviour do we want for integer eventually? | ||||
|         if empty_dtype == "object": | ||||
|             df_dtype = "object" | ||||
|         else: | ||||
|             df_dtype = "float64" | ||||
|  | ||||
|     msg = "The behavior of DataFrame concatenation with empty or all-NA entries" | ||||
|     warn = None | ||||
|     if empty_dtype != df_dtype and empty_dtype is not None: | ||||
|         warn = FutureWarning | ||||
|     elif df_dtype == "datetime64[ns]": | ||||
|         warn = FutureWarning | ||||
|  | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = concat([empty, df], ignore_index=True) | ||||
|  | ||||
|     expected = DataFrame({"foo": [np.nan, 1, 2], "bar": [np.nan, 1, 2]}, dtype=df_dtype) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| def test_concat_ignore_empty_from_reindex(): | ||||
|     # https://github.com/pandas-dev/pandas/pull/43507#issuecomment-920375856 | ||||
|     df1 = DataFrame({"a": [1], "b": [pd.Timestamp("2012-01-01")]}) | ||||
|     df2 = DataFrame({"a": [2]}) | ||||
|  | ||||
|     aligned = df2.reindex(columns=df1.columns) | ||||
|  | ||||
|     msg = "The behavior of DataFrame concatenation with empty or all-NA entries" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = concat([df1, aligned], ignore_index=True) | ||||
|     expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_mismatched_keys_length(): | ||||
|     # GH#43485 | ||||
|     ser = Series(range(5)) | ||||
|     sers = [ser + n for n in range(4)] | ||||
|     keys = ["A", "B", "C"] | ||||
|  | ||||
|     msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         concat(sers, keys=keys, axis=1) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         concat(sers, keys=keys, axis=0) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         concat((x for x in sers), keys=(y for y in keys), axis=1) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         concat((x for x in sers), keys=(y for y in keys), axis=0) | ||||
|  | ||||
|  | ||||
| def test_concat_multiindex_with_category(): | ||||
|     df1 = DataFrame( | ||||
|         { | ||||
|             "c1": Series(list("abc"), dtype="category"), | ||||
|             "c2": Series(list("eee"), dtype="category"), | ||||
|             "i2": Series([1, 2, 3]), | ||||
|         } | ||||
|     ) | ||||
|     df1 = df1.set_index(["c1", "c2"]) | ||||
|     df2 = DataFrame( | ||||
|         { | ||||
|             "c1": Series(list("abc"), dtype="category"), | ||||
|             "c2": Series(list("eee"), dtype="category"), | ||||
|             "i2": Series([4, 5, 6]), | ||||
|         } | ||||
|     ) | ||||
|     df2 = df2.set_index(["c1", "c2"]) | ||||
|     result = concat([df1, df2]) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "c1": Series(list("abcabc"), dtype="category"), | ||||
|             "c2": Series(list("eeeeee"), dtype="category"), | ||||
|             "i2": Series([1, 2, 3, 4, 5, 6]), | ||||
|         } | ||||
|     ) | ||||
|     expected = expected.set_index(["c1", "c2"]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_ea_upcast(): | ||||
|     # GH#54848 | ||||
|     df1 = DataFrame(["a"], dtype="string") | ||||
|     df2 = DataFrame([1], dtype="Int64") | ||||
|     result = concat([df1, df2]) | ||||
|     expected = DataFrame(["a", 1], index=[0, 0]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_none_with_timezone_timestamp(): | ||||
|     # GH#52093 | ||||
|     df1 = DataFrame([{"A": None}]) | ||||
|     df2 = DataFrame([{"A": pd.Timestamp("1990-12-20 00:00:00+00:00")}]) | ||||
|     msg = "The behavior of DataFrame concatenation with empty or all-NA entries" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = concat([df1, df2], ignore_index=True) | ||||
|     expected = DataFrame({"A": [None, pd.Timestamp("1990-12-20 00:00:00+00:00")]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,230 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     concat, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDataFrameConcat: | ||||
|     def test_concat_multiple_frames_dtypes(self): | ||||
|         # GH#2759 | ||||
|         df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) | ||||
|         df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32) | ||||
|         results = concat((df1, df2), axis=1).dtypes | ||||
|         expected = Series( | ||||
|             [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, | ||||
|             index=["foo", "bar", 0, 1], | ||||
|         ) | ||||
|         tm.assert_series_equal(results, expected) | ||||
|  | ||||
|     def test_concat_tuple_keys(self): | ||||
|         # GH#14438 | ||||
|         df1 = DataFrame(np.ones((2, 2)), columns=list("AB")) | ||||
|         df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) | ||||
|         results = concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")]) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "A": { | ||||
|                     ("bee", "bah", 0): 1.0, | ||||
|                     ("bee", "bah", 1): 1.0, | ||||
|                     ("bee", "boo", 0): 2.0, | ||||
|                     ("bee", "boo", 1): 2.0, | ||||
|                     ("bee", "boo", 2): 2.0, | ||||
|                 }, | ||||
|                 "B": { | ||||
|                     ("bee", "bah", 0): 1.0, | ||||
|                     ("bee", "bah", 1): 1.0, | ||||
|                     ("bee", "boo", 0): 2.0, | ||||
|                     ("bee", "boo", 1): 2.0, | ||||
|                     ("bee", "boo", 2): 2.0, | ||||
|                 }, | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(results, expected) | ||||
|  | ||||
|     def test_concat_named_keys(self): | ||||
|         # GH#14252 | ||||
|         df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) | ||||
|         index = Index(["a", "b"], name="baz") | ||||
|         concatted_named_from_keys = concat([df, df], keys=index) | ||||
|         expected_named = DataFrame( | ||||
|             {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, | ||||
|             index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(concatted_named_from_keys, expected_named) | ||||
|  | ||||
|         index_no_name = Index(["a", "b"], name=None) | ||||
|         concatted_named_from_names = concat([df, df], keys=index_no_name, names=["baz"]) | ||||
|         tm.assert_frame_equal(concatted_named_from_names, expected_named) | ||||
|  | ||||
|         concatted_unnamed = concat([df, df], keys=index_no_name) | ||||
|         expected_unnamed = DataFrame( | ||||
|             {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, | ||||
|             index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(concatted_unnamed, expected_unnamed) | ||||
|  | ||||
|     def test_concat_axis_parameter(self): | ||||
|         # GH#14369 | ||||
|         df1 = DataFrame({"A": [0.1, 0.2]}, index=range(2)) | ||||
|         df2 = DataFrame({"A": [0.3, 0.4]}, index=range(2)) | ||||
|  | ||||
|         # Index/row/0 DataFrame | ||||
|         expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) | ||||
|  | ||||
|         concatted_index = concat([df1, df2], axis="index") | ||||
|         tm.assert_frame_equal(concatted_index, expected_index) | ||||
|  | ||||
|         concatted_row = concat([df1, df2], axis="rows") | ||||
|         tm.assert_frame_equal(concatted_row, expected_index) | ||||
|  | ||||
|         concatted_0 = concat([df1, df2], axis=0) | ||||
|         tm.assert_frame_equal(concatted_0, expected_index) | ||||
|  | ||||
|         # Columns/1 DataFrame | ||||
|         expected_columns = DataFrame( | ||||
|             [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"] | ||||
|         ) | ||||
|  | ||||
|         concatted_columns = concat([df1, df2], axis="columns") | ||||
|         tm.assert_frame_equal(concatted_columns, expected_columns) | ||||
|  | ||||
|         concatted_1 = concat([df1, df2], axis=1) | ||||
|         tm.assert_frame_equal(concatted_1, expected_columns) | ||||
|  | ||||
|         series1 = Series([0.1, 0.2]) | ||||
|         series2 = Series([0.3, 0.4]) | ||||
|  | ||||
|         # Index/row/0 Series | ||||
|         expected_index_series = Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1]) | ||||
|  | ||||
|         concatted_index_series = concat([series1, series2], axis="index") | ||||
|         tm.assert_series_equal(concatted_index_series, expected_index_series) | ||||
|  | ||||
|         concatted_row_series = concat([series1, series2], axis="rows") | ||||
|         tm.assert_series_equal(concatted_row_series, expected_index_series) | ||||
|  | ||||
|         concatted_0_series = concat([series1, series2], axis=0) | ||||
|         tm.assert_series_equal(concatted_0_series, expected_index_series) | ||||
|  | ||||
|         # Columns/1 Series | ||||
|         expected_columns_series = DataFrame( | ||||
|             [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1] | ||||
|         ) | ||||
|  | ||||
|         concatted_columns_series = concat([series1, series2], axis="columns") | ||||
|         tm.assert_frame_equal(concatted_columns_series, expected_columns_series) | ||||
|  | ||||
|         concatted_1_series = concat([series1, series2], axis=1) | ||||
|         tm.assert_frame_equal(concatted_1_series, expected_columns_series) | ||||
|  | ||||
|         # Testing ValueError | ||||
|         with pytest.raises(ValueError, match="No axis named"): | ||||
|             concat([series1, series2], axis="something") | ||||
|  | ||||
|     def test_concat_numerical_names(self): | ||||
|         # GH#15262, GH#12223 | ||||
|         df = DataFrame( | ||||
|             {"col": range(9)}, | ||||
|             dtype="int32", | ||||
|             index=( | ||||
|                 pd.MultiIndex.from_product( | ||||
|                     [["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2] | ||||
|                 ) | ||||
|             ), | ||||
|         ) | ||||
|         result = concat((df.iloc[:2, :], df.iloc[-2:, :])) | ||||
|         expected = DataFrame( | ||||
|             {"col": [0, 1, 7, 8]}, | ||||
|             dtype="int32", | ||||
|             index=pd.MultiIndex.from_tuples( | ||||
|                 [("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2] | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_astype_dup_col(self): | ||||
|         # GH#23049 | ||||
|         df = DataFrame([{"a": "b"}]) | ||||
|         df = concat([df, df], axis=1) | ||||
|  | ||||
|         result = df.astype("category") | ||||
|         expected = DataFrame( | ||||
|             np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"] | ||||
|         ).astype("category") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_dataframe_keys_bug(self, sort): | ||||
|         t1 = DataFrame( | ||||
|             {"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))} | ||||
|         ) | ||||
|         t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))}) | ||||
|  | ||||
|         # it works | ||||
|         result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort) | ||||
|         assert list(result.columns) == [("t1", "value"), ("t2", "value")] | ||||
|  | ||||
|     def test_concat_bool_with_int(self): | ||||
|         # GH#42092 we may want to change this to return object, but that | ||||
|         #  would need a deprecation | ||||
|         df1 = DataFrame(Series([True, False, True, True], dtype="bool")) | ||||
|         df2 = DataFrame(Series([1, 0, 1], dtype="int64")) | ||||
|  | ||||
|         result = concat([df1, df2]) | ||||
|         expected = concat([df1.astype("int64"), df2]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_duplicates_in_index_with_keys(self): | ||||
|         # GH#42651 | ||||
|         index = [1, 1, 3] | ||||
|         data = [1, 2, 3] | ||||
|  | ||||
|         df = DataFrame(data=data, index=index) | ||||
|         result = concat([df], keys=["A"], names=["ID", "date"]) | ||||
|         mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"]) | ||||
|         expected = DataFrame(data=data, index=mi) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date")) | ||||
|  | ||||
|     @pytest.mark.parametrize("ignore_index", [True, False]) | ||||
|     @pytest.mark.parametrize("order", ["C", "F"]) | ||||
|     @pytest.mark.parametrize("axis", [0, 1]) | ||||
|     def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write): | ||||
|         # based on asv ConcatDataFrames | ||||
|         df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order)) | ||||
|  | ||||
|         res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True) | ||||
|  | ||||
|         if not using_copy_on_write: | ||||
|             for arr in res._iter_column_arrays(): | ||||
|                 for arr2 in df._iter_column_arrays(): | ||||
|                     assert not np.shares_memory(arr, arr2) | ||||
|  | ||||
|     def test_outer_sort_columns(self): | ||||
|         # GH#47127 | ||||
|         df1 = DataFrame({"A": [0], "B": [1], 0: 1}) | ||||
|         df2 = DataFrame({"A": [100]}) | ||||
|         result = concat([df1, df2], ignore_index=True, join="outer", sort=True) | ||||
|         expected = DataFrame({0: [1.0, np.nan], "A": [0, 100], "B": [1.0, np.nan]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_inner_sort_columns(self): | ||||
|         # GH#47127 | ||||
|         df1 = DataFrame({"A": [0], "B": [1], 0: 1}) | ||||
|         df2 = DataFrame({"A": [100], 0: 2}) | ||||
|         result = concat([df1, df2], ignore_index=True, join="inner", sort=True) | ||||
|         expected = DataFrame({0: [1, 2], "A": [0, 100]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_sort_columns_one_df(self): | ||||
|         # GH#47127 | ||||
|         df1 = DataFrame({"A": [100], 0: 2}) | ||||
|         result = concat([df1], ignore_index=True, join="inner", sort=True) | ||||
|         expected = DataFrame({0: [2], "A": [100]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,606 @@ | ||||
| import datetime as dt | ||||
| from datetime import datetime | ||||
|  | ||||
| import dateutil | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     concat, | ||||
|     date_range, | ||||
|     to_timedelta, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeConcat: | ||||
|     def test_concat_datetime64_block(self): | ||||
|         rng = date_range("1/1/2000", periods=10) | ||||
|  | ||||
|         df = DataFrame({"time": rng}) | ||||
|  | ||||
|         result = concat([df, df]) | ||||
|         assert (result.iloc[:10]["time"] == rng).all() | ||||
|         assert (result.iloc[10:]["time"] == rng).all() | ||||
|  | ||||
|     def test_concat_datetime_datetime64_frame(self): | ||||
|         # GH#2624 | ||||
|         rows = [] | ||||
|         rows.append([datetime(2010, 1, 1), 1]) | ||||
|         rows.append([datetime(2010, 1, 2), "hi"]) | ||||
|  | ||||
|         df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) | ||||
|  | ||||
|         ind = date_range(start="2000/1/1", freq="D", periods=10) | ||||
|         df1 = DataFrame({"date": ind, "test": range(10)}) | ||||
|  | ||||
|         # it works! | ||||
|         concat([df1, df2_obj]) | ||||
|  | ||||
|     def test_concat_datetime_timezone(self): | ||||
|         # GH 18523 | ||||
|         idx1 = date_range("2011-01-01", periods=3, freq="h", tz="Europe/Paris") | ||||
|         idx2 = date_range(start=idx1[0], end=idx1[-1], freq="h") | ||||
|         df1 = DataFrame({"a": [1, 2, 3]}, index=idx1) | ||||
|         df2 = DataFrame({"b": [1, 2, 3]}, index=idx2) | ||||
|         result = concat([df1, df2], axis=1) | ||||
|  | ||||
|         exp_idx = DatetimeIndex( | ||||
|             [ | ||||
|                 "2011-01-01 00:00:00+01:00", | ||||
|                 "2011-01-01 01:00:00+01:00", | ||||
|                 "2011-01-01 02:00:00+01:00", | ||||
|             ], | ||||
|             dtype="M8[ns, Europe/Paris]", | ||||
|             freq="h", | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"] | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo") | ||||
|         df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) | ||||
|         result = concat([df1, df3], axis=1) | ||||
|  | ||||
|         exp_idx = DatetimeIndex( | ||||
|             [ | ||||
|                 "2010-12-31 15:00:00+00:00", | ||||
|                 "2010-12-31 16:00:00+00:00", | ||||
|                 "2010-12-31 17:00:00+00:00", | ||||
|                 "2010-12-31 23:00:00+00:00", | ||||
|                 "2011-01-01 00:00:00+00:00", | ||||
|                 "2011-01-01 01:00:00+00:00", | ||||
|             ] | ||||
|         ).as_unit("ns") | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [np.nan, 1], | ||||
|                 [np.nan, 2], | ||||
|                 [np.nan, 3], | ||||
|                 [1, np.nan], | ||||
|                 [2, np.nan], | ||||
|                 [3, np.nan], | ||||
|             ], | ||||
|             index=exp_idx, | ||||
|             columns=["a", "b"], | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # GH 13783: Concat after resample | ||||
|         result = concat([df1.resample("h").mean(), df2.resample("h").mean()], sort=True) | ||||
|         expected = DataFrame( | ||||
|             {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]}, | ||||
|             index=idx1.append(idx1), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_datetimeindex_freq(self): | ||||
|         # GH 3232 | ||||
|         # Monotonic index result | ||||
|         dr = date_range("01-Jan-2013", periods=100, freq="50ms", tz="UTC") | ||||
|         data = list(range(100)) | ||||
|         expected = DataFrame(data, index=dr) | ||||
|         result = concat([expected[:50], expected[50:]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # Non-monotonic index result | ||||
|         result = concat([expected[50:], expected[:50]]) | ||||
|         expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) | ||||
|         expected.index._data.freq = None | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_multiindex_datetime_object_index(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/11058 | ||||
|         idx = Index( | ||||
|             [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)], | ||||
|             dtype="object", | ||||
|         ) | ||||
|  | ||||
|         s = Series( | ||||
|             ["a", "b"], | ||||
|             index=MultiIndex.from_arrays( | ||||
|                 [ | ||||
|                     [1, 2], | ||||
|                     idx[:-1], | ||||
|                 ], | ||||
|                 names=["first", "second"], | ||||
|             ), | ||||
|         ) | ||||
|         s2 = Series( | ||||
|             ["a", "b"], | ||||
|             index=MultiIndex.from_arrays( | ||||
|                 [[1, 2], idx[::2]], | ||||
|                 names=["first", "second"], | ||||
|             ), | ||||
|         ) | ||||
|         mi = MultiIndex.from_arrays( | ||||
|             [[1, 2, 2], idx], | ||||
|             names=["first", "second"], | ||||
|         ) | ||||
|         assert mi.levels[1].dtype == object | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [["a", "a"], ["b", np.nan], [np.nan, "b"]], | ||||
|             index=mi, | ||||
|         ) | ||||
|         result = concat([s, s2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_NaT_series(self): | ||||
|         # GH 11693 | ||||
|         # test for merging NaT series with datetime series. | ||||
|         x = Series( | ||||
|             date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern") | ||||
|         ) | ||||
|         y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") | ||||
|         expected = Series([x[0], x[1], pd.NaT, pd.NaT]) | ||||
|  | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # all NaT with tz | ||||
|         expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]") | ||||
|         result = concat([y, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_NaT_series2(self): | ||||
|         # without tz | ||||
|         x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h")) | ||||
|         y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h")) | ||||
|         y[:] = pd.NaT | ||||
|         expected = Series([x[0], x[1], pd.NaT, pd.NaT]) | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # all NaT without tz | ||||
|         x[:] = pd.NaT | ||||
|         expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "UTC"]) | ||||
|     def test_concat_NaT_dataframes(self, tz): | ||||
|         # GH 12396 | ||||
|  | ||||
|         dti = DatetimeIndex([pd.NaT, pd.NaT], tz=tz) | ||||
|         first = DataFrame({0: dti}) | ||||
|         second = DataFrame( | ||||
|             [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]], | ||||
|             index=[2, 3], | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 pd.NaT, | ||||
|                 pd.NaT, | ||||
|                 Timestamp("2015/01/01", tz=tz), | ||||
|                 Timestamp("2016/01/01", tz=tz), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         result = concat([first, second], axis=0) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz1", [None, "UTC"]) | ||||
|     @pytest.mark.parametrize("tz2", [None, "UTC"]) | ||||
|     @pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101")]) | ||||
|     def test_concat_NaT_dataframes_all_NaT_axis_0( | ||||
|         self, tz1, tz2, item, using_array_manager | ||||
|     ): | ||||
|         # GH 12396 | ||||
|  | ||||
|         # tz-naive | ||||
|         first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1)) | ||||
|         second = DataFrame([item]).apply(lambda x: x.dt.tz_localize(tz2)) | ||||
|  | ||||
|         result = concat([first, second], axis=0) | ||||
|         expected = DataFrame(Series([pd.NaT, pd.NaT, item], index=[0, 1, 0])) | ||||
|         expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) | ||||
|         if tz1 != tz2: | ||||
|             expected = expected.astype(object) | ||||
|             if item is pd.NaT and not using_array_manager: | ||||
|                 # GH#18463 | ||||
|                 # TODO: setting nan here is to keep the test passing as we | ||||
|                 #  make assert_frame_equal stricter, but is nan really the | ||||
|                 #  ideal behavior here? | ||||
|                 if tz1 is not None: | ||||
|                     expected.iloc[-1, 0] = np.nan | ||||
|                 else: | ||||
|                     expected.iloc[:-1, 0] = np.nan | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz1", [None, "UTC"]) | ||||
|     @pytest.mark.parametrize("tz2", [None, "UTC"]) | ||||
|     def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): | ||||
|         # GH 12396 | ||||
|  | ||||
|         first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) | ||||
|         second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), | ||||
|                 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2), | ||||
|             } | ||||
|         ) | ||||
|         result = concat([first, second], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz1", [None, "UTC"]) | ||||
|     @pytest.mark.parametrize("tz2", [None, "UTC"]) | ||||
|     def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): | ||||
|         # GH 12396 | ||||
|  | ||||
|         # tz-naive | ||||
|         first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) | ||||
|         second = DataFrame( | ||||
|             [ | ||||
|                 [Timestamp("2015/01/01", tz=tz2)], | ||||
|                 [Timestamp("2016/01/01", tz=tz2)], | ||||
|             ], | ||||
|             index=[2, 3], | ||||
|         ) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 pd.NaT, | ||||
|                 pd.NaT, | ||||
|                 Timestamp("2015/01/01", tz=tz2), | ||||
|                 Timestamp("2016/01/01", tz=tz2), | ||||
|             ] | ||||
|         ) | ||||
|         if tz1 != tz2: | ||||
|             expected = expected.astype(object) | ||||
|  | ||||
|         result = concat([first, second]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestTimezoneConcat: | ||||
|     def test_concat_tz_series(self): | ||||
|         # gh-11755: tz and no tz | ||||
|         x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) | ||||
|         y = Series(date_range("2012-01-01", "2012-01-02")) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_tz_series2(self): | ||||
|         # gh-11887: concat tz and object | ||||
|         x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) | ||||
|         y = Series(["a", "b"]) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_tz_series3(self, unit, unit2): | ||||
|         # see gh-12217 and gh-12306 | ||||
|         # Concatenating two UTC times | ||||
|         first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") | ||||
|         first[0] = first[0].dt.tz_localize("UTC") | ||||
|  | ||||
|         second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]") | ||||
|         second[0] = second[0].dt.tz_localize("UTC") | ||||
|  | ||||
|         result = concat([first, second]) | ||||
|         exp_unit = tm.get_finest_unit(unit, unit2) | ||||
|         assert result[0].dtype == f"datetime64[{exp_unit}, UTC]" | ||||
|  | ||||
|     def test_concat_tz_series4(self, unit, unit2): | ||||
|         # Concatenating two London times | ||||
|         first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") | ||||
|         first[0] = first[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]") | ||||
|         second[0] = second[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         result = concat([first, second]) | ||||
|         exp_unit = tm.get_finest_unit(unit, unit2) | ||||
|         assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" | ||||
|  | ||||
|     def test_concat_tz_series5(self, unit, unit2): | ||||
|         # Concatenating 2+1 London times | ||||
|         first = DataFrame( | ||||
|             [[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]], dtype=f"M8[{unit}]" | ||||
|         ) | ||||
|         first[0] = first[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         second = DataFrame([[datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]") | ||||
|         second[0] = second[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         result = concat([first, second]) | ||||
|         exp_unit = tm.get_finest_unit(unit, unit2) | ||||
|         assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" | ||||
|  | ||||
|     def test_concat_tz_series6(self, unit, unit2): | ||||
|         # Concatenating 1+2 London times | ||||
|         first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") | ||||
|         first[0] = first[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         second = DataFrame( | ||||
|             [[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]" | ||||
|         ) | ||||
|         second[0] = second[0].dt.tz_localize("Europe/London") | ||||
|  | ||||
|         result = concat([first, second]) | ||||
|         exp_unit = tm.get_finest_unit(unit, unit2) | ||||
|         assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" | ||||
|  | ||||
|     def test_concat_tz_series_tzlocal(self): | ||||
|         # see gh-13583 | ||||
|         x = [ | ||||
|             Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()), | ||||
|             Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()), | ||||
|         ] | ||||
|         y = [ | ||||
|             Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()), | ||||
|             Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()), | ||||
|         ] | ||||
|  | ||||
|         result = concat([Series(x), Series(y)], ignore_index=True) | ||||
|         tm.assert_series_equal(result, Series(x + y)) | ||||
|         assert result.dtype == "datetime64[ns, tzlocal()]" | ||||
|  | ||||
|     def test_concat_tz_series_with_datetimelike(self): | ||||
|         # see gh-12620: tz and timedelta | ||||
|         x = [ | ||||
|             Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|             Timestamp("2011-02-01", tz="US/Eastern"), | ||||
|         ] | ||||
|         y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] | ||||
|         result = concat([Series(x), Series(y)], ignore_index=True) | ||||
|         tm.assert_series_equal(result, Series(x + y, dtype="object")) | ||||
|  | ||||
|         # tz and period | ||||
|         y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")] | ||||
|         result = concat([Series(x), Series(y)], ignore_index=True) | ||||
|         tm.assert_series_equal(result, Series(x + y, dtype="object")) | ||||
|  | ||||
|     def test_concat_tz_frame(self): | ||||
|         df2 = DataFrame( | ||||
|             { | ||||
|                 "A": Timestamp("20130102", tz="US/Eastern"), | ||||
|                 "B": Timestamp("20130603", tz="CET"), | ||||
|             }, | ||||
|             index=range(5), | ||||
|         ) | ||||
|  | ||||
|         # concat | ||||
|         df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) | ||||
|         tm.assert_frame_equal(df2, df3) | ||||
|  | ||||
|     def test_concat_multiple_tzs(self): | ||||
|         # GH#12467 | ||||
|         # combining datetime tz-aware and naive DataFrames | ||||
|         ts1 = Timestamp("2015-01-01", tz=None) | ||||
|         ts2 = Timestamp("2015-01-01", tz="UTC") | ||||
|         ts3 = Timestamp("2015-01-01", tz="EST") | ||||
|  | ||||
|         df1 = DataFrame({"time": [ts1]}) | ||||
|         df2 = DataFrame({"time": [ts2]}) | ||||
|         df3 = DataFrame({"time": [ts3]}) | ||||
|  | ||||
|         results = concat([df1, df2]).reset_index(drop=True) | ||||
|         expected = DataFrame({"time": [ts1, ts2]}, dtype=object) | ||||
|         tm.assert_frame_equal(results, expected) | ||||
|  | ||||
|         results = concat([df1, df3]).reset_index(drop=True) | ||||
|         expected = DataFrame({"time": [ts1, ts3]}, dtype=object) | ||||
|         tm.assert_frame_equal(results, expected) | ||||
|  | ||||
|         results = concat([df2, df3]).reset_index(drop=True) | ||||
|         expected = DataFrame({"time": [ts2, ts3]}) | ||||
|         tm.assert_frame_equal(results, expected) | ||||
|  | ||||
|     def test_concat_multiindex_with_tz(self): | ||||
|         # GH 6606 | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "dt": DatetimeIndex( | ||||
|                     [ | ||||
|                         datetime(2014, 1, 1), | ||||
|                         datetime(2014, 1, 2), | ||||
|                         datetime(2014, 1, 3), | ||||
|                     ], | ||||
|                     dtype="M8[ns, US/Pacific]", | ||||
|                 ), | ||||
|                 "b": ["A", "B", "C"], | ||||
|                 "c": [1, 2, 3], | ||||
|                 "d": [4, 5, 6], | ||||
|             } | ||||
|         ) | ||||
|         df = df.set_index(["dt", "b"]) | ||||
|  | ||||
|         exp_idx1 = DatetimeIndex( | ||||
|             ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, | ||||
|             dtype="M8[ns, US/Pacific]", | ||||
|             name="dt", | ||||
|         ) | ||||
|         exp_idx2 = Index(["A", "B", "C"] * 2, name="b") | ||||
|         exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) | ||||
|         expected = DataFrame( | ||||
|             {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"] | ||||
|         ) | ||||
|  | ||||
|         result = concat([df, df]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_tz_not_aligned(self): | ||||
|         # GH#22796 | ||||
|         ts = pd.to_datetime([1, 2]).tz_localize("UTC") | ||||
|         a = DataFrame({"A": ts}) | ||||
|         b = DataFrame({"A": ts, "B": ts}) | ||||
|         result = concat([a, b], sort=True, ignore_index=True) | ||||
|         expected = DataFrame( | ||||
|             {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "t1", | ||||
|         [ | ||||
|             "2015-01-01", | ||||
|             pytest.param( | ||||
|                 pd.NaT, | ||||
|                 marks=pytest.mark.xfail( | ||||
|                     reason="GH23037 incorrect dtype when concatenating" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat_tz_NaT(self, t1): | ||||
|         # GH#22796 | ||||
|         # Concatenating tz-aware multicolumn DataFrames | ||||
|         ts1 = Timestamp(t1, tz="UTC") | ||||
|         ts2 = Timestamp("2015-01-01", tz="UTC") | ||||
|         ts3 = Timestamp("2015-01-01", tz="UTC") | ||||
|  | ||||
|         df1 = DataFrame([[ts1, ts2]]) | ||||
|         df2 = DataFrame([[ts3]]) | ||||
|  | ||||
|         result = concat([df1, df2]) | ||||
|         expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_tz_with_empty(self): | ||||
|         # GH 9188 | ||||
|         result = concat( | ||||
|             [DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()] | ||||
|         ) | ||||
|         expected = DataFrame(date_range("2000", periods=1, tz="UTC")) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestPeriodConcat: | ||||
|     def test_concat_period_series(self): | ||||
|         x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) | ||||
|         y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D")) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_period_multiple_freq_series(self): | ||||
|         x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) | ||||
|         y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M")) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert result.dtype == "object" | ||||
|  | ||||
|     def test_concat_period_other_series(self): | ||||
|         x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) | ||||
|         y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M")) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert result.dtype == "object" | ||||
|  | ||||
|     def test_concat_period_other_series2(self): | ||||
|         # non-period | ||||
|         x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) | ||||
|         y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"])) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert result.dtype == "object" | ||||
|  | ||||
|     def test_concat_period_other_series3(self): | ||||
|         x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) | ||||
|         y = Series(["A", "B"]) | ||||
|         expected = Series([x[0], x[1], y[0], y[1]], dtype="object") | ||||
|         result = concat([x, y], ignore_index=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert result.dtype == "object" | ||||
|  | ||||
|  | ||||
| def test_concat_timedelta64_block(): | ||||
|     rng = to_timedelta(np.arange(10), unit="s") | ||||
|  | ||||
|     df = DataFrame({"time": rng}) | ||||
|  | ||||
|     result = concat([df, df]) | ||||
|     tm.assert_frame_equal(result.iloc[:10], df) | ||||
|     tm.assert_frame_equal(result.iloc[10:], df) | ||||
|  | ||||
|  | ||||
| def test_concat_multiindex_datetime_nat(): | ||||
|     # GH#44900 | ||||
|     left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)])) | ||||
|     right = DataFrame( | ||||
|         {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) | ||||
|     ) | ||||
|     result = concat([left, right], axis="columns") | ||||
|     expected = DataFrame( | ||||
|         {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_float_datetime64(using_array_manager): | ||||
|     # GH#32934 | ||||
|     df_time = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) | ||||
|     df_float = DataFrame({"A": pd.array([1.0], dtype="float64")}) | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "A": [ | ||||
|                 pd.array(["2000"], dtype="datetime64[ns]")[0], | ||||
|                 pd.array([1.0], dtype="float64")[0], | ||||
|             ] | ||||
|         }, | ||||
|         index=[0, 0], | ||||
|     ) | ||||
|     result = concat([df_time, df_float]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     expected = DataFrame({"A": pd.array([], dtype="object")}) | ||||
|     result = concat([df_time.iloc[:0], df_float.iloc[:0]]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     expected = DataFrame({"A": pd.array([1.0], dtype="object")}) | ||||
|     result = concat([df_time.iloc[:0], df_float]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     if not using_array_manager: | ||||
|         expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) | ||||
|         msg = "The behavior of DataFrame concatenation with empty or all-NA entries" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = concat([df_time, df_float.iloc[:0]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|     else: | ||||
|         expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype( | ||||
|             {"A": "object"} | ||||
|         ) | ||||
|         result = concat([df_time, df_float.iloc[:0]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,299 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
|     concat, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestEmptyConcat: | ||||
|     def test_handle_empty_objects(self, sort, using_infer_string): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), columns=list("abcd") | ||||
|         ) | ||||
|  | ||||
|         dfcopy = df[:5].copy() | ||||
|         dfcopy["foo"] = "bar" | ||||
|         empty = df[5:5] | ||||
|  | ||||
|         frames = [dfcopy, empty, empty, df[5:]] | ||||
|         concatted = concat(frames, axis=0, sort=sort) | ||||
|  | ||||
|         expected = df.reindex(columns=["a", "b", "c", "d", "foo"]) | ||||
|         expected["foo"] = expected["foo"].astype( | ||||
|             object if not using_infer_string else "str" | ||||
|         ) | ||||
|         expected.loc[0:4, "foo"] = "bar" | ||||
|  | ||||
|         tm.assert_frame_equal(concatted, expected) | ||||
|  | ||||
|         # empty as first element with time series | ||||
|         # GH3259 | ||||
|         df = DataFrame( | ||||
|             {"A": range(10000)}, index=date_range("20130101", periods=10000, freq="s") | ||||
|         ) | ||||
|         empty = DataFrame() | ||||
|         result = concat([df, empty], axis=1) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         result = concat([empty, df], axis=1) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = concat([df, empty]) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         result = concat([empty, df]) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     def test_concat_empty_series(self): | ||||
|         # GH 11082 | ||||
|         s1 = Series([1, 2, 3], name="x") | ||||
|         s2 = Series(name="y", dtype="float64") | ||||
|         res = concat([s1, s2], axis=1) | ||||
|         exp = DataFrame( | ||||
|             {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, | ||||
|             index=RangeIndex(3), | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|         s1 = Series([1, 2, 3], name="x") | ||||
|         s2 = Series(name="y", dtype="float64") | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             res = concat([s1, s2], axis=0) | ||||
|         # name will be reset | ||||
|         exp = Series([1, 2, 3]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         # empty Series with no name | ||||
|         s1 = Series([1, 2, 3], name="x") | ||||
|         s2 = Series(name=None, dtype="float64") | ||||
|         res = concat([s1, s2], axis=1) | ||||
|         exp = DataFrame( | ||||
|             {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, | ||||
|             columns=["x", 0], | ||||
|             index=RangeIndex(3), | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "UTC"]) | ||||
|     @pytest.mark.parametrize("values", [[], [1, 2, 3]]) | ||||
|     def test_concat_empty_series_timelike(self, tz, values): | ||||
|         # GH 18447 | ||||
|  | ||||
|         first = Series([], dtype="M8[ns]").dt.tz_localize(tz) | ||||
|         dtype = None if values else np.float64 | ||||
|         second = Series(values, dtype=dtype) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz), | ||||
|                 1: values, | ||||
|             } | ||||
|         ) | ||||
|         result = concat([first, second], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left,right,expected", | ||||
|         [ | ||||
|             # booleans | ||||
|             (np.bool_, np.int32, np.object_),  # changed from int32 in 2.0 GH#39817 | ||||
|             (np.bool_, np.float32, np.object_), | ||||
|             # datetime-like | ||||
|             ("m8[ns]", np.bool_, np.object_), | ||||
|             ("m8[ns]", np.int64, np.object_), | ||||
|             ("M8[ns]", np.bool_, np.object_), | ||||
|             ("M8[ns]", np.int64, np.object_), | ||||
|             # categorical | ||||
|             ("category", "category", "category"), | ||||
|             ("category", "object", "object"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat_empty_series_dtypes(self, left, right, expected): | ||||
|         # GH#39817, GH#45101 | ||||
|         result = concat([Series(dtype=left), Series(dtype=right)]) | ||||
|         assert result.dtype == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"] | ||||
|     ) | ||||
|     def test_concat_empty_series_dtypes_match_roundtrips(self, dtype): | ||||
|         dtype = np.dtype(dtype) | ||||
|  | ||||
|         result = concat([Series(dtype=dtype)]) | ||||
|         assert result.dtype == dtype | ||||
|  | ||||
|         result = concat([Series(dtype=dtype), Series(dtype=dtype)]) | ||||
|         assert result.dtype == dtype | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["float64", "int8", "uint8", "m8[ns]", "M8[ns]"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype2", | ||||
|         ["float64", "int8", "uint8", "m8[ns]", "M8[ns]"], | ||||
|     ) | ||||
|     def test_concat_empty_series_dtypes_roundtrips(self, dtype, dtype2): | ||||
|         # round-tripping with self & like self | ||||
|         if dtype == dtype2: | ||||
|             pytest.skip("same dtype is not applicable for test") | ||||
|  | ||||
|         def int_result_type(dtype, dtype2): | ||||
|             typs = {dtype.kind, dtype2.kind} | ||||
|             if not len(typs - {"i", "u", "b"}) and ( | ||||
|                 dtype.kind == "i" or dtype2.kind == "i" | ||||
|             ): | ||||
|                 return "i" | ||||
|             elif not len(typs - {"u", "b"}) and ( | ||||
|                 dtype.kind == "u" or dtype2.kind == "u" | ||||
|             ): | ||||
|                 return "u" | ||||
|             return None | ||||
|  | ||||
|         def float_result_type(dtype, dtype2): | ||||
|             typs = {dtype.kind, dtype2.kind} | ||||
|             if not len(typs - {"f", "i", "u"}) and ( | ||||
|                 dtype.kind == "f" or dtype2.kind == "f" | ||||
|             ): | ||||
|                 return "f" | ||||
|             return None | ||||
|  | ||||
|         def get_result_type(dtype, dtype2): | ||||
|             result = float_result_type(dtype, dtype2) | ||||
|             if result is not None: | ||||
|                 return result | ||||
|             result = int_result_type(dtype, dtype2) | ||||
|             if result is not None: | ||||
|                 return result | ||||
|             return "O" | ||||
|  | ||||
|         dtype = np.dtype(dtype) | ||||
|         dtype2 = np.dtype(dtype2) | ||||
|         expected = get_result_type(dtype, dtype2) | ||||
|         result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype | ||||
|         assert result.kind == expected | ||||
|  | ||||
|     def test_concat_empty_series_dtypes_triple(self): | ||||
|         assert ( | ||||
|             concat( | ||||
|                 [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)] | ||||
|             ).dtype | ||||
|             == np.object_ | ||||
|         ) | ||||
|  | ||||
|     def test_concat_empty_series_dtype_category_with_array(self): | ||||
|         # GH#18515 | ||||
|         assert ( | ||||
|             concat( | ||||
|                 [Series(np.array([]), dtype="category"), Series(dtype="float64")] | ||||
|             ).dtype | ||||
|             == "float64" | ||||
|         ) | ||||
|  | ||||
|     def test_concat_empty_series_dtypes_sparse(self): | ||||
|         result = concat( | ||||
|             [ | ||||
|                 Series(dtype="float64").astype("Sparse"), | ||||
|                 Series(dtype="float64").astype("Sparse"), | ||||
|             ] | ||||
|         ) | ||||
|         assert result.dtype == "Sparse[float64]" | ||||
|  | ||||
|         result = concat( | ||||
|             [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")] | ||||
|         ) | ||||
|         expected = pd.SparseDtype(np.float64) | ||||
|         assert result.dtype == expected | ||||
|  | ||||
|         result = concat( | ||||
|             [Series(dtype="float64").astype("Sparse"), Series(dtype="object")] | ||||
|         ) | ||||
|         expected = pd.SparseDtype("object") | ||||
|         assert result.dtype == expected | ||||
|  | ||||
|     def test_concat_empty_df_object_dtype(self): | ||||
|         # GH 9149 | ||||
|         df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) | ||||
|         df_2 = DataFrame(columns=df_1.columns) | ||||
|         result = concat([df_1, df_2], axis=0) | ||||
|         expected = df_1.astype(object) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_empty_dataframe_dtypes(self): | ||||
|         df = DataFrame(columns=list("abc")) | ||||
|         df["a"] = df["a"].astype(np.bool_) | ||||
|         df["b"] = df["b"].astype(np.int32) | ||||
|         df["c"] = df["c"].astype(np.float64) | ||||
|  | ||||
|         result = concat([df, df]) | ||||
|         assert result["a"].dtype == np.bool_ | ||||
|         assert result["b"].dtype == np.int32 | ||||
|         assert result["c"].dtype == np.float64 | ||||
|  | ||||
|         result = concat([df, df.astype(np.float64)]) | ||||
|         assert result["a"].dtype == np.object_ | ||||
|         assert result["b"].dtype == np.float64 | ||||
|         assert result["c"].dtype == np.float64 | ||||
|  | ||||
|     # triggers warning about empty entries | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") | ||||
|     def test_concat_inner_join_empty(self): | ||||
|         # GH 15328 | ||||
|         df_empty = DataFrame() | ||||
|         df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") | ||||
|         df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64") | ||||
|  | ||||
|         result = concat([df_a, df_empty], axis=1, join="inner") | ||||
|         tm.assert_frame_equal(result, df_expected) | ||||
|  | ||||
|         result = concat([df_a, df_empty], axis=1, join="outer") | ||||
|         tm.assert_frame_equal(result, df_a) | ||||
|  | ||||
|     def test_empty_dtype_coerce(self): | ||||
|         # xref to #12411 | ||||
|         # xref to #12045 | ||||
|         # xref to #11594 | ||||
|         # see below | ||||
|  | ||||
|         # 10571 | ||||
|         df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"]) | ||||
|         df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"]) | ||||
|         result = concat([df1, df2]) | ||||
|         expected = df1.dtypes | ||||
|         tm.assert_series_equal(result.dtypes, expected) | ||||
|  | ||||
|     def test_concat_empty_dataframe(self): | ||||
|         # 39037 | ||||
|         df1 = DataFrame(columns=["a", "b"]) | ||||
|         df2 = DataFrame(columns=["b", "c"]) | ||||
|         result = concat([df1, df2, df1]) | ||||
|         expected = DataFrame(columns=["a", "b", "c"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df3 = DataFrame(columns=["a", "b"]) | ||||
|         df4 = DataFrame(columns=["b"]) | ||||
|         result = concat([df3, df4]) | ||||
|         expected = DataFrame(columns=["a", "b"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_empty_dataframe_different_dtypes(self, using_infer_string): | ||||
|         # 39037 | ||||
|         df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) | ||||
|         df2 = DataFrame({"a": [1, 2, 3]}) | ||||
|  | ||||
|         result = concat([df1[:0], df2[:0]]) | ||||
|         assert result["a"].dtype == np.int64 | ||||
|         assert result["b"].dtype == np.object_ if not using_infer_string else "str" | ||||
|  | ||||
|     def test_concat_to_empty_ea(self): | ||||
|         """48510 `concat` to an empty EA should maintain type EA dtype.""" | ||||
|         df_empty = DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) | ||||
|         df_new = DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) | ||||
|         expected = df_new.copy() | ||||
|         result = concat([df_empty, df_new]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,470 @@ | ||||
| from copy import deepcopy | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import PerformanceWarning | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     concat, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIndexConcat: | ||||
|     def test_concat_ignore_index(self, sort): | ||||
|         frame1 = DataFrame( | ||||
|             {"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]} | ||||
|         ) | ||||
|         frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]}) | ||||
|         frame1.index = Index(["x", "y", "z"]) | ||||
|         frame2.index = Index(["x", "y", "q"]) | ||||
|  | ||||
|         v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort) | ||||
|  | ||||
|         nan = np.nan | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [nan, nan, nan, 4.3], | ||||
|                 ["a", 1, 4.5, 5.2], | ||||
|                 ["b", 2, 3.2, 2.2], | ||||
|                 ["c", 3, 1.2, nan], | ||||
|             ], | ||||
|             index=Index(["q", "x", "y", "z"]), | ||||
|         ) | ||||
|         if not sort: | ||||
|             expected = expected.loc[["x", "y", "z", "q"]] | ||||
|  | ||||
|         tm.assert_frame_equal(v1, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "name_in1,name_in2,name_in3,name_out", | ||||
|         [ | ||||
|             ("idx", "idx", "idx", "idx"), | ||||
|             ("idx", "idx", None, None), | ||||
|             ("idx", None, None, None), | ||||
|             ("idx1", "idx2", None, None), | ||||
|             ("idx1", "idx1", "idx2", None), | ||||
|             ("idx1", "idx2", "idx3", None), | ||||
|             (None, None, None, None), | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): | ||||
|         # GH13475 | ||||
|         indices = [ | ||||
|             Index(["a", "b", "c"], name=name_in1), | ||||
|             Index(["b", "c", "d"], name=name_in2), | ||||
|             Index(["c", "d", "e"], name=name_in3), | ||||
|         ] | ||||
|         frames = [ | ||||
|             DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"]) | ||||
|         ] | ||||
|         result = concat(frames, axis=1) | ||||
|  | ||||
|         exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "x": [0, 1, 2, np.nan, np.nan], | ||||
|                 "y": [np.nan, 0, 1, 2, np.nan], | ||||
|                 "z": [np.nan, np.nan, 0, 1, 2], | ||||
|             }, | ||||
|             index=exp_ind, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_rename_index(self): | ||||
|         a = DataFrame( | ||||
|             np.random.default_rng(2).random((3, 3)), | ||||
|             columns=list("ABC"), | ||||
|             index=Index(list("abc"), name="index_a"), | ||||
|         ) | ||||
|         b = DataFrame( | ||||
|             np.random.default_rng(2).random((3, 3)), | ||||
|             columns=list("ABC"), | ||||
|             index=Index(list("abc"), name="index_b"), | ||||
|         ) | ||||
|  | ||||
|         result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"]) | ||||
|  | ||||
|         exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"]) | ||||
|         names = list(exp.index.names) | ||||
|         names[1] = "lvl1" | ||||
|         exp.index.set_names(names, inplace=True) | ||||
|  | ||||
|         tm.assert_frame_equal(result, exp) | ||||
|         assert result.index.names == exp.index.names | ||||
|  | ||||
|     def test_concat_copy_index_series(self, axis, using_copy_on_write): | ||||
|         # GH 29879 | ||||
|         ser = Series([1, 2]) | ||||
|         comb = concat([ser, ser], axis=axis, copy=True) | ||||
|         if not using_copy_on_write or axis in [0, "index"]: | ||||
|             assert comb.index is not ser.index | ||||
|         else: | ||||
|             assert comb.index is ser.index | ||||
|  | ||||
|     def test_concat_copy_index_frame(self, axis, using_copy_on_write): | ||||
|         # GH 29879 | ||||
|         df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) | ||||
|         comb = concat([df, df], axis=axis, copy=True) | ||||
|         if not using_copy_on_write: | ||||
|             assert not comb.index.is_(df.index) | ||||
|             assert not comb.columns.is_(df.columns) | ||||
|         elif axis in [0, "index"]: | ||||
|             assert not comb.index.is_(df.index) | ||||
|             assert comb.columns.is_(df.columns) | ||||
|         elif axis in [1, "columns"]: | ||||
|             assert comb.index.is_(df.index) | ||||
|             assert not comb.columns.is_(df.columns) | ||||
|  | ||||
|     def test_default_index(self): | ||||
|         # is_series and ignore_index | ||||
|         s1 = Series([1, 2, 3], name="x") | ||||
|         s2 = Series([4, 5, 6], name="y") | ||||
|         res = concat([s1, s2], axis=1, ignore_index=True) | ||||
|         assert isinstance(res.columns, pd.RangeIndex) | ||||
|         exp = DataFrame([[1, 4], [2, 5], [3, 6]]) | ||||
|         # use check_index_type=True to check the result have | ||||
|         # RangeIndex (default index) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) | ||||
|  | ||||
|         # is_series and all inputs have no names | ||||
|         s1 = Series([1, 2, 3]) | ||||
|         s2 = Series([4, 5, 6]) | ||||
|         res = concat([s1, s2], axis=1, ignore_index=False) | ||||
|         assert isinstance(res.columns, pd.RangeIndex) | ||||
|         exp = DataFrame([[1, 4], [2, 5], [3, 6]]) | ||||
|         exp.columns = pd.RangeIndex(2) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) | ||||
|  | ||||
|         # is_dataframe and ignore_index | ||||
|         df1 = DataFrame({"A": [1, 2], "B": [5, 6]}) | ||||
|         df2 = DataFrame({"A": [3, 4], "B": [7, 8]}) | ||||
|  | ||||
|         res = concat([df1, df2], axis=0, ignore_index=True) | ||||
|         exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"]) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) | ||||
|  | ||||
|         res = concat([df1, df2], axis=1, ignore_index=True) | ||||
|         exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) | ||||
|  | ||||
|     def test_dups_index(self): | ||||
|         # GH 4771 | ||||
|  | ||||
|         # single dtypes | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).integers(0, 10, size=40).reshape(10, 4), | ||||
|             columns=["A", "A", "C", "C"], | ||||
|         ) | ||||
|  | ||||
|         result = concat([df, df], axis=1) | ||||
|         tm.assert_frame_equal(result.iloc[:, :4], df) | ||||
|         tm.assert_frame_equal(result.iloc[:, 4:], df) | ||||
|  | ||||
|         result = concat([df, df], axis=0) | ||||
|         tm.assert_frame_equal(result.iloc[:10], df) | ||||
|         tm.assert_frame_equal(result.iloc[10:], df) | ||||
|  | ||||
|         # multi dtypes | ||||
|         df = concat( | ||||
|             [ | ||||
|                 DataFrame( | ||||
|                     np.random.default_rng(2).standard_normal((10, 4)), | ||||
|                     columns=["A", "A", "B", "B"], | ||||
|                 ), | ||||
|                 DataFrame( | ||||
|                     np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2), | ||||
|                     columns=["A", "C"], | ||||
|                 ), | ||||
|             ], | ||||
|             axis=1, | ||||
|         ) | ||||
|  | ||||
|         result = concat([df, df], axis=1) | ||||
|         tm.assert_frame_equal(result.iloc[:, :6], df) | ||||
|         tm.assert_frame_equal(result.iloc[:, 6:], df) | ||||
|  | ||||
|         result = concat([df, df], axis=0) | ||||
|         tm.assert_frame_equal(result.iloc[:10], df) | ||||
|         tm.assert_frame_equal(result.iloc[10:], df) | ||||
|  | ||||
|         # append | ||||
|         result = df.iloc[0:8, :]._append(df.iloc[8:]) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10]) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         expected = concat([df, df], axis=0) | ||||
|         result = df._append(df) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestMultiIndexConcat: | ||||
|     def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         index = frame.index | ||||
|         result = concat([frame, frame], keys=[0, 1], names=["iteration"]) | ||||
|  | ||||
|         assert result.index.names == ("iteration",) + index.names | ||||
|         tm.assert_frame_equal(result.loc[0], frame) | ||||
|         tm.assert_frame_equal(result.loc[1], frame) | ||||
|         assert result.index.nlevels == 3 | ||||
|  | ||||
|     def test_concat_multiindex_with_none_in_index_names(self): | ||||
|         # GH 15787 | ||||
|         index = MultiIndex.from_product([[1], range(5)], names=["level1", None]) | ||||
|         df = DataFrame({"col": range(5)}, index=index, dtype=np.int32) | ||||
|  | ||||
|         result = concat([df, df], keys=[1, 2], names=["level2"]) | ||||
|         index = MultiIndex.from_product( | ||||
|             [[1, 2], [1], range(5)], names=["level2", "level1", None] | ||||
|         ) | ||||
|         expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat([df, df[:2]], keys=[1, 2], names=["level2"]) | ||||
|         level2 = [1] * 5 + [2] * 2 | ||||
|         level1 = [1] * 7 | ||||
|         no_name = list(range(5)) + list(range(2)) | ||||
|         tuples = list(zip(level2, level1, no_name)) | ||||
|         index = MultiIndex.from_tuples(tuples, names=["level2", "level1", None]) | ||||
|         expected = DataFrame({"col": no_name}, index=index, dtype=np.int32) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_multiindex_rangeindex(self): | ||||
|         # GH13542 | ||||
|         # when multi-index levels are RangeIndex objects | ||||
|         # there is a bug in concat with objects of len 1 | ||||
|  | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((9, 2))) | ||||
|         df.index = MultiIndex( | ||||
|             levels=[pd.RangeIndex(3), pd.RangeIndex(3)], | ||||
|             codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], | ||||
|         ) | ||||
|  | ||||
|         res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]]) | ||||
|         exp = df.iloc[[2, 3, 4, 5], :] | ||||
|         tm.assert_frame_equal(res, exp) | ||||
|  | ||||
|     def test_concat_multiindex_dfs_with_deepcopy(self): | ||||
|         # GH 9967 | ||||
|         example_multiindex1 = MultiIndex.from_product([["a"], ["b"]]) | ||||
|         example_dataframe1 = DataFrame([0], index=example_multiindex1) | ||||
|  | ||||
|         example_multiindex2 = MultiIndex.from_product([["a"], ["c"]]) | ||||
|         example_dataframe2 = DataFrame([1], index=example_multiindex2) | ||||
|  | ||||
|         example_dict = {"s1": example_dataframe1, "s2": example_dataframe2} | ||||
|         expected_index = MultiIndex( | ||||
|             levels=[["s1", "s2"], ["a"], ["b", "c"]], | ||||
|             codes=[[0, 1], [0, 0], [0, 1]], | ||||
|             names=["testname", None, None], | ||||
|         ) | ||||
|         expected = DataFrame([[0], [1]], index=expected_index) | ||||
|         result_copy = concat(deepcopy(example_dict), names=["testname"]) | ||||
|         tm.assert_frame_equal(result_copy, expected) | ||||
|         result_no_copy = concat(example_dict, names=["testname"]) | ||||
|         tm.assert_frame_equal(result_no_copy, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "mi1_list", | ||||
|         [ | ||||
|             [["a"], range(2)], | ||||
|             [["b"], np.arange(2.0, 4.0)], | ||||
|             [["c"], ["A", "B"]], | ||||
|             [["d"], pd.date_range(start="2017", end="2018", periods=2)], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "mi2_list", | ||||
|         [ | ||||
|             [["a"], range(2)], | ||||
|             [["b"], np.arange(2.0, 4.0)], | ||||
|             [["c"], ["A", "B"]], | ||||
|             [["d"], pd.date_range(start="2017", end="2018", periods=2)], | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat_with_various_multiindex_dtypes( | ||||
|         self, mi1_list: list, mi2_list: list | ||||
|     ): | ||||
|         # GitHub #23478 | ||||
|         mi1 = MultiIndex.from_product(mi1_list) | ||||
|         mi2 = MultiIndex.from_product(mi2_list) | ||||
|  | ||||
|         df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1) | ||||
|         df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2) | ||||
|  | ||||
|         if mi1_list[0] == mi2_list[0]: | ||||
|             expected_mi = MultiIndex( | ||||
|                 levels=[mi1_list[0], list(mi1_list[1])], | ||||
|                 codes=[[0, 0, 0, 0], [0, 1, 0, 1]], | ||||
|             ) | ||||
|         else: | ||||
|             expected_mi = MultiIndex( | ||||
|                 levels=[ | ||||
|                     mi1_list[0] + mi2_list[0], | ||||
|                     list(mi1_list[1]) + list(mi2_list[1]), | ||||
|                 ], | ||||
|                 codes=[[0, 0, 1, 1], [0, 1, 2, 3]], | ||||
|             ) | ||||
|  | ||||
|         expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result_df = concat((df1, df2), axis=1) | ||||
|  | ||||
|         tm.assert_frame_equal(expected_df, result_df) | ||||
|  | ||||
|     def test_concat_multiindex_(self): | ||||
|         # GitHub #44786 | ||||
|         df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) | ||||
|         df = concat([df], keys=["X"]) | ||||
|  | ||||
|         iterables = [["X"], ["1", "2", "2"]] | ||||
|         result_index = df.index | ||||
|         expected_index = MultiIndex.from_product(iterables) | ||||
|  | ||||
|         tm.assert_index_equal(result_index, expected_index) | ||||
|  | ||||
|         result_df = df | ||||
|         expected_df = DataFrame( | ||||
|             {"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables) | ||||
|         ) | ||||
|         tm.assert_frame_equal(result_df, expected_df) | ||||
|  | ||||
|     def test_concat_with_key_not_unique(self): | ||||
|         # GitHub #46519 | ||||
|         df1 = DataFrame({"name": [1]}) | ||||
|         df2 = DataFrame({"name": [2]}) | ||||
|         df3 = DataFrame({"name": [3]}) | ||||
|         df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) | ||||
|         # the warning is caused by indexing unsorted multi-index | ||||
|         with tm.assert_produces_warning( | ||||
|             PerformanceWarning, match="indexing past lexsort depth" | ||||
|         ): | ||||
|             out_a = df_a.loc[("x", 0), :] | ||||
|  | ||||
|         df_b = DataFrame( | ||||
|             {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)]) | ||||
|         ) | ||||
|         with tm.assert_produces_warning( | ||||
|             PerformanceWarning, match="indexing past lexsort depth" | ||||
|         ): | ||||
|             out_b = df_b.loc[("x", 0)] | ||||
|  | ||||
|         tm.assert_frame_equal(out_a, out_b) | ||||
|  | ||||
|         df1 = DataFrame({"name": ["a", "a", "b"]}) | ||||
|         df2 = DataFrame({"name": ["a", "b"]}) | ||||
|         df3 = DataFrame({"name": ["c", "d"]}) | ||||
|         df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) | ||||
|         with tm.assert_produces_warning( | ||||
|             PerformanceWarning, match="indexing past lexsort depth" | ||||
|         ): | ||||
|             out_a = df_a.loc[("x", 0), :] | ||||
|  | ||||
|         df_b = DataFrame( | ||||
|             { | ||||
|                 "a": ["x", "x", "x", "y", "y", "x", "x"], | ||||
|                 "b": [0, 1, 2, 0, 1, 0, 1], | ||||
|                 "name": list("aababcd"), | ||||
|             } | ||||
|         ).set_index(["a", "b"]) | ||||
|         df_b.index.names = [None, None] | ||||
|         with tm.assert_produces_warning( | ||||
|             PerformanceWarning, match="indexing past lexsort depth" | ||||
|         ): | ||||
|             out_b = df_b.loc[("x", 0), :] | ||||
|  | ||||
|         tm.assert_frame_equal(out_a, out_b) | ||||
|  | ||||
|     def test_concat_with_duplicated_levels(self): | ||||
|         # keyword levels should be unique | ||||
|         df1 = DataFrame({"A": [1]}, index=["x"]) | ||||
|         df2 = DataFrame({"A": [1]}, index=["y"]) | ||||
|         msg = r"Level values not unique: \['x', 'y', 'y'\]" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) | ||||
|  | ||||
|     @pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]]) | ||||
|     def test_concat_with_levels_with_none_keys(self, levels): | ||||
|         df1 = DataFrame({"A": [1]}, index=["x"]) | ||||
|         df2 = DataFrame({"A": [1]}, index=["y"]) | ||||
|         msg = "levels supported only when keys is not None" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             concat([df1, df2], levels=levels) | ||||
|  | ||||
|     def test_concat_range_index_result(self): | ||||
|         # GH#47501 | ||||
|         df1 = DataFrame({"a": [1, 2]}) | ||||
|         df2 = DataFrame({"b": [1, 2]}) | ||||
|  | ||||
|         result = concat([df1, df2], sort=True, axis=1) | ||||
|         expected = DataFrame({"a": [1, 2], "b": [1, 2]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         expected_index = pd.RangeIndex(0, 2) | ||||
|         tm.assert_index_equal(result.index, expected_index, exact=True) | ||||
|  | ||||
|     def test_concat_index_keep_dtype(self): | ||||
|         # GH#47329 | ||||
|         df1 = DataFrame([[0, 1, 1]], columns=Index([1, 2, 3], dtype="object")) | ||||
|         df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype="object")) | ||||
|         result = concat([df1, df2], ignore_index=True, join="outer", sort=True) | ||||
|         expected = DataFrame( | ||||
|             [[0, 1, 1.0], [0, 1, np.nan]], columns=Index([1, 2, 3], dtype="object") | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_index_keep_dtype_ea_numeric(self, any_numeric_ea_dtype): | ||||
|         # GH#47329 | ||||
|         df1 = DataFrame( | ||||
|             [[0, 1, 1]], columns=Index([1, 2, 3], dtype=any_numeric_ea_dtype) | ||||
|         ) | ||||
|         df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype=any_numeric_ea_dtype)) | ||||
|         result = concat([df1, df2], ignore_index=True, join="outer", sort=True) | ||||
|         expected = DataFrame( | ||||
|             [[0, 1, 1.0], [0, 1, np.nan]], | ||||
|             columns=Index([1, 2, 3], dtype=any_numeric_ea_dtype), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["Int8", "Int16", "Int32"]) | ||||
|     def test_concat_index_find_common(self, dtype): | ||||
|         # GH#47329 | ||||
|         df1 = DataFrame([[0, 1, 1]], columns=Index([1, 2, 3], dtype=dtype)) | ||||
|         df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype="Int32")) | ||||
|         result = concat([df1, df2], ignore_index=True, join="outer", sort=True) | ||||
|         expected = DataFrame( | ||||
|             [[0, 1, 1.0], [0, 1, np.nan]], columns=Index([1, 2, 3], dtype="Int32") | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string): | ||||
|         # GH 46675 | ||||
|         s1 = Series(["a", "b", "c"]) | ||||
|         s2 = Series(["a", "b"]) | ||||
|         s3 = Series(["a", "b", "c", "d"]) | ||||
|         s4 = Series([], dtype=object if not using_infer_string else "str") | ||||
|         result = concat( | ||||
|             [s1, s2, s3, s4], sort=False, join="outer", ignore_index=False, axis=1 | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 ["a"] * 3 + [np.nan], | ||||
|                 ["b"] * 3 + [np.nan], | ||||
|                 ["c", np.nan] * 2, | ||||
|                 [np.nan] * 2 + ["d"] + [np.nan], | ||||
|             ], | ||||
|             dtype=object if not using_infer_string else "str", | ||||
|         ) | ||||
|         tm.assert_frame_equal( | ||||
|             result, expected, check_index_type=True, check_column_type=True | ||||
|         ) | ||||
| @ -0,0 +1,54 @@ | ||||
| from io import StringIO | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     concat, | ||||
|     read_csv, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestInvalidConcat: | ||||
|     @pytest.mark.parametrize("obj", [1, {}, [1, 2], (1, 2)]) | ||||
|     def test_concat_invalid(self, obj): | ||||
|         # trying to concat a ndframe with a non-ndframe | ||||
|         df1 = DataFrame(range(2)) | ||||
|         msg = ( | ||||
|             f"cannot concatenate object of type '{type(obj)}'; " | ||||
|             "only Series and DataFrame objs are valid" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             concat([df1, obj]) | ||||
|  | ||||
|     def test_concat_invalid_first_argument(self): | ||||
|         df1 = DataFrame(range(2)) | ||||
|         msg = ( | ||||
|             "first argument must be an iterable of pandas " | ||||
|             'objects, you passed an object of type "DataFrame"' | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             concat(df1) | ||||
|  | ||||
|     def test_concat_generator_obj(self): | ||||
|         # generator ok though | ||||
|         concat(DataFrame(np.random.default_rng(2).random((5, 5))) for _ in range(3)) | ||||
|  | ||||
|     def test_concat_textreader_obj(self): | ||||
|         # text reader ok | ||||
|         # GH6583 | ||||
|         data = """index,A,B,C,D | ||||
|                   foo,2,3,4,5 | ||||
|                   bar,7,8,9,10 | ||||
|                   baz,12,13,14,15 | ||||
|                   qux,12,13,14,15 | ||||
|                   foo2,12,13,14,15 | ||||
|                   bar2,12,13,14,15 | ||||
|                """ | ||||
|  | ||||
|         with read_csv(StringIO(data), chunksize=1) as reader: | ||||
|             result = concat(reader, ignore_index=True) | ||||
|         expected = read_csv(StringIO(data)) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,175 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     concat, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesConcat: | ||||
|     def test_concat_series(self): | ||||
|         ts = Series( | ||||
|             np.arange(20, dtype=np.float64), | ||||
|             index=date_range("2020-01-01", periods=20), | ||||
|             name="foo", | ||||
|         ) | ||||
|         ts.name = "foo" | ||||
|  | ||||
|         pieces = [ts[:5], ts[5:15], ts[15:]] | ||||
|  | ||||
|         result = concat(pieces) | ||||
|         tm.assert_series_equal(result, ts) | ||||
|         assert result.name == ts.name | ||||
|  | ||||
|         result = concat(pieces, keys=[0, 1, 2]) | ||||
|         expected = ts.copy() | ||||
|  | ||||
|         ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]")) | ||||
|  | ||||
|         exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))] | ||||
|         exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes) | ||||
|         expected.index = exp_index | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_empty_and_non_empty_series_regression(self): | ||||
|         # GH 18187 regression test | ||||
|         s1 = Series([1]) | ||||
|         s2 = Series([], dtype=object) | ||||
|  | ||||
|         expected = s1 | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = concat([s1, s2]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_axis1(self): | ||||
|         ts = Series( | ||||
|             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) | ||||
|         ) | ||||
|  | ||||
|         pieces = [ts[:-2], ts[2:], ts[2:-2]] | ||||
|  | ||||
|         result = concat(pieces, axis=1) | ||||
|         expected = DataFrame(pieces).T | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat(pieces, keys=["A", "B", "C"], axis=1) | ||||
|         expected = DataFrame(pieces, index=["A", "B", "C"]).T | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_axis1_preserves_series_names(self): | ||||
|         # preserve series names, #2489 | ||||
|         s = Series(np.random.default_rng(2).standard_normal(5), name="A") | ||||
|         s2 = Series(np.random.default_rng(2).standard_normal(5), name="B") | ||||
|  | ||||
|         result = concat([s, s2], axis=1) | ||||
|         expected = DataFrame({"A": s, "B": s2}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         s2.name = None | ||||
|         result = concat([s, s2], axis=1) | ||||
|         tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object")) | ||||
|  | ||||
|     def test_concat_series_axis1_with_reindex(self, sort): | ||||
|         # must reindex, #2603 | ||||
|         s = Series( | ||||
|             np.random.default_rng(2).standard_normal(3), index=["c", "a", "b"], name="A" | ||||
|         ) | ||||
|         s2 = Series( | ||||
|             np.random.default_rng(2).standard_normal(4), | ||||
|             index=["d", "a", "b", "c"], | ||||
|             name="B", | ||||
|         ) | ||||
|         result = concat([s, s2], axis=1, sort=sort) | ||||
|         expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"]) | ||||
|         if sort: | ||||
|             expected = expected.sort_index() | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_axis1_names_applied(self): | ||||
|         # ensure names argument is not ignored on axis=1, #23490 | ||||
|         s = Series([1, 2, 3]) | ||||
|         s2 = Series([4, 5, 6]) | ||||
|         result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"]) | ||||
|         expected = DataFrame( | ||||
|             [[1, 4], [2, 5], [3, 6]], columns=Index(["a", "b"], name="A") | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"]) | ||||
|         expected = DataFrame( | ||||
|             [[1, 4], [2, 5], [3, 6]], | ||||
|             columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_axis1_same_names_ignore_index(self): | ||||
|         dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] | ||||
|         s1 = Series( | ||||
|             np.random.default_rng(2).standard_normal(len(dates)), | ||||
|             index=dates, | ||||
|             name="value", | ||||
|         ) | ||||
|         s2 = Series( | ||||
|             np.random.default_rng(2).standard_normal(len(dates)), | ||||
|             index=dates, | ||||
|             name="value", | ||||
|         ) | ||||
|  | ||||
|         result = concat([s1, s2], axis=1, ignore_index=True) | ||||
|         expected = Index(range(2)) | ||||
|  | ||||
|         tm.assert_index_equal(result.columns, expected, exact=True) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))] | ||||
|     ) | ||||
|     def test_concat_series_name_npscalar_tuple(self, s1name, s2name): | ||||
|         # GH21015 | ||||
|         s1 = Series({"a": 1, "b": 2}, name=s1name) | ||||
|         s2 = Series({"c": 5, "d": 6}, name=s2name) | ||||
|         result = concat([s1, s2]) | ||||
|         expected = Series({"a": 1, "b": 2, "c": 5, "d": 6}) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_partial_columns_names(self): | ||||
|         # GH10698 | ||||
|         named_series = Series([1, 2], name="foo") | ||||
|         unnamed_series1 = Series([1, 2]) | ||||
|         unnamed_series2 = Series([4, 5]) | ||||
|  | ||||
|         result = concat([named_series, unnamed_series1, unnamed_series2], axis=1) | ||||
|         expected = DataFrame( | ||||
|             {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat( | ||||
|             [named_series, unnamed_series1, unnamed_series2], | ||||
|             axis=1, | ||||
|             keys=["red", "blue", "yellow"], | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]}, | ||||
|             columns=["red", "blue", "yellow"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = concat( | ||||
|             [named_series, unnamed_series1, unnamed_series2], axis=1, ignore_index=True | ||||
|         ) | ||||
|         expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_series_length_one_reversed(self, frame_or_series): | ||||
|         # GH39401 | ||||
|         obj = frame_or_series([100]) | ||||
|         result = concat([obj.iloc[::-1]]) | ||||
|         tm.assert_equal(result, obj) | ||||
| @ -0,0 +1,118 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import DataFrame | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestConcatSort: | ||||
|     def test_concat_sorts_columns(self, sort): | ||||
|         # GH-4588 | ||||
|         df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) | ||||
|         df2 = DataFrame({"a": [3, 4], "c": [5, 6]}) | ||||
|  | ||||
|         # for sort=True/None | ||||
|         expected = DataFrame( | ||||
|             {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]}, | ||||
|             columns=["a", "b", "c"], | ||||
|         ) | ||||
|  | ||||
|         if sort is False: | ||||
|             expected = expected[["b", "a", "c"]] | ||||
|  | ||||
|         # default | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = pd.concat([df1, df2], ignore_index=True, sort=sort) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_sorts_index(self, sort): | ||||
|         df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) | ||||
|         df2 = DataFrame({"b": [1, 2]}, index=["a", "b"]) | ||||
|  | ||||
|         # For True/None | ||||
|         expected = DataFrame( | ||||
|             {"a": [2, 3, 1], "b": [1, 2, None]}, | ||||
|             index=["a", "b", "c"], | ||||
|             columns=["a", "b"], | ||||
|         ) | ||||
|         if sort is False: | ||||
|             expected = expected.loc[["c", "a", "b"]] | ||||
|  | ||||
|         # Warn and sort by default | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = pd.concat([df1, df2], axis=1, sort=sort) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_inner_sort(self, sort): | ||||
|         # https://github.com/pandas-dev/pandas/pull/20613 | ||||
|         df1 = DataFrame( | ||||
|             {"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"] | ||||
|         ) | ||||
|         df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             # unset sort should *not* warn for inner join | ||||
|             # since that never sorted | ||||
|             result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True) | ||||
|  | ||||
|         expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) | ||||
|         if sort is True: | ||||
|             expected = expected[["a", "b"]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_aligned_sort(self): | ||||
|         # GH-4588 | ||||
|         df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"]) | ||||
|         result = pd.concat([df, df], sort=True, ignore_index=True) | ||||
|         expected = DataFrame( | ||||
|             {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]}, | ||||
|             columns=["a", "b", "c"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = pd.concat( | ||||
|             [df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True | ||||
|         ) | ||||
|         expected = expected[["b", "c"]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_aligned_sort_does_not_raise(self): | ||||
|         # GH-4588 | ||||
|         # We catch TypeErrors from sorting internally and do not re-raise. | ||||
|         df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"]) | ||||
|         expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) | ||||
|         result = pd.concat([df, df], ignore_index=True, sort=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_frame_with_sort_false(self): | ||||
|         # GH 43375 | ||||
|         result = pd.concat( | ||||
|             [DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False | ||||
|         ) | ||||
|         expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1]) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # GH 37937 | ||||
|         df1 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[1, 2, 3]) | ||||
|         df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}, index=[3, 1, 6]) | ||||
|         result = pd.concat([df2, df1], axis=1, sort=False) | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [7.0, 10.0, 3.0, 6.0], | ||||
|                 [8.0, 11.0, 1.0, 4.0], | ||||
|                 [9.0, 12.0, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 2.0, 5.0], | ||||
|             ], | ||||
|             index=[3, 1, 6, 2], | ||||
|             columns=["c", "d", "a", "b"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_sort_none_raises(self): | ||||
|         # GH#41518 | ||||
|         df = DataFrame({1: [1, 2], "a": [3, 4]}) | ||||
|         msg = "The 'sort' keyword only accepts boolean values; None was passed." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             pd.concat([df, df], sort=None) | ||||
		Reference in New Issue
	
	Block a user