done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/tests/frame/constructors/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/frame/constructors/init.py
--- a/lib/python3.11/site-packages/pandas/tests/frame/constructors/test_from_dict.py
+++ b/lib/python3.11/site-packages/pandas/tests/frame/constructors/test_from_dict.py
@ -0,0 +1,223 @@
+from collections import OrderedDict
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    RangeIndex,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestFromDict:
+    # Note: these tests are specific to the from_dict method, not for
+    #  passing dictionaries to DataFrame.__init__
+
+    def test_constructor_list_of_odicts(self):
+        data = [
+            OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
+            OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
+            OrderedDict([["a", 1.5], ["d", 6]]),
+            OrderedDict(),
+            OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
+            OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
+        ]
+
+        result = DataFrame(data)
+        expected = DataFrame.from_dict(
+            dict(zip(range(len(data)), data)), orient="index"
+        )
+        tm.assert_frame_equal(result, expected.reindex(result.index))
+
+    def test_constructor_single_row(self):
+        data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]
+
+        result = DataFrame(data)
+        expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
+            result.index
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_list_of_series(self):
+        data = [
+            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
+            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
+        ]
+        sdict = OrderedDict(zip(["x", "y"], data))
+        idx = Index(["a", "b", "c"])
+
+        # all named
+        data2 = [
+            Series([1.5, 3, 4], idx, dtype="O", name="x"),
+            Series([1.5, 3, 6], idx, name="y"),
+        ]
+        result = DataFrame(data2)
+        expected = DataFrame.from_dict(sdict, orient="index")
+        tm.assert_frame_equal(result, expected)
+
+        # some unnamed
+        data2 = [
+            Series([1.5, 3, 4], idx, dtype="O", name="x"),
+            Series([1.5, 3, 6], idx),
+        ]
+        result = DataFrame(data2)
+
+        sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
+        expected = DataFrame.from_dict(sdict, orient="index")
+        tm.assert_frame_equal(result, expected)
+
+        # none named
+        data = [
+            OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
+            OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
+            OrderedDict([["a", 1.5], ["d", 6]]),
+            OrderedDict(),
+            OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
+            OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
+        ]
+        data = [Series(d) for d in data]
+
+        result = DataFrame(data)
+        sdict = OrderedDict(zip(range(len(data)), data))
+        expected = DataFrame.from_dict(sdict, orient="index")
+        tm.assert_frame_equal(result, expected.reindex(result.index))
+
+        result2 = DataFrame(data, index=np.arange(6, dtype=np.int64))
+        tm.assert_frame_equal(result, result2)
+
+        result = DataFrame([Series(dtype=object)])
+        expected = DataFrame(index=[0])
+        tm.assert_frame_equal(result, expected)
+
+        data = [
+            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
+            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
+        ]
+        sdict = OrderedDict(zip(range(len(data)), data))
+
+        idx = Index(["a", "b", "c"])
+        data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
+        result = DataFrame(data2)
+        expected = DataFrame.from_dict(sdict, orient="index")
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_orient(self, float_string_frame):
+        data_dict = float_string_frame.T._series
+        recons = DataFrame.from_dict(data_dict, orient="index")
+        expected = float_string_frame.reindex(index=recons.index)
+        tm.assert_frame_equal(recons, expected)
+
+        # dict of sequence
+        a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
+        rs = DataFrame.from_dict(a, orient="index")
+        xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
+        tm.assert_frame_equal(rs, xp)
+
+    def test_constructor_from_ordered_dict(self):
+        # GH#8425
+        a = OrderedDict(
+            [
+                ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
+                ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
+                ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
+            ]
+        )
+        expected = DataFrame.from_dict(a, orient="columns").T
+        result = DataFrame.from_dict(a, orient="index")
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_dict_columns_parameter(self):
+        # GH#18529
+        # Test new columns parameter for from_dict that was added to make
+        # from_items(..., orient='index', columns=[...]) easier to replicate
+        result = DataFrame.from_dict(
+            OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
+            orient="index",
+            columns=["one", "two"],
+        )
+        expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
+        tm.assert_frame_equal(result, expected)
+
+        msg = "cannot use columns parameter with orient='columns'"
+        with pytest.raises(ValueError, match=msg):
+            DataFrame.from_dict(
+                {"A": [1, 2], "B": [4, 5]},
+                orient="columns",
+                columns=["one", "two"],
+            )
+        with pytest.raises(ValueError, match=msg):
+            DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
+
+    @pytest.mark.parametrize(
+        "data_dict, orient, expected",
+        [
+            ({}, "index", RangeIndex(0)),
+            (
+                [{("a",): 1}, {("a",): 2}],
+                "columns",
+                Index([("a",)], tupleize_cols=False),
+            ),
+            (
+                [OrderedDict([(("a",), 1), (("b",), 2)])],
+                "columns",
+                Index([("a",), ("b",)], tupleize_cols=False),
+            ),
+            ([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
+        ],
+    )
+    def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
+        # GH#16769
+        df = DataFrame.from_dict(data_dict, orient)
+        result = df.columns
+        tm.assert_index_equal(result, expected)
+
+    def test_frame_dict_constructor_empty_series(self):
+        s1 = Series(
+            [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
+        )
+        s2 = Series(
+            [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
+        )
+        s3 = Series(dtype=object)
+
+        # it works!
+        DataFrame({"foo": s1, "bar": s2, "baz": s3})
+        DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
+
+    def test_from_dict_scalars_requires_index(self):
+        msg = "If using all scalar values, you must pass an index"
+        with pytest.raises(ValueError, match=msg):
+            DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))
+
+    def test_from_dict_orient_invalid(self):
+        msg = (
+            "Expected 'index', 'columns' or 'tight' for orient parameter. "
+            "Got 'abc' instead"
+        )
+        with pytest.raises(ValueError, match=msg):
+            DataFrame.from_dict({"foo": 1, "baz": 3, "bar": 2}, orient="abc")
+
+    def test_from_dict_order_with_single_column(self):
+        data = {
+            "alpha": {
+                "value2": 123,
+                "value1": 532,
+                "animal": 222,
+                "plant": False,
+                "name": "test",
+            }
+        }
+        result = DataFrame.from_dict(
+            data,
+            orient="columns",
+        )
+        expected = DataFrame(
+            [[123], [532], [222], [False], ["test"]],
+            index=["value2", "value1", "animal", "plant", "name"],
+            columns=["alpha"],
+        )
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/frame/constructors/test_from_records.py
+++ b/lib/python3.11/site-packages/pandas/tests/frame/constructors/test_from_records.py
@ -0,0 +1,503 @@
+from collections.abc import Iterator
+from datetime import datetime
+from decimal import Decimal
+
+import numpy as np
+import pytest
+import pytz
+
+from pandas._config import using_string_dtype
+
+from pandas.compat import is_platform_little_endian
+
+from pandas import (
+    CategoricalIndex,
+    DataFrame,
+    Index,
+    Interval,
+    RangeIndex,
+    Series,
+    date_range,
+)
+import pandas._testing as tm
+
+
+class TestFromRecords:
+    def test_from_records_dt64tz_frame(self):
+        # GH#51162 don't lose tz when calling from_records with DataFrame input
+        dti = date_range("2016-01-01", periods=10, tz="US/Pacific")
+        df = DataFrame({i: dti for i in range(4)})
+        with tm.assert_produces_warning(FutureWarning):
+            res = DataFrame.from_records(df)
+        tm.assert_frame_equal(res, df)
+
+    def test_from_records_with_datetimes(self):
+        # this may fail on certain platforms because of a numpy issue
+        # related GH#6140
+        if not is_platform_little_endian():
+            pytest.skip("known failure of test on non-little endian")
+
+        # construction with a null in a recarray
+        # GH#6140
+        expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]})
+
+        arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
+        dtypes = [("EXPIRY", "<M8[ns]")]
+
+        recarray = np.rec.fromarrays(arrdata, dtype=dtypes)
+
+        result = DataFrame.from_records(recarray)
+        tm.assert_frame_equal(result, expected)
+
+        # coercion should work too
+        arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
+        dtypes = [("EXPIRY", "<M8[m]")]
+        recarray = np.rec.fromarrays(arrdata, dtype=dtypes)
+        result = DataFrame.from_records(recarray)
+        # we get the closest supported unit, "s"
+        expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.xfail(using_string_dtype(), reason="dtype checking logic doesn't work")
+    def test_from_records_sequencelike(self):
+        df = DataFrame(
+            {
+                "A": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float64
+                ),
+                "A1": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float64
+                ),
+                "B": np.array(np.arange(6), dtype=np.int64),
+                "C": ["foo"] * 6,
+                "D": np.array([True, False] * 3, dtype=bool),
+                "E": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float32
+                ),
+                "E1": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float32
+                ),
+                "F": np.array(np.arange(6), dtype=np.int32),
+            }
+        )
+
+        # this is actually tricky to create the recordlike arrays and
+        # have the dtypes be intact
+        blocks = df._to_dict_of_blocks()
+        tuples = []
+        columns = []
+        dtypes = []
+        for dtype, b in blocks.items():
+            columns.extend(b.columns)
+            dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])
+        for i in range(len(df.index)):
+            tup = []
+            for _, b in blocks.items():
+                tup.extend(b.iloc[i].values)
+            tuples.append(tuple(tup))
+
+        recarray = np.array(tuples, dtype=dtypes).view(np.rec.recarray)
+        recarray2 = df.to_records()
+        lists = [list(x) for x in tuples]
+
+        # tuples (lose the dtype info)
+        result = DataFrame.from_records(tuples, columns=columns).reindex(
+            columns=df.columns
+        )
+
+        # created recarray and with to_records recarray (have dtype info)
+        result2 = DataFrame.from_records(recarray, columns=columns).reindex(
+            columns=df.columns
+        )
+        result3 = DataFrame.from_records(recarray2, columns=columns).reindex(
+            columns=df.columns
+        )
+
+        # list of tuples (no dtype info)
+        result4 = DataFrame.from_records(lists, columns=columns).reindex(
+            columns=df.columns
+        )
+
+        tm.assert_frame_equal(result, df, check_dtype=False)
+        tm.assert_frame_equal(result2, df)
+        tm.assert_frame_equal(result3, df)
+        tm.assert_frame_equal(result4, df, check_dtype=False)
+
+        # tuples is in the order of the columns
+        result = DataFrame.from_records(tuples)
+        tm.assert_index_equal(result.columns, RangeIndex(8))
+
+        # test exclude parameter & we are casting the results here (as we don't
+        # have dtype info to recover)
+        columns_to_test = [columns.index("C"), columns.index("E1")]
+
+        exclude = list(set(range(8)) - set(columns_to_test))
+        result = DataFrame.from_records(tuples, exclude=exclude)
+        result.columns = [columns[i] for i in sorted(columns_to_test)]
+        tm.assert_series_equal(result["C"], df["C"])
+        tm.assert_series_equal(result["E1"], df["E1"])
+
+    def test_from_records_sequencelike_empty(self):
+        # empty case
+        result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
+        assert len(result) == 0
+        tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"]))
+
+        result = DataFrame.from_records([])
+        assert len(result) == 0
+        assert len(result.columns) == 0
+
+    def test_from_records_dictlike(self):
+        # test the dict methods
+        df = DataFrame(
+            {
+                "A": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float64
+                ),
+                "A1": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float64
+                ),
+                "B": np.array(np.arange(6), dtype=np.int64),
+                "C": ["foo"] * 6,
+                "D": np.array([True, False] * 3, dtype=bool),
+                "E": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float32
+                ),
+                "E1": np.array(
+                    np.random.default_rng(2).standard_normal(6), dtype=np.float32
+                ),
+                "F": np.array(np.arange(6), dtype=np.int32),
+            }
+        )
+
+        # columns is in a different order here than the actual items iterated
+        # from the dict
+        blocks = df._to_dict_of_blocks()
+        columns = []
+        for b in blocks.values():
+            columns.extend(b.columns)
+
+        asdict = dict(df.items())
+        asdict2 = {x: y.values for x, y in df.items()}
+
+        # dict of series & dict of ndarrays (have dtype info)
+        results = []
+        results.append(DataFrame.from_records(asdict).reindex(columns=df.columns))
+        results.append(
+            DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns)
+        )
+        results.append(
+            DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns)
+        )
+
+        for r in results:
+            tm.assert_frame_equal(r, df)
+
+    def test_from_records_with_index_data(self):
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"]
+        )
+
+        data = np.random.default_rng(2).standard_normal(10)
+        with tm.assert_produces_warning(FutureWarning):
+            df1 = DataFrame.from_records(df, index=data)
+        tm.assert_index_equal(df1.index, Index(data))
+
+    def test_from_records_bad_index_column(self):
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"]
+        )
+
+        # should pass
+        with tm.assert_produces_warning(FutureWarning):
+            df1 = DataFrame.from_records(df, index=["C"])
+        tm.assert_index_equal(df1.index, Index(df.C))
+
+        with tm.assert_produces_warning(FutureWarning):
+            df1 = DataFrame.from_records(df, index="C")
+        tm.assert_index_equal(df1.index, Index(df.C))
+
+        # should fail
+        msg = "|".join(
+            [
+                r"'None of \[2\] are in the columns'",
+            ]
+        )
+        with pytest.raises(KeyError, match=msg):
+            with tm.assert_produces_warning(FutureWarning):
+                DataFrame.from_records(df, index=[2])
+        with pytest.raises(KeyError, match=msg):
+            with tm.assert_produces_warning(FutureWarning):
+                DataFrame.from_records(df, index=2)
+
+    def test_from_records_non_tuple(self):
+        class Record:
+            def __init__(self, *args) -> None:
+                self.args = args
+
+            def __getitem__(self, i):
+                return self.args[i]
+
+            def __iter__(self) -> Iterator:
+                return iter(self.args)
+
+        recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]
+        tups = [tuple(rec) for rec in recs]
+
+        result = DataFrame.from_records(recs)
+        expected = DataFrame.from_records(tups)
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_len0_with_columns(self):
+        # GH#2633
+        result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
+        expected = Index(["bar"])
+
+        assert len(result) == 0
+        assert result.index.name == "foo"
+        tm.assert_index_equal(result.columns, expected)
+
+    def test_from_records_series_list_dict(self):
+        # GH#27358
+        expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T
+        data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]])
+        result = DataFrame.from_records(data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_series_categorical_index(self):
+        # GH#32805
+        index = CategoricalIndex(
+            [Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]
+        )
+        series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)
+        frame = DataFrame.from_records(series_of_dicts, index=index)
+        expected = DataFrame(
+            {"a": [1, 2, np.nan], "b": [np.nan, np.nan, 3]}, index=index
+        )
+        tm.assert_frame_equal(frame, expected)
+
+    def test_frame_from_records_utc(self):
+        rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)}
+
+        # it works
+        DataFrame.from_records([rec], index="begin_time")
+
+    def test_from_records_to_records(self):
+        # from numpy documentation
+        arr = np.zeros((2,), dtype=("i4,f4,S10"))
+        arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
+
+        DataFrame.from_records(arr)
+
+        index = Index(np.arange(len(arr))[::-1])
+        indexed_frame = DataFrame.from_records(arr, index=index)
+        tm.assert_index_equal(indexed_frame.index, index)
+
+        # without names, it should go to last ditch
+        arr2 = np.zeros((2, 3))
+        tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
+
+        # wrong length
+        msg = "|".join(
+            [
+                r"Length of values \(2\) does not match length of index \(1\)",
+            ]
+        )
+        with pytest.raises(ValueError, match=msg):
+            DataFrame.from_records(arr, index=index[:-1])
+
+        indexed_frame = DataFrame.from_records(arr, index="f1")
+
+        # what to do?
+        records = indexed_frame.to_records()
+        assert len(records.dtype.names) == 3
+
+        records = indexed_frame.to_records(index=False)
+        assert len(records.dtype.names) == 2
+        assert "index" not in records.dtype.names
+
+    def test_from_records_nones(self):
+        tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)]
+
+        df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"])
+        assert np.isnan(df["c"][0])
+
+    def test_from_records_iterator(self):
+        arr = np.array(
+            [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)],
+            dtype=[
+                ("x", np.float64),
+                ("u", np.float32),
+                ("y", np.int64),
+                ("z", np.int32),
+            ],
+        )
+        df = DataFrame.from_records(iter(arr), nrows=2)
+        xp = DataFrame(
+            {
+                "x": np.array([1.0, 3.0], dtype=np.float64),
+                "u": np.array([1.0, 3.0], dtype=np.float32),
+                "y": np.array([2, 4], dtype=np.int64),
+                "z": np.array([2, 4], dtype=np.int32),
+            }
+        )
+        tm.assert_frame_equal(df.reindex_like(xp), xp)
+
+        # no dtypes specified here, so just compare with the default
+        arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)]
+        df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2)
+        tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False)
+
+    def test_from_records_tuples_generator(self):
+        def tuple_generator(length):
+            for i in range(length):
+                letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                yield (i, letters[i % len(letters)], i / length)
+
+        columns_names = ["Integer", "String", "Float"]
+        columns = [
+            [i[j] for i in tuple_generator(10)] for j in range(len(columns_names))
+        ]
+        data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
+        expected = DataFrame(data, columns=columns_names)
+
+        generator = tuple_generator(10)
+        result = DataFrame.from_records(generator, columns=columns_names)
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_lists_generator(self):
+        def list_generator(length):
+            for i in range(length):
+                letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                yield [i, letters[i % len(letters)], i / length]
+
+        columns_names = ["Integer", "String", "Float"]
+        columns = [
+            [i[j] for i in list_generator(10)] for j in range(len(columns_names))
+        ]
+        data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
+        expected = DataFrame(data, columns=columns_names)
+
+        generator = list_generator(10)
+        result = DataFrame.from_records(generator, columns=columns_names)
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_columns_not_modified(self):
+        tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)]
+
+        columns = ["a", "b", "c"]
+        original_columns = list(columns)
+
+        DataFrame.from_records(tuples, columns=columns, index="a")
+
+        assert columns == original_columns
+
+    def test_from_records_decimal(self):
+        tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)]
+
+        df = DataFrame.from_records(tuples, columns=["a"])
+        assert df["a"].dtype == object
+
+        df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True)
+        assert df["a"].dtype == np.float64
+        assert np.isnan(df["a"].values[-1])
+
+    def test_from_records_duplicates(self):
+        result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
+
+        expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_set_index_name(self):
+        def create_dict(order_id):
+            return {
+                "order_id": order_id,
+                "quantity": np.random.default_rng(2).integers(1, 10),
+                "price": np.random.default_rng(2).integers(1, 10),
+            }
+
+        documents = [create_dict(i) for i in range(10)]
+        # demo missing data
+        documents.append({"order_id": 10, "quantity": 5})
+
+        result = DataFrame.from_records(documents, index="order_id")
+        assert result.index.name == "order_id"
+
+        # MultiIndex
+        result = DataFrame.from_records(documents, index=["order_id", "quantity"])
+        assert result.index.names == ("order_id", "quantity")
+
+    def test_from_records_misc_brokenness(self):
+        # GH#2179
+
+        data = {1: ["foo"], 2: ["bar"]}
+
+        result = DataFrame.from_records(data, columns=["a", "b"])
+        exp = DataFrame(data, columns=["a", "b"])
+        tm.assert_frame_equal(result, exp)
+
+        # overlap in index/index_names
+
+        data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
+        result = DataFrame.from_records(data, index=["a", "b", "c"])
+        exp = DataFrame(data, index=["a", "b", "c"])
+        tm.assert_frame_equal(result, exp)
+
+    def test_from_records_misc_brokenness2(self):
+        # GH#2623
+        rows = []
+        rows.append([datetime(2010, 1, 1), 1])
+        rows.append([datetime(2010, 1, 2), "hi"])  # test col upconverts to obj
+        result = DataFrame.from_records(rows, columns=["date", "test"])
+        expected = DataFrame(
+            {"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
+        )
+        tm.assert_frame_equal(result, expected)
+        assert result.dtypes["test"] == np.dtype(object)
+
+    def test_from_records_misc_brokenness3(self):
+        rows = []
+        rows.append([datetime(2010, 1, 1), 1])
+        rows.append([datetime(2010, 1, 2), 1])
+        result = DataFrame.from_records(rows, columns=["date", "test"])
+        expected = DataFrame(
+            {"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_empty(self):
+        # GH#3562
+        result = DataFrame.from_records([], columns=["a", "b", "c"])
+        expected = DataFrame(columns=["a", "b", "c"])
+        tm.assert_frame_equal(result, expected)
+
+        result = DataFrame.from_records([], columns=["a", "b", "b"])
+        expected = DataFrame(columns=["a", "b", "b"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_records_empty_with_nonempty_fields_gh3682(self):
+        a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
+        df = DataFrame.from_records(a, index="id")
+
+        ex_index = Index([1], name="id")
+        expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
+        tm.assert_frame_equal(df, expected)
+
+        b = a[:0]
+        df2 = DataFrame.from_records(b, index="id")
+        tm.assert_frame_equal(df2, df.iloc[:0])
+
+    def test_from_records_empty2(self):
+        # GH#42456
+        dtype = [("prop", int)]
+        shape = (0, len(dtype))
+        arr = np.empty(shape, dtype=dtype)
+
+        result = DataFrame.from_records(arr)
+        expected = DataFrame({"prop": np.array([], dtype=int)})
+        tm.assert_frame_equal(result, expected)
+
+        alt = DataFrame(arr)
+        tm.assert_frame_equal(alt, expected)