done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/io/sas/init.py
+++ b/lib/python3.11/site-packages/pandas/io/sas/init.py
@ -0,0 +1,3 @@
+from pandas.io.sas.sasreader import read_sas
+
+__all__ = ["read_sas"]
--- a/lib/python3.11/site-packages/pandas/io/sas/sas7bdat.py
+++ b/lib/python3.11/site-packages/pandas/io/sas/sas7bdat.py
@ -0,0 +1,762 @@
+"""
+Read SAS7BDAT files
+
+Based on code written by Jared Hobbs:
+  https://bitbucket.org/jaredhobbs/sas7bdat
+
+See also:
+  https://github.com/BioStatMatt/sas7bdat
+
+Partial documentation of the file format:
+  https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
+
+Reference for binary data compression:
+  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+"""
+from __future__ import annotations
+
+from collections import abc
+from datetime import (
+    datetime,
+    timedelta,
+)
+import sys
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas._config import get_option
+
+from pandas._libs.byteswap import (
+    read_double_with_byteswap,
+    read_float_with_byteswap,
+    read_uint16_with_byteswap,
+    read_uint32_with_byteswap,
+    read_uint64_with_byteswap,
+)
+from pandas._libs.sas import (
+    Parser,
+    get_subheader_index,
+)
+from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
+from pandas.errors import EmptyDataError
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Timestamp,
+    isna,
+)
+
+from pandas.io.common import get_handle
+import pandas.io.sas.sas_constants as const
+from pandas.io.sas.sasreader import ReaderBase
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadBuffer,
+    )
+
+
+_unix_origin = Timestamp("1970-01-01")
+_sas_origin = Timestamp("1960-01-01")
+
+
+def _parse_datetime(sas_datetime: float, unit: str):
+    if isna(sas_datetime):
+        return pd.NaT
+
+    if unit == "s":
+        return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime)
+
+    elif unit == "d":
+        return datetime(1960, 1, 1) + timedelta(days=sas_datetime)
+
+    else:
+        raise ValueError("unit must be 'd' or 's'")
+
+
+def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
+    """
+    Convert to Timestamp if possible, otherwise to datetime.datetime.
+    SAS float64 lacks precision for more than ms resolution so the fit
+    to datetime.datetime is ok.
+
+    Parameters
+    ----------
+    sas_datetimes : {Series, Sequence[float]}
+       Dates or datetimes in SAS
+    unit : {'d', 's'}
+       "d" if the floats represent dates, "s" for datetimes
+
+    Returns
+    -------
+    Series
+       Series of datetime64 dtype or datetime.datetime.
+    """
+    td = (_sas_origin - _unix_origin).as_unit("s")
+    if unit == "s":
+        millis = cast_from_unit_vectorized(
+            sas_datetimes._values, unit="s", out_unit="ms"
+        )
+        dt64ms = millis.view("M8[ms]") + td
+        return pd.Series(dt64ms, index=sas_datetimes.index, copy=False)
+    else:
+        vals = np.array(sas_datetimes, dtype="M8[D]") + td
+        return pd.Series(vals, dtype="M8[s]", index=sas_datetimes.index, copy=False)
+
+
+class _Column:
+    col_id: int
+    name: str | bytes
+    label: str | bytes
+    format: str | bytes
+    ctype: bytes
+    length: int
+
+    def __init__(
+        self,
+        col_id: int,
+        # These can be bytes when convert_header_text is False
+        name: str | bytes,
+        label: str | bytes,
+        format: str | bytes,
+        ctype: bytes,
+        length: int,
+    ) -> None:
+        self.col_id = col_id
+        self.name = name
+        self.label = label
+        self.format = format
+        self.ctype = ctype
+        self.length = length
+
+
+# SAS7BDAT represents a SAS data file in SAS7BDAT format.
+class SAS7BDATReader(ReaderBase, abc.Iterator):
+    """
+    Read SAS files in SAS7BDAT format.
+
+    Parameters
+    ----------
+    path_or_buf : path name or buffer
+        Name of SAS file or file-like object pointing to SAS file
+        contents.
+    index : column identifier, defaults to None
+        Column to use as index.
+    convert_dates : bool, defaults to True
+        Attempt to convert dates to Pandas datetime values.  Note that
+        some rarely used SAS date formats may be unsupported.
+    blank_missing : bool, defaults to True
+        Convert empty strings to missing values (SAS uses blanks to
+        indicate missing character variables).
+    chunksize : int, defaults to None
+        Return SAS7BDATReader object for iterations, returns chunks
+        with given number of lines.
+    encoding : str, 'infer', defaults to None
+        String encoding acc. to Python standard encodings,
+        encoding='infer' tries to detect the encoding from the file header,
+        encoding=None will leave the data in binary format.
+    convert_text : bool, defaults to True
+        If False, text variables are left as raw bytes.
+    convert_header_text : bool, defaults to True
+        If False, header text, including column names, are left as raw
+        bytes.
+    """
+
+    _int_length: int
+    _cached_page: bytes | None
+
+    def __init__(
+        self,
+        path_or_buf: FilePath | ReadBuffer[bytes],
+        index=None,
+        convert_dates: bool = True,
+        blank_missing: bool = True,
+        chunksize: int | None = None,
+        encoding: str | None = None,
+        convert_text: bool = True,
+        convert_header_text: bool = True,
+        compression: CompressionOptions = "infer",
+    ) -> None:
+        self.index = index
+        self.convert_dates = convert_dates
+        self.blank_missing = blank_missing
+        self.chunksize = chunksize
+        self.encoding = encoding
+        self.convert_text = convert_text
+        self.convert_header_text = convert_header_text
+
+        self.default_encoding = "latin-1"
+        self.compression = b""
+        self.column_names_raw: list[bytes] = []
+        self.column_names: list[str | bytes] = []
+        self.column_formats: list[str | bytes] = []
+        self.columns: list[_Column] = []
+
+        self._current_page_data_subheader_pointers: list[tuple[int, int]] = []
+        self._cached_page = None
+        self._column_data_lengths: list[int] = []
+        self._column_data_offsets: list[int] = []
+        self._column_types: list[bytes] = []
+
+        self._current_row_in_file_index = 0
+        self._current_row_on_page_index = 0
+        self._current_row_in_file_index = 0
+
+        self.handles = get_handle(
+            path_or_buf, "rb", is_text=False, compression=compression
+        )
+
+        self._path_or_buf = self.handles.handle
+
+        # Same order as const.SASIndex
+        self._subheader_processors = [
+            self._process_rowsize_subheader,
+            self._process_columnsize_subheader,
+            self._process_subheader_counts,
+            self._process_columntext_subheader,
+            self._process_columnname_subheader,
+            self._process_columnattributes_subheader,
+            self._process_format_subheader,
+            self._process_columnlist_subheader,
+            None,  # Data
+        ]
+
+        try:
+            self._get_properties()
+            self._parse_metadata()
+        except Exception:
+            self.close()
+            raise
+
+    def column_data_lengths(self) -> np.ndarray:
+        """Return a numpy int64 array of the column data lengths"""
+        return np.asarray(self._column_data_lengths, dtype=np.int64)
+
+    def column_data_offsets(self) -> np.ndarray:
+        """Return a numpy int64 array of the column offsets"""
+        return np.asarray(self._column_data_offsets, dtype=np.int64)
+
+    def column_types(self) -> np.ndarray:
+        """
+        Returns a numpy character array of the column types:
+           s (string) or d (double)
+        """
+        return np.asarray(self._column_types, dtype=np.dtype("S1"))
+
+    def close(self) -> None:
+        self.handles.close()
+
+    def _get_properties(self) -> None:
+        # Check magic number
+        self._path_or_buf.seek(0)
+        self._cached_page = self._path_or_buf.read(288)
+        if self._cached_page[0 : len(const.magic)] != const.magic:
+            raise ValueError("magic number mismatch (not a SAS file?)")
+
+        # Get alignment information
+        buf = self._read_bytes(const.align_1_offset, const.align_1_length)
+        if buf == const.u64_byte_checker_value:
+            self.U64 = True
+            self._int_length = 8
+            self._page_bit_offset = const.page_bit_offset_x64
+            self._subheader_pointer_length = const.subheader_pointer_length_x64
+        else:
+            self.U64 = False
+            self._page_bit_offset = const.page_bit_offset_x86
+            self._subheader_pointer_length = const.subheader_pointer_length_x86
+            self._int_length = 4
+        buf = self._read_bytes(const.align_2_offset, const.align_2_length)
+        if buf == const.align_1_checker_value:
+            align1 = const.align_2_value
+        else:
+            align1 = 0
+
+        # Get endianness information
+        buf = self._read_bytes(const.endianness_offset, const.endianness_length)
+        if buf == b"\x01":
+            self.byte_order = "<"
+            self.need_byteswap = sys.byteorder == "big"
+        else:
+            self.byte_order = ">"
+            self.need_byteswap = sys.byteorder == "little"
+
+        # Get encoding information
+        buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
+        if buf in const.encoding_names:
+            self.inferred_encoding = const.encoding_names[buf]
+            if self.encoding == "infer":
+                self.encoding = self.inferred_encoding
+        else:
+            self.inferred_encoding = f"unknown (code={buf})"
+
+        # Timestamp is epoch 01/01/1960
+        epoch = datetime(1960, 1, 1)
+        x = self._read_float(
+            const.date_created_offset + align1, const.date_created_length
+        )
+        self.date_created = epoch + pd.to_timedelta(x, unit="s")
+        x = self._read_float(
+            const.date_modified_offset + align1, const.date_modified_length
+        )
+        self.date_modified = epoch + pd.to_timedelta(x, unit="s")
+
+        self.header_length = self._read_uint(
+            const.header_size_offset + align1, const.header_size_length
+        )
+
+        # Read the rest of the header into cached_page.
+        buf = self._path_or_buf.read(self.header_length - 288)
+        self._cached_page += buf
+        # error: Argument 1 to "len" has incompatible type "Optional[bytes]";
+        #  expected "Sized"
+        if len(self._cached_page) != self.header_length:  # type: ignore[arg-type]
+            raise ValueError("The SAS7BDAT file appears to be truncated.")
+
+        self._page_length = self._read_uint(
+            const.page_size_offset + align1, const.page_size_length
+        )
+
+    def __next__(self) -> DataFrame:
+        da = self.read(nrows=self.chunksize or 1)
+        if da.empty:
+            self.close()
+            raise StopIteration
+        return da
+
+    # Read a single float of the given width (4 or 8).
+    def _read_float(self, offset: int, width: int):
+        assert self._cached_page is not None
+        if width == 4:
+            return read_float_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 8:
+            return read_double_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        else:
+            self.close()
+            raise ValueError("invalid float width")
+
+    # Read a single unsigned integer of the given width (1, 2, 4 or 8).
+    def _read_uint(self, offset: int, width: int) -> int:
+        assert self._cached_page is not None
+        if width == 1:
+            return self._read_bytes(offset, 1)[0]
+        elif width == 2:
+            return read_uint16_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 4:
+            return read_uint32_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 8:
+            return read_uint64_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        else:
+            self.close()
+            raise ValueError("invalid int width")
+
+    def _read_bytes(self, offset: int, length: int):
+        assert self._cached_page is not None
+        if offset + length > len(self._cached_page):
+            self.close()
+            raise ValueError("The cached page is too small.")
+        return self._cached_page[offset : offset + length]
+
+    def _read_and_convert_header_text(self, offset: int, length: int) -> str | bytes:
+        return self._convert_header_text(
+            self._read_bytes(offset, length).rstrip(b"\x00 ")
+        )
+
+    def _parse_metadata(self) -> None:
+        done = False
+        while not done:
+            self._cached_page = self._path_or_buf.read(self._page_length)
+            if len(self._cached_page) <= 0:
+                break
+            if len(self._cached_page) != self._page_length:
+                raise ValueError("Failed to read a meta data page from the SAS file.")
+            done = self._process_page_meta()
+
+    def _process_page_meta(self) -> bool:
+        self._read_page_header()
+        pt = const.page_meta_types + [const.page_amd_type, const.page_mix_type]
+        if self._current_page_type in pt:
+            self._process_page_metadata()
+        is_data_page = self._current_page_type == const.page_data_type
+        is_mix_page = self._current_page_type == const.page_mix_type
+        return bool(
+            is_data_page
+            or is_mix_page
+            or self._current_page_data_subheader_pointers != []
+        )
+
+    def _read_page_header(self) -> None:
+        bit_offset = self._page_bit_offset
+        tx = const.page_type_offset + bit_offset
+        self._current_page_type = (
+            self._read_uint(tx, const.page_type_length) & const.page_type_mask2
+        )
+        tx = const.block_count_offset + bit_offset
+        self._current_page_block_count = self._read_uint(tx, const.block_count_length)
+        tx = const.subheader_count_offset + bit_offset
+        self._current_page_subheaders_count = self._read_uint(
+            tx, const.subheader_count_length
+        )
+
+    def _process_page_metadata(self) -> None:
+        bit_offset = self._page_bit_offset
+
+        for i in range(self._current_page_subheaders_count):
+            offset = const.subheader_pointers_offset + bit_offset
+            total_offset = offset + self._subheader_pointer_length * i
+
+            subheader_offset = self._read_uint(total_offset, self._int_length)
+            total_offset += self._int_length
+
+            subheader_length = self._read_uint(total_offset, self._int_length)
+            total_offset += self._int_length
+
+            subheader_compression = self._read_uint(total_offset, 1)
+            total_offset += 1
+
+            subheader_type = self._read_uint(total_offset, 1)
+
+            if (
+                subheader_length == 0
+                or subheader_compression == const.truncated_subheader_id
+            ):
+                continue
+
+            subheader_signature = self._read_bytes(subheader_offset, self._int_length)
+            subheader_index = get_subheader_index(subheader_signature)
+            subheader_processor = self._subheader_processors[subheader_index]
+
+            if subheader_processor is None:
+                f1 = subheader_compression in (const.compressed_subheader_id, 0)
+                f2 = subheader_type == const.compressed_subheader_type
+                if self.compression and f1 and f2:
+                    self._current_page_data_subheader_pointers.append(
+                        (subheader_offset, subheader_length)
+                    )
+                else:
+                    self.close()
+                    raise ValueError(
+                        f"Unknown subheader signature {subheader_signature}"
+                    )
+            else:
+                subheader_processor(subheader_offset, subheader_length)
+
+    def _process_rowsize_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        lcs_offset = offset
+        lcp_offset = offset
+        if self.U64:
+            lcs_offset += 682
+            lcp_offset += 706
+        else:
+            lcs_offset += 354
+            lcp_offset += 378
+
+        self.row_length = self._read_uint(
+            offset + const.row_length_offset_multiplier * int_len,
+            int_len,
+        )
+        self.row_count = self._read_uint(
+            offset + const.row_count_offset_multiplier * int_len,
+            int_len,
+        )
+        self.col_count_p1 = self._read_uint(
+            offset + const.col_count_p1_multiplier * int_len, int_len
+        )
+        self.col_count_p2 = self._read_uint(
+            offset + const.col_count_p2_multiplier * int_len, int_len
+        )
+        mx = const.row_count_on_mix_page_offset_multiplier * int_len
+        self._mix_page_row_count = self._read_uint(offset + mx, int_len)
+        self._lcs = self._read_uint(lcs_offset, 2)
+        self._lcp = self._read_uint(lcp_offset, 2)
+
+    def _process_columnsize_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        self.column_count = self._read_uint(offset, int_len)
+        if self.col_count_p1 + self.col_count_p2 != self.column_count:
+            print(
+                f"Warning: column count mismatch ({self.col_count_p1} + "
+                f"{self.col_count_p2} != {self.column_count})\n"
+            )
+
+    # Unknown purpose
+    def _process_subheader_counts(self, offset: int, length: int) -> None:
+        pass
+
+    def _process_columntext_subheader(self, offset: int, length: int) -> None:
+        offset += self._int_length
+        text_block_size = self._read_uint(offset, const.text_block_size_length)
+
+        buf = self._read_bytes(offset, text_block_size)
+        cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
+        self.column_names_raw.append(cname_raw)
+
+        if len(self.column_names_raw) == 1:
+            compression_literal = b""
+            for cl in const.compression_literals:
+                if cl in cname_raw:
+                    compression_literal = cl
+            self.compression = compression_literal
+            offset -= self._int_length
+
+            offset1 = offset + 16
+            if self.U64:
+                offset1 += 4
+
+            buf = self._read_bytes(offset1, self._lcp)
+            compression_literal = buf.rstrip(b"\x00")
+            if compression_literal == b"":
+                self._lcs = 0
+                offset1 = offset + 32
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif compression_literal == const.rle_compression:
+                offset1 = offset + 40
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif self._lcs > 0:
+                self._lcp = 0
+                offset1 = offset + 16
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcs)
+                self.creator_proc = buf[0 : self._lcp]
+            if hasattr(self, "creator_proc"):
+                self.creator_proc = self._convert_header_text(self.creator_proc)
+
+    def _process_columnname_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        column_name_pointers_count = (length - 2 * int_len - 12) // 8
+        for i in range(column_name_pointers_count):
+            text_subheader = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_text_subheader_offset
+            )
+            col_name_offset = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_offset_offset
+            )
+            col_name_length = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_length_offset
+            )
+
+            idx = self._read_uint(
+                text_subheader, const.column_name_text_subheader_length
+            )
+            col_offset = self._read_uint(
+                col_name_offset, const.column_name_offset_length
+            )
+            col_len = self._read_uint(col_name_length, const.column_name_length_length)
+
+            name_raw = self.column_names_raw[idx]
+            cname = name_raw[col_offset : col_offset + col_len]
+            self.column_names.append(self._convert_header_text(cname))
+
+    def _process_columnattributes_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8)
+        for i in range(column_attributes_vectors_count):
+            col_data_offset = (
+                offset + int_len + const.column_data_offset_offset + i * (int_len + 8)
+            )
+            col_data_len = (
+                offset
+                + 2 * int_len
+                + const.column_data_length_offset
+                + i * (int_len + 8)
+            )
+            col_types = (
+                offset + 2 * int_len + const.column_type_offset + i * (int_len + 8)
+            )
+
+            x = self._read_uint(col_data_offset, int_len)
+            self._column_data_offsets.append(x)
+
+            x = self._read_uint(col_data_len, const.column_data_length_length)
+            self._column_data_lengths.append(x)
+
+            x = self._read_uint(col_types, const.column_type_length)
+            self._column_types.append(b"d" if x == 1 else b"s")
+
+    def _process_columnlist_subheader(self, offset: int, length: int) -> None:
+        # unknown purpose
+        pass
+
+    def _process_format_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        text_subheader_format = (
+            offset + const.column_format_text_subheader_index_offset + 3 * int_len
+        )
+        col_format_offset = offset + const.column_format_offset_offset + 3 * int_len
+        col_format_len = offset + const.column_format_length_offset + 3 * int_len
+        text_subheader_label = (
+            offset + const.column_label_text_subheader_index_offset + 3 * int_len
+        )
+        col_label_offset = offset + const.column_label_offset_offset + 3 * int_len
+        col_label_len = offset + const.column_label_length_offset + 3 * int_len
+
+        x = self._read_uint(
+            text_subheader_format, const.column_format_text_subheader_index_length
+        )
+        format_idx = min(x, len(self.column_names_raw) - 1)
+
+        format_start = self._read_uint(
+            col_format_offset, const.column_format_offset_length
+        )
+        format_len = self._read_uint(col_format_len, const.column_format_length_length)
+
+        label_idx = self._read_uint(
+            text_subheader_label, const.column_label_text_subheader_index_length
+        )
+        label_idx = min(label_idx, len(self.column_names_raw) - 1)
+
+        label_start = self._read_uint(
+            col_label_offset, const.column_label_offset_length
+        )
+        label_len = self._read_uint(col_label_len, const.column_label_length_length)
+
+        label_names = self.column_names_raw[label_idx]
+        column_label = self._convert_header_text(
+            label_names[label_start : label_start + label_len]
+        )
+        format_names = self.column_names_raw[format_idx]
+        column_format = self._convert_header_text(
+            format_names[format_start : format_start + format_len]
+        )
+        current_column_number = len(self.columns)
+
+        col = _Column(
+            current_column_number,
+            self.column_names[current_column_number],
+            column_label,
+            column_format,
+            self._column_types[current_column_number],
+            self._column_data_lengths[current_column_number],
+        )
+
+        self.column_formats.append(column_format)
+        self.columns.append(col)
+
+    def read(self, nrows: int | None = None) -> DataFrame:
+        if (nrows is None) and (self.chunksize is not None):
+            nrows = self.chunksize
+        elif nrows is None:
+            nrows = self.row_count
+
+        if len(self._column_types) == 0:
+            self.close()
+            raise EmptyDataError("No columns to parse from file")
+
+        if nrows > 0 and self._current_row_in_file_index >= self.row_count:
+            return DataFrame()
+
+        nrows = min(nrows, self.row_count - self._current_row_in_file_index)
+
+        nd = self._column_types.count(b"d")
+        ns = self._column_types.count(b"s")
+
+        self._string_chunk = np.empty((ns, nrows), dtype=object)
+        self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
+
+        self._current_row_in_chunk_index = 0
+        p = Parser(self)
+        p.read(nrows)
+
+        rslt = self._chunk_to_dataframe()
+        if self.index is not None:
+            rslt = rslt.set_index(self.index)
+
+        return rslt
+
+    def _read_next_page(self):
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = self._path_or_buf.read(self._page_length)
+        if len(self._cached_page) <= 0:
+            return True
+        elif len(self._cached_page) != self._page_length:
+            self.close()
+            msg = (
+                "failed to read complete page from file (read "
+                f"{len(self._cached_page):d} of {self._page_length:d} bytes)"
+            )
+            raise ValueError(msg)
+
+        self._read_page_header()
+        if self._current_page_type in const.page_meta_types:
+            self._process_page_metadata()
+
+        if self._current_page_type not in const.page_meta_types + [
+            const.page_data_type,
+            const.page_mix_type,
+        ]:
+            return self._read_next_page()
+
+        return False
+
+    def _chunk_to_dataframe(self) -> DataFrame:
+        n = self._current_row_in_chunk_index
+        m = self._current_row_in_file_index
+        ix = range(m - n, m)
+        rslt = {}
+
+        js, jb = 0, 0
+        infer_string = get_option("future.infer_string")
+        for j in range(self.column_count):
+            name = self.column_names[j]
+
+            if self._column_types[j] == b"d":
+                col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
+                rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix, copy=False)
+                if self.convert_dates:
+                    if self.column_formats[j] in const.sas_date_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "d")
+                    elif self.column_formats[j] in const.sas_datetime_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "s")
+                jb += 1
+            elif self._column_types[j] == b"s":
+                rslt[name] = pd.Series(self._string_chunk[js, :], index=ix, copy=False)
+                if self.convert_text and (self.encoding is not None):
+                    rslt[name] = self._decode_string(rslt[name].str)
+                    if infer_string:
+                        rslt[name] = rslt[name].astype("str")
+
+                js += 1
+            else:
+                self.close()
+                raise ValueError(f"unknown column type {repr(self._column_types[j])}")
+
+        df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False)
+        return df
+
+    def _decode_string(self, b):
+        return b.decode(self.encoding or self.default_encoding)
+
+    def _convert_header_text(self, b: bytes) -> str | bytes:
+        if self.convert_header_text:
+            return self._decode_string(b)
+        else:
+            return b
--- a/lib/python3.11/site-packages/pandas/io/sas/sas_constants.py
+++ b/lib/python3.11/site-packages/pandas/io/sas/sas_constants.py
@ -0,0 +1,310 @@
+from __future__ import annotations
+
+from typing import Final
+
+magic: Final = (
+    b"\x00\x00\x00\x00\x00\x00\x00\x00"
+    b"\x00\x00\x00\x00\xc2\xea\x81\x60"
+    b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
+    b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
+)
+
+align_1_checker_value: Final = b"3"
+align_1_offset: Final = 32
+align_1_length: Final = 1
+align_1_value: Final = 4
+u64_byte_checker_value: Final = b"3"
+align_2_offset: Final = 35
+align_2_length: Final = 1
+align_2_value: Final = 4
+endianness_offset: Final = 37
+endianness_length: Final = 1
+platform_offset: Final = 39
+platform_length: Final = 1
+encoding_offset: Final = 70
+encoding_length: Final = 1
+dataset_offset: Final = 92
+dataset_length: Final = 64
+file_type_offset: Final = 156
+file_type_length: Final = 8
+date_created_offset: Final = 164
+date_created_length: Final = 8
+date_modified_offset: Final = 172
+date_modified_length: Final = 8
+header_size_offset: Final = 196
+header_size_length: Final = 4
+page_size_offset: Final = 200
+page_size_length: Final = 4
+page_count_offset: Final = 204
+page_count_length: Final = 4
+sas_release_offset: Final = 216
+sas_release_length: Final = 8
+sas_server_type_offset: Final = 224
+sas_server_type_length: Final = 16
+os_version_number_offset: Final = 240
+os_version_number_length: Final = 16
+os_maker_offset: Final = 256
+os_maker_length: Final = 16
+os_name_offset: Final = 272
+os_name_length: Final = 16
+page_bit_offset_x86: Final = 16
+page_bit_offset_x64: Final = 32
+subheader_pointer_length_x86: Final = 12
+subheader_pointer_length_x64: Final = 24
+page_type_offset: Final = 0
+page_type_length: Final = 2
+block_count_offset: Final = 2
+block_count_length: Final = 2
+subheader_count_offset: Final = 4
+subheader_count_length: Final = 2
+page_type_mask: Final = 0x0F00
+# Keep "page_comp_type" bits
+page_type_mask2: Final = 0xF000 | page_type_mask
+page_meta_type: Final = 0x0000
+page_data_type: Final = 0x0100
+page_mix_type: Final = 0x0200
+page_amd_type: Final = 0x0400
+page_meta2_type: Final = 0x4000
+page_comp_type: Final = 0x9000
+page_meta_types: Final = [page_meta_type, page_meta2_type]
+subheader_pointers_offset: Final = 8
+truncated_subheader_id: Final = 1
+compressed_subheader_id: Final = 4
+compressed_subheader_type: Final = 1
+text_block_size_length: Final = 2
+row_length_offset_multiplier: Final = 5
+row_count_offset_multiplier: Final = 6
+col_count_p1_multiplier: Final = 9
+col_count_p2_multiplier: Final = 10
+row_count_on_mix_page_offset_multiplier: Final = 15
+column_name_pointer_length: Final = 8
+column_name_text_subheader_offset: Final = 0
+column_name_text_subheader_length: Final = 2
+column_name_offset_offset: Final = 2
+column_name_offset_length: Final = 2
+column_name_length_offset: Final = 4
+column_name_length_length: Final = 2
+column_data_offset_offset: Final = 8
+column_data_length_offset: Final = 8
+column_data_length_length: Final = 4
+column_type_offset: Final = 14
+column_type_length: Final = 1
+column_format_text_subheader_index_offset: Final = 22
+column_format_text_subheader_index_length: Final = 2
+column_format_offset_offset: Final = 24
+column_format_offset_length: Final = 2
+column_format_length_offset: Final = 26
+column_format_length_length: Final = 2
+column_label_text_subheader_index_offset: Final = 28
+column_label_text_subheader_index_length: Final = 2
+column_label_offset_offset: Final = 30
+column_label_offset_length: Final = 2
+column_label_length_offset: Final = 32
+column_label_length_length: Final = 2
+rle_compression: Final = b"SASYZCRL"
+rdc_compression: Final = b"SASYZCR2"
+
+compression_literals: Final = [rle_compression, rdc_compression]
+
+# Incomplete list of encodings, using SAS nomenclature:
+# https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
+# corresponding to the Python documentation of standard encodings
+# https://docs.python.org/3/library/codecs.html#standard-encodings
+encoding_names: Final = {
+    20: "utf-8",
+    29: "latin1",
+    30: "latin2",
+    31: "latin3",
+    32: "latin4",
+    33: "cyrillic",
+    34: "arabic",
+    35: "greek",
+    36: "hebrew",
+    37: "latin5",
+    38: "latin6",
+    39: "cp874",
+    40: "latin9",
+    41: "cp437",
+    42: "cp850",
+    43: "cp852",
+    44: "cp857",
+    45: "cp858",
+    46: "cp862",
+    47: "cp864",
+    48: "cp865",
+    49: "cp866",
+    50: "cp869",
+    51: "cp874",
+    # 52: "",  # not found
+    # 53: "",  # not found
+    # 54: "",  # not found
+    55: "cp720",
+    56: "cp737",
+    57: "cp775",
+    58: "cp860",
+    59: "cp863",
+    60: "cp1250",
+    61: "cp1251",
+    62: "cp1252",
+    63: "cp1253",
+    64: "cp1254",
+    65: "cp1255",
+    66: "cp1256",
+    67: "cp1257",
+    68: "cp1258",
+    118: "cp950",
+    # 119: "",  # not found
+    123: "big5",
+    125: "gb2312",
+    126: "cp936",
+    134: "euc_jp",
+    136: "cp932",
+    138: "shift_jis",
+    140: "euc-kr",
+    141: "cp949",
+    227: "latin8",
+    # 228: "", # not found
+    # 229: ""  # not found
+}
+
+
+class SASIndex:
+    row_size_index: Final = 0
+    column_size_index: Final = 1
+    subheader_counts_index: Final = 2
+    column_text_index: Final = 3
+    column_name_index: Final = 4
+    column_attributes_index: Final = 5
+    format_and_label_index: Final = 6
+    column_list_index: Final = 7
+    data_subheader_index: Final = 8
+
+
+subheader_signature_to_index: Final = {
+    b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
+    b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
+    b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+    b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+}
+
+
+# List of frequently used SAS date and datetime formats
+# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
+# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
+sas_date_formats: Final = (
+    "DATE",
+    "DAY",
+    "DDMMYY",
+    "DOWNAME",
+    "JULDAY",
+    "JULIAN",
+    "MMDDYY",
+    "MMYY",
+    "MMYYC",
+    "MMYYD",
+    "MMYYP",
+    "MMYYS",
+    "MMYYN",
+    "MONNAME",
+    "MONTH",
+    "MONYY",
+    "QTR",
+    "QTRR",
+    "NENGO",
+    "WEEKDATE",
+    "WEEKDATX",
+    "WEEKDAY",
+    "WEEKV",
+    "WORDDATE",
+    "WORDDATX",
+    "YEAR",
+    "YYMM",
+    "YYMMC",
+    "YYMMD",
+    "YYMMP",
+    "YYMMS",
+    "YYMMN",
+    "YYMON",
+    "YYMMDD",
+    "YYQ",
+    "YYQC",
+    "YYQD",
+    "YYQP",
+    "YYQS",
+    "YYQN",
+    "YYQR",
+    "YYQRC",
+    "YYQRD",
+    "YYQRP",
+    "YYQRS",
+    "YYQRN",
+    "YYMMDDP",
+    "YYMMDDC",
+    "E8601DA",
+    "YYMMDDN",
+    "MMDDYYC",
+    "MMDDYYS",
+    "MMDDYYD",
+    "YYMMDDS",
+    "B8601DA",
+    "DDMMYYN",
+    "YYMMDDD",
+    "DDMMYYB",
+    "DDMMYYP",
+    "MMDDYYP",
+    "YYMMDDB",
+    "MMDDYYN",
+    "DDMMYYC",
+    "DDMMYYD",
+    "DDMMYYS",
+    "MINGUO",
+)
+
+sas_datetime_formats: Final = (
+    "DATETIME",
+    "DTWKDATX",
+    "B8601DN",
+    "B8601DT",
+    "B8601DX",
+    "B8601DZ",
+    "B8601LX",
+    "E8601DN",
+    "E8601DT",
+    "E8601DX",
+    "E8601DZ",
+    "E8601LX",
+    "DATEAMPM",
+    "DTDATE",
+    "DTMONYY",
+    "DTMONYY",
+    "DTWKDATX",
+    "DTYEAR",
+    "TOD",
+    "MDYAMPM",
+)
--- a/lib/python3.11/site-packages/pandas/io/sas/sas_xport.py
+++ b/lib/python3.11/site-packages/pandas/io/sas/sas_xport.py
@ -0,0 +1,508 @@
+"""
+Read a SAS XPort format file into a Pandas DataFrame.
+
+Based on code from Jack Cushman (github.com/jcushman/xport).
+
+The file format is defined here:
+
+https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
+"""
+from __future__ import annotations
+
+from collections import abc
+from datetime import datetime
+import struct
+from typing import TYPE_CHECKING
+import warnings
+
+import numpy as np
+
+from pandas.util._decorators import Appender
+from pandas.util._exceptions import find_stack_level
+
+import pandas as pd
+
+from pandas.io.common import get_handle
+from pandas.io.sas.sasreader import ReaderBase
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        DatetimeNaTType,
+        FilePath,
+        ReadBuffer,
+    )
+_correct_line1 = (
+    "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_header1 = (
+    "HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000"
+)
+_correct_header2 = (
+    "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_obs_header = (
+    "HEADER RECORD*******OBS     HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_fieldkeys = [
+    "ntype",
+    "nhfun",
+    "field_length",
+    "nvar0",
+    "name",
+    "label",
+    "nform",
+    "nfl",
+    "num_decimals",
+    "nfj",
+    "nfill",
+    "niform",
+    "nifl",
+    "nifd",
+    "npos",
+    "_",
+]
+
+
+_base_params_doc = """\
+Parameters
+----------
+filepath_or_buffer : str or file-like object
+    Path to SAS file or object implementing binary read method."""
+
+_params2_doc = """\
+index : identifier of index column
+    Identifier of column that should be used as index of the DataFrame.
+encoding : str
+    Encoding for text data.
+chunksize : int
+    Read file `chunksize` lines at a time, returns iterator."""
+
+_format_params_doc = """\
+format : str
+    File format, only `xport` is currently supported."""
+
+_iterator_doc = """\
+iterator : bool, default False
+    Return XportReader object for reading file incrementally."""
+
+
+_read_sas_doc = f"""Read a SAS file into a DataFrame.
+
+{_base_params_doc}
+{_format_params_doc}
+{_params2_doc}
+{_iterator_doc}
+
+Returns
+-------
+DataFrame or XportReader
+
+Examples
+--------
+Read a SAS Xport file:
+
+>>> df = pd.read_sas('filename.XPT')
+
+Read a Xport file in 10,000 line chunks:
+
+>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
+>>> for chunk in itr:
+>>>     do_something(chunk)
+
+"""
+
+_xport_reader_doc = f"""\
+Class for reading SAS Xport files.
+
+{_base_params_doc}
+{_params2_doc}
+
+Attributes
+----------
+member_info : list
+    Contains information about the file
+fields : list
+    Contains information about the variables in the file
+"""
+
+_read_method_doc = """\
+Read observations from SAS Xport file, returning as data frame.
+
+Parameters
+----------
+nrows : int
+    Number of rows to read from data file; if None, read whole
+    file.
+
+Returns
+-------
+A DataFrame.
+"""
+
+
+def _parse_date(datestr: str) -> DatetimeNaTType:
+    """Given a date in xport format, return Python date."""
+    try:
+        # e.g. "16FEB11:10:07:55"
+        return datetime.strptime(datestr, "%d%b%y:%H:%M:%S")
+    except ValueError:
+        return pd.NaT
+
+
+def _split_line(s: str, parts):
+    """
+    Parameters
+    ----------
+    s: str
+        Fixed-length string to split
+    parts: list of (name, length) pairs
+        Used to break up string, name '_' will be filtered from output.
+
+    Returns
+    -------
+    Dict of name:contents of string at given location.
+    """
+    out = {}
+    start = 0
+    for name, length in parts:
+        out[name] = s[start : start + length].strip()
+        start += length
+    del out["_"]
+    return out
+
+
+def _handle_truncated_float_vec(vec, nbytes):
+    # This feature is not well documented, but some SAS XPORT files
+    # have 2-7 byte "truncated" floats.  To read these truncated
+    # floats, pad them with zeros on the right to make 8 byte floats.
+    #
+    # References:
+    # https://github.com/jcushman/xport/pull/3
+    # The R "foreign" library
+
+    if nbytes != 8:
+        vec1 = np.zeros(len(vec), np.dtype("S8"))
+        dtype = np.dtype(f"S{nbytes},S{8 - nbytes}")
+        vec2 = vec1.view(dtype=dtype)
+        vec2["f0"] = vec
+        return vec2
+
+    return vec
+
+
+def _parse_float_vec(vec):
+    """
+    Parse a vector of float values representing IBM 8 byte floats into
+    native 8 byte floats.
+    """
+    dtype = np.dtype(">u4,>u4")
+    vec1 = vec.view(dtype=dtype)
+    xport1 = vec1["f0"]
+    xport2 = vec1["f1"]
+
+    # Start by setting first half of ieee number to first half of IBM
+    # number sans exponent
+    ieee1 = xport1 & 0x00FFFFFF
+
+    # The fraction bit to the left of the binary point in the ieee
+    # format was set and the number was shifted 0, 1, 2, or 3
+    # places. This will tell us how to adjust the ibm exponent to be a
+    # power of 2 ieee exponent and how to shift the fraction bits to
+    # restore the correct magnitude.
+    shift = np.zeros(len(vec), dtype=np.uint8)
+    shift[np.where(xport1 & 0x00200000)] = 1
+    shift[np.where(xport1 & 0x00400000)] = 2
+    shift[np.where(xport1 & 0x00800000)] = 3
+
+    # shift the ieee number down the correct number of places then
+    # set the second half of the ieee number to be the second half
+    # of the ibm number shifted appropriately, ored with the bits
+    # from the first half that would have been shifted in if we
+    # could shift a double. All we are worried about are the low
+    # order 3 bits of the first half since we're only shifting by
+    # 1, 2, or 3.
+    ieee1 >>= shift
+    ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift)))
+
+    # clear the 1 bit to the left of the binary point
+    ieee1 &= 0xFFEFFFFF
+
+    # set the exponent of the ieee number to be the actual exponent
+    # plus the shift count + 1023. Or this into the first half of the
+    # ieee number. The ibm exponent is excess 64 but is adjusted by 65
+    # since during conversion to ibm format the exponent is
+    # incremented by 1 and the fraction bits left 4 positions to the
+    # right of the radix point.  (had to add >> 24 because C treats &
+    # 0x7f as 0x7f000000 and Python doesn't)
+    ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | (
+        xport1 & 0x80000000
+    )
+
+    ieee = np.empty((len(ieee1),), dtype=">u4,>u4")
+    ieee["f0"] = ieee1
+    ieee["f1"] = ieee2
+    ieee = ieee.view(dtype=">f8")
+    ieee = ieee.astype("f8")
+
+    return ieee
+
+
+class XportReader(ReaderBase, abc.Iterator):
+    __doc__ = _xport_reader_doc
+
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        index=None,
+        encoding: str | None = "ISO-8859-1",
+        chunksize: int | None = None,
+        compression: CompressionOptions = "infer",
+    ) -> None:
+        self._encoding = encoding
+        self._lines_read = 0
+        self._index = index
+        self._chunksize = chunksize
+
+        self.handles = get_handle(
+            filepath_or_buffer,
+            "rb",
+            encoding=encoding,
+            is_text=False,
+            compression=compression,
+        )
+        self.filepath_or_buffer = self.handles.handle
+
+        try:
+            self._read_header()
+        except Exception:
+            self.close()
+            raise
+
+    def close(self) -> None:
+        self.handles.close()
+
+    def _get_row(self):
+        return self.filepath_or_buffer.read(80).decode()
+
+    def _read_header(self) -> None:
+        self.filepath_or_buffer.seek(0)
+
+        # read file header
+        line1 = self._get_row()
+        if line1 != _correct_line1:
+            if "**COMPRESSED**" in line1:
+                # this was created with the PROC CPORT method and can't be read
+                # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm
+                raise ValueError(
+                    "Header record indicates a CPORT file, which is not readable."
+                )
+            raise ValueError("Header record is not an XPORT file.")
+
+        line2 = self._get_row()
+        fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]]
+        file_info = _split_line(line2, fif)
+        if file_info["prefix"] != "SAS     SAS     SASLIB":
+            raise ValueError("Header record has invalid prefix.")
+        file_info["created"] = _parse_date(file_info["created"])
+        self.file_info = file_info
+
+        line3 = self._get_row()
+        file_info["modified"] = _parse_date(line3[:16])
+
+        # read member header
+        header1 = self._get_row()
+        header2 = self._get_row()
+        headflag1 = header1.startswith(_correct_header1)
+        headflag2 = header2 == _correct_header2
+        if not (headflag1 and headflag2):
+            raise ValueError("Member header not found")
+        # usually 140, could be 135
+        fieldnamelength = int(header1[-5:-2])
+
+        # member info
+        mem = [
+            ["prefix", 8],
+            ["set_name", 8],
+            ["sasdata", 8],
+            ["version", 8],
+            ["OS", 8],
+            ["_", 24],
+            ["created", 16],
+        ]
+        member_info = _split_line(self._get_row(), mem)
+        mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]]
+        member_info.update(_split_line(self._get_row(), mem))
+        member_info["modified"] = _parse_date(member_info["modified"])
+        member_info["created"] = _parse_date(member_info["created"])
+        self.member_info = member_info
+
+        # read field names
+        types = {1: "numeric", 2: "char"}
+        fieldcount = int(self._get_row()[54:58])
+        datalength = fieldnamelength * fieldcount
+        # round up to nearest 80
+        if datalength % 80:
+            datalength += 80 - datalength % 80
+        fielddata = self.filepath_or_buffer.read(datalength)
+        fields = []
+        obs_length = 0
+        while len(fielddata) >= fieldnamelength:
+            # pull data for one field
+            fieldbytes, fielddata = (
+                fielddata[:fieldnamelength],
+                fielddata[fieldnamelength:],
+            )
+
+            # rest at end gets ignored, so if field is short, pad out
+            # to match struct pattern below
+            fieldbytes = fieldbytes.ljust(140)
+
+            fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", fieldbytes)
+            field = dict(zip(_fieldkeys, fieldstruct))
+            del field["_"]
+            field["ntype"] = types[field["ntype"]]
+            fl = field["field_length"]
+            if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)):
+                msg = f"Floating field width {fl} is not between 2 and 8."
+                raise TypeError(msg)
+
+            for k, v in field.items():
+                try:
+                    field[k] = v.strip()
+                except AttributeError:
+                    pass
+
+            obs_length += field["field_length"]
+            fields += [field]
+
+        header = self._get_row()
+        if not header == _correct_obs_header:
+            raise ValueError("Observation header not found.")
+
+        self.fields = fields
+        self.record_length = obs_length
+        self.record_start = self.filepath_or_buffer.tell()
+
+        self.nobs = self._record_count()
+        self.columns = [x["name"].decode() for x in self.fields]
+
+        # Setup the dtype.
+        dtypel = [
+            ("s" + str(i), "S" + str(field["field_length"]))
+            for i, field in enumerate(self.fields)
+        ]
+        dtype = np.dtype(dtypel)
+        self._dtype = dtype
+
+    def __next__(self) -> pd.DataFrame:
+        return self.read(nrows=self._chunksize or 1)
+
+    def _record_count(self) -> int:
+        """
+        Get number of records in file.
+
+        This is maybe suboptimal because we have to seek to the end of
+        the file.
+
+        Side effect: returns file position to record_start.
+        """
+        self.filepath_or_buffer.seek(0, 2)
+        total_records_length = self.filepath_or_buffer.tell() - self.record_start
+
+        if total_records_length % 80 != 0:
+            warnings.warn(
+                "xport file may be corrupted.",
+                stacklevel=find_stack_level(),
+            )
+
+        if self.record_length > 80:
+            self.filepath_or_buffer.seek(self.record_start)
+            return total_records_length // self.record_length
+
+        self.filepath_or_buffer.seek(-80, 2)
+        last_card_bytes = self.filepath_or_buffer.read(80)
+        last_card = np.frombuffer(last_card_bytes, dtype=np.uint64)
+
+        # 8 byte blank
+        ix = np.flatnonzero(last_card == 2314885530818453536)
+
+        if len(ix) == 0:
+            tail_pad = 0
+        else:
+            tail_pad = 8 * len(ix)
+
+        self.filepath_or_buffer.seek(self.record_start)
+
+        return (total_records_length - tail_pad) // self.record_length
+
+    def get_chunk(self, size: int | None = None) -> pd.DataFrame:
+        """
+        Reads lines from Xport file and returns as dataframe
+
+        Parameters
+        ----------
+        size : int, defaults to None
+            Number of lines to read.  If None, reads whole file.
+
+        Returns
+        -------
+        DataFrame
+        """
+        if size is None:
+            size = self._chunksize
+        return self.read(nrows=size)
+
+    def _missing_double(self, vec):
+        v = vec.view(dtype="u1,u1,u2,u4")
+        miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0)
+        miss1 = (
+            ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A))
+            | (v["f0"] == 0x5F)
+            | (v["f0"] == 0x2E)
+        )
+        miss &= miss1
+        return miss
+
+    @Appender(_read_method_doc)
+    def read(self, nrows: int | None = None) -> pd.DataFrame:
+        if nrows is None:
+            nrows = self.nobs
+
+        read_lines = min(nrows, self.nobs - self._lines_read)
+        read_len = read_lines * self.record_length
+        if read_len <= 0:
+            self.close()
+            raise StopIteration
+        raw = self.filepath_or_buffer.read(read_len)
+        data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)
+
+        df_data = {}
+        for j, x in enumerate(self.columns):
+            vec = data["s" + str(j)]
+            ntype = self.fields[j]["ntype"]
+            if ntype == "numeric":
+                vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"])
+                miss = self._missing_double(vec)
+                v = _parse_float_vec(vec)
+                v[miss] = np.nan
+            elif self.fields[j]["ntype"] == "char":
+                v = [y.rstrip() for y in vec]
+
+                if self._encoding is not None:
+                    v = [y.decode(self._encoding) for y in v]
+
+            df_data.update({x: v})
+        df = pd.DataFrame(df_data)
+
+        if self._index is None:
+            df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines))
+        else:
+            df = df.set_index(self._index)
+
+        self._lines_read += read_lines
+
+        return df
--- a/lib/python3.11/site-packages/pandas/io/sas/sasreader.py
+++ b/lib/python3.11/site-packages/pandas/io/sas/sasreader.py
@ -0,0 +1,178 @@
+"""
+Read SAS sas7bdat or xport files.
+"""
+from __future__ import annotations
+
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    TYPE_CHECKING,
+    overload,
+)
+
+from pandas.util._decorators import doc
+
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.common import stringify_path
+
+if TYPE_CHECKING:
+    from collections.abc import Hashable
+    from types import TracebackType
+
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadBuffer,
+        Self,
+    )
+
+    from pandas import DataFrame
+
+
+class ReaderBase(ABC):
+    """
+    Protocol for XportReader and SAS7BDATReader classes.
+    """
+
+    @abstractmethod
+    def read(self, nrows: int | None = None) -> DataFrame:
+        ...
+
+    @abstractmethod
+    def close(self) -> None:
+        ...
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.close()
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: int = ...,
+    iterator: bool = ...,
+    compression: CompressionOptions = ...,
+) -> ReaderBase:
+    ...
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: None = ...,
+    iterator: bool = ...,
+    compression: CompressionOptions = ...,
+) -> DataFrame | ReaderBase:
+    ...
+
+
+@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer")
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = None,
+    index: Hashable | None = None,
+    encoding: str | None = None,
+    chunksize: int | None = None,
+    iterator: bool = False,
+    compression: CompressionOptions = "infer",
+) -> DataFrame | ReaderBase:
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.sas7bdat``.
+    format : str {{'xport', 'sas7bdat'}} or None
+        If None, file format is inferred from file extension. If 'xport' or
+        'sas7bdat', uses the corresponding format.
+    index : identifier of index column, defaults to None
+        Identifier of column that should be used as index of the DataFrame.
+    encoding : str, default is None
+        Encoding for text data.  If None, text data are stored as raw bytes.
+    chunksize : int
+        Read file `chunksize` lines at a time, returns iterator.
+    iterator : bool, defaults to False
+        If True, returns an iterator for reading the file incrementally.
+    {decompression_options}
+
+    Returns
+    -------
+    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
+    or XportReader
+
+    Examples
+    --------
+    >>> df = pd.read_sas("sas_data.sas7bdat")  # doctest: +SKIP
+    """
+    if format is None:
+        buffer_error_msg = (
+            "If this is a buffer object rather "
+            "than a string name, you must specify a format string"
+        )
+        filepath_or_buffer = stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, str):
+            raise ValueError(buffer_error_msg)
+        fname = filepath_or_buffer.lower()
+        if ".xpt" in fname:
+            format = "xport"
+        elif ".sas7bdat" in fname:
+            format = "sas7bdat"
+        else:
+            raise ValueError(
+                f"unable to infer format of SAS file from filename: {repr(fname)}"
+            )
+
+    reader: ReaderBase
+    if format.lower() == "xport":
+        from pandas.io.sas.sas_xport import XportReader
+
+        reader = XportReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+            compression=compression,
+        )
+    elif format.lower() == "sas7bdat":
+        from pandas.io.sas.sas7bdat import SAS7BDATReader
+
+        reader = SAS7BDATReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+            compression=compression,
+        )
+    else:
+        raise ValueError("unknown SAS format")
+
+    if iterator or chunksize:
+        return reader
+
+    with reader:
+        return reader.read()