2669 lines
		
	
	
		
			98 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			2669 lines
		
	
	
		
			98 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware
 | 
						|
optimization, starting from parsing the command arguments, to managing the
 | 
						|
relation between the CPU baseline and dispatch-able features,
 | 
						|
also generating the required C headers and ending with compiling
 | 
						|
the sources with proper compiler's flags.
 | 
						|
 | 
						|
`CCompilerOpt` doesn't provide runtime detection for the CPU features,
 | 
						|
instead only focuses on the compiler side, but it creates abstract C headers
 | 
						|
that can be used later for the final runtime dispatching process."""
 | 
						|
 | 
						|
import atexit
 | 
						|
import inspect
 | 
						|
import os
 | 
						|
import pprint
 | 
						|
import re
 | 
						|
import subprocess
 | 
						|
import textwrap
 | 
						|
 | 
						|
class _Config:
 | 
						|
    """An abstract class holds all configurable attributes of `CCompilerOpt`,
 | 
						|
    these class attributes can be used to change the default behavior
 | 
						|
    of `CCompilerOpt` in order to fit other requirements.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    conf_nocache : bool
 | 
						|
        Set True to disable memory and file cache.
 | 
						|
        Default is False.
 | 
						|
 | 
						|
    conf_noopt : bool
 | 
						|
        Set True to forces the optimization to be disabled,
 | 
						|
        in this case `CCompilerOpt` tends to generate all
 | 
						|
        expected headers in order to 'not' break the build.
 | 
						|
        Default is False.
 | 
						|
 | 
						|
    conf_cache_factors : list
 | 
						|
        Add extra factors to the primary caching factors. The caching factors
 | 
						|
        are utilized to determine if there are changes had happened that
 | 
						|
        requires to discard the cache and re-updating it. The primary factors
 | 
						|
        are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc).
 | 
						|
        Default is list of two items, containing the time of last modification
 | 
						|
        of `ccompiler_opt` and value of attribute "conf_noopt"
 | 
						|
 | 
						|
    conf_tmp_path : str,
 | 
						|
        The path of temporary directory. Default is auto-created
 | 
						|
        temporary directory via ``tempfile.mkdtemp()``.
 | 
						|
 | 
						|
    conf_check_path : str
 | 
						|
        The path of testing files. Each added CPU feature must have a
 | 
						|
        **C** source file contains at least one intrinsic or instruction that
 | 
						|
        related to this feature, so it can be tested against the compiler.
 | 
						|
        Default is ``./distutils/checks``.
 | 
						|
 | 
						|
    conf_target_groups : dict
 | 
						|
        Extra tokens that can be reached from dispatch-able sources through
 | 
						|
        the special mark ``@targets``. Default is an empty dictionary.
 | 
						|
 | 
						|
        **Notes**:
 | 
						|
            - case-insensitive for tokens and group names
 | 
						|
            - sign '#' must stick in the begin of group name and only within ``@targets``
 | 
						|
 | 
						|
        **Example**:
 | 
						|
            .. code-block:: console
 | 
						|
 | 
						|
                $ "@targets #avx_group other_tokens" > group_inside.c
 | 
						|
 | 
						|
            >>> CCompilerOpt.conf_target_groups["avx_group"] = \\
 | 
						|
            "$werror $maxopt avx2 avx512f avx512_skx"
 | 
						|
            >>> cco = CCompilerOpt(cc_instance)
 | 
						|
            >>> cco.try_dispatch(["group_inside.c"])
 | 
						|
 | 
						|
    conf_c_prefix : str
 | 
						|
        The prefix of public C definitions. Default is ``"NPY_"``.
 | 
						|
 | 
						|
    conf_c_prefix_ : str
 | 
						|
        The prefix of internal C definitions. Default is ``"NPY__"``.
 | 
						|
 | 
						|
    conf_cc_flags : dict
 | 
						|
        Nested dictionaries defining several compiler flags
 | 
						|
        that linked to some major functions, the main key
 | 
						|
        represent the compiler name and sub-keys represent
 | 
						|
        flags names. Default is already covers all supported
 | 
						|
        **C** compilers.
 | 
						|
 | 
						|
        Sub-keys explained as follows:
 | 
						|
 | 
						|
        "native": str or None
 | 
						|
            used by argument option `native`, to detect the current
 | 
						|
            machine support via the compiler.
 | 
						|
        "werror": str or None
 | 
						|
            utilized to treat warning as errors during testing CPU features
 | 
						|
            against the compiler and also for target's policy `$werror`
 | 
						|
            via dispatch-able sources.
 | 
						|
        "maxopt": str or None
 | 
						|
            utilized for target's policy '$maxopt' and the value should
 | 
						|
            contains the maximum acceptable optimization by the compiler.
 | 
						|
            e.g. in gcc ``'-O3'``
 | 
						|
 | 
						|
        **Notes**:
 | 
						|
            * case-sensitive for compiler names and flags
 | 
						|
            * use space to separate multiple flags
 | 
						|
            * any flag will tested against the compiler and it will skipped
 | 
						|
              if it's not applicable.
 | 
						|
 | 
						|
    conf_min_features : dict
 | 
						|
        A dictionary defines the used CPU features for
 | 
						|
        argument option ``'min'``, the key represent the CPU architecture
 | 
						|
        name e.g. ``'x86'``. Default values provide the best effort
 | 
						|
        on wide range of users platforms.
 | 
						|
 | 
						|
        **Note**: case-sensitive for architecture names.
 | 
						|
 | 
						|
    conf_features : dict
 | 
						|
        Nested dictionaries used for identifying the CPU features.
 | 
						|
        the primary key is represented as a feature name or group name
 | 
						|
        that gathers several features. Default values covers all
 | 
						|
        supported features but without the major options like "flags",
 | 
						|
        these undefined options handle it by method `conf_features_partial()`.
 | 
						|
        Default value is covers almost all CPU features for *X86*, *IBM/Power64*
 | 
						|
        and *ARM 7/8*.
 | 
						|
 | 
						|
        Sub-keys explained as follows:
 | 
						|
 | 
						|
        "implies" : str or list, optional,
 | 
						|
            List of CPU feature names to be implied by it,
 | 
						|
            the feature name must be defined within `conf_features`.
 | 
						|
            Default is None.
 | 
						|
 | 
						|
        "flags": str or list, optional
 | 
						|
            List of compiler flags. Default is None.
 | 
						|
 | 
						|
        "detect": str or list, optional
 | 
						|
            List of CPU feature names that required to be detected
 | 
						|
            in runtime. By default, its the feature name or features
 | 
						|
            in "group" if its specified.
 | 
						|
 | 
						|
        "implies_detect": bool, optional
 | 
						|
            If True, all "detect" of implied features will be combined.
 | 
						|
            Default is True. see `feature_detect()`.
 | 
						|
 | 
						|
        "group": str or list, optional
 | 
						|
            Same as "implies" but doesn't require the feature name to be
 | 
						|
            defined within `conf_features`.
 | 
						|
 | 
						|
        "interest": int, required
 | 
						|
            a key for sorting CPU features
 | 
						|
 | 
						|
        "headers": str or list, optional
 | 
						|
            intrinsics C header file
 | 
						|
 | 
						|
        "disable": str, optional
 | 
						|
            force disable feature, the string value should contains the
 | 
						|
            reason of disabling.
 | 
						|
 | 
						|
        "autovec": bool or None, optional
 | 
						|
            True or False to declare that CPU feature can be auto-vectorized
 | 
						|
            by the compiler.
 | 
						|
            By default(None), treated as True if the feature contains at
 | 
						|
            least one applicable flag. see `feature_can_autovec()`
 | 
						|
 | 
						|
        "extra_checks": str or list, optional
 | 
						|
            Extra test case names for the CPU feature that need to be tested
 | 
						|
            against the compiler.
 | 
						|
 | 
						|
            Each test case must have a C file named ``extra_xxxx.c``, where
 | 
						|
            ``xxxx`` is the case name in lower case, under 'conf_check_path'.
 | 
						|
            It should contain at least one intrinsic or function related to the test case.
 | 
						|
 | 
						|
            If the compiler able to successfully compile the C file then `CCompilerOpt`
 | 
						|
            will add a C ``#define`` for it into the main dispatch header, e.g.
 | 
						|
            ``#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case.
 | 
						|
 | 
						|
        **NOTES**:
 | 
						|
            * space can be used as separator with options that supports "str or list"
 | 
						|
            * case-sensitive for all values and feature name must be in upper-case.
 | 
						|
            * if flags aren't applicable, its will skipped rather than disable the
 | 
						|
              CPU feature
 | 
						|
            * the CPU feature will disabled if the compiler fail to compile
 | 
						|
              the test file
 | 
						|
    """
 | 
						|
    conf_nocache = False
 | 
						|
    conf_noopt = False
 | 
						|
    conf_cache_factors = None
 | 
						|
    conf_tmp_path = None
 | 
						|
    conf_check_path = os.path.join(
 | 
						|
        os.path.dirname(os.path.realpath(__file__)), "checks"
 | 
						|
    )
 | 
						|
    conf_target_groups = {}
 | 
						|
    conf_c_prefix = 'NPY_'
 | 
						|
    conf_c_prefix_ = 'NPY__'
 | 
						|
    conf_cc_flags = dict(
 | 
						|
        gcc = dict(
 | 
						|
            # native should always fail on arm and ppc64,
 | 
						|
            # native usually works only with x86
 | 
						|
            native = '-march=native',
 | 
						|
            opt = '-O3',
 | 
						|
            werror = '-Werror',
 | 
						|
        ),
 | 
						|
        clang = dict(
 | 
						|
            native = '-march=native',
 | 
						|
            opt = "-O3",
 | 
						|
            # One of the following flags needs to be applicable for Clang to
 | 
						|
            # guarantee the sanity of the testing process, however in certain
 | 
						|
            # cases `-Werror` gets skipped during the availability test due to
 | 
						|
            # "unused arguments" warnings.
 | 
						|
            # see https://github.com/numpy/numpy/issues/19624
 | 
						|
            werror = '-Werror=switch -Werror',
 | 
						|
        ),
 | 
						|
        icc = dict(
 | 
						|
            native = '-xHost',
 | 
						|
            opt = '-O3',
 | 
						|
            werror = '-Werror',
 | 
						|
        ),
 | 
						|
        iccw = dict(
 | 
						|
            native = '/QxHost',
 | 
						|
            opt = '/O3',
 | 
						|
            werror = '/Werror',
 | 
						|
        ),
 | 
						|
        msvc = dict(
 | 
						|
            native = None,
 | 
						|
            opt = '/O2',
 | 
						|
            werror = '/WX',
 | 
						|
        ),
 | 
						|
        fcc = dict(
 | 
						|
            native = '-mcpu=a64fx',
 | 
						|
            opt = None,
 | 
						|
            werror = None,
 | 
						|
        )
 | 
						|
    )
 | 
						|
    conf_min_features = dict(
 | 
						|
        x86 = "SSE SSE2",
 | 
						|
        x64 = "SSE SSE2 SSE3",
 | 
						|
        ppc64 = '', # play it safe
 | 
						|
        ppc64le = "VSX VSX2",
 | 
						|
        s390x = '',
 | 
						|
        armhf = '', # play it safe
 | 
						|
        aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
 | 
						|
    )
 | 
						|
    conf_features = dict(
 | 
						|
        # X86
 | 
						|
        SSE = dict(
 | 
						|
            interest=1, headers="xmmintrin.h",
 | 
						|
            # enabling SSE without SSE2 is useless also
 | 
						|
            # it's non-optional for x86_64
 | 
						|
            implies="SSE2"
 | 
						|
        ),
 | 
						|
        SSE2   = dict(interest=2, implies="SSE", headers="emmintrin.h"),
 | 
						|
        SSE3   = dict(interest=3, implies="SSE2", headers="pmmintrin.h"),
 | 
						|
        SSSE3  = dict(interest=4, implies="SSE3", headers="tmmintrin.h"),
 | 
						|
        SSE41  = dict(interest=5, implies="SSSE3", headers="smmintrin.h"),
 | 
						|
        POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"),
 | 
						|
        SSE42  = dict(interest=7, implies="POPCNT"),
 | 
						|
        AVX    = dict(
 | 
						|
            interest=8, implies="SSE42", headers="immintrin.h",
 | 
						|
            implies_detect=False
 | 
						|
        ),
 | 
						|
        XOP    = dict(interest=9, implies="AVX", headers="x86intrin.h"),
 | 
						|
        FMA4   = dict(interest=10, implies="AVX", headers="x86intrin.h"),
 | 
						|
        F16C   = dict(interest=11, implies="AVX"),
 | 
						|
        FMA3   = dict(interest=12, implies="F16C"),
 | 
						|
        AVX2   = dict(interest=13, implies="F16C"),
 | 
						|
        AVX512F = dict(
 | 
						|
            interest=20, implies="FMA3 AVX2", implies_detect=False,
 | 
						|
            extra_checks="AVX512F_REDUCE"
 | 
						|
        ),
 | 
						|
        AVX512CD = dict(interest=21, implies="AVX512F"),
 | 
						|
        AVX512_KNL = dict(
 | 
						|
            interest=40, implies="AVX512CD", group="AVX512ER AVX512PF",
 | 
						|
            detect="AVX512_KNL", implies_detect=False
 | 
						|
        ),
 | 
						|
        AVX512_KNM = dict(
 | 
						|
            interest=41, implies="AVX512_KNL",
 | 
						|
            group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ",
 | 
						|
            detect="AVX512_KNM", implies_detect=False
 | 
						|
        ),
 | 
						|
        AVX512_SKX = dict(
 | 
						|
            interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ",
 | 
						|
            detect="AVX512_SKX", implies_detect=False,
 | 
						|
            extra_checks="AVX512BW_MASK AVX512DQ_MASK"
 | 
						|
        ),
 | 
						|
        AVX512_CLX = dict(
 | 
						|
            interest=43, implies="AVX512_SKX", group="AVX512VNNI",
 | 
						|
            detect="AVX512_CLX"
 | 
						|
        ),
 | 
						|
        AVX512_CNL = dict(
 | 
						|
            interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI",
 | 
						|
            detect="AVX512_CNL", implies_detect=False
 | 
						|
        ),
 | 
						|
        AVX512_ICL = dict(
 | 
						|
            interest=45, implies="AVX512_CLX AVX512_CNL",
 | 
						|
            group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ",
 | 
						|
            detect="AVX512_ICL", implies_detect=False
 | 
						|
        ),
 | 
						|
        AVX512_SPR = dict(
 | 
						|
            interest=46, implies="AVX512_ICL", group="AVX512FP16",
 | 
						|
            detect="AVX512_SPR", implies_detect=False
 | 
						|
        ),
 | 
						|
        # IBM/Power
 | 
						|
        ## Power7/ISA 2.06
 | 
						|
        VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"),
 | 
						|
        ## Power8/ISA 2.07
 | 
						|
        VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
 | 
						|
        ## Power9/ISA 3.00
 | 
						|
        VSX3 = dict(interest=3, implies="VSX2", implies_detect=False,
 | 
						|
                    extra_checks="VSX3_HALF_DOUBLE"),
 | 
						|
        ## Power10/ISA 3.1
 | 
						|
        VSX4 = dict(interest=4, implies="VSX3", implies_detect=False,
 | 
						|
                    extra_checks="VSX4_MMA"),
 | 
						|
        # IBM/Z
 | 
						|
        ## VX(z13) support
 | 
						|
        VX = dict(interest=1, headers="vecintrin.h"),
 | 
						|
        ## Vector-Enhancements Facility
 | 
						|
        VXE = dict(interest=2, implies="VX", implies_detect=False),
 | 
						|
        ## Vector-Enhancements Facility 2
 | 
						|
        VXE2 = dict(interest=3, implies="VXE", implies_detect=False),
 | 
						|
        # ARM
 | 
						|
        NEON  = dict(interest=1, headers="arm_neon.h"),
 | 
						|
        NEON_FP16 = dict(interest=2, implies="NEON"),
 | 
						|
        ## FMA
 | 
						|
        NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"),
 | 
						|
        ## Advanced SIMD
 | 
						|
        ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False),
 | 
						|
        ## ARMv8.2 half-precision & vector arithm
 | 
						|
        ASIMDHP = dict(interest=5, implies="ASIMD"),
 | 
						|
        ## ARMv8.2 dot product
 | 
						|
        ASIMDDP = dict(interest=6, implies="ASIMD"),
 | 
						|
        ## ARMv8.2 Single & half-precision Multiply
 | 
						|
        ASIMDFHM = dict(interest=7, implies="ASIMDHP")
 | 
						|
    )
 | 
						|
    def conf_features_partial(self):
 | 
						|
        """Return a dictionary of supported CPU features by the platform,
 | 
						|
        and accumulate the rest of undefined options in `conf_features`,
 | 
						|
        the returned dict has same rules and notes in
 | 
						|
        class attribute `conf_features`, also its override
 | 
						|
        any options that been set in 'conf_features'.
 | 
						|
        """
 | 
						|
        if self.cc_noopt:
 | 
						|
            # optimization is disabled
 | 
						|
            return {}
 | 
						|
 | 
						|
        on_x86 = self.cc_on_x86 or self.cc_on_x64
 | 
						|
        is_unix = self.cc_is_gcc or self.cc_is_clang or self.cc_is_fcc
 | 
						|
 | 
						|
        if on_x86 and is_unix: return dict(
 | 
						|
            SSE    = dict(flags="-msse"),
 | 
						|
            SSE2   = dict(flags="-msse2"),
 | 
						|
            SSE3   = dict(flags="-msse3"),
 | 
						|
            SSSE3  = dict(flags="-mssse3"),
 | 
						|
            SSE41  = dict(flags="-msse4.1"),
 | 
						|
            POPCNT = dict(flags="-mpopcnt"),
 | 
						|
            SSE42  = dict(flags="-msse4.2"),
 | 
						|
            AVX    = dict(flags="-mavx"),
 | 
						|
            F16C   = dict(flags="-mf16c"),
 | 
						|
            XOP    = dict(flags="-mxop"),
 | 
						|
            FMA4   = dict(flags="-mfma4"),
 | 
						|
            FMA3   = dict(flags="-mfma"),
 | 
						|
            AVX2   = dict(flags="-mavx2"),
 | 
						|
            AVX512F = dict(flags="-mavx512f -mno-mmx"),
 | 
						|
            AVX512CD = dict(flags="-mavx512cd"),
 | 
						|
            AVX512_KNL = dict(flags="-mavx512er -mavx512pf"),
 | 
						|
            AVX512_KNM = dict(
 | 
						|
                flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq"
 | 
						|
            ),
 | 
						|
            AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"),
 | 
						|
            AVX512_CLX = dict(flags="-mavx512vnni"),
 | 
						|
            AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"),
 | 
						|
            AVX512_ICL = dict(
 | 
						|
                flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq"
 | 
						|
            ),
 | 
						|
            AVX512_SPR = dict(flags="-mavx512fp16"),
 | 
						|
        )
 | 
						|
        if on_x86 and self.cc_is_icc: return dict(
 | 
						|
            SSE    = dict(flags="-msse"),
 | 
						|
            SSE2   = dict(flags="-msse2"),
 | 
						|
            SSE3   = dict(flags="-msse3"),
 | 
						|
            SSSE3  = dict(flags="-mssse3"),
 | 
						|
            SSE41  = dict(flags="-msse4.1"),
 | 
						|
            POPCNT = {},
 | 
						|
            SSE42  = dict(flags="-msse4.2"),
 | 
						|
            AVX    = dict(flags="-mavx"),
 | 
						|
            F16C   = {},
 | 
						|
            XOP    = dict(disable="Intel Compiler doesn't support it"),
 | 
						|
            FMA4   = dict(disable="Intel Compiler doesn't support it"),
 | 
						|
            # Intel Compiler doesn't support AVX2 or FMA3 independently
 | 
						|
            FMA3 = dict(
 | 
						|
                implies="F16C AVX2", flags="-march=core-avx2"
 | 
						|
            ),
 | 
						|
            AVX2 = dict(implies="FMA3", flags="-march=core-avx2"),
 | 
						|
            # Intel Compiler doesn't support AVX512F or AVX512CD independently
 | 
						|
            AVX512F = dict(
 | 
						|
                implies="AVX2 AVX512CD", flags="-march=common-avx512"
 | 
						|
            ),
 | 
						|
            AVX512CD = dict(
 | 
						|
                implies="AVX2 AVX512F", flags="-march=common-avx512"
 | 
						|
            ),
 | 
						|
            AVX512_KNL = dict(flags="-xKNL"),
 | 
						|
            AVX512_KNM = dict(flags="-xKNM"),
 | 
						|
            AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"),
 | 
						|
            AVX512_CLX = dict(flags="-xCASCADELAKE"),
 | 
						|
            AVX512_CNL = dict(flags="-xCANNONLAKE"),
 | 
						|
            AVX512_ICL = dict(flags="-xICELAKE-CLIENT"),
 | 
						|
            AVX512_SPR = dict(disable="Not supported yet")
 | 
						|
        )
 | 
						|
        if on_x86 and self.cc_is_iccw: return dict(
 | 
						|
            SSE    = dict(flags="/arch:SSE"),
 | 
						|
            SSE2   = dict(flags="/arch:SSE2"),
 | 
						|
            SSE3   = dict(flags="/arch:SSE3"),
 | 
						|
            SSSE3  = dict(flags="/arch:SSSE3"),
 | 
						|
            SSE41  = dict(flags="/arch:SSE4.1"),
 | 
						|
            POPCNT = {},
 | 
						|
            SSE42  = dict(flags="/arch:SSE4.2"),
 | 
						|
            AVX    = dict(flags="/arch:AVX"),
 | 
						|
            F16C   = {},
 | 
						|
            XOP    = dict(disable="Intel Compiler doesn't support it"),
 | 
						|
            FMA4   = dict(disable="Intel Compiler doesn't support it"),
 | 
						|
            # Intel Compiler doesn't support FMA3 or AVX2 independently
 | 
						|
            FMA3 = dict(
 | 
						|
                implies="F16C AVX2", flags="/arch:CORE-AVX2"
 | 
						|
            ),
 | 
						|
            AVX2 = dict(
 | 
						|
                implies="FMA3", flags="/arch:CORE-AVX2"
 | 
						|
            ),
 | 
						|
            # Intel Compiler doesn't support AVX512F or AVX512CD independently
 | 
						|
            AVX512F = dict(
 | 
						|
                implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512"
 | 
						|
            ),
 | 
						|
            AVX512CD = dict(
 | 
						|
                implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512"
 | 
						|
            ),
 | 
						|
            AVX512_KNL = dict(flags="/Qx:KNL"),
 | 
						|
            AVX512_KNM = dict(flags="/Qx:KNM"),
 | 
						|
            AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"),
 | 
						|
            AVX512_CLX = dict(flags="/Qx:CASCADELAKE"),
 | 
						|
            AVX512_CNL = dict(flags="/Qx:CANNONLAKE"),
 | 
						|
            AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT"),
 | 
						|
            AVX512_SPR = dict(disable="Not supported yet")
 | 
						|
        )
 | 
						|
        if on_x86 and self.cc_is_msvc: return dict(
 | 
						|
            SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {},
 | 
						|
            SSE2 = dict(flags="/arch:SSE2") if self.cc_on_x86 else {},
 | 
						|
            SSE3   = {},
 | 
						|
            SSSE3  = {},
 | 
						|
            SSE41  = {},
 | 
						|
            POPCNT = dict(headers="nmmintrin.h"),
 | 
						|
            SSE42  = {},
 | 
						|
            AVX    = dict(flags="/arch:AVX"),
 | 
						|
            F16C   = {},
 | 
						|
            XOP    = dict(headers="ammintrin.h"),
 | 
						|
            FMA4   = dict(headers="ammintrin.h"),
 | 
						|
            # MSVC doesn't support FMA3 or AVX2 independently
 | 
						|
            FMA3 = dict(
 | 
						|
                implies="F16C AVX2", flags="/arch:AVX2"
 | 
						|
            ),
 | 
						|
            AVX2 = dict(
 | 
						|
                implies="F16C FMA3", flags="/arch:AVX2"
 | 
						|
            ),
 | 
						|
            # MSVC doesn't support AVX512F or AVX512CD independently,
 | 
						|
            # always generate instructions belong to (VL/VW/DQ)
 | 
						|
            AVX512F = dict(
 | 
						|
                implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512"
 | 
						|
            ),
 | 
						|
            AVX512CD = dict(
 | 
						|
                implies="AVX512F AVX512_SKX", flags="/arch:AVX512"
 | 
						|
            ),
 | 
						|
            AVX512_KNL = dict(
 | 
						|
                disable="MSVC compiler doesn't support it"
 | 
						|
            ),
 | 
						|
            AVX512_KNM = dict(
 | 
						|
                disable="MSVC compiler doesn't support it"
 | 
						|
            ),
 | 
						|
            AVX512_SKX = dict(flags="/arch:AVX512"),
 | 
						|
            AVX512_CLX = {},
 | 
						|
            AVX512_CNL = {},
 | 
						|
            AVX512_ICL = {},
 | 
						|
            AVX512_SPR= dict(
 | 
						|
                disable="MSVC compiler doesn't support it"
 | 
						|
            )
 | 
						|
        )
 | 
						|
 | 
						|
        on_power = self.cc_on_ppc64le or self.cc_on_ppc64
 | 
						|
        if on_power:
 | 
						|
            partial = dict(
 | 
						|
                VSX = dict(
 | 
						|
                    implies=("VSX2" if self.cc_on_ppc64le else ""),
 | 
						|
                    flags="-mvsx"
 | 
						|
                ),
 | 
						|
                VSX2 = dict(
 | 
						|
                    flags="-mcpu=power8", implies_detect=False
 | 
						|
                ),
 | 
						|
                VSX3 = dict(
 | 
						|
                    flags="-mcpu=power9 -mtune=power9", implies_detect=False
 | 
						|
                ),
 | 
						|
                VSX4 = dict(
 | 
						|
                    flags="-mcpu=power10 -mtune=power10", implies_detect=False
 | 
						|
                )
 | 
						|
            )
 | 
						|
            if self.cc_is_clang:
 | 
						|
                partial["VSX"]["flags"]  = "-maltivec -mvsx"
 | 
						|
                partial["VSX2"]["flags"] = "-mcpu=power8"
 | 
						|
                partial["VSX3"]["flags"] = "-mcpu=power9"
 | 
						|
                partial["VSX4"]["flags"] = "-mcpu=power10"
 | 
						|
 | 
						|
            return partial
 | 
						|
 | 
						|
        on_zarch = self.cc_on_s390x
 | 
						|
        if on_zarch:
 | 
						|
            partial = dict(
 | 
						|
                VX = dict(
 | 
						|
                    flags="-march=arch11 -mzvector"
 | 
						|
                ),
 | 
						|
                VXE = dict(
 | 
						|
                    flags="-march=arch12", implies_detect=False
 | 
						|
                ),
 | 
						|
                VXE2 = dict(
 | 
						|
                    flags="-march=arch13", implies_detect=False
 | 
						|
                )
 | 
						|
            )
 | 
						|
 | 
						|
            return partial
 | 
						|
 | 
						|
 | 
						|
        if self.cc_on_aarch64 and is_unix: return dict(
 | 
						|
            NEON = dict(
 | 
						|
                implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
 | 
						|
            ),
 | 
						|
            NEON_FP16 = dict(
 | 
						|
                implies="NEON NEON_VFPV4 ASIMD", autovec=True
 | 
						|
            ),
 | 
						|
            NEON_VFPV4 = dict(
 | 
						|
                implies="NEON NEON_FP16 ASIMD", autovec=True
 | 
						|
            ),
 | 
						|
            ASIMD = dict(
 | 
						|
                implies="NEON NEON_FP16 NEON_VFPV4", autovec=True
 | 
						|
            ),
 | 
						|
            ASIMDHP = dict(
 | 
						|
                flags="-march=armv8.2-a+fp16"
 | 
						|
            ),
 | 
						|
            ASIMDDP = dict(
 | 
						|
                flags="-march=armv8.2-a+dotprod"
 | 
						|
            ),
 | 
						|
            ASIMDFHM = dict(
 | 
						|
                flags="-march=armv8.2-a+fp16fml"
 | 
						|
            ),
 | 
						|
        )
 | 
						|
        if self.cc_on_armhf and is_unix: return dict(
 | 
						|
            NEON = dict(
 | 
						|
                flags="-mfpu=neon"
 | 
						|
            ),
 | 
						|
            NEON_FP16 = dict(
 | 
						|
                flags="-mfpu=neon-fp16 -mfp16-format=ieee"
 | 
						|
            ),
 | 
						|
            NEON_VFPV4 = dict(
 | 
						|
                flags="-mfpu=neon-vfpv4",
 | 
						|
            ),
 | 
						|
            ASIMD = dict(
 | 
						|
                flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd",
 | 
						|
            ),
 | 
						|
            ASIMDHP = dict(
 | 
						|
                flags="-march=armv8.2-a+fp16"
 | 
						|
            ),
 | 
						|
            ASIMDDP = dict(
 | 
						|
                flags="-march=armv8.2-a+dotprod",
 | 
						|
            ),
 | 
						|
            ASIMDFHM = dict(
 | 
						|
                flags="-march=armv8.2-a+fp16fml"
 | 
						|
            )
 | 
						|
        )
 | 
						|
        # TODO: ARM MSVC
 | 
						|
        return {}
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        if self.conf_tmp_path is None:
 | 
						|
            import shutil
 | 
						|
            import tempfile
 | 
						|
            tmp = tempfile.mkdtemp()
 | 
						|
            def rm_temp():
 | 
						|
                try:
 | 
						|
                    shutil.rmtree(tmp)
 | 
						|
                except OSError:
 | 
						|
                    pass
 | 
						|
            atexit.register(rm_temp)
 | 
						|
            self.conf_tmp_path = tmp
 | 
						|
 | 
						|
        if self.conf_cache_factors is None:
 | 
						|
            self.conf_cache_factors = [
 | 
						|
                os.path.getmtime(__file__),
 | 
						|
                self.conf_nocache
 | 
						|
            ]
 | 
						|
 | 
						|
class _Distutils:
 | 
						|
    """A helper class that provides a collection of fundamental methods
 | 
						|
    implemented in a top of Python and NumPy Distutils.
 | 
						|
 | 
						|
    The idea behind this class is to gather all methods that it may
 | 
						|
    need to override in case of reuse 'CCompilerOpt' in environment
 | 
						|
    different than of what NumPy has.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    ccompiler : `CCompiler`
 | 
						|
        The generate instance that returned from `distutils.ccompiler.new_compiler()`.
 | 
						|
    """
 | 
						|
    def __init__(self, ccompiler):
 | 
						|
        self._ccompiler = ccompiler
 | 
						|
 | 
						|
    def dist_compile(self, sources, flags, ccompiler=None, **kwargs):
 | 
						|
        """Wrap CCompiler.compile()"""
 | 
						|
        assert(isinstance(sources, list))
 | 
						|
        assert(isinstance(flags, list))
 | 
						|
        flags = kwargs.pop("extra_postargs", []) + flags
 | 
						|
        if not ccompiler:
 | 
						|
            ccompiler = self._ccompiler
 | 
						|
 | 
						|
        return ccompiler.compile(sources, extra_postargs=flags, **kwargs)
 | 
						|
 | 
						|
    def dist_test(self, source, flags, macros=[]):
 | 
						|
        """Return True if 'CCompiler.compile()' able to compile
 | 
						|
        a source file with certain flags.
 | 
						|
        """
 | 
						|
        assert(isinstance(source, str))
 | 
						|
        from distutils.errors import CompileError
 | 
						|
        cc = self._ccompiler;
 | 
						|
        bk_spawn = getattr(cc, 'spawn', None)
 | 
						|
        if bk_spawn:
 | 
						|
            cc_type = getattr(self._ccompiler, "compiler_type", "")
 | 
						|
            if cc_type in ("msvc",):
 | 
						|
                setattr(cc, 'spawn', self._dist_test_spawn_paths)
 | 
						|
            else:
 | 
						|
                setattr(cc, 'spawn', self._dist_test_spawn)
 | 
						|
        test = False
 | 
						|
        try:
 | 
						|
            self.dist_compile(
 | 
						|
                [source], flags, macros=macros, output_dir=self.conf_tmp_path
 | 
						|
            )
 | 
						|
            test = True
 | 
						|
        except CompileError as e:
 | 
						|
            self.dist_log(str(e), stderr=True)
 | 
						|
        if bk_spawn:
 | 
						|
            setattr(cc, 'spawn', bk_spawn)
 | 
						|
        return test
 | 
						|
 | 
						|
    def dist_info(self):
 | 
						|
        """
 | 
						|
        Return a tuple containing info about (platform, compiler, extra_args),
 | 
						|
        required by the abstract class '_CCompiler' for discovering the
 | 
						|
        platform environment. This is also used as a cache factor in order
 | 
						|
        to detect any changes happening from outside.
 | 
						|
        """
 | 
						|
        if hasattr(self, "_dist_info"):
 | 
						|
            return self._dist_info
 | 
						|
 | 
						|
        cc_type = getattr(self._ccompiler, "compiler_type", '')
 | 
						|
        if cc_type in ("intelem", "intelemw"):
 | 
						|
            platform = "x86_64"
 | 
						|
        elif cc_type in ("intel", "intelw", "intele"):
 | 
						|
            platform = "x86"
 | 
						|
        else:
 | 
						|
            from distutils.util import get_platform
 | 
						|
            platform = get_platform()
 | 
						|
 | 
						|
        cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", ''))
 | 
						|
        if not cc_type or cc_type == "unix":
 | 
						|
            if hasattr(cc_info, "__iter__"):
 | 
						|
                compiler = cc_info[0]
 | 
						|
            else:
 | 
						|
                compiler = str(cc_info)
 | 
						|
        else:
 | 
						|
            compiler = cc_type
 | 
						|
 | 
						|
        if hasattr(cc_info, "__iter__") and len(cc_info) > 1:
 | 
						|
            extra_args = ' '.join(cc_info[1:])
 | 
						|
        else:
 | 
						|
            extra_args  = os.environ.get("CFLAGS", "")
 | 
						|
            extra_args += os.environ.get("CPPFLAGS", "")
 | 
						|
 | 
						|
        self._dist_info = (platform, compiler, extra_args)
 | 
						|
        return self._dist_info
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def dist_error(*args):
 | 
						|
        """Raise a compiler error"""
 | 
						|
        from distutils.errors import CompileError
 | 
						|
        raise CompileError(_Distutils._dist_str(*args))
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def dist_fatal(*args):
 | 
						|
        """Raise a distutils error"""
 | 
						|
        from distutils.errors import DistutilsError
 | 
						|
        raise DistutilsError(_Distutils._dist_str(*args))
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def dist_log(*args, stderr=False):
 | 
						|
        """Print a console message"""
 | 
						|
        from numpy.distutils import log
 | 
						|
        out = _Distutils._dist_str(*args)
 | 
						|
        if stderr:
 | 
						|
            log.warn(out)
 | 
						|
        else:
 | 
						|
            log.info(out)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def dist_load_module(name, path):
 | 
						|
        """Load a module from file, required by the abstract class '_Cache'."""
 | 
						|
        from .misc_util import exec_mod_from_location
 | 
						|
        try:
 | 
						|
            return exec_mod_from_location(name, path)
 | 
						|
        except Exception as e:
 | 
						|
            _Distutils.dist_log(e, stderr=True)
 | 
						|
        return None
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _dist_str(*args):
 | 
						|
        """Return a string to print by log and errors."""
 | 
						|
        def to_str(arg):
 | 
						|
            if not isinstance(arg, str) and hasattr(arg, '__iter__'):
 | 
						|
                ret = []
 | 
						|
                for a in arg:
 | 
						|
                    ret.append(to_str(a))
 | 
						|
                return '('+ ' '.join(ret) + ')'
 | 
						|
            return str(arg)
 | 
						|
 | 
						|
        stack = inspect.stack()[2]
 | 
						|
        start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno)
 | 
						|
        out = ' '.join([
 | 
						|
            to_str(a)
 | 
						|
            for a in (*args,)
 | 
						|
        ])
 | 
						|
        return start + out
 | 
						|
 | 
						|
    def _dist_test_spawn_paths(self, cmd, display=None):
 | 
						|
        """
 | 
						|
        Fix msvc SDK ENV path same as distutils do
 | 
						|
        without it we get c1: fatal error C1356: unable to find mspdbcore.dll
 | 
						|
        """
 | 
						|
        if not hasattr(self._ccompiler, "_paths"):
 | 
						|
            self._dist_test_spawn(cmd)
 | 
						|
            return
 | 
						|
        old_path = os.getenv("path")
 | 
						|
        try:
 | 
						|
            os.environ["path"] = self._ccompiler._paths
 | 
						|
            self._dist_test_spawn(cmd)
 | 
						|
        finally:
 | 
						|
            os.environ["path"] = old_path
 | 
						|
 | 
						|
    _dist_warn_regex = re.compile(
 | 
						|
        # intel and msvc compilers don't raise
 | 
						|
        # fatal errors when flags are wrong or unsupported
 | 
						|
        ".*("
 | 
						|
        "warning D9002|"  # msvc, it should be work with any language.
 | 
						|
        "invalid argument for option" # intel
 | 
						|
        ").*"
 | 
						|
    )
 | 
						|
    @staticmethod
 | 
						|
    def _dist_test_spawn(cmd, display=None):
 | 
						|
        try:
 | 
						|
            o = subprocess.check_output(cmd, stderr=subprocess.STDOUT,
 | 
						|
                                        text=True)
 | 
						|
            if o and re.match(_Distutils._dist_warn_regex, o):
 | 
						|
                _Distutils.dist_error(
 | 
						|
                    "Flags in command", cmd ,"aren't supported by the compiler"
 | 
						|
                    ", output -> \n%s" % o
 | 
						|
                )
 | 
						|
        except subprocess.CalledProcessError as exc:
 | 
						|
            o = exc.output
 | 
						|
            s = exc.returncode
 | 
						|
        except OSError as e:
 | 
						|
            o = e
 | 
						|
            s = 127
 | 
						|
        else:
 | 
						|
            return None
 | 
						|
        _Distutils.dist_error(
 | 
						|
            "Command", cmd, "failed with exit status %d output -> \n%s" % (
 | 
						|
            s, o
 | 
						|
        ))
 | 
						|
 | 
						|
_share_cache = {}
 | 
						|
class _Cache:
 | 
						|
    """An abstract class handles caching functionality, provides two
 | 
						|
    levels of caching, in-memory by share instances attributes among
 | 
						|
    each other and by store attributes into files.
 | 
						|
 | 
						|
    **Note**:
 | 
						|
        any attributes that start with ``_`` or ``conf_`` will be ignored.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    cache_path : str or None
 | 
						|
        The path of cache file, if None then cache in file will disabled.
 | 
						|
 | 
						|
    *factors :
 | 
						|
        The caching factors that need to utilize next to `conf_cache_factors`.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    cache_private : set
 | 
						|
        Hold the attributes that need be skipped from "in-memory cache".
 | 
						|
 | 
						|
    cache_infile : bool
 | 
						|
        Utilized during initializing this class, to determine if the cache was able
 | 
						|
        to loaded from the specified cache path in 'cache_path'.
 | 
						|
    """
 | 
						|
 | 
						|
    # skip attributes from cache
 | 
						|
    _cache_ignore = re.compile("^(_|conf_)")
 | 
						|
 | 
						|
    def __init__(self, cache_path=None, *factors):
 | 
						|
        self.cache_me = {}
 | 
						|
        self.cache_private = set()
 | 
						|
        self.cache_infile = False
 | 
						|
        self._cache_path = None
 | 
						|
 | 
						|
        if self.conf_nocache:
 | 
						|
            self.dist_log("cache is disabled by `Config`")
 | 
						|
            return
 | 
						|
 | 
						|
        self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors)
 | 
						|
        self._cache_path = cache_path
 | 
						|
        if cache_path:
 | 
						|
            if os.path.exists(cache_path):
 | 
						|
                self.dist_log("load cache from file ->", cache_path)
 | 
						|
                cache_mod = self.dist_load_module("cache", cache_path)
 | 
						|
                if not cache_mod:
 | 
						|
                    self.dist_log(
 | 
						|
                        "unable to load the cache file as a module",
 | 
						|
                        stderr=True
 | 
						|
                    )
 | 
						|
                elif not hasattr(cache_mod, "hash") or \
 | 
						|
                     not hasattr(cache_mod, "data"):
 | 
						|
                    self.dist_log("invalid cache file", stderr=True)
 | 
						|
                elif self._cache_hash == cache_mod.hash:
 | 
						|
                    self.dist_log("hit the file cache")
 | 
						|
                    for attr, val in cache_mod.data.items():
 | 
						|
                        setattr(self, attr, val)
 | 
						|
                    self.cache_infile = True
 | 
						|
                else:
 | 
						|
                    self.dist_log("miss the file cache")
 | 
						|
 | 
						|
        if not self.cache_infile:
 | 
						|
            other_cache = _share_cache.get(self._cache_hash)
 | 
						|
            if other_cache:
 | 
						|
                self.dist_log("hit the memory cache")
 | 
						|
                for attr, val in other_cache.__dict__.items():
 | 
						|
                    if attr in other_cache.cache_private or \
 | 
						|
                               re.match(self._cache_ignore, attr):
 | 
						|
                        continue
 | 
						|
                    setattr(self, attr, val)
 | 
						|
 | 
						|
        _share_cache[self._cache_hash] = self
 | 
						|
        atexit.register(self.cache_flush)
 | 
						|
 | 
						|
    def __del__(self):
 | 
						|
        for h, o in _share_cache.items():
 | 
						|
            if o == self:
 | 
						|
                _share_cache.pop(h)
 | 
						|
                break
 | 
						|
 | 
						|
    def cache_flush(self):
 | 
						|
        """
 | 
						|
        Force update the cache.
 | 
						|
        """
 | 
						|
        if not self._cache_path:
 | 
						|
            return
 | 
						|
        # TODO: don't write if the cache doesn't change
 | 
						|
        self.dist_log("write cache to path ->", self._cache_path)
 | 
						|
        cdict = self.__dict__.copy()
 | 
						|
        for attr in self.__dict__.keys():
 | 
						|
            if re.match(self._cache_ignore, attr):
 | 
						|
                cdict.pop(attr)
 | 
						|
 | 
						|
        d = os.path.dirname(self._cache_path)
 | 
						|
        if not os.path.exists(d):
 | 
						|
            os.makedirs(d)
 | 
						|
 | 
						|
        repr_dict = pprint.pformat(cdict, compact=True)
 | 
						|
        with open(self._cache_path, "w") as f:
 | 
						|
            f.write(textwrap.dedent("""\
 | 
						|
            # AUTOGENERATED DON'T EDIT
 | 
						|
            # Please make changes to the code generator \
 | 
						|
            (distutils/ccompiler_opt.py)
 | 
						|
            hash = {}
 | 
						|
            data = \\
 | 
						|
            """).format(self._cache_hash))
 | 
						|
            f.write(repr_dict)
 | 
						|
 | 
						|
    def cache_hash(self, *factors):
 | 
						|
        # is there a built-in non-crypto hash?
 | 
						|
        # sdbm
 | 
						|
        chash = 0
 | 
						|
        for f in factors:
 | 
						|
            for char in str(f):
 | 
						|
                chash  = ord(char) + (chash << 6) + (chash << 16) - chash
 | 
						|
                chash &= 0xFFFFFFFF
 | 
						|
        return chash
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def me(cb):
 | 
						|
        """
 | 
						|
        A static method that can be treated as a decorator to
 | 
						|
        dynamically cache certain methods.
 | 
						|
        """
 | 
						|
        def cache_wrap_me(self, *args, **kwargs):
 | 
						|
            # good for normal args
 | 
						|
            cache_key = str((
 | 
						|
                cb.__name__, *args, *kwargs.keys(), *kwargs.values()
 | 
						|
            ))
 | 
						|
            if cache_key in self.cache_me:
 | 
						|
                return self.cache_me[cache_key]
 | 
						|
            ccb = cb(self, *args, **kwargs)
 | 
						|
            self.cache_me[cache_key] = ccb
 | 
						|
            return ccb
 | 
						|
        return cache_wrap_me
 | 
						|
 | 
						|
class _CCompiler:
 | 
						|
    """A helper class for `CCompilerOpt` containing all utilities that
 | 
						|
    related to the fundamental compiler's functions.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    cc_on_x86 : bool
 | 
						|
        True when the target architecture is 32-bit x86
 | 
						|
    cc_on_x64 : bool
 | 
						|
        True when the target architecture is 64-bit x86
 | 
						|
    cc_on_ppc64 : bool
 | 
						|
        True when the target architecture is 64-bit big-endian powerpc
 | 
						|
    cc_on_ppc64le : bool
 | 
						|
        True when the target architecture is 64-bit litle-endian powerpc
 | 
						|
    cc_on_s390x : bool
 | 
						|
        True when the target architecture is IBM/ZARCH on linux
 | 
						|
    cc_on_armhf : bool
 | 
						|
        True when the target architecture is 32-bit ARMv7+
 | 
						|
    cc_on_aarch64 : bool
 | 
						|
        True when the target architecture is 64-bit Armv8-a+
 | 
						|
    cc_on_noarch : bool
 | 
						|
        True when the target architecture is unknown or not supported
 | 
						|
    cc_is_gcc : bool
 | 
						|
        True if the compiler is GNU or
 | 
						|
        if the compiler is unknown
 | 
						|
    cc_is_clang : bool
 | 
						|
        True if the compiler is Clang
 | 
						|
    cc_is_icc : bool
 | 
						|
        True if the compiler is Intel compiler (unix like)
 | 
						|
    cc_is_iccw : bool
 | 
						|
        True if the compiler is Intel compiler (msvc like)
 | 
						|
    cc_is_nocc : bool
 | 
						|
        True if the compiler isn't supported directly,
 | 
						|
        Note: that cause a fail-back to gcc
 | 
						|
    cc_has_debug : bool
 | 
						|
        True if the compiler has debug flags
 | 
						|
    cc_has_native : bool
 | 
						|
        True if the compiler has native flags
 | 
						|
    cc_noopt : bool
 | 
						|
        True if the compiler has definition 'DISABLE_OPT*',
 | 
						|
        or 'cc_on_noarch' is True
 | 
						|
    cc_march : str
 | 
						|
        The target architecture name, or "unknown" if
 | 
						|
        the architecture isn't supported
 | 
						|
    cc_name : str
 | 
						|
        The compiler name, or "unknown" if the compiler isn't supported
 | 
						|
    cc_flags : dict
 | 
						|
        Dictionary containing the initialized flags of `_Config.conf_cc_flags`
 | 
						|
    """
 | 
						|
    def __init__(self):
 | 
						|
        if hasattr(self, "cc_is_cached"):
 | 
						|
            return
 | 
						|
        #      attr            regex        compiler-expression
 | 
						|
        detect_arch = (
 | 
						|
            ("cc_on_x64",      ".*(x|x86_|amd)64.*", ""),
 | 
						|
            ("cc_on_x86",      ".*(win32|x86|i386|i686).*", ""),
 | 
						|
            ("cc_on_ppc64le",  ".*(powerpc|ppc)64(el|le).*|.*powerpc.*",
 | 
						|
                                          "defined(__powerpc64__) && "
 | 
						|
                                          "defined(__LITTLE_ENDIAN__)"),
 | 
						|
            ("cc_on_ppc64",    ".*(powerpc|ppc).*|.*powerpc.*",
 | 
						|
                                          "defined(__powerpc64__) && "
 | 
						|
                                          "defined(__BIG_ENDIAN__)"),
 | 
						|
            ("cc_on_aarch64",  ".*(aarch64|arm64).*", ""),
 | 
						|
            ("cc_on_armhf",    ".*arm.*", "defined(__ARM_ARCH_7__) || "
 | 
						|
                                          "defined(__ARM_ARCH_7A__)"),
 | 
						|
            ("cc_on_s390x",    ".*s390x.*", ""),
 | 
						|
            # undefined platform
 | 
						|
            ("cc_on_noarch",   "", ""),
 | 
						|
        )
 | 
						|
        detect_compiler = (
 | 
						|
            ("cc_is_gcc",     r".*(gcc|gnu\-g).*", ""),
 | 
						|
            ("cc_is_clang",    ".*clang.*", ""),
 | 
						|
            # intel msvc like
 | 
						|
            ("cc_is_iccw",     ".*(intelw|intelemw|iccw).*", ""),
 | 
						|
            ("cc_is_icc",      ".*(intel|icc).*", ""),  # intel unix like
 | 
						|
            ("cc_is_msvc",     ".*msvc.*", ""),
 | 
						|
            ("cc_is_fcc",     ".*fcc.*", ""),
 | 
						|
            # undefined compiler will be treat it as gcc
 | 
						|
            ("cc_is_nocc",     "", ""),
 | 
						|
        )
 | 
						|
        detect_args = (
 | 
						|
           ("cc_has_debug",  ".*(O0|Od|ggdb|coverage|debug:full).*", ""),
 | 
						|
           ("cc_has_native",
 | 
						|
                ".*(-march=native|-xHost|/QxHost|-mcpu=a64fx).*", ""),
 | 
						|
           # in case if the class run with -DNPY_DISABLE_OPTIMIZATION
 | 
						|
           ("cc_noopt", ".*DISABLE_OPT.*", ""),
 | 
						|
        )
 | 
						|
 | 
						|
        dist_info = self.dist_info()
 | 
						|
        platform, compiler_info, extra_args = dist_info
 | 
						|
        # set False to all attrs
 | 
						|
        for section in (detect_arch, detect_compiler, detect_args):
 | 
						|
            for attr, rgex, cexpr in section:
 | 
						|
                setattr(self, attr, False)
 | 
						|
 | 
						|
        for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
 | 
						|
            for attr, rgex, cexpr in detect:
 | 
						|
                if rgex and not re.match(rgex, searchin, re.IGNORECASE):
 | 
						|
                    continue
 | 
						|
                if cexpr and not self.cc_test_cexpr(cexpr):
 | 
						|
                    continue
 | 
						|
                setattr(self, attr, True)
 | 
						|
                break
 | 
						|
 | 
						|
        for attr, rgex, cexpr in detect_args:
 | 
						|
            if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
 | 
						|
                continue
 | 
						|
            if cexpr and not self.cc_test_cexpr(cexpr):
 | 
						|
                continue
 | 
						|
            setattr(self, attr, True)
 | 
						|
 | 
						|
        if self.cc_on_noarch:
 | 
						|
            self.dist_log(
 | 
						|
                "unable to detect CPU architecture which lead to disable the optimization. "
 | 
						|
                f"check dist_info:<<\n{dist_info}\n>>",
 | 
						|
                stderr=True
 | 
						|
            )
 | 
						|
            self.cc_noopt = True
 | 
						|
 | 
						|
        if self.conf_noopt:
 | 
						|
            self.dist_log("Optimization is disabled by the Config", stderr=True)
 | 
						|
            self.cc_noopt = True
 | 
						|
 | 
						|
        if self.cc_is_nocc:
 | 
						|
            """
 | 
						|
            mingw can be treated as a gcc, and also xlc even if it based on clang,
 | 
						|
            but still has the same gcc optimization flags.
 | 
						|
            """
 | 
						|
            self.dist_log(
 | 
						|
                "unable to detect compiler type which leads to treating it as GCC. "
 | 
						|
                "this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC."
 | 
						|
                f"check dist_info:<<\n{dist_info}\n>>",
 | 
						|
                stderr=True
 | 
						|
            )
 | 
						|
            self.cc_is_gcc = True
 | 
						|
 | 
						|
        self.cc_march = "unknown"
 | 
						|
        for arch in ("x86", "x64", "ppc64", "ppc64le",
 | 
						|
                     "armhf", "aarch64", "s390x"):
 | 
						|
            if getattr(self, "cc_on_" + arch):
 | 
						|
                self.cc_march = arch
 | 
						|
                break
 | 
						|
 | 
						|
        self.cc_name = "unknown"
 | 
						|
        for name in ("gcc", "clang", "iccw", "icc", "msvc", "fcc"):
 | 
						|
            if getattr(self, "cc_is_" + name):
 | 
						|
                self.cc_name = name
 | 
						|
                break
 | 
						|
 | 
						|
        self.cc_flags = {}
 | 
						|
        compiler_flags = self.conf_cc_flags.get(self.cc_name)
 | 
						|
        if compiler_flags is None:
 | 
						|
            self.dist_fatal(
 | 
						|
                "undefined flag for compiler '%s', "
 | 
						|
                "leave an empty dict instead" % self.cc_name
 | 
						|
            )
 | 
						|
        for name, flags in compiler_flags.items():
 | 
						|
            self.cc_flags[name] = nflags = []
 | 
						|
            if flags:
 | 
						|
                assert(isinstance(flags, str))
 | 
						|
                flags = flags.split()
 | 
						|
                for f in flags:
 | 
						|
                    if self.cc_test_flags([f]):
 | 
						|
                        nflags.append(f)
 | 
						|
 | 
						|
        self.cc_is_cached = True
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def cc_test_flags(self, flags):
 | 
						|
        """
 | 
						|
        Returns True if the compiler supports 'flags'.
 | 
						|
        """
 | 
						|
        assert(isinstance(flags, list))
 | 
						|
        self.dist_log("testing flags", flags)
 | 
						|
        test_path = os.path.join(self.conf_check_path, "test_flags.c")
 | 
						|
        test = self.dist_test(test_path, flags)
 | 
						|
        if not test:
 | 
						|
            self.dist_log("testing failed", stderr=True)
 | 
						|
        return test
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def cc_test_cexpr(self, cexpr, flags=[]):
 | 
						|
        """
 | 
						|
        Same as the above but supports compile-time expressions.
 | 
						|
        """
 | 
						|
        self.dist_log("testing compiler expression", cexpr)
 | 
						|
        test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c")
 | 
						|
        with open(test_path, "w") as fd:
 | 
						|
            fd.write(textwrap.dedent(f"""\
 | 
						|
               #if !({cexpr})
 | 
						|
                   #error "unsupported expression"
 | 
						|
               #endif
 | 
						|
               int dummy;
 | 
						|
            """))
 | 
						|
        test = self.dist_test(test_path, flags)
 | 
						|
        if not test:
 | 
						|
            self.dist_log("testing failed", stderr=True)
 | 
						|
        return test
 | 
						|
 | 
						|
    def cc_normalize_flags(self, flags):
 | 
						|
        """
 | 
						|
        Remove the conflicts that caused due gathering implied features flags.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'flags' list, compiler flags
 | 
						|
            flags should be sorted from the lowest to the highest interest.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        list, filtered from any conflicts.
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod'])
 | 
						|
        ['armv8.2-a+fp16+dotprod']
 | 
						|
 | 
						|
        >>> self.cc_normalize_flags(
 | 
						|
            ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2']
 | 
						|
        )
 | 
						|
        ['-march=core-avx2']
 | 
						|
        """
 | 
						|
        assert(isinstance(flags, list))
 | 
						|
        if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc:
 | 
						|
            return self._cc_normalize_unix(flags)
 | 
						|
 | 
						|
        if self.cc_is_msvc or self.cc_is_iccw:
 | 
						|
            return self._cc_normalize_win(flags)
 | 
						|
        return flags
 | 
						|
 | 
						|
    _cc_normalize_unix_mrgx = re.compile(
 | 
						|
        # 1- to check the highest of
 | 
						|
        r"^(-mcpu=|-march=|-x[A-Z0-9\-])"
 | 
						|
    )
 | 
						|
    _cc_normalize_unix_frgx = re.compile(
 | 
						|
        # 2- to remove any flags starts with
 | 
						|
        # -march, -mcpu, -x(INTEL) and '-m' without '='
 | 
						|
        r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]|-m[a-z0-9\-\.]*.$))|"
 | 
						|
        # exclude:
 | 
						|
        r"(?:-mzvector)"
 | 
						|
    )
 | 
						|
    _cc_normalize_unix_krgx = re.compile(
 | 
						|
        # 3- keep only the highest of
 | 
						|
        r"^(-mfpu|-mtune)"
 | 
						|
    )
 | 
						|
    _cc_normalize_arch_ver = re.compile(
 | 
						|
        r"[0-9.]"
 | 
						|
    )
 | 
						|
    def _cc_normalize_unix(self, flags):
 | 
						|
        def ver_flags(f):
 | 
						|
            #        arch ver  subflag
 | 
						|
            # -march=armv8.2-a+fp16fml
 | 
						|
            tokens = f.split('+')
 | 
						|
            ver = float('0' + ''.join(
 | 
						|
                re.findall(self._cc_normalize_arch_ver, tokens[0])
 | 
						|
            ))
 | 
						|
            return ver, tokens[0], tokens[1:]
 | 
						|
 | 
						|
        if len(flags) <= 1:
 | 
						|
            return flags
 | 
						|
        # get the highest matched flag
 | 
						|
        for i, cur_flag in enumerate(reversed(flags)):
 | 
						|
            if not re.match(self._cc_normalize_unix_mrgx, cur_flag):
 | 
						|
                continue
 | 
						|
            lower_flags = flags[:-(i+1)]
 | 
						|
            upper_flags = flags[-i:]
 | 
						|
            filtered = list(filter(
 | 
						|
                self._cc_normalize_unix_frgx.search, lower_flags
 | 
						|
            ))
 | 
						|
            # gather subflags
 | 
						|
            ver, arch, subflags = ver_flags(cur_flag)
 | 
						|
            if ver > 0 and len(subflags) > 0:
 | 
						|
                for xflag in lower_flags:
 | 
						|
                    xver, _, xsubflags = ver_flags(xflag)
 | 
						|
                    if ver == xver:
 | 
						|
                        subflags = xsubflags + subflags
 | 
						|
                cur_flag = arch + '+' + '+'.join(subflags)
 | 
						|
 | 
						|
            flags = filtered + [cur_flag]
 | 
						|
            if i > 0:
 | 
						|
                flags += upper_flags
 | 
						|
            break
 | 
						|
 | 
						|
        # to remove overridable flags
 | 
						|
        final_flags = []
 | 
						|
        matched = set()
 | 
						|
        for f in reversed(flags):
 | 
						|
            match = re.match(self._cc_normalize_unix_krgx, f)
 | 
						|
            if not match:
 | 
						|
                pass
 | 
						|
            elif match[0] in matched:
 | 
						|
                continue
 | 
						|
            else:
 | 
						|
                matched.add(match[0])
 | 
						|
            final_flags.insert(0, f)
 | 
						|
        return final_flags
 | 
						|
 | 
						|
    _cc_normalize_win_frgx = re.compile(
 | 
						|
        r"^(?!(/arch\:|/Qx\:))"
 | 
						|
    )
 | 
						|
    _cc_normalize_win_mrgx = re.compile(
 | 
						|
        r"^(/arch|/Qx:)"
 | 
						|
    )
 | 
						|
    def _cc_normalize_win(self, flags):
 | 
						|
        for i, f in enumerate(reversed(flags)):
 | 
						|
            if not re.match(self._cc_normalize_win_mrgx, f):
 | 
						|
                continue
 | 
						|
            i += 1
 | 
						|
            return list(filter(
 | 
						|
                self._cc_normalize_win_frgx.search, flags[:-i]
 | 
						|
            )) + flags[-i:]
 | 
						|
        return flags
 | 
						|
 | 
						|
class _Feature:
 | 
						|
    """A helper class for `CCompilerOpt` that managing CPU features.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    feature_supported : dict
 | 
						|
        Dictionary containing all CPU features that supported
 | 
						|
        by the platform, according to the specified values in attribute
 | 
						|
        `_Config.conf_features` and `_Config.conf_features_partial()`
 | 
						|
 | 
						|
    feature_min : set
 | 
						|
        The minimum support of CPU features, according to
 | 
						|
        the specified values in attribute `_Config.conf_min_features`.
 | 
						|
    """
 | 
						|
    def __init__(self):
 | 
						|
        if hasattr(self, "feature_is_cached"):
 | 
						|
            return
 | 
						|
        self.feature_supported = pfeatures = self.conf_features_partial()
 | 
						|
        for feature_name in list(pfeatures.keys()):
 | 
						|
            feature  = pfeatures[feature_name]
 | 
						|
            cfeature = self.conf_features[feature_name]
 | 
						|
            feature.update({
 | 
						|
                k:v for k,v in cfeature.items() if k not in feature
 | 
						|
            })
 | 
						|
            disabled = feature.get("disable")
 | 
						|
            if disabled is not None:
 | 
						|
                pfeatures.pop(feature_name)
 | 
						|
                self.dist_log(
 | 
						|
                    "feature '%s' is disabled," % feature_name,
 | 
						|
                    disabled, stderr=True
 | 
						|
                )
 | 
						|
                continue
 | 
						|
            # list is used internally for these options
 | 
						|
            for option in (
 | 
						|
                "implies", "group", "detect", "headers", "flags", "extra_checks"
 | 
						|
            ) :
 | 
						|
                oval = feature.get(option)
 | 
						|
                if isinstance(oval, str):
 | 
						|
                    feature[option] = oval.split()
 | 
						|
 | 
						|
        self.feature_min = set()
 | 
						|
        min_f = self.conf_min_features.get(self.cc_march, "")
 | 
						|
        for F in min_f.upper().split():
 | 
						|
            if F in self.feature_supported:
 | 
						|
                self.feature_min.add(F)
 | 
						|
 | 
						|
        self.feature_is_cached = True
 | 
						|
 | 
						|
    def feature_names(self, names=None, force_flags=None, macros=[]):
 | 
						|
        """
 | 
						|
        Returns a set of CPU feature names that supported by platform and the **C** compiler.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        names : sequence or None, optional
 | 
						|
            Specify certain CPU features to test it against the **C** compiler.
 | 
						|
            if None(default), it will test all current supported features.
 | 
						|
            **Note**: feature names must be in upper-case.
 | 
						|
 | 
						|
        force_flags : list or None, optional
 | 
						|
            If None(default), default compiler flags for every CPU feature will
 | 
						|
            be used during the test.
 | 
						|
 | 
						|
        macros : list of tuples, optional
 | 
						|
            A list of C macro definitions.
 | 
						|
        """
 | 
						|
        assert(
 | 
						|
            names is None or (
 | 
						|
                not isinstance(names, str) and
 | 
						|
                hasattr(names, "__iter__")
 | 
						|
            )
 | 
						|
        )
 | 
						|
        assert(force_flags is None or isinstance(force_flags, list))
 | 
						|
        if names is None:
 | 
						|
            names = self.feature_supported.keys()
 | 
						|
        supported_names = set()
 | 
						|
        for f in names:
 | 
						|
            if self.feature_is_supported(
 | 
						|
                f, force_flags=force_flags, macros=macros
 | 
						|
            ):
 | 
						|
                supported_names.add(f)
 | 
						|
        return supported_names
 | 
						|
 | 
						|
    def feature_is_exist(self, name):
 | 
						|
        """
 | 
						|
        Returns True if a certain feature is exist and covered within
 | 
						|
        ``_Config.conf_features``.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'name': str
 | 
						|
            feature name in uppercase.
 | 
						|
        """
 | 
						|
        assert(name.isupper())
 | 
						|
        return name in self.conf_features
 | 
						|
 | 
						|
    def feature_sorted(self, names, reverse=False):
 | 
						|
        """
 | 
						|
        Sort a list of CPU features ordered by the lowest interest.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'names': sequence
 | 
						|
            sequence of supported feature names in uppercase.
 | 
						|
        'reverse': bool, optional
 | 
						|
            If true, the sorted features is reversed. (highest interest)
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        list, sorted CPU features
 | 
						|
        """
 | 
						|
        def sort_cb(k):
 | 
						|
            if isinstance(k, str):
 | 
						|
                return self.feature_supported[k]["interest"]
 | 
						|
            # multiple features
 | 
						|
            rank = max([self.feature_supported[f]["interest"] for f in k])
 | 
						|
            # FIXME: that's not a safe way to increase the rank for
 | 
						|
            # multi targets
 | 
						|
            rank += len(k) -1
 | 
						|
            return rank
 | 
						|
        return sorted(names, reverse=reverse, key=sort_cb)
 | 
						|
 | 
						|
    def feature_implies(self, names, keep_origins=False):
 | 
						|
        """
 | 
						|
        Return a set of CPU features that implied by 'names'
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        names : str or sequence of str
 | 
						|
            CPU feature name(s) in uppercase.
 | 
						|
 | 
						|
        keep_origins : bool
 | 
						|
            if False(default) then the returned set will not contain any
 | 
						|
            features from 'names'. This case happens only when two features
 | 
						|
            imply each other.
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> self.feature_implies("SSE3")
 | 
						|
        {'SSE', 'SSE2'}
 | 
						|
        >>> self.feature_implies("SSE2")
 | 
						|
        {'SSE'}
 | 
						|
        >>> self.feature_implies("SSE2", keep_origins=True)
 | 
						|
        # 'SSE2' found here since 'SSE' and 'SSE2' imply each other
 | 
						|
        {'SSE', 'SSE2'}
 | 
						|
        """
 | 
						|
        def get_implies(name, _caller=set()):
 | 
						|
            implies = set()
 | 
						|
            d = self.feature_supported[name]
 | 
						|
            for i in d.get("implies", []):
 | 
						|
                implies.add(i)
 | 
						|
                if i in _caller:
 | 
						|
                    # infinity recursive guard since
 | 
						|
                    # features can imply each other
 | 
						|
                    continue
 | 
						|
                _caller.add(name)
 | 
						|
                implies = implies.union(get_implies(i, _caller))
 | 
						|
            return implies
 | 
						|
 | 
						|
        if isinstance(names, str):
 | 
						|
            implies = get_implies(names)
 | 
						|
            names = [names]
 | 
						|
        else:
 | 
						|
            assert(hasattr(names, "__iter__"))
 | 
						|
            implies = set()
 | 
						|
            for n in names:
 | 
						|
                implies = implies.union(get_implies(n))
 | 
						|
        if not keep_origins:
 | 
						|
            implies.difference_update(names)
 | 
						|
        return implies
 | 
						|
 | 
						|
    def feature_implies_c(self, names):
 | 
						|
        """same as feature_implies() but combining 'names'"""
 | 
						|
        if isinstance(names, str):
 | 
						|
            names = set((names,))
 | 
						|
        else:
 | 
						|
            names = set(names)
 | 
						|
        return names.union(self.feature_implies(names))
 | 
						|
 | 
						|
    def feature_ahead(self, names):
 | 
						|
        """
 | 
						|
        Return list of features in 'names' after remove any
 | 
						|
        implied features and keep the origins.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'names': sequence
 | 
						|
            sequence of CPU feature names in uppercase.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        list of CPU features sorted as-is 'names'
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41"])
 | 
						|
        ["SSE41"]
 | 
						|
        # assume AVX2 and FMA3 implies each other and AVX2
 | 
						|
        # is the highest interest
 | 
						|
        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
 | 
						|
        ["AVX2"]
 | 
						|
        # assume AVX2 and FMA3 don't implies each other
 | 
						|
        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
 | 
						|
        ["AVX2", "FMA3"]
 | 
						|
        """
 | 
						|
        assert(
 | 
						|
            not isinstance(names, str)
 | 
						|
            and hasattr(names, '__iter__')
 | 
						|
        )
 | 
						|
        implies = self.feature_implies(names, keep_origins=True)
 | 
						|
        ahead = [n for n in names if n not in implies]
 | 
						|
        if len(ahead) == 0:
 | 
						|
            # return the highest interested feature
 | 
						|
            # if all features imply each other
 | 
						|
            ahead = self.feature_sorted(names, reverse=True)[:1]
 | 
						|
        return ahead
 | 
						|
 | 
						|
    def feature_untied(self, names):
 | 
						|
        """
 | 
						|
        same as 'feature_ahead()' but if both features implied each other
 | 
						|
        and keep the highest interest.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'names': sequence
 | 
						|
            sequence of CPU feature names in uppercase.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        list of CPU features sorted as-is 'names'
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> self.feature_untied(["SSE2", "SSE3", "SSE41"])
 | 
						|
        ["SSE2", "SSE3", "SSE41"]
 | 
						|
        # assume AVX2 and FMA3 implies each other
 | 
						|
        >>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"])
 | 
						|
        ["SSE2", "SSE3", "SSE41", "AVX2"]
 | 
						|
        """
 | 
						|
        assert(
 | 
						|
            not isinstance(names, str)
 | 
						|
            and hasattr(names, '__iter__')
 | 
						|
        )
 | 
						|
        final = []
 | 
						|
        for n in names:
 | 
						|
            implies = self.feature_implies(n)
 | 
						|
            tied = [
 | 
						|
                nn for nn in final
 | 
						|
                if nn in implies and n in self.feature_implies(nn)
 | 
						|
            ]
 | 
						|
            if tied:
 | 
						|
                tied = self.feature_sorted(tied + [n])
 | 
						|
                if n not in tied[1:]:
 | 
						|
                    continue
 | 
						|
                final.remove(tied[:1][0])
 | 
						|
            final.append(n)
 | 
						|
        return final
 | 
						|
 | 
						|
    def feature_get_til(self, names, keyisfalse):
 | 
						|
        """
 | 
						|
        same as `feature_implies_c()` but stop collecting implied
 | 
						|
        features when feature's option that provided through
 | 
						|
        parameter 'keyisfalse' is False, also sorting the returned
 | 
						|
        features.
 | 
						|
        """
 | 
						|
        def til(tnames):
 | 
						|
            # sort from highest to lowest interest then cut if "key" is False
 | 
						|
            tnames = self.feature_implies_c(tnames)
 | 
						|
            tnames = self.feature_sorted(tnames, reverse=True)
 | 
						|
            for i, n in enumerate(tnames):
 | 
						|
                if not self.feature_supported[n].get(keyisfalse, True):
 | 
						|
                    tnames = tnames[:i+1]
 | 
						|
                    break
 | 
						|
            return tnames
 | 
						|
 | 
						|
        if isinstance(names, str) or len(names) <= 1:
 | 
						|
            names = til(names)
 | 
						|
            # normalize the sort
 | 
						|
            names.reverse()
 | 
						|
            return names
 | 
						|
 | 
						|
        names = self.feature_ahead(names)
 | 
						|
        names = {t for n in names for t in til(n)}
 | 
						|
        return self.feature_sorted(names)
 | 
						|
 | 
						|
    def feature_detect(self, names):
 | 
						|
        """
 | 
						|
        Return a list of CPU features that required to be detected
 | 
						|
        sorted from the lowest to highest interest.
 | 
						|
        """
 | 
						|
        names = self.feature_get_til(names, "implies_detect")
 | 
						|
        detect = []
 | 
						|
        for n in names:
 | 
						|
            d = self.feature_supported[n]
 | 
						|
            detect += d.get("detect", d.get("group", [n]))
 | 
						|
        return detect
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def feature_flags(self, names):
 | 
						|
        """
 | 
						|
        Return a list of CPU features flags sorted from the lowest
 | 
						|
        to highest interest.
 | 
						|
        """
 | 
						|
        names = self.feature_sorted(self.feature_implies_c(names))
 | 
						|
        flags = []
 | 
						|
        for n in names:
 | 
						|
            d = self.feature_supported[n]
 | 
						|
            f = d.get("flags", [])
 | 
						|
            if not f or not self.cc_test_flags(f):
 | 
						|
                continue
 | 
						|
            flags += f
 | 
						|
        return self.cc_normalize_flags(flags)
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def feature_test(self, name, force_flags=None, macros=[]):
 | 
						|
        """
 | 
						|
        Test a certain CPU feature against the compiler through its own
 | 
						|
        check file.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        name : str
 | 
						|
            Supported CPU feature name.
 | 
						|
 | 
						|
        force_flags : list or None, optional
 | 
						|
            If None(default), the returned flags from `feature_flags()`
 | 
						|
            will be used.
 | 
						|
 | 
						|
        macros : list of tuples, optional
 | 
						|
            A list of C macro definitions.
 | 
						|
        """
 | 
						|
        if force_flags is None:
 | 
						|
            force_flags = self.feature_flags(name)
 | 
						|
 | 
						|
        self.dist_log(
 | 
						|
            "testing feature '%s' with flags (%s)" % (
 | 
						|
            name, ' '.join(force_flags)
 | 
						|
        ))
 | 
						|
        # Each CPU feature must have C source code contains at
 | 
						|
        # least one intrinsic or instruction related to this feature.
 | 
						|
        test_path = os.path.join(
 | 
						|
            self.conf_check_path, "cpu_%s.c" % name.lower()
 | 
						|
        )
 | 
						|
        if not os.path.exists(test_path):
 | 
						|
            self.dist_fatal("feature test file is not exist", test_path)
 | 
						|
 | 
						|
        test = self.dist_test(
 | 
						|
            test_path, force_flags + self.cc_flags["werror"], macros=macros
 | 
						|
        )
 | 
						|
        if not test:
 | 
						|
            self.dist_log("testing failed", stderr=True)
 | 
						|
        return test
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def feature_is_supported(self, name, force_flags=None, macros=[]):
 | 
						|
        """
 | 
						|
        Check if a certain CPU feature is supported by the platform and compiler.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        name : str
 | 
						|
            CPU feature name in uppercase.
 | 
						|
 | 
						|
        force_flags : list or None, optional
 | 
						|
            If None(default), default compiler flags for every CPU feature will
 | 
						|
            be used during test.
 | 
						|
 | 
						|
        macros : list of tuples, optional
 | 
						|
            A list of C macro definitions.
 | 
						|
        """
 | 
						|
        assert(name.isupper())
 | 
						|
        assert(force_flags is None or isinstance(force_flags, list))
 | 
						|
 | 
						|
        supported = name in self.feature_supported
 | 
						|
        if supported:
 | 
						|
            for impl in self.feature_implies(name):
 | 
						|
                if not self.feature_test(impl, force_flags, macros=macros):
 | 
						|
                    return False
 | 
						|
            if not self.feature_test(name, force_flags, macros=macros):
 | 
						|
                return False
 | 
						|
        return supported
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def feature_can_autovec(self, name):
 | 
						|
        """
 | 
						|
        check if the feature can be auto-vectorized by the compiler
 | 
						|
        """
 | 
						|
        assert(isinstance(name, str))
 | 
						|
        d = self.feature_supported[name]
 | 
						|
        can = d.get("autovec", None)
 | 
						|
        if can is None:
 | 
						|
            valid_flags = [
 | 
						|
                self.cc_test_flags([f]) for f in d.get("flags", [])
 | 
						|
            ]
 | 
						|
            can = valid_flags and any(valid_flags)
 | 
						|
        return can
 | 
						|
 | 
						|
    @_Cache.me
 | 
						|
    def feature_extra_checks(self, name):
 | 
						|
        """
 | 
						|
        Return a list of supported extra checks after testing them against
 | 
						|
        the compiler.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        names : str
 | 
						|
            CPU feature name in uppercase.
 | 
						|
        """
 | 
						|
        assert isinstance(name, str)
 | 
						|
        d = self.feature_supported[name]
 | 
						|
        extra_checks = d.get("extra_checks", [])
 | 
						|
        if not extra_checks:
 | 
						|
            return []
 | 
						|
 | 
						|
        self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks)
 | 
						|
        flags = self.feature_flags(name)
 | 
						|
        available = []
 | 
						|
        not_available = []
 | 
						|
        for chk in extra_checks:
 | 
						|
            test_path = os.path.join(
 | 
						|
                self.conf_check_path, "extra_%s.c" % chk.lower()
 | 
						|
            )
 | 
						|
            if not os.path.exists(test_path):
 | 
						|
                self.dist_fatal("extra check file does not exist", test_path)
 | 
						|
 | 
						|
            is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"])
 | 
						|
            if is_supported:
 | 
						|
                available.append(chk)
 | 
						|
            else:
 | 
						|
                not_available.append(chk)
 | 
						|
 | 
						|
        if not_available:
 | 
						|
            self.dist_log("testing failed for checks", not_available, stderr=True)
 | 
						|
        return available
 | 
						|
 | 
						|
 | 
						|
    def feature_c_preprocessor(self, feature_name, tabs=0):
 | 
						|
        """
 | 
						|
        Generate C preprocessor definitions and include headers of a CPU feature.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        'feature_name': str
 | 
						|
            CPU feature name in uppercase.
 | 
						|
        'tabs': int
 | 
						|
            if > 0, align the generated strings to the right depend on number of tabs.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        str, generated C preprocessor
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> self.feature_c_preprocessor("SSE3")
 | 
						|
        /** SSE3 **/
 | 
						|
        #define NPY_HAVE_SSE3 1
 | 
						|
        #include <pmmintrin.h>
 | 
						|
        """
 | 
						|
        assert(feature_name.isupper())
 | 
						|
        feature = self.feature_supported.get(feature_name)
 | 
						|
        assert(feature is not None)
 | 
						|
 | 
						|
        prepr = [
 | 
						|
            "/** %s **/" % feature_name,
 | 
						|
            "#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name)
 | 
						|
        ]
 | 
						|
        prepr += [
 | 
						|
            "#include <%s>" % h for h in feature.get("headers", [])
 | 
						|
        ]
 | 
						|
 | 
						|
        extra_defs = feature.get("group", [])
 | 
						|
        extra_defs += self.feature_extra_checks(feature_name)
 | 
						|
        for edef in extra_defs:
 | 
						|
            # Guard extra definitions in case of duplicate with
 | 
						|
            # another feature
 | 
						|
            prepr += [
 | 
						|
                "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef),
 | 
						|
                "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef),
 | 
						|
                "#endif",
 | 
						|
            ]
 | 
						|
 | 
						|
        if tabs > 0:
 | 
						|
            prepr = [('\t'*tabs) + l for l in prepr]
 | 
						|
        return '\n'.join(prepr)
 | 
						|
 | 
						|
class _Parse:
 | 
						|
    """A helper class that parsing main arguments of `CCompilerOpt`,
 | 
						|
    also parsing configuration statements in dispatch-able sources.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    cpu_baseline : str or None
 | 
						|
        minimal set of required CPU features or special options.
 | 
						|
 | 
						|
    cpu_dispatch : str or None
 | 
						|
        dispatched set of additional CPU features or special options.
 | 
						|
 | 
						|
    Special options can be:
 | 
						|
        - **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features`
 | 
						|
        - **MAX**: Enables all supported CPU features by the Compiler and platform.
 | 
						|
        - **NATIVE**: Enables all CPU features that supported by the current machine.
 | 
						|
        - **NONE**: Enables nothing
 | 
						|
        - **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**.
 | 
						|
            NOTE: operand + is only added for nominal reason.
 | 
						|
 | 
						|
    NOTES:
 | 
						|
        - Case-insensitive among all CPU features and special options.
 | 
						|
        - Comma or space can be used as a separator.
 | 
						|
        - If the CPU feature is not supported by the user platform or compiler,
 | 
						|
          it will be skipped rather than raising a fatal error.
 | 
						|
        - Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features
 | 
						|
        - 'cpu_baseline' force enables implied features.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    parse_baseline_names : list
 | 
						|
        Final CPU baseline's feature names(sorted from low to high)
 | 
						|
    parse_baseline_flags : list
 | 
						|
        Compiler flags of baseline features
 | 
						|
    parse_dispatch_names : list
 | 
						|
        Final CPU dispatch-able feature names(sorted from low to high)
 | 
						|
    parse_target_groups : dict
 | 
						|
        Dictionary containing initialized target groups that configured
 | 
						|
        through class attribute `conf_target_groups`.
 | 
						|
 | 
						|
        The key is represent the group name and value is a tuple
 | 
						|
        contains three items :
 | 
						|
            - bool, True if group has the 'baseline' option.
 | 
						|
            - list, list of CPU features.
 | 
						|
            - list, list of extra compiler flags.
 | 
						|
 | 
						|
    """
 | 
						|
    def __init__(self, cpu_baseline, cpu_dispatch):
 | 
						|
        self._parse_policies = dict(
 | 
						|
            # POLICY NAME, (HAVE, NOT HAVE, [DEB])
 | 
						|
            KEEP_BASELINE = (
 | 
						|
                None, self._parse_policy_not_keepbase,
 | 
						|
                []
 | 
						|
            ),
 | 
						|
            KEEP_SORT = (
 | 
						|
                self._parse_policy_keepsort,
 | 
						|
                self._parse_policy_not_keepsort,
 | 
						|
                []
 | 
						|
            ),
 | 
						|
            MAXOPT = (
 | 
						|
                self._parse_policy_maxopt, None,
 | 
						|
                []
 | 
						|
            ),
 | 
						|
            WERROR = (
 | 
						|
                self._parse_policy_werror, None,
 | 
						|
                []
 | 
						|
            ),
 | 
						|
            AUTOVEC = (
 | 
						|
                self._parse_policy_autovec, None,
 | 
						|
                ["MAXOPT"]
 | 
						|
            )
 | 
						|
        )
 | 
						|
        if hasattr(self, "parse_is_cached"):
 | 
						|
            return
 | 
						|
 | 
						|
        self.parse_baseline_names = []
 | 
						|
        self.parse_baseline_flags = []
 | 
						|
        self.parse_dispatch_names = []
 | 
						|
        self.parse_target_groups = {}
 | 
						|
 | 
						|
        if self.cc_noopt:
 | 
						|
            # skip parsing baseline and dispatch args and keep parsing target groups
 | 
						|
            cpu_baseline = cpu_dispatch = None
 | 
						|
 | 
						|
        self.dist_log("check requested baseline")
 | 
						|
        if cpu_baseline is not None:
 | 
						|
            cpu_baseline = self._parse_arg_features("cpu_baseline", cpu_baseline)
 | 
						|
            baseline_names = self.feature_names(cpu_baseline)
 | 
						|
            self.parse_baseline_flags = self.feature_flags(baseline_names)
 | 
						|
            self.parse_baseline_names = self.feature_sorted(
 | 
						|
                self.feature_implies_c(baseline_names)
 | 
						|
            )
 | 
						|
 | 
						|
        self.dist_log("check requested dispatch-able features")
 | 
						|
        if cpu_dispatch is not None:
 | 
						|
            cpu_dispatch_ = self._parse_arg_features("cpu_dispatch", cpu_dispatch)
 | 
						|
            cpu_dispatch = {
 | 
						|
                f for f in cpu_dispatch_
 | 
						|
                if f not in self.parse_baseline_names
 | 
						|
            }
 | 
						|
            conflict_baseline = cpu_dispatch_.difference(cpu_dispatch)
 | 
						|
            self.parse_dispatch_names = self.feature_sorted(
 | 
						|
                self.feature_names(cpu_dispatch)
 | 
						|
            )
 | 
						|
            if len(conflict_baseline) > 0:
 | 
						|
                self.dist_log(
 | 
						|
                    "skip features", conflict_baseline, "since its part of baseline"
 | 
						|
                )
 | 
						|
 | 
						|
        self.dist_log("initialize targets groups")
 | 
						|
        for group_name, tokens in self.conf_target_groups.items():
 | 
						|
            self.dist_log("parse target group", group_name)
 | 
						|
            GROUP_NAME = group_name.upper()
 | 
						|
            if not tokens or not tokens.strip():
 | 
						|
                # allow empty groups, useful in case if there's a need
 | 
						|
                # to disable certain group since '_parse_target_tokens()'
 | 
						|
                # requires at least one valid target
 | 
						|
                self.parse_target_groups[GROUP_NAME] = (
 | 
						|
                    False, [], []
 | 
						|
                )
 | 
						|
                continue
 | 
						|
            has_baseline, features, extra_flags = \
 | 
						|
                self._parse_target_tokens(tokens)
 | 
						|
            self.parse_target_groups[GROUP_NAME] = (
 | 
						|
                has_baseline, features, extra_flags
 | 
						|
            )
 | 
						|
 | 
						|
        self.parse_is_cached = True
 | 
						|
 | 
						|
    def parse_targets(self, source):
 | 
						|
        """
 | 
						|
        Fetch and parse configuration statements that required for
 | 
						|
        defining the targeted CPU features, statements should be declared
 | 
						|
        in the top of source in between **C** comment and start
 | 
						|
        with a special mark **@targets**.
 | 
						|
 | 
						|
        Configuration statements are sort of keywords representing
 | 
						|
        CPU features names, group of statements and policies, combined
 | 
						|
        together to determine the required optimization.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        source : str
 | 
						|
            the path of **C** source file.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        - bool, True if group has the 'baseline' option
 | 
						|
        - list, list of CPU features
 | 
						|
        - list, list of extra compiler flags
 | 
						|
        """
 | 
						|
        self.dist_log("looking for '@targets' inside -> ", source)
 | 
						|
        # get lines between /*@targets and */
 | 
						|
        with open(source) as fd:
 | 
						|
            tokens = ""
 | 
						|
            max_to_reach = 1000 # good enough, isn't?
 | 
						|
            start_with = "@targets"
 | 
						|
            start_pos = -1
 | 
						|
            end_with = "*/"
 | 
						|
            end_pos = -1
 | 
						|
            for current_line, line in enumerate(fd):
 | 
						|
                if current_line == max_to_reach:
 | 
						|
                    self.dist_fatal("reached the max of lines")
 | 
						|
                    break
 | 
						|
                if start_pos == -1:
 | 
						|
                    start_pos = line.find(start_with)
 | 
						|
                    if start_pos == -1:
 | 
						|
                        continue
 | 
						|
                    start_pos += len(start_with)
 | 
						|
                tokens += line
 | 
						|
                end_pos = line.find(end_with)
 | 
						|
                if end_pos != -1:
 | 
						|
                    end_pos += len(tokens) - len(line)
 | 
						|
                    break
 | 
						|
 | 
						|
        if start_pos == -1:
 | 
						|
            self.dist_fatal("expected to find '%s' within a C comment" % start_with)
 | 
						|
        if end_pos == -1:
 | 
						|
            self.dist_fatal("expected to end with '%s'" % end_with)
 | 
						|
 | 
						|
        tokens = tokens[start_pos:end_pos]
 | 
						|
        return self._parse_target_tokens(tokens)
 | 
						|
 | 
						|
    _parse_regex_arg = re.compile(r'\s|,|([+-])')
 | 
						|
    def _parse_arg_features(self, arg_name, req_features):
 | 
						|
        if not isinstance(req_features, str):
 | 
						|
            self.dist_fatal("expected a string in '%s'" % arg_name)
 | 
						|
 | 
						|
        final_features = set()
 | 
						|
        # space and comma can be used as a separator
 | 
						|
        tokens = list(filter(None, re.split(self._parse_regex_arg, req_features)))
 | 
						|
        append = True # append is the default
 | 
						|
        for tok in tokens:
 | 
						|
            if tok[0] in ("#", "$"):
 | 
						|
                self.dist_fatal(
 | 
						|
                    arg_name, "target groups and policies "
 | 
						|
                    "aren't allowed from arguments, "
 | 
						|
                    "only from dispatch-able sources"
 | 
						|
                )
 | 
						|
            if tok == '+':
 | 
						|
                append = True
 | 
						|
                continue
 | 
						|
            if tok == '-':
 | 
						|
                append = False
 | 
						|
                continue
 | 
						|
 | 
						|
            TOK = tok.upper() # we use upper-case internally
 | 
						|
            features_to = set()
 | 
						|
            if TOK == "NONE":
 | 
						|
                pass
 | 
						|
            elif TOK == "NATIVE":
 | 
						|
                native = self.cc_flags["native"]
 | 
						|
                if not native:
 | 
						|
                    self.dist_fatal(arg_name,
 | 
						|
                        "native option isn't supported by the compiler"
 | 
						|
                    )
 | 
						|
                features_to = self.feature_names(
 | 
						|
                    force_flags=native, macros=[("DETECT_FEATURES", 1)]
 | 
						|
                )
 | 
						|
            elif TOK == "MAX":
 | 
						|
                features_to = self.feature_supported.keys()
 | 
						|
            elif TOK == "MIN":
 | 
						|
                features_to = self.feature_min
 | 
						|
            else:
 | 
						|
                if TOK in self.feature_supported:
 | 
						|
                    features_to.add(TOK)
 | 
						|
                else:
 | 
						|
                    if not self.feature_is_exist(TOK):
 | 
						|
                        self.dist_fatal(arg_name,
 | 
						|
                            ", '%s' isn't a known feature or option" % tok
 | 
						|
                        )
 | 
						|
            if append:
 | 
						|
                final_features = final_features.union(features_to)
 | 
						|
            else:
 | 
						|
                final_features = final_features.difference(features_to)
 | 
						|
 | 
						|
            append = True # back to default
 | 
						|
 | 
						|
        return final_features
 | 
						|
 | 
						|
    _parse_regex_target = re.compile(r'\s|[*,/]|([()])')
 | 
						|
    def _parse_target_tokens(self, tokens):
 | 
						|
        assert(isinstance(tokens, str))
 | 
						|
        final_targets = [] # to keep it sorted as specified
 | 
						|
        extra_flags = []
 | 
						|
        has_baseline = False
 | 
						|
 | 
						|
        skipped  = set()
 | 
						|
        policies = set()
 | 
						|
        multi_target = None
 | 
						|
 | 
						|
        tokens = list(filter(None, re.split(self._parse_regex_target, tokens)))
 | 
						|
        if not tokens:
 | 
						|
            self.dist_fatal("expected one token at least")
 | 
						|
 | 
						|
        for tok in tokens:
 | 
						|
            TOK = tok.upper()
 | 
						|
            ch = tok[0]
 | 
						|
            if ch in ('+', '-'):
 | 
						|
                self.dist_fatal(
 | 
						|
                    "+/- are 'not' allowed from target's groups or @targets, "
 | 
						|
                    "only from cpu_baseline and cpu_dispatch parms"
 | 
						|
                )
 | 
						|
            elif ch == '$':
 | 
						|
                if multi_target is not None:
 | 
						|
                    self.dist_fatal(
 | 
						|
                        "policies aren't allowed inside multi-target '()'"
 | 
						|
                        ", only CPU features"
 | 
						|
                    )
 | 
						|
                policies.add(self._parse_token_policy(TOK))
 | 
						|
            elif ch == '#':
 | 
						|
                if multi_target is not None:
 | 
						|
                    self.dist_fatal(
 | 
						|
                        "target groups aren't allowed inside multi-target '()'"
 | 
						|
                        ", only CPU features"
 | 
						|
                    )
 | 
						|
                has_baseline, final_targets, extra_flags = \
 | 
						|
                self._parse_token_group(TOK, has_baseline, final_targets, extra_flags)
 | 
						|
            elif ch == '(':
 | 
						|
                if multi_target is not None:
 | 
						|
                    self.dist_fatal("unclosed multi-target, missing ')'")
 | 
						|
                multi_target = set()
 | 
						|
            elif ch == ')':
 | 
						|
                if multi_target is None:
 | 
						|
                    self.dist_fatal("multi-target opener '(' wasn't found")
 | 
						|
                targets = self._parse_multi_target(multi_target)
 | 
						|
                if targets is None:
 | 
						|
                    skipped.add(tuple(multi_target))
 | 
						|
                else:
 | 
						|
                    if len(targets) == 1:
 | 
						|
                        targets = targets[0]
 | 
						|
                    if targets and targets not in final_targets:
 | 
						|
                        final_targets.append(targets)
 | 
						|
                multi_target = None # back to default
 | 
						|
            else:
 | 
						|
                if TOK == "BASELINE":
 | 
						|
                    if multi_target is not None:
 | 
						|
                        self.dist_fatal("baseline isn't allowed inside multi-target '()'")
 | 
						|
                    has_baseline = True
 | 
						|
                    continue
 | 
						|
 | 
						|
                if multi_target is not None:
 | 
						|
                    multi_target.add(TOK)
 | 
						|
                    continue
 | 
						|
 | 
						|
                if not self.feature_is_exist(TOK):
 | 
						|
                    self.dist_fatal("invalid target name '%s'" % TOK)
 | 
						|
 | 
						|
                is_enabled = (
 | 
						|
                    TOK in self.parse_baseline_names or
 | 
						|
                    TOK in self.parse_dispatch_names
 | 
						|
                )
 | 
						|
                if  is_enabled:
 | 
						|
                    if TOK not in final_targets:
 | 
						|
                        final_targets.append(TOK)
 | 
						|
                    continue
 | 
						|
 | 
						|
                skipped.add(TOK)
 | 
						|
 | 
						|
        if multi_target is not None:
 | 
						|
            self.dist_fatal("unclosed multi-target, missing ')'")
 | 
						|
        if skipped:
 | 
						|
            self.dist_log(
 | 
						|
                "skip targets", skipped,
 | 
						|
                "not part of baseline or dispatch-able features"
 | 
						|
            )
 | 
						|
 | 
						|
        final_targets = self.feature_untied(final_targets)
 | 
						|
 | 
						|
        # add polices dependencies
 | 
						|
        for p in list(policies):
 | 
						|
            _, _, deps = self._parse_policies[p]
 | 
						|
            for d in deps:
 | 
						|
                if d in policies:
 | 
						|
                    continue
 | 
						|
                self.dist_log(
 | 
						|
                    "policy '%s' force enables '%s'" % (
 | 
						|
                    p, d
 | 
						|
                ))
 | 
						|
                policies.add(d)
 | 
						|
 | 
						|
        # release policies filtrations
 | 
						|
        for p, (have, nhave, _) in self._parse_policies.items():
 | 
						|
            func = None
 | 
						|
            if p in policies:
 | 
						|
                func = have
 | 
						|
                self.dist_log("policy '%s' is ON" % p)
 | 
						|
            else:
 | 
						|
                func = nhave
 | 
						|
            if not func:
 | 
						|
                continue
 | 
						|
            has_baseline, final_targets, extra_flags = func(
 | 
						|
                has_baseline, final_targets, extra_flags
 | 
						|
            )
 | 
						|
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_token_policy(self, token):
 | 
						|
        """validate policy token"""
 | 
						|
        if len(token) <= 1 or token[-1:] == token[0]:
 | 
						|
            self.dist_fatal("'$' must stuck in the begin of policy name")
 | 
						|
        token = token[1:]
 | 
						|
        if token not in self._parse_policies:
 | 
						|
            self.dist_fatal(
 | 
						|
                "'%s' is an invalid policy name, available policies are" % token,
 | 
						|
                self._parse_policies.keys()
 | 
						|
            )
 | 
						|
        return token
 | 
						|
 | 
						|
    def _parse_token_group(self, token, has_baseline, final_targets, extra_flags):
 | 
						|
        """validate group token"""
 | 
						|
        if len(token) <= 1 or token[-1:] == token[0]:
 | 
						|
            self.dist_fatal("'#' must stuck in the begin of group name")
 | 
						|
 | 
						|
        token = token[1:]
 | 
						|
        ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get(
 | 
						|
            token, (False, None, [])
 | 
						|
        )
 | 
						|
        if gtargets is None:
 | 
						|
            self.dist_fatal(
 | 
						|
                "'%s' is an invalid target group name, " % token + \
 | 
						|
                "available target groups are",
 | 
						|
                self.parse_target_groups.keys()
 | 
						|
            )
 | 
						|
        if ghas_baseline:
 | 
						|
            has_baseline = True
 | 
						|
        # always keep sorting as specified
 | 
						|
        final_targets += [f for f in gtargets if f not in final_targets]
 | 
						|
        extra_flags += [f for f in gextra_flags if f not in extra_flags]
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_multi_target(self, targets):
 | 
						|
        """validate multi targets that defined between parentheses()"""
 | 
						|
        # remove any implied features and keep the origins
 | 
						|
        if not targets:
 | 
						|
            self.dist_fatal("empty multi-target '()'")
 | 
						|
        if not all([
 | 
						|
            self.feature_is_exist(tar) for tar in targets
 | 
						|
        ]) :
 | 
						|
            self.dist_fatal("invalid target name in multi-target", targets)
 | 
						|
        if not all([
 | 
						|
            (
 | 
						|
                tar in self.parse_baseline_names or
 | 
						|
                tar in self.parse_dispatch_names
 | 
						|
            )
 | 
						|
            for tar in targets
 | 
						|
        ]) :
 | 
						|
            return None
 | 
						|
        targets = self.feature_ahead(targets)
 | 
						|
        if not targets:
 | 
						|
            return None
 | 
						|
        # force sort multi targets, so it can be comparable
 | 
						|
        targets = self.feature_sorted(targets)
 | 
						|
        targets = tuple(targets) # hashable
 | 
						|
        return targets
 | 
						|
 | 
						|
    def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """skip all baseline features"""
 | 
						|
        skipped = []
 | 
						|
        for tar in final_targets[:]:
 | 
						|
            is_base = False
 | 
						|
            if isinstance(tar, str):
 | 
						|
                is_base = tar in self.parse_baseline_names
 | 
						|
            else:
 | 
						|
                # multi targets
 | 
						|
                is_base = all([
 | 
						|
                    f in self.parse_baseline_names
 | 
						|
                    for f in tar
 | 
						|
                ])
 | 
						|
            if is_base:
 | 
						|
                skipped.append(tar)
 | 
						|
                final_targets.remove(tar)
 | 
						|
 | 
						|
        if skipped:
 | 
						|
            self.dist_log("skip baseline features", skipped)
 | 
						|
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """leave a notice that $keep_sort is on"""
 | 
						|
        self.dist_log(
 | 
						|
            "policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n"
 | 
						|
            "are 'not' sorted depend on the highest interest but"
 | 
						|
            "as specified in the dispatch-able source or the extra group"
 | 
						|
        )
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """sorted depend on the highest interest"""
 | 
						|
        final_targets = self.feature_sorted(final_targets, reverse=True)
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """append the compiler optimization flags"""
 | 
						|
        if self.cc_has_debug:
 | 
						|
            self.dist_log("debug mode is detected, policy 'maxopt' is skipped.")
 | 
						|
        elif self.cc_noopt:
 | 
						|
            self.dist_log("optimization is disabled, policy 'maxopt' is skipped.")
 | 
						|
        else:
 | 
						|
            flags = self.cc_flags["opt"]
 | 
						|
            if not flags:
 | 
						|
                self.dist_log(
 | 
						|
                    "current compiler doesn't support optimization flags, "
 | 
						|
                    "policy 'maxopt' is skipped", stderr=True
 | 
						|
                )
 | 
						|
            else:
 | 
						|
                extra_flags += flags
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_policy_werror(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """force warnings to treated as errors"""
 | 
						|
        flags = self.cc_flags["werror"]
 | 
						|
        if not flags:
 | 
						|
            self.dist_log(
 | 
						|
                "current compiler doesn't support werror flags, "
 | 
						|
                "warnings will 'not' treated as errors", stderr=True
 | 
						|
            )
 | 
						|
        else:
 | 
						|
            self.dist_log("compiler warnings are treated as errors")
 | 
						|
            extra_flags += flags
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
    def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags):
 | 
						|
        """skip features that has no auto-vectorized support by compiler"""
 | 
						|
        skipped = []
 | 
						|
        for tar in final_targets[:]:
 | 
						|
            if isinstance(tar, str):
 | 
						|
                can = self.feature_can_autovec(tar)
 | 
						|
            else: # multiple target
 | 
						|
                can = all([
 | 
						|
                    self.feature_can_autovec(t)
 | 
						|
                    for t in tar
 | 
						|
                ])
 | 
						|
            if not can:
 | 
						|
                final_targets.remove(tar)
 | 
						|
                skipped.append(tar)
 | 
						|
 | 
						|
        if skipped:
 | 
						|
            self.dist_log("skip non auto-vectorized features", skipped)
 | 
						|
 | 
						|
        return has_baseline, final_targets, extra_flags
 | 
						|
 | 
						|
class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
 | 
						|
    """
 | 
						|
    A helper class for `CCompiler` aims to provide extra build options
 | 
						|
    to effectively control of compiler optimizations that are directly
 | 
						|
    related to CPU features.
 | 
						|
    """
 | 
						|
    def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None):
 | 
						|
        _Config.__init__(self)
 | 
						|
        _Distutils.__init__(self, ccompiler)
 | 
						|
        _Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch)
 | 
						|
        _CCompiler.__init__(self)
 | 
						|
        _Feature.__init__(self)
 | 
						|
        if not self.cc_noopt and self.cc_has_native:
 | 
						|
            self.dist_log(
 | 
						|
                "native flag is specified through environment variables. "
 | 
						|
                "force cpu-baseline='native'"
 | 
						|
            )
 | 
						|
            cpu_baseline = "native"
 | 
						|
        _Parse.__init__(self, cpu_baseline, cpu_dispatch)
 | 
						|
        # keep the requested features untouched, need it later for report
 | 
						|
        # and trace purposes
 | 
						|
        self._requested_baseline = cpu_baseline
 | 
						|
        self._requested_dispatch = cpu_dispatch
 | 
						|
        # key is the dispatch-able source and value is a tuple
 | 
						|
        # contains two items (has_baseline[boolean], dispatched-features[list])
 | 
						|
        self.sources_status = getattr(self, "sources_status", {})
 | 
						|
        # every instance should has a separate one
 | 
						|
        self.cache_private.add("sources_status")
 | 
						|
        # set it at the end to make sure the cache writing was done after init
 | 
						|
        # this class
 | 
						|
        self.hit_cache = hasattr(self, "hit_cache")
 | 
						|
 | 
						|
    def is_cached(self):
 | 
						|
        """
 | 
						|
        Returns True if the class loaded from the cache file
 | 
						|
        """
 | 
						|
        return self.cache_infile and self.hit_cache
 | 
						|
 | 
						|
    def cpu_baseline_flags(self):
 | 
						|
        """
 | 
						|
        Returns a list of final CPU baseline compiler flags
 | 
						|
        """
 | 
						|
        return self.parse_baseline_flags
 | 
						|
 | 
						|
    def cpu_baseline_names(self):
 | 
						|
        """
 | 
						|
        return a list of final CPU baseline feature names
 | 
						|
        """
 | 
						|
        return self.parse_baseline_names
 | 
						|
 | 
						|
    def cpu_dispatch_names(self):
 | 
						|
        """
 | 
						|
        return a list of final CPU dispatch feature names
 | 
						|
        """
 | 
						|
        return self.parse_dispatch_names
 | 
						|
 | 
						|
    def try_dispatch(self, sources, src_dir=None, ccompiler=None, **kwargs):
 | 
						|
        """
 | 
						|
        Compile one or more dispatch-able sources and generates object files,
 | 
						|
        also generates abstract C config headers and macros that
 | 
						|
        used later for the final runtime dispatching process.
 | 
						|
 | 
						|
        The mechanism behind it is to takes each source file that specified
 | 
						|
        in 'sources' and branching it into several files depend on
 | 
						|
        special configuration statements that must be declared in the
 | 
						|
        top of each source which contains targeted CPU features,
 | 
						|
        then it compiles every branched source with the proper compiler flags.
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        sources : list
 | 
						|
            Must be a list of dispatch-able sources file paths,
 | 
						|
            and configuration statements must be declared inside
 | 
						|
            each file.
 | 
						|
 | 
						|
        src_dir : str
 | 
						|
            Path of parent directory for the generated headers and wrapped sources.
 | 
						|
            If None(default) the files will generated in-place.
 | 
						|
 | 
						|
        ccompiler : CCompiler
 | 
						|
            Distutils `CCompiler` instance to be used for compilation.
 | 
						|
            If None (default), the provided instance during the initialization
 | 
						|
            will be used instead.
 | 
						|
 | 
						|
        **kwargs : any
 | 
						|
            Arguments to pass on to the `CCompiler.compile()`
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        list : generated object files
 | 
						|
 | 
						|
        Raises
 | 
						|
        ------
 | 
						|
        CompileError
 | 
						|
            Raises by `CCompiler.compile()` on compiling failure.
 | 
						|
        DistutilsError
 | 
						|
            Some errors during checking the sanity of configuration statements.
 | 
						|
 | 
						|
        See Also
 | 
						|
        --------
 | 
						|
        parse_targets :
 | 
						|
            Parsing the configuration statements of dispatch-able sources.
 | 
						|
        """
 | 
						|
        to_compile = {}
 | 
						|
        baseline_flags = self.cpu_baseline_flags()
 | 
						|
        include_dirs = kwargs.setdefault("include_dirs", [])
 | 
						|
 | 
						|
        for src in sources:
 | 
						|
            output_dir = os.path.dirname(src)
 | 
						|
            if src_dir:
 | 
						|
                if not output_dir.startswith(src_dir):
 | 
						|
                    output_dir = os.path.join(src_dir, output_dir)
 | 
						|
                if output_dir not in include_dirs:
 | 
						|
                    # To allow including the generated config header(*.dispatch.h)
 | 
						|
                    # by the dispatch-able sources
 | 
						|
                    include_dirs.append(output_dir)
 | 
						|
 | 
						|
            has_baseline, targets, extra_flags = self.parse_targets(src)
 | 
						|
            nochange = self._generate_config(output_dir, src, targets, has_baseline)
 | 
						|
            for tar in targets:
 | 
						|
                tar_src = self._wrap_target(output_dir, src, tar, nochange=nochange)
 | 
						|
                flags = tuple(extra_flags + self.feature_flags(tar))
 | 
						|
                to_compile.setdefault(flags, []).append(tar_src)
 | 
						|
 | 
						|
            if has_baseline:
 | 
						|
                flags = tuple(extra_flags + baseline_flags)
 | 
						|
                to_compile.setdefault(flags, []).append(src)
 | 
						|
 | 
						|
            self.sources_status[src] = (has_baseline, targets)
 | 
						|
 | 
						|
        # For these reasons, the sources are compiled in a separate loop:
 | 
						|
        # - Gathering all sources with the same flags to benefit from
 | 
						|
        #   the parallel compiling as much as possible.
 | 
						|
        # - To generate all config headers of the dispatchable sources,
 | 
						|
        #   before the compilation in case if there are dependency relationships
 | 
						|
        #   among them.
 | 
						|
        objects = []
 | 
						|
        for flags, srcs in to_compile.items():
 | 
						|
            objects += self.dist_compile(
 | 
						|
                srcs, list(flags), ccompiler=ccompiler, **kwargs
 | 
						|
            )
 | 
						|
        return objects
 | 
						|
 | 
						|
    def generate_dispatch_header(self, header_path):
 | 
						|
        """
 | 
						|
        Generate the dispatch header which contains the #definitions and headers
 | 
						|
        for platform-specific instruction-sets for the enabled CPU baseline and
 | 
						|
        dispatch-able features.
 | 
						|
 | 
						|
        Its highly recommended to take a look at the generated header
 | 
						|
        also the generated source files via `try_dispatch()`
 | 
						|
        in order to get the full picture.
 | 
						|
        """
 | 
						|
        self.dist_log("generate CPU dispatch header: (%s)" % header_path)
 | 
						|
 | 
						|
        baseline_names = self.cpu_baseline_names()
 | 
						|
        dispatch_names = self.cpu_dispatch_names()
 | 
						|
        baseline_len = len(baseline_names)
 | 
						|
        dispatch_len = len(dispatch_names)
 | 
						|
 | 
						|
        header_dir = os.path.dirname(header_path)
 | 
						|
        if not os.path.exists(header_dir):
 | 
						|
            self.dist_log(
 | 
						|
                f"dispatch header dir {header_dir} does not exist, creating it",
 | 
						|
                stderr=True
 | 
						|
            )
 | 
						|
            os.makedirs(header_dir)
 | 
						|
 | 
						|
        with open(header_path, 'w') as f:
 | 
						|
            baseline_calls = ' \\\n'.join([
 | 
						|
                (
 | 
						|
                    "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))"
 | 
						|
                ) % (self.conf_c_prefix, f)
 | 
						|
                for f in baseline_names
 | 
						|
            ])
 | 
						|
            dispatch_calls = ' \\\n'.join([
 | 
						|
                (
 | 
						|
                    "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))"
 | 
						|
                ) % (self.conf_c_prefix, f)
 | 
						|
                for f in dispatch_names
 | 
						|
            ])
 | 
						|
            f.write(textwrap.dedent("""\
 | 
						|
                /*
 | 
						|
                 * AUTOGENERATED DON'T EDIT
 | 
						|
                 * Please make changes to the code generator (distutils/ccompiler_opt.py)
 | 
						|
                */
 | 
						|
                #define {pfx}WITH_CPU_BASELINE  "{baseline_str}"
 | 
						|
                #define {pfx}WITH_CPU_DISPATCH  "{dispatch_str}"
 | 
						|
                #define {pfx}WITH_CPU_BASELINE_N {baseline_len}
 | 
						|
                #define {pfx}WITH_CPU_DISPATCH_N {dispatch_len}
 | 
						|
                #define {pfx}WITH_CPU_EXPAND_(X) X
 | 
						|
                #define {pfx}WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \\
 | 
						|
                {baseline_calls}
 | 
						|
                #define {pfx}WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \\
 | 
						|
                {dispatch_calls}
 | 
						|
            """).format(
 | 
						|
                pfx=self.conf_c_prefix, baseline_str=" ".join(baseline_names),
 | 
						|
                dispatch_str=" ".join(dispatch_names), baseline_len=baseline_len,
 | 
						|
                dispatch_len=dispatch_len, baseline_calls=baseline_calls,
 | 
						|
                dispatch_calls=dispatch_calls
 | 
						|
            ))
 | 
						|
            baseline_pre = ''
 | 
						|
            for name in baseline_names:
 | 
						|
                baseline_pre += self.feature_c_preprocessor(name, tabs=1) + '\n'
 | 
						|
 | 
						|
            dispatch_pre = ''
 | 
						|
            for name in dispatch_names:
 | 
						|
                dispatch_pre += textwrap.dedent("""\
 | 
						|
                #ifdef {pfx}CPU_TARGET_{name}
 | 
						|
                {pre}
 | 
						|
                #endif /*{pfx}CPU_TARGET_{name}*/
 | 
						|
                """).format(
 | 
						|
                    pfx=self.conf_c_prefix_, name=name, pre=self.feature_c_preprocessor(
 | 
						|
                    name, tabs=1
 | 
						|
                ))
 | 
						|
 | 
						|
            f.write(textwrap.dedent("""\
 | 
						|
            /******* baseline features *******/
 | 
						|
            {baseline_pre}
 | 
						|
            /******* dispatch features *******/
 | 
						|
            {dispatch_pre}
 | 
						|
            """).format(
 | 
						|
                pfx=self.conf_c_prefix_, baseline_pre=baseline_pre,
 | 
						|
                dispatch_pre=dispatch_pre
 | 
						|
            ))
 | 
						|
 | 
						|
    def report(self, full=False):
 | 
						|
        report = []
 | 
						|
        platform_rows = []
 | 
						|
        baseline_rows = []
 | 
						|
        dispatch_rows = []
 | 
						|
        report.append(("Platform", platform_rows))
 | 
						|
        report.append(("", ""))
 | 
						|
        report.append(("CPU baseline", baseline_rows))
 | 
						|
        report.append(("", ""))
 | 
						|
        report.append(("CPU dispatch", dispatch_rows))
 | 
						|
 | 
						|
        ########## platform ##########
 | 
						|
        platform_rows.append(("Architecture", (
 | 
						|
            "unsupported" if self.cc_on_noarch else self.cc_march)
 | 
						|
        ))
 | 
						|
        platform_rows.append(("Compiler", (
 | 
						|
            "unix-like"   if self.cc_is_nocc   else self.cc_name)
 | 
						|
        ))
 | 
						|
        ########## baseline ##########
 | 
						|
        if self.cc_noopt:
 | 
						|
            baseline_rows.append(("Requested", "optimization disabled"))
 | 
						|
        else:
 | 
						|
            baseline_rows.append(("Requested", repr(self._requested_baseline)))
 | 
						|
 | 
						|
        baseline_names = self.cpu_baseline_names()
 | 
						|
        baseline_rows.append((
 | 
						|
            "Enabled", (' '.join(baseline_names) if baseline_names else "none")
 | 
						|
        ))
 | 
						|
        baseline_flags = self.cpu_baseline_flags()
 | 
						|
        baseline_rows.append((
 | 
						|
            "Flags", (' '.join(baseline_flags) if baseline_flags else "none")
 | 
						|
        ))
 | 
						|
        extra_checks = []
 | 
						|
        for name in baseline_names:
 | 
						|
            extra_checks += self.feature_extra_checks(name)
 | 
						|
        baseline_rows.append((
 | 
						|
            "Extra checks", (' '.join(extra_checks) if extra_checks else "none")
 | 
						|
        ))
 | 
						|
 | 
						|
        ########## dispatch ##########
 | 
						|
        if self.cc_noopt:
 | 
						|
            baseline_rows.append(("Requested", "optimization disabled"))
 | 
						|
        else:
 | 
						|
            dispatch_rows.append(("Requested", repr(self._requested_dispatch)))
 | 
						|
 | 
						|
        dispatch_names = self.cpu_dispatch_names()
 | 
						|
        dispatch_rows.append((
 | 
						|
            "Enabled", (' '.join(dispatch_names) if dispatch_names else "none")
 | 
						|
        ))
 | 
						|
        ########## Generated ##########
 | 
						|
        # TODO:
 | 
						|
        # - collect object names from 'try_dispatch()'
 | 
						|
        #   then get size of each object and printed
 | 
						|
        # - give more details about the features that not
 | 
						|
        #   generated due compiler support
 | 
						|
        # - find a better output's design.
 | 
						|
        #
 | 
						|
        target_sources = {}
 | 
						|
        for source, (_, targets) in self.sources_status.items():
 | 
						|
            for tar in targets:
 | 
						|
                target_sources.setdefault(tar, []).append(source)
 | 
						|
 | 
						|
        if not full or not target_sources:
 | 
						|
            generated = ""
 | 
						|
            for tar in self.feature_sorted(target_sources):
 | 
						|
                sources = target_sources[tar]
 | 
						|
                name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
 | 
						|
                generated += name + "[%d] " % len(sources)
 | 
						|
            dispatch_rows.append(("Generated", generated[:-1] if generated else "none"))
 | 
						|
        else:
 | 
						|
            dispatch_rows.append(("Generated", ''))
 | 
						|
            for tar in self.feature_sorted(target_sources):
 | 
						|
                sources = target_sources[tar]
 | 
						|
                pretty_name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
 | 
						|
                flags = ' '.join(self.feature_flags(tar))
 | 
						|
                implies = ' '.join(self.feature_sorted(self.feature_implies(tar)))
 | 
						|
                detect = ' '.join(self.feature_detect(tar))
 | 
						|
                extra_checks = []
 | 
						|
                for name in ((tar,) if isinstance(tar, str) else tar):
 | 
						|
                    extra_checks += self.feature_extra_checks(name)
 | 
						|
                extra_checks = (' '.join(extra_checks) if extra_checks else "none")
 | 
						|
 | 
						|
                dispatch_rows.append(('', ''))
 | 
						|
                dispatch_rows.append((pretty_name, implies))
 | 
						|
                dispatch_rows.append(("Flags", flags))
 | 
						|
                dispatch_rows.append(("Extra checks", extra_checks))
 | 
						|
                dispatch_rows.append(("Detect", detect))
 | 
						|
                for src in sources:
 | 
						|
                    dispatch_rows.append(("", src))
 | 
						|
 | 
						|
        ###############################
 | 
						|
        # TODO: add support for 'markdown' format
 | 
						|
        text = []
 | 
						|
        secs_len = [len(secs) for secs, _ in report]
 | 
						|
        cols_len = [len(col) for _, rows in report for col, _ in rows]
 | 
						|
        tab = ' ' * 2
 | 
						|
        pad =  max(max(secs_len), max(cols_len))
 | 
						|
        for sec, rows in report:
 | 
						|
            if not sec:
 | 
						|
                text.append("") # empty line
 | 
						|
                continue
 | 
						|
            sec += ' ' * (pad - len(sec))
 | 
						|
            text.append(sec + tab + ': ')
 | 
						|
            for col, val in rows:
 | 
						|
                col += ' ' * (pad - len(col))
 | 
						|
                text.append(tab + col + ': ' + val)
 | 
						|
 | 
						|
        return '\n'.join(text)
 | 
						|
 | 
						|
    def _wrap_target(self, output_dir, dispatch_src, target, nochange=False):
 | 
						|
        assert(isinstance(target, (str, tuple)))
 | 
						|
        if isinstance(target, str):
 | 
						|
            ext_name = target_name = target
 | 
						|
        else:
 | 
						|
            # multi-target
 | 
						|
            ext_name = '.'.join(target)
 | 
						|
            target_name = '__'.join(target)
 | 
						|
 | 
						|
        wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src))
 | 
						|
        wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower())
 | 
						|
        if nochange and os.path.exists(wrap_path):
 | 
						|
            return wrap_path
 | 
						|
 | 
						|
        self.dist_log("wrap dispatch-able target -> ", wrap_path)
 | 
						|
        # sorting for readability
 | 
						|
        features = self.feature_sorted(self.feature_implies_c(target))
 | 
						|
        target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_
 | 
						|
        target_defs = [target_join + f for f in features]
 | 
						|
        target_defs = '\n'.join(target_defs)
 | 
						|
 | 
						|
        with open(wrap_path, "w") as fd:
 | 
						|
            fd.write(textwrap.dedent("""\
 | 
						|
            /**
 | 
						|
             * AUTOGENERATED DON'T EDIT
 | 
						|
             * Please make changes to the code generator \
 | 
						|
             (distutils/ccompiler_opt.py)
 | 
						|
             */
 | 
						|
            #define {pfx}CPU_TARGET_MODE
 | 
						|
            #define {pfx}CPU_TARGET_CURRENT {target_name}
 | 
						|
            {target_defs}
 | 
						|
            #include "{path}"
 | 
						|
            """).format(
 | 
						|
                pfx=self.conf_c_prefix_, target_name=target_name,
 | 
						|
                path=os.path.abspath(dispatch_src), target_defs=target_defs
 | 
						|
            ))
 | 
						|
        return wrap_path
 | 
						|
 | 
						|
    def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False):
 | 
						|
        config_path = os.path.basename(dispatch_src)
 | 
						|
        config_path = os.path.splitext(config_path)[0] + '.h'
 | 
						|
        config_path = os.path.join(output_dir, config_path)
 | 
						|
        # check if targets didn't change to avoid recompiling
 | 
						|
        cache_hash = self.cache_hash(targets, has_baseline)
 | 
						|
        try:
 | 
						|
            with open(config_path) as f:
 | 
						|
                last_hash = f.readline().split("cache_hash:")
 | 
						|
                if len(last_hash) == 2 and int(last_hash[1]) == cache_hash:
 | 
						|
                    return True
 | 
						|
        except OSError:
 | 
						|
            pass
 | 
						|
 | 
						|
        os.makedirs(os.path.dirname(config_path), exist_ok=True)
 | 
						|
 | 
						|
        self.dist_log("generate dispatched config -> ", config_path)
 | 
						|
        dispatch_calls = []
 | 
						|
        for tar in targets:
 | 
						|
            if isinstance(tar, str):
 | 
						|
                target_name = tar
 | 
						|
            else: # multi target
 | 
						|
                target_name = '__'.join([t for t in tar])
 | 
						|
            req_detect = self.feature_detect(tar)
 | 
						|
            req_detect = '&&'.join([
 | 
						|
                "CHK(%s)" % f for f in req_detect
 | 
						|
            ])
 | 
						|
            dispatch_calls.append(
 | 
						|
                "\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % (
 | 
						|
                self.conf_c_prefix_, req_detect, target_name
 | 
						|
            ))
 | 
						|
        dispatch_calls = ' \\\n'.join(dispatch_calls)
 | 
						|
 | 
						|
        if has_baseline:
 | 
						|
            baseline_calls = (
 | 
						|
                "\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))"
 | 
						|
            ) % self.conf_c_prefix_
 | 
						|
        else:
 | 
						|
            baseline_calls = ''
 | 
						|
 | 
						|
        with open(config_path, "w") as fd:
 | 
						|
            fd.write(textwrap.dedent("""\
 | 
						|
            // cache_hash:{cache_hash}
 | 
						|
            /**
 | 
						|
             * AUTOGENERATED DON'T EDIT
 | 
						|
             * Please make changes to the code generator (distutils/ccompiler_opt.py)
 | 
						|
             */
 | 
						|
            #ifndef {pfx}CPU_DISPATCH_EXPAND_
 | 
						|
                #define {pfx}CPU_DISPATCH_EXPAND_(X) X
 | 
						|
            #endif
 | 
						|
            #undef {pfx}CPU_DISPATCH_BASELINE_CALL
 | 
						|
            #undef {pfx}CPU_DISPATCH_CALL
 | 
						|
            #define {pfx}CPU_DISPATCH_BASELINE_CALL(CB, ...) \\
 | 
						|
            {baseline_calls}
 | 
						|
            #define {pfx}CPU_DISPATCH_CALL(CHK, CB, ...) \\
 | 
						|
            {dispatch_calls}
 | 
						|
            """).format(
 | 
						|
                pfx=self.conf_c_prefix_, baseline_calls=baseline_calls,
 | 
						|
                dispatch_calls=dispatch_calls, cache_hash=cache_hash
 | 
						|
            ))
 | 
						|
        return False
 | 
						|
 | 
						|
def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs):
 | 
						|
    """
 | 
						|
    Create a new instance of 'CCompilerOpt' and generate the dispatch header
 | 
						|
    which contains the #definitions and headers of platform-specific instruction-sets for
 | 
						|
    the enabled CPU baseline and dispatch-able features.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    compiler : CCompiler instance
 | 
						|
    dispatch_hpath : str
 | 
						|
        path of the dispatch header
 | 
						|
 | 
						|
    **kwargs: passed as-is to `CCompilerOpt(...)`
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    new instance of CCompilerOpt
 | 
						|
    """
 | 
						|
    opt = CCompilerOpt(compiler, **kwargs)
 | 
						|
    if not os.path.exists(dispatch_hpath) or not opt.is_cached():
 | 
						|
        opt.generate_dispatch_header(dispatch_hpath)
 | 
						|
    return opt
 |