This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,27 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
float *src = (float*)argv[argc-1];
float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
/* MAXMIN */
int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
/* ROUNDING */
ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
#ifdef __aarch64__
{
double *src2 = (double*)argv[argc-1];
float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
/* MAXMIN */
ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
/* ROUNDING */
ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0);
}
#endif
return ret;
}

View File

@ -0,0 +1,16 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
unsigned char *src = (unsigned char*)argv[argc-1];
uint8x16_t v1 = vdupq_n_u8(src[0]), v2 = vdupq_n_u8(src[1]);
uint32x4_t va = vdupq_n_u32(3);
int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
#ifdef __aarch64__
ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0);
#endif
return ret;
}

View File

@ -0,0 +1,19 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
float16_t *src = (float16_t*)argv[argc-1];
float *src2 = (float*)argv[argc-2];
float16x8_t vhp = vdupq_n_f16(src[0]);
float16x4_t vlhp = vdup_n_f16(src[1]);
float32x4_t vf = vdupq_n_f32(src2[0]);
float32x2_t vlf = vdup_n_f32(src2[1]);
int ret = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0);
ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0);
return ret;
}

View File

@ -0,0 +1,15 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
float16_t *src = (float16_t*)argv[argc-1];
float16x8_t vhp = vdupq_n_f16(src[0]);
float16x4_t vlhp = vdup_n_f16(src[1]);
int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
return ret;
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX__
#error "HOST/ARCH doesn't support AVX"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256 a = _mm256_add_ps(_mm256_loadu_ps((const float*)argv[argc-1]), _mm256_loadu_ps((const float*)argv[1]));
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX2__
#error "HOST/ARCH doesn't support AVX2"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256i a = _mm256_abs_epi16(_mm256_loadu_si256((const __m256i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}

View File

@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512VNNI__
#error "HOST/ARCH doesn't support CascadeLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
/* VNNI */
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
a = _mm512_dpbusd_epi32(a, _mm512_setzero_si512(), a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,24 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VBMI__) || !defined(__AVX512IFMA__)
#error "HOST/ARCH doesn't support CannonLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* IFMA */
a = _mm512_madd52hi_epu64(a, a, _mm512_setzero_si512());
/* VMBI */
a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,26 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512BITALG__) || !defined(__AVX512VPOPCNTDQ__)
#error "HOST/ARCH doesn't support IceLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* VBMI2 */
a = _mm512_shrdv_epi64(a, a, _mm512_setzero_si512());
/* BITLAG */
a = _mm512_popcnt_epi8(a);
/* VPOPCNTDQ */
a = _mm512_popcnt_epi64(a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,25 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512ER__) || !defined(__AVX512PF__)
#error "HOST/ARCH doesn't support Knights Landing AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
int base[128]={};
__m512d ad = _mm512_loadu_pd((const __m512d*)argv[argc-1]);
/* ER */
__m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(ad));
/* PF */
_mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1);
return base[0];
}

View File

@ -0,0 +1,30 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX5124FMAPS__) || !defined(__AVX5124VNNIW__) || !defined(__AVX512VPOPCNTDQ__)
#error "HOST/ARCH doesn't support Knights Mill AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
__m512 b = _mm512_loadu_ps((const __m512*)argv[argc-2]);
/* 4FMAPS */
b = _mm512_4fmadd_ps(b, b, b, b, b, NULL);
/* 4VNNIW */
a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL);
/* VPOPCNTDQ */
a = _mm512_popcnt_epi64(a);
a = _mm512_add_epi32(a, _mm512_castps_si512(b));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,26 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || !defined(__AVX512DQ__)
#error "HOST/ARCH doesn't support SkyLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i aa = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
/* VL */
__m256i a = _mm256_abs_epi64(_mm512_extracti64x4_epi64(aa, 1));
/* DQ */
__m512i b = _mm512_broadcast_i32x8(a);
/* BW */
b = _mm512_abs_epi16(b);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(b));
}

View File

@ -0,0 +1,26 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512FP16__)
#error "HOST/ARCH doesn't support Sapphire Rapids AVX512FP16 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
/* clang has a bug regarding our spr coode, see gh-23730. */
#if __clang__
#error
#endif
__m512h a = _mm512_loadu_ph((void*)argv[argc-1]);
__m512h temp = _mm512_fmadd_ph(a, a, a);
_mm512_storeu_ph((void*)(argv[argc-1]), temp);
return 0;
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512CD__
#error "HOST/ARCH doesn't support AVX512CD"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_lzcnt_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512F__
#error "HOST/ARCH doesn't support AVX512F"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __F16C__
#error "HOST/ARCH doesn't support F16C"
#endif
#endif
#include <emmintrin.h>
#include <immintrin.h>
int main(int argc, char **argv)
{
__m128 a = _mm_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-1]));
__m256 a8 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-2]));
return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8)));
}

View File

@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__FMA__) && !defined(__AVX2__)
#error "HOST/ARCH doesn't support FMA3"
#endif
#endif
#include <xmmintrin.h>
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]);
a = _mm256_fmadd_ps(a, a, a);
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@ -0,0 +1,13 @@
#include <immintrin.h>
#ifdef _MSC_VER
#include <ammintrin.h>
#else
#include <x86intrin.h>
#endif
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]);
a = _mm256_macc_ps(a, a, a);
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@ -0,0 +1,11 @@
#ifndef __loongarch_sx
#error "HOST/ARCH doesn't support LSX"
#endif
#include <lsxintrin.h>
int main(void)
{
__m128i a = __lsx_vadd_d(__lsx_vldi(0), __lsx_vldi(0));
return __lsx_vpickve2gr_w(a, 0);
}

View File

@ -0,0 +1,19 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
// passing from untraced pointers to avoid optimizing out any constants
// so we can test against the linker.
float *src = (float*)argv[argc-1];
float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
#ifdef __aarch64__
double *src2 = (double*)argv[argc-2];
float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
#endif
return ret;
}

View File

@ -0,0 +1,11 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
short *src = (short*)argv[argc-1];
float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
return (int)vgetq_lane_f32(v_z4, 0);
}

View File

@ -0,0 +1,21 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(int argc, char **argv)
{
float *src = (float*)argv[argc-1];
float32x4_t v1 = vdupq_n_f32(src[0]);
float32x4_t v2 = vdupq_n_f32(src[1]);
float32x4_t v3 = vdupq_n_f32(src[2]);
int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
#ifdef __aarch64__
double *src2 = (double*)argv[argc-2];
float64x2_t vd1 = vdupq_n_f64(src2[0]);
float64x2_t vd2 = vdupq_n_f64(src2[1]);
float64x2_t vd3 = vdupq_n_f64(src2[2]);
ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
#endif
return ret;
}

View File

@ -0,0 +1,32 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env vr `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__SSE4_2__) && !defined(__POPCNT__)
#error "HOST/ARCH doesn't support POPCNT"
#endif
#endif
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <popcntintrin.h>
#endif
int main(int argc, char **argv)
{
// To make sure popcnt instructions are generated
// and been tested against the assembler
unsigned long long a = *((unsigned long long*)argv[argc-1]);
unsigned int b = *((unsigned int*)argv[argc-2]);
#if defined(_M_X64) || defined(__x86_64__)
a = _mm_popcnt_u64(a);
#endif
b = _mm_popcnt_u32(b);
return (int)a + b;
}

View File

@ -0,0 +1,13 @@
#ifndef __riscv_vector
#error RVV not supported
#endif
#include <riscv_vector.h>
int main(void)
{
size_t vlmax = __riscv_vsetvlmax_e32m1();
vuint32m1_t a = __riscv_vmv_v_x_u32m1(0, vlmax);
vuint32m1_t b = __riscv_vadd_vv_u32m1(a, a, vlmax);
return __riscv_vmv_x_s_u32m1_u32(b);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE__
#error "HOST/ARCH doesn't support SSE"
#endif
#endif
#include <xmmintrin.h>
int main(void)
{
__m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE2__
#error "HOST/ARCH doesn't support SSE2"
#endif
#endif
#include <emmintrin.h>
int main(void)
{
__m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128());
return _mm_cvtsi128_si32(a);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE3__
#error "HOST/ARCH doesn't support SSE3"
#endif
#endif
#include <pmmintrin.h>
int main(void)
{
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE4_1__
#error "HOST/ARCH doesn't support SSE41"
#endif
#endif
#include <smmintrin.h>
int main(void)
{
__m128 a = _mm_floor_ps(_mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE4_2__
#error "HOST/ARCH doesn't support SSE42"
#endif
#endif
#include <smmintrin.h>
int main(void)
{
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSSE3__
#error "HOST/ARCH doesn't support SSSE3"
#endif
#endif
#include <tmmintrin.h>
int main(void)
{
__m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128());
return (int)_mm_cvtsi128_si32(a);
}

View File

@ -0,0 +1,14 @@
#include <arm_sve.h>
int accumulate(svint64_t a, svint64_t b) {
svbool_t p = svptrue_b64();
return svaddv(p, svmla_z(p, a, a, b));
}
int main(void)
{
svbool_t p = svptrue_b64();
svint64_t a = svdup_s64(1);
svint64_t b = svdup_s64(2);
return accumulate(a, b);
}

View File

@ -0,0 +1,21 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
#define vsx_ld vec_vsx_ld
#define vsx_st vec_vsx_st
#else
#define vsx_ld vec_xl
#define vsx_st vec_xst
#endif
int main(void)
{
unsigned int zout[4];
unsigned int z4[] = {0, 0, 0, 0};
__vector unsigned int v_z4 = vsx_ld(0, z4);
vsx_st(v_z4, 0, zout);
return zout[0];
}

View File

@ -0,0 +1,13 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector unsigned long long v_uint64x2;
int main(void)
{
v_uint64x2 z2 = (v_uint64x2){0, 0};
z2 = (v_uint64x2)vec_cmpeq(z2, z2);
return (int)vec_extract(z2, 0);
}

View File

@ -0,0 +1,13 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector unsigned int v_uint32x4;
int main(void)
{
v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0};
z4 = vec_absd(z4, z4);
return (int)vec_extract(z4, 0);
}

View File

@ -0,0 +1,14 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector unsigned int v_uint32x4;
int main(void)
{
v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16};
v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2};
v_uint32x4 v3 = vec_mod(v1, v2);
return (int)vec_extractm(v3);
}

View File

@ -0,0 +1,16 @@
#if (__VEC__ < 10301) || (__ARCH__ < 11)
#error VX not supported
#endif
#include <vecintrin.h>
int main(int argc, char **argv)
{
__vector double x = vec_abs(vec_xl(argc, (double*)argv));
__vector double y = vec_load_len((double*)argv, (unsigned int)argc);
x = vec_round(vec_ceil(x) + vec_floor(y));
__vector bool long long m = vec_cmpge(x, y);
__vector long long i = vec_signed(vec_sel(x, y, m));
return (int)vec_extract(i, 0);
}

View File

@ -0,0 +1,25 @@
#if (__VEC__ < 10302) || (__ARCH__ < 12)
#error VXE not supported
#endif
#include <vecintrin.h>
int main(int argc, char **argv)
{
__vector float x = vec_nabs(vec_xl(argc, (float*)argv));
__vector float y = vec_load_len((float*)argv, (unsigned int)argc);
x = vec_round(vec_ceil(x) + vec_floor(y));
__vector bool int m = vec_cmpge(x, y);
x = vec_sel(x, y, m);
// need to test the existence of intrin "vflls" since vec_doublee
// is vec_doublee maps to wrong intrin "vfll".
// see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100871
#if defined(__GNUC__) && !defined(__clang__)
__vector long long i = vec_signed(__builtin_s390_vflls(x));
#else
__vector long long i = vec_signed(vec_doublee(x));
#endif
return (int)vec_extract(i, 0);
}

View File

@ -0,0 +1,21 @@
#if (__VEC__ < 10303) || (__ARCH__ < 13)
#error VXE2 not supported
#endif
#include <vecintrin.h>
int main(int argc, char **argv)
{
int val;
__vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
__vector signed short search = { 'g', 'h', 'g', 'o' };
__vector unsigned char len = { 0 };
__vector unsigned char res = vec_search_string_cc(large, search, len, &val);
__vector float x = vec_xl(argc, (float*)argv);
__vector int i = vec_signed(x);
i = vec_srdb(vec_sldb(i, i, 2), i, 3);
val += (int)vec_extract(res, 1);
val += vec_extract(i, 0);
return val;
}

View File

@ -0,0 +1,12 @@
#include <immintrin.h>
#ifdef _MSC_VER
#include <ammintrin.h>
#else
#include <x86intrin.h>
#endif
int main(void)
{
__m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128());
return _mm_cvtsi128_si32(a);
}

View File

@ -0,0 +1,18 @@
#include <immintrin.h>
/**
* Test BW mask operations due to:
* - MSVC has supported it since vs2019 see,
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
__mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1));
m64 = _kor_mask64(m64, m64);
m64 = _kxor_mask64(m64, m64);
m64 = _cvtu64_mask64(_cvtmask64_u64(m64));
m64 = _mm512_kunpackd(m64, m64);
m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64);
return (int)_cvtmask64_u64(m64);
}

View File

@ -0,0 +1,16 @@
#include <immintrin.h>
/**
* Test DQ mask operations due to:
* - MSVC has supported it since vs2019 see,
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
__mmask8 m8 = _mm512_cmpeq_epi64_mask(_mm512_set1_epi64(1), _mm512_set1_epi64(1));
m8 = _kor_mask8(m8, m8);
m8 = _kxor_mask8(m8, m8);
m8 = _cvtu32_mask8(_cvtmask8_u32(m8));
return (int)_cvtmask8_u32(m8);
}

View File

@ -0,0 +1,41 @@
#include <immintrin.h>
/**
* The following intrinsics don't have direct native support but compilers
* tend to emulate them.
* They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
*/
int main(void)
{
__m512 one_ps = _mm512_set1_ps(1.0f);
__m512d one_pd = _mm512_set1_pd(1.0);
__m512i one_i64 = _mm512_set1_epi64(1);
// add
float sum_ps = _mm512_reduce_add_ps(one_ps);
double sum_pd = _mm512_reduce_add_pd(one_pd);
int sum_int = (int)_mm512_reduce_add_epi64(one_i64);
sum_int += (int)_mm512_reduce_add_epi32(one_i64);
// mul
sum_ps += _mm512_reduce_mul_ps(one_ps);
sum_pd += _mm512_reduce_mul_pd(one_pd);
sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
// min
sum_ps += _mm512_reduce_min_ps(one_ps);
sum_pd += _mm512_reduce_min_pd(one_pd);
sum_int += (int)_mm512_reduce_min_epi32(one_i64);
sum_int += (int)_mm512_reduce_min_epu32(one_i64);
sum_int += (int)_mm512_reduce_min_epi64(one_i64);
// max
sum_ps += _mm512_reduce_max_ps(one_ps);
sum_pd += _mm512_reduce_max_pd(one_pd);
sum_int += (int)_mm512_reduce_max_epi32(one_i64);
sum_int += (int)_mm512_reduce_max_epu32(one_i64);
sum_int += (int)_mm512_reduce_max_epi64(one_i64);
// and
sum_int += (int)_mm512_reduce_and_epi32(one_i64);
sum_int += (int)_mm512_reduce_and_epi64(one_i64);
// or
sum_int += (int)_mm512_reduce_or_epi32(one_i64);
sum_int += (int)_mm512_reduce_or_epi64(one_i64);
return (int)sum_ps + (int)sum_pd + sum_int;
}

View File

@ -0,0 +1,12 @@
/**
* Assembler may not fully support the following VSX3 scalar
* instructions, even though compilers report VSX3 support.
*/
int main(void)
{
unsigned short bits = 0xFF;
double f;
__asm__ __volatile__("xscvhpdp %x0,%x1" : "=wa"(f) : "wa"(bits));
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits) : "wa" (f));
return bits;
}

View File

@ -0,0 +1,21 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector float fv4sf_t;
typedef __vector unsigned char vec_t;
int main(void)
{
__vector_quad acc0;
float a[4] = {0,1,2,3};
float b[4] = {0,1,2,3};
vec_t *va = (vec_t *) a;
vec_t *vb = (vec_t *) b;
__builtin_mma_xvf32ger(&acc0, va[0], vb[0]);
fv4sf_t result[4];
__builtin_mma_disassemble_acc((void *)result, &acc0);
fv4sf_t c0 = result[0];
return (int)((float*)&c0)[0];
}

View File

@ -0,0 +1,36 @@
/**
* Testing ASM VSX register number fixer '%x<n>'
*
* old versions of CLANG doesn't support %x<n> in the inline asm template
* which fixes register number when using any of the register constraints wa, wd, wf.
*
* xref:
* - https://bugs.llvm.org/show_bug.cgi?id=31837
* - https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
*/
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
#define vsx_ld vec_vsx_ld
#define vsx_st vec_vsx_st
#else
#define vsx_ld vec_xl
#define vsx_st vec_xst
#endif
int main(void)
{
float z4[] = {0, 0, 0, 0};
signed int zout[] = {0, 0, 0, 0};
__vector float vz4 = vsx_ld(0, z4);
__vector signed int asm_ret = vsx_ld(0, zout);
__asm__ ("xvcvspsxws %x0,%x1" : "=wa" (vz4) : "wa" (asm_ret));
vsx_st(asm_ret, 0, zout);
return zout[0];
}

View File

@ -0,0 +1 @@
int test_flags;