summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-01-07 21:00:41 +0000
committerCraig Topper <craig.topper@intel.com>2019-01-07 21:00:41 +0000
commitcd9e232a4d35f5983e48e626cf97e0ac5151f722 (patch)
tree48367973935ccb0f57a8b67864b6ba432a53f9a2 /clang/lib
parent486313b5f7361d4f61c8f9ef6b79f9aa9678c5d2 (diff)
downloadbcm5719-llvm-cd9e232a4d35f5983e48e626cf97e0ac5151f722.tar.gz
bcm5719-llvm-cd9e232a4d35f5983e48e626cf97e0ac5151f722.zip
Recommit r350555 "[X86] Use funnel shift intrinsics for the VBMI2 vshld/vshrd builtins."
The MSVC limit hit in AutoUpgrade.cpp has been worked around for now. llvm-svn: 350568
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp46
-rw-r--r--clang/lib/Headers/avx512vbmi2intrin.h158
-rw-r--r--clang/lib/Headers/avx512vlvbmi2intrin.h312
3 files changed, 244 insertions, 272 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 93484f82c30..ca7b4691ff5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10999,6 +10999,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pternlogq256_maskz:
return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
+ case X86::BI__builtin_ia32_vpshldvd128:
+ case X86::BI__builtin_ia32_vpshldvd256:
+ case X86::BI__builtin_ia32_vpshldvd512:
+ case X86::BI__builtin_ia32_vpshldvq128:
+ case X86::BI__builtin_ia32_vpshldvq256:
+ case X86::BI__builtin_ia32_vpshldvq512:
+ case X86::BI__builtin_ia32_vpshldvw128:
+ case X86::BI__builtin_ia32_vpshldvw256:
+ case X86::BI__builtin_ia32_vpshldvw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdvd128:
+ case X86::BI__builtin_ia32_vpshrdvd256:
+ case X86::BI__builtin_ia32_vpshrdvd512:
+ case X86::BI__builtin_ia32_vpshrdvq128:
+ case X86::BI__builtin_ia32_vpshrdvq256:
+ case X86::BI__builtin_ia32_vpshrdvq512:
+ case X86::BI__builtin_ia32_vpshrdvw128:
+ case X86::BI__builtin_ia32_vpshrdvw256:
+ case X86::BI__builtin_ia32_vpshrdvw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h
index d2a58094fd0..53242524293 100644
--- a/clang/lib/Headers/avx512vbmi2intrin.h
+++ b/clang/lib/Headers/avx512vbmi2intrin.h
@@ -227,167 +227,141 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
(__v32hi)_mm512_setzero_si512())
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
+_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
+ (__v8di)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- __U);
+ return (__m512i)__builtin_ia32_selectq_512(__U,
+ (__v8di)_mm512_shldv_epi64(__A, __B, __C),
+ (__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- (__mmask8) -1);
+ return (__m512i)__builtin_ia32_selectq_512(__U,
+ (__v8di)_mm512_shldv_epi64(__A, __B, __C),
+ (__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
+ (__v16si)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_shldv_epi32(__A, __B, __C),
+ (__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_shldv_epi32(__A, __B, __C),
+ (__v16si)_mm512_setzero_si512());
}
-
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
+_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
+ (__v32hi)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- __U);
+ return (__m512i)__builtin_ia32_selectw_512(__U,
+ (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
+ (__v32hi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- (__mmask32) -1);
+ return (__m512i)__builtin_ia32_selectw_512(__U,
+ (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
+ (__v32hi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
+_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
+ (__v8di)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- __U);
+ return (__m512i)__builtin_ia32_selectq_512(__U,
+ (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
+ (__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
- (__v8di) __A,
- (__v8di) __B,
- (__mmask8) -1);
+ return (__m512i)__builtin_ia32_selectq_512(__U,
+ (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
+ (__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
+ (__v16si)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- __U);
+ return (__m512i) __builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
+ (__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i) __builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
+ (__v16si)_mm512_setzero_si512());
}
-
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
+_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- __U);
+ return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
+ (__v32hi)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
+_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- __U);
+ return (__m512i)__builtin_ia32_selectw_512(__U,
+ (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
+ (__v32hi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
+_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
- (__v32hi) __A,
- (__v32hi) __B,
- (__mmask32) -1);
+ return (__m512i)__builtin_ia32_selectw_512(__U,
+ (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
+ (__v32hi)_mm512_setzero_si512());
}
diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h
index baaf5654631..632d14fb55a 100644
--- a/clang/lib/Headers/avx512vlvbmi2intrin.h
+++ b/clang/lib/Headers/avx512vlvbmi2intrin.h
@@ -421,327 +421,279 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
(__v8hi)_mm_setzero_si128())
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
+ (__v4di)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectq_256(__U,
+ (__v4di)_mm256_shldv_epi64(__A, __B, __C),
+ (__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_selectq_256(__U,
+ (__v4di)_mm256_shldv_epi64(__A, __B, __C),
+ (__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
+ (__v2di)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectq_128(__U,
+ (__v2di)_mm_shldv_epi64(__A, __B, __C),
+ (__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectq_128(__U,
+ (__v2di)_mm_shldv_epi64(__A, __B, __C),
+ (__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
+ (__v8si)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_shldv_epi32(__A, __B, __C),
+ (__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_shldv_epi32(__A, __B, __C),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
+ (__v4si)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_shldv_epi32(__A, __B, __C),
+ (__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_shldv_epi32(__A, __B, __C),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
+_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
+ (__v16hi)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectw_256(__U,
+ (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
+ (__v16hi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- (__mmask16) -1);
+ return (__m256i)__builtin_ia32_selectw_256(__U,
+ (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
+ (__v16hi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
+ (__v8hi)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectw_128(__U,
+ (__v8hi)_mm_shldv_epi16(__A, __B, __C),
+ (__v8hi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectw_128(__U,
+ (__v8hi)_mm_shldv_epi16(__A, __B, __C),
+ (__v8hi)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
+ (__v4di)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectq_256(__U,
+ (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
+ (__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
- (__v4di) __A,
- (__v4di) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_selectq_256(__U,
+ (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
+ (__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
+ (__v2di)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectq_128(__U,
+ (__v2di)_mm_shrdv_epi64(__A, __B, __C),
+ (__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
- (__v2di) __A,
- (__v2di) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectq_128(__U,
+ (__v2di)_mm_shrdv_epi64(__A, __B, __C),
+ (__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
+_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
+ (__v8si)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
+ (__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
+ (__v4si)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_shrdv_epi32(__A, __B, __C),
+ (__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_shrdv_epi32(__A, __B, __C),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
+_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- __U);
+ return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
+ (__v16hi)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
+_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- __U);
+ return (__m256i)__builtin_ia32_selectw_256(__U,
+ (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
+ (__v16hi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
+_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
- (__v16hi) __A,
- (__v16hi) __B,
- (__mmask16) -1);
+ return (__m256i)__builtin_ia32_selectw_256(__U,
+ (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
+ (__v16hi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- __U);
+ return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
+ (__v8hi)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
+_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- __U);
+ return (__m128i)__builtin_ia32_selectw_128(__U,
+ (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
+ (__v8hi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
+_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
- (__v8hi) __A,
- (__v8hi) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_selectw_128(__U,
+ (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
+ (__v8hi)_mm_setzero_si128());
}
OpenPOWER on IntegriCloud