summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-05-13 18:03:59 +0000
committerCraig Topper <craig.topper@intel.com>2018-05-13 18:03:59 +0000
commit85906cf0415ad8c740d7bc93d73d89533cfdb308 (patch)
tree15fc6f0b45fef44d593a0289072d59d9dbec054c /llvm
parenta39c409619f3c06e4febd8d1bb0d5efc7d0a940d (diff)
downloadbcm5719-llvm-85906cf0415ad8c740d7bc93d73d89533cfdb308.tar.gz
bcm5719-llvm-85906cf0415ad8c740d7bc93d73d89533cfdb308.zip
[X86] Remove and autoupgrade masked vpermd/vpermps intrinsics.
llvm-svn: 332198
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td6
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp20
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll40
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll40
6 files changed, 53 insertions, 59 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index d3b3d1a5e4c..aa8df70e7a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3289,15 +3289,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_sf_256 : // TODO: Remove this intrinsic
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_si_256 : // TODO: Remove this intrinsic
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 9d603146b00..0a3ecaed6d7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -181,6 +181,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
+ Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
+ Name == "avx512.mask.permvar.si.256" || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
@@ -1190,19 +1192,23 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name == "cvtdq2ps.128") {
- IID = Intrinsic::x86_sse2_cvtdq2ps;
+ IID = Intrinsic::x86_sse2_cvtdq2ps;
} else if (Name == "cvtdq2ps.256") {
- IID = Intrinsic::x86_avx_cvtdq2_ps_256;
+ IID = Intrinsic::x86_avx_cvtdq2_ps_256;
} else if (Name == "cvtpd2dq.256") {
- IID = Intrinsic::x86_avx_cvt_pd2dq_256;
+ IID = Intrinsic::x86_avx_cvt_pd2dq_256;
} else if (Name == "cvtpd2ps.256") {
- IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
+ IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
} else if (Name == "cvttpd2dq.256") {
- IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
+ IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
} else if (Name == "cvttps2dq.128") {
- IID = Intrinsic::x86_sse2_cvttps2dq;
+ IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
- IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
+ IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
+ } else if (Name == "permvar.sf.256") {
+ IID = Intrinsic::x86_avx2_permps;
+ } else if (Name == "permvar.si.256") {
+ IID = Intrinsic::x86_avx2_permd;
} else
return false;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 483aafedc82..2078e1f9baa 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -817,12 +817,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_sf_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_si_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c323bfc3519..ff3fb9387d2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2796,9 +2796,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_mask_permvar_qi_128:
case Intrinsic::x86_avx512_mask_permvar_qi_256:
case Intrinsic::x86_avx512_mask_permvar_qi_512:
- case Intrinsic::x86_avx512_mask_permvar_sf_256:
case Intrinsic::x86_avx512_mask_permvar_sf_512:
- case Intrinsic::x86_avx512_mask_permvar_si_256:
case Intrinsic::x86_avx512_mask_permvar_si_512:
if (Value *V = simplifyX86vpermv(*II, Builder)) {
// We simplified the permuting, now create a select for the masking.
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 219e93c27ba..f3288a01831 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -6737,3 +6737,43 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
+
+declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
+; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
+; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res3, %res2
+ ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
+; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
+; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index ee06fbb2ba3..69854d2485b 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -3075,46 +3075,6 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64
ret <4 x i64> %res4
}
-declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
-
-define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
-; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
-; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
-; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
-; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
- %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
- %res3 = fadd <8 x float> %res, %res1
- %res4 = fadd <8 x float> %res3, %res2
- ret <8 x float> %res4
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-
-define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
-; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
-; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
-; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
-; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
- %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res3, %res2
- ret <8 x i32> %res4
-}
-
declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
OpenPOWER on IntegriCloud