diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-07-09 04:38:27 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-07-09 04:38:27 +0000 |
| commit | 70610cf7b630a3379e97718193798296408b7a74 (patch) | |
| tree | 435a80c7f61336c74e20f882f2f031dfc68b3c1f /llvm | |
| parent | 83c65d7889f3692bdc859767d15f19980dc0d0ba (diff) | |
| download | bcm5719-llvm-70610cf7b630a3379e97718193798296408b7a74.tar.gz bcm5719-llvm-70610cf7b630a3379e97718193798296408b7a74.zip | |
[X86] Remove and autoupgrade 512-bit non-temporal store intrinsics.
llvm-svn: 274966
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 12 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 33 |
6 files changed, 40 insertions, 72 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index c7c17b4ae88..8ed7ba4ce33 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1773,18 +1773,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrArgMemOnly]>; } -// Store ops using non-temporal hint -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_storent_q_512 : - GCCBuiltin<"__builtin_ia32_movntdq512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_pd_512 : - GCCBuiltin<"__builtin_ia32_movntpd512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_storent_ps_512 : - GCCBuiltin<"__builtin_ia32_movntps512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty], [IntrArgMemOnly]>; -} //===----------------------------------------------------------------------===// // AVX2 diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index cacc7a398ca..ad73897aff8 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -255,6 +255,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx2.vextracti128" || Name.startswith("sse4a.movnt.") || Name.startswith("avx.movnt.") || + Name.startswith("avx512.storent.") || Name == "sse2.storel.dq" || Name.startswith("sse.storeu.") || Name.startswith("sse2.storeu.") || @@ -738,7 +739,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove intrinsic. CI->eraseFromParent(); return; - } else if (IsX86 && Name.startswith("avx.movnt.")) { + } else if (IsX86 && (Name.startswith("avx.movnt.") || + Name.startswith("avx512.storent."))) { Module *M = F->getParent(); SmallVector<Metadata *, 1> Elts; Elts.push_back( @@ -752,7 +754,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *BC = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()), "cast"); - StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 32); + VectorType *VTy = cast<VectorType>(Arg1->getType()); + StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, + VTy->getBitWidth() / 8); SI->setMetadata(M->getMDKindID("nontemporal"), Node); // Remove intrinsic. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 65d246d7d24..b8250c266ed 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4105,13 +4105,6 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; break; } - case STOREANT: { - Info.ptrVal = I.getArgOperand(0); - Info.memVT = MVT::getVT(I.getArgOperand(1)->getType()); - Info.align = Info.memVT.getSizeInBits()/8; - Info.writeMem = true; - break; - } default: return false; } @@ -18523,20 +18516,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, Mask, PassThru, Subtarget, DAG), Chain}; return DAG.getMergeValues(Results, dl); } - case STOREANT: { - // Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation. - SDValue Data = Op.getOperand(3); - SDValue Addr = Op.getOperand(2); - SDValue Chain = Op.getOperand(0); - - MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); - assert(MemIntr && "Expected MemIntrinsicSDNode!"); - MachineMemOperand *MMO = MemIntr->getMemOperand(); - - MMO->setFlags(MachineMemOperand::MONonTemporal); - - return DAG.getStore(Chain, dl, Data, Addr, MMO); - } } } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index bfd20896924..57cc5b53cbf 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -33,7 +33,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, STOREANT, INSERT_SUBVEC, + EXPAND_FROM_MEM, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -216,9 +216,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), - X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_storent_q_512, STOREANT, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index ee529bda782..46916ea2423 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -923,3 +923,36 @@ define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone +declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) + +define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { +; CHECK-LABEL: test_storent_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovntdq %zmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) + ret void +} + +declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) + +define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { +; CHECK-LABEL: test_storent_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovntpd %zmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) + ret void +} + +declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) + +define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { +; CHECK-LABEL: test_storent_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovntps %zmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) + ret void +} + diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index cce3ae62fdb..5fa3944e71d 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -6309,39 +6309,6 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x ret <2 x double> %res4 } -declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) - -define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT: vmovntdq %zmm0, (%rdi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) - -define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT: vmovntpd %zmm0, (%rdi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) - ret void -} - -declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) - -define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { -; CHECK-LABEL: test_storent_ps_512: -; CHECK: ## BB#0: -; CHECK-NEXT: vmovntps %zmm0, (%rdi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) - ret void -} - declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { |

