diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-05-25 06:56:32 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-05-25 06:56:32 +0000 |
commit | 12e322a8cfb1e043ea8d22a226de2f953b5c3f2a (patch) | |
tree | 999c2d8f205379052e107d203dbbff2676c48253 | |
parent | 613e7041902f7469efeeac309b075b66d0a8b776 (diff) | |
download | bcm5719-llvm-12e322a8cfb1e043ea8d22a226de2f953b5c3f2a.tar.gz bcm5719-llvm-12e322a8cfb1e043ea8d22a226de2f953b5c3f2a.zip |
[X86] Remove the llvm.x86.sse2.storel.dq intrinsic. It hasn't been used in a long time.
llvm-svn: 270677
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 3 | ||||
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 24 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll | 13 |
8 files changed, 45 insertions, 71 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index b0673056320..e118beb87a1 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -537,9 +537,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty], [IntrArgMemOnly]>; - def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">, - Intrinsic<[], [llvm_ptr_ty, - llvm_v4i32_ty], [IntrArgMemOnly]>; } // Misc. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 41d59b372ca..ff7588e7fcf 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -189,6 +189,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.movnt.dq.256" || Name == "x86.avx.movnt.pd.256" || Name == "x86.avx.movnt.ps.256" || + Name == "x86.sse2.storel.dq" || Name == "x86.sse42.crc32.64.8" || Name == "x86.avx.vbroadcast.ss" || Name == "x86.avx.vbroadcast.ss.256" || @@ -422,6 +423,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove intrinsic. CI->eraseFromParent(); return; + } else if (Name == "llvm.x86.sse2.storel.dq") { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); + + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + + Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); + Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); + Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); + Value *BC = Builder.CreateBitCast(Arg0, + PointerType::getUnqual(Elt->getType()), + "cast"); + StoreInst *SI = Builder.CreateStore(Elt, BC); + SI->setAlignment(1); + + // Remove intrinsic. + CI->eraseFromParent(); + return; } else if (Name.startswith("llvm.x86.xop.vpcom")) { Intrinsic::ID intID; if (Name.endswith("ub")) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ae488c4bf4b..71565df351c 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5022,13 +5022,6 @@ def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // -let Predicates = [HasAVX] in -def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), - (VMOVPQI2QImr addr:$dst, VR128:$src)>; -let Predicates = [UseSSE2] in -def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), - (MOVPQI2QImr addr:$dst, VR128:$src)>; - let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in { def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b4b6fd1fe78..b5afe0a62ce 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -687,7 +687,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: - case Intrinsic::x86_sse2_storel_dq: if (II->getArgOperand(0) == OperandVal) isAddress = true; break; @@ -712,7 +711,6 @@ static MemAccessTy getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: - case Intrinsic::x86_sse2_storel_dq: AccessTy.MemTy = II->getArgOperand(0)->getType(); break; } diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index d847a111d99..a70414f48ed 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL @@ -1253,37 +1253,19 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone -define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { -; AVX-LABEL: test_x86_sse2_storel_dq: -; AVX: ## BB#0: -; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vmovlps %xmm0, (%eax) -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse2_storel_dq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vmovlps %xmm0, (%eax) -; AVX512VL-NEXT: retl - call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) - ret void -} -declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind - - define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { ; add operation forces the execution domain. ; AVX-LABEL: test_x86_sse2_storeu_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0 ; AVX-NEXT: vmovdqu %xmm0, (%eax) ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_storeu_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqu %xmm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -4208,7 +4190,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; AVX512VL-LABEL: test_x86_avx_storeu_dq_256: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI236_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddb LCPI235_0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqu %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -4449,7 +4431,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd LCPI250_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd LCPI249_0, %ymm0, %ymm0 ; AVX512VL-NEXT: retl %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -4941,7 +4923,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl @@ -4949,7 +4931,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <2 x i64> %a1, <i64 1, i64 1> diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 300e4f88183..32825a67c37 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -42,3 +42,18 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { ret <2 x i64> %res } declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone + + +define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { +; CHECK-LABEL: test_x86_sse2_storel_dq: +; CHECK: ## BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movlps %xmm0, (%eax) +; CHECK-NEXT: retl + call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) + ret void +} +declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind + + + diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index b7da2fdcde1..ca43cdf8d8c 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL @@ -1157,37 +1157,19 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone -define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { -; SSE-LABEL: test_x86_sse2_storel_dq: -; SSE: ## BB#0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE-NEXT: movlps %xmm0, (%eax) -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_storel_dq: -; KNL: ## BB#0: -; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL-NEXT: vmovlps %xmm0, (%eax) -; KNL-NEXT: retl - call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) - ret void -} -declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind - - define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { ; add operation forces the execution domain. ; SSE-LABEL: test_x86_sse2_storeu_dq: ; SSE: ## BB#0: ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE-NEXT: paddb LCPI71_0, %xmm0 +; SSE-NEXT: paddb LCPI70_0, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%eax) ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_storeu_dq: ; KNL: ## BB#0: ; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL-NEXT: vpaddb LCPI71_0, %xmm0, %xmm0 +; KNL-NEXT: vpaddb LCPI70_0, %xmm0, %xmm0 ; KNL-NEXT: vmovdqu %xmm0, (%eax) ; KNL-NEXT: retl %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> diff --git a/llvm/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/llvm/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll deleted file mode 100644 index b469887ba25..00000000000 --- a/llvm/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -instcombine -S | not grep "store " -; PR2296 - -@G = common global double 0.000000e+00, align 16 - -define void @x(<2 x i64> %y) nounwind { -entry: - bitcast <2 x i64> %y to <4 x i32> - call void @llvm.x86.sse2.storel.dq( i8* bitcast (double* @G to i8*), <4 x i32> %0 ) nounwind - ret void -} - -declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind |