diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_ss_load_fold.ll | 22 |
2 files changed, 26 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index a85a28cf361..fdd811491df 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1700,6 +1700,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 }, { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 }, + { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, 0 }, + { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, 0 }, { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, { X86::VANDPSZrr, X86::VANDPSZrm, 0 }, { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 }, @@ -6189,6 +6191,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, // instruction isn't scalar (SS). switch (UserOpc) { case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int: + case X86::Int_CMPSSrr: case X86::Int_VCMPSSrr: case X86::VCMPSSZrr_Int: case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int: case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int: case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int: @@ -6213,6 +6216,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, // instruction isn't scalar (SD). switch (UserOpc) { case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int: + case X86::Int_CMPSDrr: case X86::Int_VCMPSDrr: case X86::VCMPSDZrr_Int: case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int: case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int: case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int: diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll index 8c6f654d3ae..3d69f065542 100644 --- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll @@ -196,3 +196,25 @@ entry: %1 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %y, <4 x float> %vecinit4.i) ret <4 x float> %1 } + +define <4 x float> @cmpss_fold(float* %x, <4 x float> %y) { +; X32-LABEL: cmpss_fold: +; X32: ## BB#0: ## %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: cmpeqss (%eax), %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: cmpss_fold: +; X64: ## BB#0: ## %entry +; X64-NEXT: cmpeqss (%rdi), %xmm0 +; X64-NEXT: retq +entry: + %0 = load float, float* %x, align 1 + %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 + %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0.000000e+00, i32 1 + %vecinit3.i = insertelement <4 x float> %vecinit2.i, float 0.000000e+00, i32 2 + %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 3 + %1 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %y, <4 x float> %vecinit4.i, i8 0) + ret <4 x float> %1 +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone |