diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-12-12 07:57:21 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-12-12 07:57:21 +0000 |
| commit | f2c6f7abf32a5082af0ad42e5c094f28275aeb9a (patch) | |
| tree | 253165ff6a082f68d907b45edfab8aac748fde9e /llvm/lib/Target/X86 | |
| parent | eb4cc23fcd5b71b77ad3cac2dd0ecfbf84f0b404 (diff) | |
| download | bcm5719-llvm-f2c6f7abf32a5082af0ad42e5c094f28275aeb9a.tar.gz bcm5719-llvm-f2c6f7abf32a5082af0ad42e5c094f28275aeb9a.zip | |
[X86] Change CMPSS/CMPSD intrinsic instructions to use sse_load_f32/f64 as its memory pattern instead of full vector load.
These intrinsics only load a single element. We should use sse_loadf32/f64 to give more options of what loads it can match.
Currently these instructions are often only getting their load folded thanks to the load folding in the peephole pass. I plan to add more types of loads to sse_load_f32/64 so we can match without the peephole.
llvm-svn: 289423
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c9a6b4e523c..9a54e98f771 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2317,9 +2317,9 @@ let Constraints = "$src1 = $dst" in { SSE_ALU_F64S, i8immZExt3>, XD; } -multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, +multiclass sse12_cmp_scalar_int<Operand memop, Operand CC, Intrinsic Int, string asm, OpndItins itins, - ImmLeaf immLeaf> { + ImmLeaf immLeaf, ComplexPattern mem_cpat> { def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, @@ -2327,30 +2327,30 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, x86memop:$src, CC:$cc), asm, + (ins VR128:$src1, memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - (load addr:$src), immLeaf:$cc))], + mem_cpat:$src, immLeaf:$cc))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let isCodeGenOnly = 1 in { // Aliases to match intrinsics which expect XMM operand(s). - defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, + defm Int_VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", - SSE_ALU_F32S, i8immZExt5>, + SSE_ALU_F32S, i8immZExt5, sse_load_f32>, XS, VEX_4V; - defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, + defm Int_VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", - SSE_ALU_F32S, i8immZExt5>, // same latency as f32 + SSE_ALU_F32S, i8immZExt5, sse_load_f64>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { - defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, + defm Int_CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}", - SSE_ALU_F32S, i8immZExt3>, XS; - defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, + SSE_ALU_F32S, i8immZExt3, sse_load_f32>, XS; + defm Int_CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $dst|$dst, $src}", - SSE_ALU_F64S, i8immZExt3>, + SSE_ALU_F64S, i8immZExt3, sse_load_f64>, XD; } } |

