summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-12 07:57:21 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-12 07:57:21 +0000
commitf2c6f7abf32a5082af0ad42e5c094f28275aeb9a (patch)
tree253165ff6a082f68d907b45edfab8aac748fde9e /llvm/lib/Target/X86
parenteb4cc23fcd5b71b77ad3cac2dd0ecfbf84f0b404 (diff)
downloadbcm5719-llvm-f2c6f7abf32a5082af0ad42e5c094f28275aeb9a.tar.gz
bcm5719-llvm-f2c6f7abf32a5082af0ad42e5c094f28275aeb9a.zip
[X86] Change CMPSS/CMPSD intrinsic instructions to use sse_load_f32/f64 as its memory pattern instead of full vector load.
These intrinsics only load a single element. We should use sse_loadf32/f64 to give more options of what loads it can match. Currently these instructions are often only getting their load folded thanks to the load folding in the peephole pass. I plan to add more types of loads to sse_load_f32/64 so we can match without the peephole. llvm-svn: 289423
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td24
1 files changed, 12 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c9a6b4e523c..9a54e98f771 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2317,9 +2317,9 @@ let Constraints = "$src1 = $dst" in {
SSE_ALU_F64S, i8immZExt3>, XD;
}
-multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
+multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
Intrinsic Int, string asm, OpndItins itins,
- ImmLeaf immLeaf> {
+ ImmLeaf immLeaf, ComplexPattern mem_cpat> {
def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
@@ -2327,30 +2327,30 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
itins.rr>,
Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
+ (ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- (load addr:$src), immLeaf:$cc))],
+ mem_cpat:$src, immLeaf:$cc))],
itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let isCodeGenOnly = 1 in {
// Aliases to match intrinsics which expect XMM operand(s).
- defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
+ defm Int_VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S, i8immZExt5>,
+ SSE_ALU_F32S, i8immZExt5, sse_load_f32>,
XS, VEX_4V;
- defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
+ defm Int_VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S, i8immZExt5>, // same latency as f32
+ SSE_ALU_F32S, i8immZExt5, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
+ defm Int_CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S, i8immZExt3>, XS;
- defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
+ SSE_ALU_F32S, i8immZExt3, sse_load_f32>, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F64S, i8immZExt3>,
+ SSE_ALU_F64S, i8immZExt3, sse_load_f64>,
XD;
}
}
OpenPOWER on IntegriCloud