diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-03-29 22:03:05 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-03-29 22:03:05 +0000 |
| commit | ee3c19fd7fb0d2fa8e2f7e49bd0bb7c1394fcda9 (patch) | |
| tree | 5c984e508b3ac250ccb1cc6e8d4900294311c875 /llvm/lib/Target/X86/X86InstrFMA.td | |
| parent | dd4baff48d3318b5bcd71e720050cb5696908083 (diff) | |
| download | bcm5719-llvm-ee3c19fd7fb0d2fa8e2f7e49bd0bb7c1394fcda9.tar.gz bcm5719-llvm-ee3c19fd7fb0d2fa8e2f7e49bd0bb7c1394fcda9.zip | |
[X86] Add ReadAfterLds to some 3 src instructions
Sometimes the operand comes after the memory operand so we need 5 ReadDefaults first.
I suspect we also need to do something for the mask operand for masked avx512 instructions? I'm not sure if the mask should be ReadAfterLd or not since it can mask faults. If it shouldn't be ReadAfterLd then we're probably wrong for zero masking instructions already.
Differential Revision: https://reviews.llvm.org/D44726
llvm-svn: 328834
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrFMA.td')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 52 |
1 files changed, 37 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 35fa45590fc..3928f0f0193 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -51,7 +51,7 @@ multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, (MemFrag addr:$src3))))]>, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC, @@ -70,7 +70,8 @@ multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3), - RC:$src1)))]>, Sched<[WriteFMALd, ReadAfterLd]>; + RC:$src1)))]>, + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC, @@ -91,7 +92,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1, - RC:$src2)))]>, Sched<[WriteFMALd, ReadAfterLd]>; + RC:$src2)))]>, + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in @@ -184,7 +186,7 @@ multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr, @@ -204,7 +206,7 @@ multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr, @@ -226,7 +228,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in @@ -270,7 +272,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, (ins RC:$src1, RC:$src2, memopr:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>, Sched<[WriteFMALd, ReadAfterLd]>; + []>, Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -374,14 +376,19 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2, (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, + // x86memop:$src2 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src3 + ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst), @@ -407,14 +414,19 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, mem_cpat:$src3)))]>, VEX_W, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>, - VEX_LIG, Sched<[WriteFMALd, ReadAfterLd]>; + VEX_LIG, Sched<[WriteFMALd, ReadAfterLd, + // memop:$src2 + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + // VR128::$src3 + ReadAfterLd]>; let hasSideEffects = 0 in def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), @@ -441,14 +453,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, (ld_frag128 addr:$src3)))]>, VEX_W, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, + // f128mem:$src2 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // VR128::$src3 + ReadAfterLd]>; let isCommutable = 1 in def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), @@ -463,14 +480,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd, + // f256mem:$src2 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // VR256::$src3 + ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), |

