diff options
| author | Vyacheslav Klochkov <vyacheslav.n.klochkov@gmail.com> | 2015-11-26 07:45:30 +0000 |
|---|---|---|
| committer | Vyacheslav Klochkov <vyacheslav.n.klochkov@gmail.com> | 2015-11-26 07:45:30 +0000 |
| commit | ed865dfcc50d5248dbbb055c8859ebf6a15f3bc8 (patch) | |
| tree | 3ff2d8a82dcff448c87986f9dfcca8b22a030fdb /llvm/lib | |
| parent | 4c175cdc8ef4be5fc77ecfdb9810005f5286ac0f (diff) | |
| download | bcm5719-llvm-ed865dfcc50d5248dbbb055c8859ebf6a15f3bc8.tar.gz bcm5719-llvm-ed865dfcc50d5248dbbb055c8859ebf6a15f3bc8.zip | |
X86-FMA3: Improved/enabled the memory folding optimization for scalar loads
generated for _mm_losd_s{s,d}() intrinsics and used in scalar FMAs generated
for FMA intrinsics _mm_f{madd,msub,nmadd,nmsub}_s{s,d}().
Reviewer: David Kreitzer
Differential Revision: http://reviews.llvm.org/D14762
llvm-svn: 254140
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 12 |
2 files changed, 17 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 49f8ff2263d..0467a64d7e5 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -170,7 +170,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, // Commuting the 1st operand of FMA*_Int requires some additional analysis, // the commute optimization is legal only if all users of FMA*_Int use only // the lowest element of the FMA*_Int instruction. Even though such analysis -// may be not implemened yet we allow the routines doing the actual commute +// may be not implemented yet we allow the routines doing the actual commute // transformation to decide if one or another instruction is commutable or not. let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, hasSideEffects = 0 in @@ -237,20 +237,12 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, // require the pass-through values to come from the first source // operand, not the second. def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS - (!cast<Instruction>(NAME#"SSr213r_Int") - (COPY_TO_REGCLASS $src1, FR32), - (COPY_TO_REGCLASS $src2, FR32), - (COPY_TO_REGCLASS $src3, FR32)), - VR128)>; + (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int") + $src1, $src2, $src3), VR128)>; def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS - (!cast<Instruction>(NAME#"SDr213r_Int") - (COPY_TO_REGCLASS $src1, FR64), - (COPY_TO_REGCLASS $src2, FR64), - (COPY_TO_REGCLASS $src3, FR64)), - VR128)>; + (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int") + $src1, $src2, $src3), VR128)>; } defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index aa635fc634a..aaeef465bf5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5867,6 +5867,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: + case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int: + case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int: + case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int: + case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int: + case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int: + case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int: return false; default: return true; @@ -5882,6 +5888,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: + case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int: + case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int: + case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int: + case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int: + case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int: + case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int: return false; default: return true; |

