diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 20 |
3 files changed, 37 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 02f5af438b6..78ce2e339ec 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6362,14 +6362,11 @@ let Predicates = [HasAVX512] in { } let SchedRW = [WriteFStore] in { +let mayStore = 1, hasSideEffects = 0 in def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovhps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt - (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), - (bc_v2f64 (v4f32 VR128X:$src))), - (iPTR 0))), addr:$dst)]>, - EVEX, EVEX_CD8<32, CD8VT2>; + []>, EVEX, EVEX_CD8<32, CD8VT2>; def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovhpd\t{$src, $dst|$dst, $src}", @@ -6377,12 +6374,11 @@ def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), (iPTR 0))), addr:$dst)]>, EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +let mayStore = 1, hasSideEffects = 0 in def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovlps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)), - (iPTR 0))), addr:$dst)]>, - EVEX, EVEX_CD8<32, CD8VT2>; + []>, EVEX, EVEX_CD8<32, CD8VT2>; def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128X:$src), "vmovlpd\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 2e868a60ce4..acb9128db79 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5962,6 +5962,19 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr }, }; +static const uint16_t ReplaceableInstrsFP[][3] = { + //PackedSingle PackedDouble + { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END }, + { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END }, + { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END }, + { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END }, + { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END }, + { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END }, + { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END }, + { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END }, + { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END }, +}; + static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { //PackedSingle PackedDouble PackedInt { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, @@ -6202,7 +6215,7 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = { }; // NOTE: These should only be used by the custom domain methods. -static const uint16_t ReplaceableCustomInstrs[][3] = { +static const uint16_t ReplaceableBlendInstrs[][3] = { //PackedSingle PackedDouble PackedInt { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi }, { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri }, @@ -6211,7 +6224,7 @@ static const uint16_t ReplaceableCustomInstrs[][3] = { { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi }, { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri }, }; -static const uint16_t ReplaceableCustomAVX2Instrs[][3] = { +static const uint16_t ReplaceableBlendAVX2Instrs[][3] = { //PackedSingle PackedDouble PackedInt { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi }, { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri }, @@ -6405,9 +6418,9 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm); unsigned NewImm = Imm; - const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs); + const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs); if (!table) - table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); if (Domain == 1) { // PackedSingle AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); @@ -6417,7 +6430,7 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, if (Subtarget.hasAVX2()) { // If we are already VPBLENDW use that, else use VPBLENDD. if ((ImmWidth / (Is256 ? 2 : 1)) != 8) { - table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); } } else { @@ -6525,6 +6538,8 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; + } else if (lookup(opcode, domain, ReplaceableInstrsFP)) { + validDomains = 0x6; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { // Insert/extract instructions should only effect domain if AVX2 // is enabled. @@ -6564,6 +6579,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { "256-bit vector operations only available in AVX2"); table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); } + if (!table) { // try the FP table + table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP); + assert((!table || Domain < 3) && + "Can only select PackedSingle or PackedDouble"); + } if (!table) { // try the other table assert(Subtarget.hasAVX2() && "256-bit insert/extract only available in AVX2"); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 76530adc152..ea14fb0600a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -635,10 +635,10 @@ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; let SchedRW = [WriteFStore] in { let Predicates = [UseAVX] in { +let mayStore = 1, hasSideEffects = 0 in def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>, + []>, VEX, VEX_WIG; def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", @@ -646,10 +646,10 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; }// UseAVX +let mayStore = 1, hasSideEffects = 0 in def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>; + []>; def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 VR128:$src), @@ -675,24 +675,20 @@ let SchedRW = [WriteFStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. let Predicates = [UseAVX] in { +let mayStore = 1, hasSideEffects = 0 in def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt - (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), - (bc_v2f64 (v4f32 VR128:$src))), - (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; + []>, VEX, VEX_WIG; def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; } // UseAVX +let mayStore = 1, hasSideEffects = 0 in def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", - [(store (f64 (extractelt - (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), - (bc_v2f64 (v4f32 VR128:$src))), - (iPTR 0))), addr:$dst)]>; + []>; def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (extractelt |

