summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td12
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp30
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td20
3 files changed, 37 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 02f5af438b6..78ce2e339ec 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6362,14 +6362,11 @@ let Predicates = [HasAVX512] in {
}
let SchedRW = [WriteFStore] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
- (bc_v2f64 (v4f32 VR128X:$src))),
- (iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<32, CD8VT2>;
+ []>, EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhpd\t{$src, $dst|$dst, $src}",
@@ -6377,12 +6374,11 @@ def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
(iPTR 0))), addr:$dst)]>,
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
- (iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<32, CD8VT2>;
+ []>, EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlpd\t{$src, $dst|$dst, $src}",
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2e868a60ce4..acb9128db79 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5962,6 +5962,19 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
};
+static const uint16_t ReplaceableInstrsFP[][3] = {
+ //PackedSingle PackedDouble
+ { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END },
+};
+
static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
@@ -6202,7 +6215,7 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
};
// NOTE: These should only be used by the custom domain methods.
-static const uint16_t ReplaceableCustomInstrs[][3] = {
+static const uint16_t ReplaceableBlendInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
{ X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
@@ -6211,7 +6224,7 @@ static const uint16_t ReplaceableCustomInstrs[][3] = {
{ X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
{ X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
};
-static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
+static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
{ X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
@@ -6405,9 +6418,9 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
unsigned NewImm = Imm;
- const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
+ const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
if (!table)
- table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
if (Domain == 1) { // PackedSingle
AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
@@ -6417,7 +6430,7 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
if (Subtarget.hasAVX2()) {
// If we are already VPBLENDW use that, else use VPBLENDD.
if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
- table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
}
} else {
@@ -6525,6 +6538,8 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+ } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
+ validDomains = 0x6;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
// Insert/extract instructions should only effect domain if AVX2
// is enabled.
@@ -6564,6 +6579,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
"256-bit vector operations only available in AVX2");
table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
}
+ if (!table) { // try the FP table
+ table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
+ assert((!table || Domain < 3) &&
+ "Can only select PackedSingle or PackedDouble");
+ }
if (!table) { // try the other table
assert(Subtarget.hasAVX2() &&
"256-bit insert/extract only available in AVX2");
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 76530adc152..ea14fb0600a 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -635,10 +635,10 @@ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
let SchedRW = [WriteFStore] in {
let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>,
+ []>,
VEX, VEX_WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
@@ -646,10 +646,10 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)]>,
VEX, VEX_WIG;
}// UseAVX
+let mayStore = 1, hasSideEffects = 0 in
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>;
+ []>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
@@ -675,24 +675,20 @@ let SchedRW = [WriteFStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
+ []>, VEX, VEX_WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
} // UseAVX
+let mayStore = 1, hasSideEffects = 0 in
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)]>;
+ []>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
OpenPOWER on IntegriCloud