diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 64 |
2 files changed, 38 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index a6a0f68e1c1..12bfd2fdd4c 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -625,11 +625,15 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::VPERMQYri: + case X86::VPERMQZ256ri: case X86::VPERMPDYri: + case X86::VPERMPDZ256ri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMQYmi: + case X86::VPERMQZ256mi: case X86::VPERMPDYmi: + case X86::VPERMPDZ256mi: if (MI->getOperand(NumOperands - 1).isImm()) DecodeVPERMMask(MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 90930aa6453..9d3e933ba0d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8463,21 +8463,23 @@ let Predicates = [HasAVX, NoVLX], AddedComplexity = 20 in { multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, ValueType OpVT, X86FoldableSchedWrite Sched> { - def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, - Sched<[Sched]>, VEX_4V, VEX_L; - def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (OpVT (X86VPermv VR256:$src1, - (bitconvert (mem_frag addr:$src2)))))]>, - Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; + let Predicates = [HasAVX2, NoVLX] in { + def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, + Sched<[Sched]>, VEX_4V, VEX_L; + def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, + (bitconvert (mem_frag addr:$src2)))))]>, + Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; + } } defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>; @@ -8486,21 +8488,23 @@ defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, ValueType OpVT, X86FoldableSchedWrite Sched> { - def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, - Sched<[Sched]>, VEX, VEX_L; - def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), - (ins i256mem:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (OpVT (X86VPermi (mem_frag addr:$src1), - (i8 imm:$src2))))]>, - Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; + let Predicates = [HasAVX2, NoVLX] in { + def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, u8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, + Sched<[Sched]>, VEX, VEX_L; + def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), + (ins i256mem:$src1, u8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OpVT (X86VPermi (mem_frag addr:$src1), + (i8 imm:$src2))))]>, + Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; + } } defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, |