diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Support/Host.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFormats.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 132 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 2 |
9 files changed, 118 insertions, 63 deletions
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 046aed82e46..9cc21a284a8 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1262,6 +1262,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; + // VPCLMULQDQ (carry-less multiplication quadword) + Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 5c7bb0045c8..a5ffc72ce0e 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -160,6 +160,9 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; +def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", + "Enable vpclmulqdq instructions", + [FeatureAVX, FeaturePCLMUL]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", [FeatureAVX, FeatureSSE4A]>; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 964e495a4d3..984d1433616 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10017,6 +10017,7 @@ defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">; //===----------------------------------------------------------------------===// // AES instructions //===----------------------------------------------------------------------===// + multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { let Predicates = [HasVLX, HasVAES] in { defm Z128 : AESI_binop_rm_int<Op, OpStr, @@ -10040,3 +10041,25 @@ defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast"> defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; +//===----------------------------------------------------------------------===// +// PCLMUL instructions - Carry less multiplication +//===----------------------------------------------------------------------===// + +let Predicates = [HasAVX512, HasVPCLMULQDQ] in +defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, + EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; + +let Predicates = [HasVLX, HasVPCLMULQDQ] in { +defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, + EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; + +defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, + int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, + EVEX_CD8<64, CD8VF>, VEX_WIG; +} + +// Aliases +defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; +defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; +defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; + diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 5604aef725a..f9572dab05c 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -854,13 +854,7 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, // PCLMUL Instruction Templates class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern, InstrItinClass itin = NoItinerary> - : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD, - Requires<[NoAVX, HasPCLMUL]>; - -class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = NoItinerary> - : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD, - VEX_4V, Requires<[HasAVX, HasPCLMUL]>; + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index e15619239d8..68f964cc866 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5255,7 +5255,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, OpIdx1, OpIdx2); } case X86::PCLMULQDQrr: - case X86::VPCLMULQDQrr:{ + case X86::VPCLMULQDQrr: + case X86::VPCLMULQDQYrr: + case X86::VPCLMULQDQZrr: + case X86::VPCLMULQDQZ128rr: + case X86::VPCLMULQDQZ256rr: { // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0] // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0] unsigned Imm = MI.getOperand(3).getImm(); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index c8698255c9a..91e55f84217 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -843,6 +843,9 @@ def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">; def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">; def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">; def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">; +def NoVLX_Or_NoVPCLMULQDQ : + Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">; +def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">; def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a0b370ee546..ead8cb4ba00 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7242,40 +7242,84 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // PCLMUL Instructions //===----------------------------------------------------------------------===// +// SSE carry-less Multiplication instructions +let Constraints = "$src1 = $dst", Predicates = [NoAVX, HasPCLMUL] in { + let isCommutable = 1 in + def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, u8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; + + def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, u8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2), + imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>; +} + +// SSE aliases +foreach HI = ["hq","lq"] in +foreach LO = ["hq","lq"] in { + def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", + (PCLMULQDQrr VR128:$dst, VR128:$src, + !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; + def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", + (PCLMULQDQrm VR128:$dst, i128mem:$src, + !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; +} + // AVX carry-less Multiplication instructions -let isCommutable = 1 in -def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, u8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, - Sched<[WriteCLMul]>, VEX_WIG; - -def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, u8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, - (loadv2i64 addr:$src2), imm:$src3))]>, - Sched<[WriteCLMulLd, ReadAfterLd]>, VEX_WIG; - -// Carry-less Multiplication instructions -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, u8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], - IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; - -def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, u8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, - (memopv2i64 addr:$src2), imm:$src3))], - IIC_SSE_PCLMULQDQ_RM>, - Sched<[WriteCLMulLd, ReadAfterLd]>; -} // Constraints = "$src1 = $dst" +multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, + PatFrag LdFrag, Intrinsic IntId> { + let isCommutable = 1 in + def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, u8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set RC:$dst, + (IntId RC:$src1, RC:$src2, imm:$src3))]>, + Sched<[WriteCLMul]>; + + def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, MemOp:$src2, u8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set RC:$dst, + (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, + Sched<[WriteCLMulLd, ReadAfterLd]>; +} + +let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in +defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, loadv2i64, + int_x86_pclmulqdq>, VEX_4V, VEX_WIG; + +let Predicates = [NoVLX, HasVPCLMULQDQ] in +defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, loadv4i64, + int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; + +multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, + X86MemOperand MemOp, string Hi, string Lo> { + def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, + !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; + def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, + !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; +} + +multiclass vpclmulqdq_aliases<string InstStr, RegisterClass RC, + X86MemOperand MemOp> { + defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; + defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; + defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; + defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; +} + +// AVX aliases +defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; +defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; // Immediate transform to help with commuting. def PCLMULCommuteImm : SDNodeXForm<imm, [{ @@ -7299,28 +7343,6 @@ let Predicates = [NoAVX, HasPCLMUL] in { (PCLMULCommuteImm imm:$src3))>; } -multiclass pclmul_alias<string asm, int immop> { - def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), - (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>; - - def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), - (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>; - - def : InstAlias<!strconcat("vpclmul", asm, - "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), - (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop), - 0>; - - def : InstAlias<!strconcat("vpclmul", asm, - "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), - (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop), - 0>; -} -defm : pclmul_alias<"hqhq", 0x11>; -defm : pclmul_alias<"hqlq", 0x01>; -defm : pclmul_alias<"lqhq", 0x10>; -defm : pclmul_alias<"lqlq", 0x00>; - //===----------------------------------------------------------------------===// // SSE4A Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index a4581e1a1a2..ff29b1e569c 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -299,6 +299,7 @@ void X86Subtarget::initializeEnvironment() { HasXSAVEC = false; HasXSAVES = false; HasPCLMUL = false; + HasVPCLMULQDQ = false; HasFMA = false; HasFMA4 = false; HasXOP = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 3f0657be7a4..482605384af 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -126,6 +126,7 @@ protected: /// Target has carry-less multiplication bool HasPCLMUL; + bool HasVPCLMULQDQ; /// Target has 3-operand fused multiply-add bool HasFMA; @@ -465,6 +466,7 @@ public: bool hasXSAVEC() const { return HasXSAVEC; } bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } + bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. bool hasFMA() const { return HasFMA && !HasFMA4; } |