diff options
author | Craig Topper <craig.topper@intel.com> | 2017-11-21 21:05:21 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-11-21 21:05:21 +0000 |
commit | ba150ef60a9ebadcb20b9557fb86d4b61391db48 (patch) | |
tree | 4e10f67e3fddb18cc7a919dd25971ad3266e0709 /llvm/lib | |
parent | a890570b1586cf35426e4b5bf340c4bf5f3a14b5 (diff) | |
download | bcm5719-llvm-ba150ef60a9ebadcb20b9557fb86d4b61391db48.tar.gz bcm5719-llvm-ba150ef60a9ebadcb20b9557fb86d4b61391db48.zip |
[X86] Allow vpclmulqdq instructions to be commuted during isel to allow load folding.
The commuting patterns for the AVX version actually still had priority over the new patterns.
llvm-svn: 318800
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 75 |
1 files changed, 36 insertions, 39 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ead8cb4ba00..9d7b17c0453 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7242,24 +7242,37 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // PCLMUL Instructions //===----------------------------------------------------------------------===// +// Immediate transform to help with commuting. +def PCLMULCommuteImm : SDNodeXForm<imm, [{ + uint8_t Imm = N->getZExtValue(); + return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); +}]>; + // SSE carry-less Multiplication instructions -let Constraints = "$src1 = $dst", Predicates = [NoAVX, HasPCLMUL] in { - let isCommutable = 1 in - def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, u8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], - IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; - - def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, u8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2), - imm:$src3))], - IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>; -} +let Predicates = [NoAVX, HasPCLMUL] in { + let Constraints = "$src1 = $dst" in { + let isCommutable = 1 in + def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, u8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; + + def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, u8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2), + imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>; + } // Constraints = "$src1 = $dst" + + def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1, + (i8 imm:$src3)), + (PCLMULQDQrm VR128:$src1, addr:$src2, + (PCLMULCommuteImm imm:$src3))>; +} // Predicates = [NoAVX, HasPCLMUL] // SSE aliases foreach HI = ["hq","lq"] in @@ -7289,6 +7302,12 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, [(set RC:$dst, (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, Sched<[WriteCLMulLd, ReadAfterLd]>; + + // We can commute a load in the first operand by swapping the sources and + // rotating the immediate. + def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)), + (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, + (PCLMULCommuteImm imm:$src3))>; } let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in @@ -7321,28 +7340,6 @@ multiclass vpclmulqdq_aliases<string InstStr, RegisterClass RC, defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; -// Immediate transform to help with commuting. -def PCLMULCommuteImm : SDNodeXForm<imm, [{ - uint8_t Imm = N->getZExtValue(); - return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); -}]>; - -// We can commute a load in the first operand by swapping the sources and -// rotating the immediate. -let Predicates = [HasAVX, HasPCLMUL] in { - def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1, - (i8 imm:$src3)), - (VPCLMULQDQrm VR128:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; -} - -let Predicates = [NoAVX, HasPCLMUL] in { - def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1, - (i8 imm:$src3)), - (PCLMULQDQrm VR128:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; -} - //===----------------------------------------------------------------------===// // SSE4A Instructions //===----------------------------------------------------------------------===// |