diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 197 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fma-fneg-combine.ll | 16 |
4 files changed, 133 insertions, 116 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f4c25414ef..61176b01fb3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37692,9 +37692,80 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { + if (NegMul) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FNMADD; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FMADDS1: Opcode = X86ISD::FNMADDS1; break; + case X86ISD::FMADDS3: Opcode = X86ISD::FNMADDS3; break; + case X86ISD::FMADDS1_RND: Opcode = X86ISD::FNMADDS1_RND; break; + case X86ISD::FMADDS3_RND: Opcode = X86ISD::FNMADDS3_RND; break; + case X86ISD::FMADD4S: Opcode = X86ISD::FNMADD4S; break; + case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FMSUBS1: Opcode = X86ISD::FNMSUBS1; break; + case X86ISD::FMSUBS3: Opcode = X86ISD::FNMSUBS3; break; + case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break; + case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break; + case X86ISD::FMSUB4S: Opcode = X86ISD::FNMSUB4S; break; + case X86ISD::FNMADD: Opcode = ISD::FMA; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FNMADDS1: Opcode = X86ISD::FMADDS1; break; + case X86ISD::FNMADDS3: Opcode = X86ISD::FMADDS3; break; + case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FMADDS1_RND; break; + case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FMADDS3_RND; break; + case X86ISD::FNMADD4S: Opcode = X86ISD::FMADD4S; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FNMSUBS1: Opcode = X86ISD::FMSUBS1; break; + case X86ISD::FNMSUBS3: Opcode = X86ISD::FMSUBS3; break; + case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FMSUBS1_RND; break; + case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FMSUBS3_RND; break; + case X86ISD::FNMSUB4S: Opcode = X86ISD::FMSUB4S; break; + } + } + + if (NegAcc) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FMSUB; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FMADDS1: Opcode = X86ISD::FMSUBS1; break; + case X86ISD::FMADDS3: Opcode = X86ISD::FMSUBS3; break; + case X86ISD::FMADDS1_RND: Opcode = X86ISD::FMSUBS1_RND; break; + case X86ISD::FMADDS3_RND: Opcode = X86ISD::FMSUBS3_RND; break; + case X86ISD::FMADD4S: Opcode = X86ISD::FMSUB4S; break; + case X86ISD::FMSUB: Opcode = ISD::FMA; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FMSUBS1: Opcode = X86ISD::FMADDS1; break; + case X86ISD::FMSUBS3: Opcode = X86ISD::FMADDS3; break; + case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FMADDS1_RND; break; + case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FMADDS3_RND; break; + case X86ISD::FMSUB4S: Opcode = X86ISD::FMADD4S; break; + case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FNMADDS1: Opcode = X86ISD::FNMSUBS1; break; + case X86ISD::FNMADDS3: Opcode = X86ISD::FNMSUBS3; break; + case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break; + case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break; + case X86ISD::FNMADD4S: Opcode = X86ISD::FNMSUB4S; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FNMSUBS1: Opcode = X86ISD::FNMADDS1; break; + case X86ISD::FNMSUBS3: Opcode = X86ISD::FNMADDS3; break; + case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FNMADDS1_RND; break; + case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FNMADDS3_RND; break; + case X86ISD::FNMSUB4S: Opcode = X86ISD::FNMADD4S; break; + } + } + + return Opcode; +} + static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // TODO: Handle FMSUB/FNMADD/FNMSUB as the starting opcode. SDLoc dl(N); EVT VT = N->getValueType(0); @@ -37718,88 +37789,37 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, return false; }; + bool IsScalarS1 = N->getOpcode() == X86ISD::FMADDS1 || + N->getOpcode() == X86ISD::FMSUBS1 || + N->getOpcode() == X86ISD::FNMADDS1 || + N->getOpcode() == X86ISD::FNMSUBS1 || + N->getOpcode() == X86ISD::FMADDS1_RND || + N->getOpcode() == X86ISD::FMSUBS1_RND || + N->getOpcode() == X86ISD::FNMADDS1_RND || + N->getOpcode() == X86ISD::FNMSUBS1_RND; + bool IsScalarS3 = N->getOpcode() == X86ISD::FMADDS3 || + N->getOpcode() == X86ISD::FMSUBS3 || + N->getOpcode() == X86ISD::FNMADDS3 || + N->getOpcode() == X86ISD::FNMSUBS3 || + N->getOpcode() == X86ISD::FMADDS3_RND || + N->getOpcode() == X86ISD::FMSUBS3_RND || + N->getOpcode() == X86ISD::FNMADDS3_RND || + N->getOpcode() == X86ISD::FNMSUBS3_RND; + // Do not convert the passthru input of scalar intrinsics. // FIXME: We could allow negations of the lower element only. - bool NegA = N->getOpcode() != X86ISD::FMADDS1 && - N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A); + bool NegA = !IsScalarS1 && invertIfNegative(A); bool NegB = invertIfNegative(B); - bool NegC = N->getOpcode() != X86ISD::FMADDS3 && - N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C); - - // Negative multiplication when NegA xor NegB - bool NegMul = (NegA != NegB); - bool HasNeg = NegA || NegB || NegC; + bool NegC = !IsScalarS3 && invertIfNegative(C); - unsigned NewOpcode; - if (!NegMul) - NewOpcode = (!NegC) ? unsigned(ISD::FMA) : unsigned(X86ISD::FMSUB); - else - NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; - - // For FMA, we risk reconstructing the node we started with. - // In order to avoid this, we check for negation or opcode change. If - // one of the two happened, then it is a new node and we return it. - if (N->getOpcode() == ISD::FMA) { - if (HasNeg || NewOpcode != N->getOpcode()) - return DAG.getNode(NewOpcode, dl, VT, A, B, C); - return SDValue(); - } - - if (N->getOpcode() == X86ISD::FMADD_RND) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADD_RND; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB_RND; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break; - } - } else if (N->getOpcode() == X86ISD::FMADDS1) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADDS1; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1; break; - } - } else if (N->getOpcode() == X86ISD::FMADDS3) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADDS3; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3; break; - } - } else if (N->getOpcode() == X86ISD::FMADDS1_RND) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1_RND; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break; - } - } else if (N->getOpcode() == X86ISD::FMADDS3_RND) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADDS3_RND; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3_RND; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break; - } - } else if (N->getOpcode() == X86ISD::FMADD4S) { - switch (NewOpcode) { - case ISD::FMA: NewOpcode = X86ISD::FMADD4S; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB4S; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD4S; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB4S; break; - } - } else { - llvm_unreachable("Unexpected opcode!"); - } + if (!NegA && !NegB && !NegC) + return SDValue(); - // Only return the node is the opcode was changed or one of the - // operand was negated. If not, we'll just recreate the same node. - if (HasNeg || NewOpcode != N->getOpcode()) { - if (N->getNumOperands() == 4) - return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); - return DAG.getNode(NewOpcode, dl, VT, A, B, C); - } + unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); - return SDValue(); + if (N->getNumOperands() == 4) + return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); + return DAG.getNode(NewOpcode, dl, VT, A, B, C); } // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C) @@ -39420,7 +39440,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FMADDS1: case X86ISD::FMADDS3: case X86ISD::FMADD4S: - case ISD::FMA: return combineFMA(N, DAG, Subtarget); + case X86ISD::FMSUB: + case X86ISD::FMSUB_RND: + case X86ISD::FMSUBS1_RND: + case X86ISD::FMSUBS3_RND: + case X86ISD::FMSUBS1: + case X86ISD::FMSUBS3: + case X86ISD::FMSUB4S: + case X86ISD::FNMADD: + case X86ISD::FNMADD_RND: + case X86ISD::FNMADDS1_RND: + case X86ISD::FNMADDS3_RND: + case X86ISD::FNMADDS1: + case X86ISD::FNMADDS3: + case X86ISD::FNMADD4S: + case X86ISD::FNMSUB: + case X86ISD::FNMSUB_RND: + case X86ISD::FNMSUBS1_RND: + case X86ISD::FNMSUBS3_RND: + case X86ISD::FNMSUBS1: + case X86ISD::FNMSUBS3: + case X86ISD::FNMSUB4S: + case ISD::FMA: return combineFMA(N, DAG, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: diff --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll index f408f66eef6..9ebc3b8941b 100644 --- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll @@ -65,16 +65,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; X32-LABEL: test4: ; X32: # %bb.0: # %entry -; X32-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 +; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X32-NEXT: retl ; ; X64-LABEL: test4: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 +; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X64-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll index c33ae5e6467..b020fdd01ea 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll @@ -2507,25 +2507,17 @@ define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> ; X86-LABEL: test_mm_mask3_fnmsub_round_ss: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] -; X86-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] -; X86-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 5, value: LCPI119_0, kind: FK_Data_4 -; X86-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] -; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] +; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_mm_mask3_fnmsub_round_ss: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] -; X64-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 5, value: LCPI119_0-4, kind: reloc_riprel_4byte -; X64-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb] ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] -; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] +; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2] ; X64-NEXT: retq ## encoding: [0xc3] entry: @@ -3133,21 +3125,17 @@ define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x doubl ; X86-LABEL: test_mm_mask3_fnmsub_round_sd: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] -; X86-NEXT: vxorpd LCPI143_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A] -; X86-NEXT: ## fixup A - offset: 4, value: LCPI143_0, kind: FK_Data_4 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] -; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] +; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_mm_mask3_fnmsub_round_sd: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A] -; X64-NEXT: ## fixup A - offset: 4, value: LCPI143_0-4, kind: reloc_riprel_4byte ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] -; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2 +; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] +; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2] ; X64-NEXT: retq ## encoding: [0xc3] entry: diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll index 1d698c77438..6d02eaec36f 100644 --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -78,18 +78,10 @@ entry: define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { -; SKX-LABEL: test7: -; SKX: # %bb.0: # %entry -; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2 -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 -; SKX-NEXT: retq -; -; KNL-LABEL: test7: -; KNL: # %bb.0: # %entry -; KNL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0] -; KNL-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 -; KNL-NEXT: retq +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; CHECK-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |

