summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp197
-rw-r--r--llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll28
-rw-r--r--llvm/test/CodeGen/X86/fma-fneg-combine.ll16
4 files changed, 133 insertions, 116 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f4c25414ef..61176b01fb3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37692,9 +37692,80 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
+ if (NegMul) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::FMA: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FMADDS1: Opcode = X86ISD::FNMADDS1; break;
+ case X86ISD::FMADDS3: Opcode = X86ISD::FNMADDS3; break;
+ case X86ISD::FMADDS1_RND: Opcode = X86ISD::FNMADDS1_RND; break;
+ case X86ISD::FMADDS3_RND: Opcode = X86ISD::FNMADDS3_RND; break;
+ case X86ISD::FMADD4S: Opcode = X86ISD::FNMADD4S; break;
+ case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FMSUBS1: Opcode = X86ISD::FNMSUBS1; break;
+ case X86ISD::FMSUBS3: Opcode = X86ISD::FNMSUBS3; break;
+ case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break;
+ case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break;
+ case X86ISD::FMSUB4S: Opcode = X86ISD::FNMSUB4S; break;
+ case X86ISD::FNMADD: Opcode = ISD::FMA; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FNMADDS1: Opcode = X86ISD::FMADDS1; break;
+ case X86ISD::FNMADDS3: Opcode = X86ISD::FMADDS3; break;
+ case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FMADDS1_RND; break;
+ case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FMADDS3_RND; break;
+ case X86ISD::FNMADD4S: Opcode = X86ISD::FMADD4S; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case X86ISD::FNMSUBS1: Opcode = X86ISD::FMSUBS1; break;
+ case X86ISD::FNMSUBS3: Opcode = X86ISD::FMSUBS3; break;
+ case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FMSUBS1_RND; break;
+ case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FMSUBS3_RND; break;
+ case X86ISD::FNMSUB4S: Opcode = X86ISD::FMSUB4S; break;
+ }
+ }
+
+ if (NegAcc) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::FMA: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case X86ISD::FMADDS1: Opcode = X86ISD::FMSUBS1; break;
+ case X86ISD::FMADDS3: Opcode = X86ISD::FMSUBS3; break;
+ case X86ISD::FMADDS1_RND: Opcode = X86ISD::FMSUBS1_RND; break;
+ case X86ISD::FMADDS3_RND: Opcode = X86ISD::FMSUBS3_RND; break;
+ case X86ISD::FMADD4S: Opcode = X86ISD::FMSUB4S; break;
+ case X86ISD::FMSUB: Opcode = ISD::FMA; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FMSUBS1: Opcode = X86ISD::FMADDS1; break;
+ case X86ISD::FMSUBS3: Opcode = X86ISD::FMADDS3; break;
+ case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FMADDS1_RND; break;
+ case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FMADDS3_RND; break;
+ case X86ISD::FMSUB4S: Opcode = X86ISD::FMADD4S; break;
+ case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FNMADDS1: Opcode = X86ISD::FNMSUBS1; break;
+ case X86ISD::FNMADDS3: Opcode = X86ISD::FNMSUBS3; break;
+ case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break;
+ case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break;
+ case X86ISD::FNMADD4S: Opcode = X86ISD::FNMSUB4S; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FNMSUBS1: Opcode = X86ISD::FNMADDS1; break;
+ case X86ISD::FNMSUBS3: Opcode = X86ISD::FNMADDS3; break;
+ case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FNMADDS1_RND; break;
+ case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FNMADDS3_RND; break;
+ case X86ISD::FNMSUB4S: Opcode = X86ISD::FNMADD4S; break;
+ }
+ }
+
+ return Opcode;
+}
+
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- // TODO: Handle FMSUB/FNMADD/FNMSUB as the starting opcode.
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -37718,88 +37789,37 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
return false;
};
+ bool IsScalarS1 = N->getOpcode() == X86ISD::FMADDS1 ||
+ N->getOpcode() == X86ISD::FMSUBS1 ||
+ N->getOpcode() == X86ISD::FNMADDS1 ||
+ N->getOpcode() == X86ISD::FNMSUBS1 ||
+ N->getOpcode() == X86ISD::FMADDS1_RND ||
+ N->getOpcode() == X86ISD::FMSUBS1_RND ||
+ N->getOpcode() == X86ISD::FNMADDS1_RND ||
+ N->getOpcode() == X86ISD::FNMSUBS1_RND;
+ bool IsScalarS3 = N->getOpcode() == X86ISD::FMADDS3 ||
+ N->getOpcode() == X86ISD::FMSUBS3 ||
+ N->getOpcode() == X86ISD::FNMADDS3 ||
+ N->getOpcode() == X86ISD::FNMSUBS3 ||
+ N->getOpcode() == X86ISD::FMADDS3_RND ||
+ N->getOpcode() == X86ISD::FMSUBS3_RND ||
+ N->getOpcode() == X86ISD::FNMADDS3_RND ||
+ N->getOpcode() == X86ISD::FNMSUBS3_RND;
+
// Do not convert the passthru input of scalar intrinsics.
// FIXME: We could allow negations of the lower element only.
- bool NegA = N->getOpcode() != X86ISD::FMADDS1 &&
- N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
+ bool NegA = !IsScalarS1 && invertIfNegative(A);
bool NegB = invertIfNegative(B);
- bool NegC = N->getOpcode() != X86ISD::FMADDS3 &&
- N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
-
- // Negative multiplication when NegA xor NegB
- bool NegMul = (NegA != NegB);
- bool HasNeg = NegA || NegB || NegC;
+ bool NegC = !IsScalarS3 && invertIfNegative(C);
- unsigned NewOpcode;
- if (!NegMul)
- NewOpcode = (!NegC) ? unsigned(ISD::FMA) : unsigned(X86ISD::FMSUB);
- else
- NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
-
- // For FMA, we risk reconstructing the node we started with.
- // In order to avoid this, we check for negation or opcode change. If
- // one of the two happened, then it is a new node and we return it.
- if (N->getOpcode() == ISD::FMA) {
- if (HasNeg || NewOpcode != N->getOpcode())
- return DAG.getNode(NewOpcode, dl, VT, A, B, C);
- return SDValue();
- }
-
- if (N->getOpcode() == X86ISD::FMADD_RND) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADD_RND; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB_RND; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
- }
- } else if (N->getOpcode() == X86ISD::FMADDS1) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADDS1; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1; break;
- }
- } else if (N->getOpcode() == X86ISD::FMADDS3) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADDS3; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3; break;
- }
- } else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1_RND; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break;
- }
- } else if (N->getOpcode() == X86ISD::FMADDS3_RND) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADDS3_RND; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3_RND; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break;
- }
- } else if (N->getOpcode() == X86ISD::FMADD4S) {
- switch (NewOpcode) {
- case ISD::FMA: NewOpcode = X86ISD::FMADD4S; break;
- case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB4S; break;
- case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD4S; break;
- case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB4S; break;
- }
- } else {
- llvm_unreachable("Unexpected opcode!");
- }
+ if (!NegA && !NegB && !NegC)
+ return SDValue();
- // Only return the node is the opcode was changed or one of the
- // operand was negated. If not, we'll just recreate the same node.
- if (HasNeg || NewOpcode != N->getOpcode()) {
- if (N->getNumOperands() == 4)
- return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
- return DAG.getNode(NewOpcode, dl, VT, A, B, C);
- }
+ unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
- return SDValue();
+ if (N->getNumOperands() == 4)
+ return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
+ return DAG.getNode(NewOpcode, dl, VT, A, B, C);
}
// Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
@@ -39420,7 +39440,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMADDS1:
case X86ISD::FMADDS3:
case X86ISD::FMADD4S:
- case ISD::FMA: return combineFMA(N, DAG, Subtarget);
+ case X86ISD::FMSUB:
+ case X86ISD::FMSUB_RND:
+ case X86ISD::FMSUBS1_RND:
+ case X86ISD::FMSUBS3_RND:
+ case X86ISD::FMSUBS1:
+ case X86ISD::FMSUBS3:
+ case X86ISD::FMSUB4S:
+ case X86ISD::FNMADD:
+ case X86ISD::FNMADD_RND:
+ case X86ISD::FNMADDS1_RND:
+ case X86ISD::FNMADDS3_RND:
+ case X86ISD::FNMADDS1:
+ case X86ISD::FNMADDS3:
+ case X86ISD::FNMADD4S:
+ case X86ISD::FNMSUB:
+ case X86ISD::FNMSUB_RND:
+ case X86ISD::FNMSUBS1_RND:
+ case X86ISD::FNMSUBS3_RND:
+ case X86ISD::FNMSUBS1:
+ case X86ISD::FNMSUBS3:
+ case X86ISD::FNMSUB4S:
+ case ISD::FMA: return combineFMA(N, DAG, Subtarget);
case X86ISD::FMADDSUB_RND:
case X86ISD::FMSUBADD_RND:
case X86ISD::FMADDSUB:
diff --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
index f408f66eef6..9ebc3b8941b 100644
--- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
+++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
@@ -65,16 +65,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4
define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X32-LABEL: test4:
; X32: # %bb.0: # %entry
-; X32-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
-; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
+; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; X32-NEXT: retl
;
; X64-LABEL: test4:
; X64: # %bb.0: # %entry
-; X64-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
-; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2
-; X64-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
+; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; X64-NEXT: retq
entry:
%0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
index c33ae5e6467..b020fdd01ea 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
@@ -2507,25 +2507,17 @@ define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float>
; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
-; X86-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
-; X86-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A]
-; X86-NEXT: ## fixup A - offset: 5, value: LCPI119_0, kind: FK_Data_4
-; X86-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
-; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
+; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
+; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mm_mask3_fnmsub_round_ss:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
-; X64-NEXT: ## encoding: [0xc4,0xe2,0x79,0x18,0x1d,A,A,A,A]
-; X64-NEXT: ## fixup A - offset: 5, value: LCPI119_0-4, kind: reloc_riprel_4byte
-; X64-NEXT: vxorps %xmm3, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xcb]
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
-; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
+; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
+; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
entry:
@@ -3133,21 +3125,17 @@ define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x doubl
; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
-; X86-NEXT: vxorpd LCPI143_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A]
-; X86-NEXT: ## fixup A - offset: 4, value: LCPI143_0, kind: FK_Data_4
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
-; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
+; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
+; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mm_mask3_fnmsub_round_sd:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0x0d,A,A,A,A]
-; X64-NEXT: ## fixup A - offset: 4, value: LCPI143_0-4, kind: reloc_riprel_4byte
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
-; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
+; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
+; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
; X64-NEXT: retq ## encoding: [0xc3]
entry:
diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll
index 1d698c77438..6d02eaec36f 100644
--- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll
+++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll
@@ -78,18 +78,10 @@ entry:
define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
-; SKX-LABEL: test7:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2
-; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
-; SKX-NEXT: retq
-;
-; KNL-LABEL: test7:
-; KNL: # %bb.0: # %entry
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
-; KNL-NEXT: vxorps %ymm3, %ymm2, %ymm2
-; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
-; KNL-NEXT: retq
+; CHECK-LABEL: test7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; CHECK-NEXT: retq
entry:
%0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
%sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
OpenPOWER on IntegriCloud