summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-mul-13.ll16
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll86
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll21
4 files changed, 127 insertions, 6 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 36f46061929..25690a2ef73 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -570,13 +570,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
}
}
- // We have fused multiply-addition for f32 and f64 but not f128.
- setOperationAction(ISD::FMA, MVT::f32, Legal);
- setOperationAction(ISD::FMA, MVT::f64, Legal);
- if (Subtarget.hasVectorEnhancements1())
- setOperationAction(ISD::FMA, MVT::f128, Legal);
- else
+ // We only have fused f128 multiply-addition on vector registers.
+ if (!Subtarget.hasVectorEnhancements1()) {
setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
+ }
// We don't have a copysign instruction on vector registers.
if (Subtarget.hasVectorEnhancements1())
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
new file mode 100644
index 00000000000..4475195cc54
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, fmal
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %f3)
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll
new file mode 100644
index 00000000000..3af5efb6c9c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+ fp128 %f1, fp128 %f2, fp128 %f3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+define void @f2(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %neg = fsub fp128 0xL00000000000000008000000000000000, %f3
+ %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+ fp128 %f1, fp128 %f2, fp128 %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+define void @f3(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f3:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfnmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+ fp128 %f1, fp128 %f2, fp128 %f3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %negres = fsub fp128 0xL00000000000000008000000000000000, %res
+ store fp128 %negres, fp128 *%dst
+ ret void
+}
+
+define void @f4(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f4:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfnmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %neg = fsub fp128 0xL00000000000000008000000000000000, %f3
+ %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+ fp128 %f1, fp128 %f2, fp128 %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %negres = fsub fp128 0xL00000000000000008000000000000000, %res
+ store fp128 %negres, fp128 *%dst
+ ret void
+}
+
+attributes #0 = { strictfp }
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
new file mode 100644
index 00000000000..32f609bb26e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, fmal
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %f3 = load fp128, fp128 *%ptr3
+ %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+ fp128 %f1, fp128 %f2, fp128 %f3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+attributes #0 = { strictfp }
+
OpenPOWER on IntegriCloud