summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-05 17:05:36 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-05 17:05:36 +0000
commit24ce89b7178ef227564c5b512ccad210db3a4cc5 (patch)
tree77dd436d3c72f1a5ef0a5ed022489357d59a26f3
parent2d47310071e1e718f59553781aea9e8aa0bcfba8 (diff)
downloadbcm5719-llvm-24ce89b7178ef227564c5b512ccad210db3a4cc5.tar.gz
bcm5719-llvm-24ce89b7178ef227564c5b512ccad210db3a4cc5.zip
Fix asserts in AMDGCN fmed3 folding by handling more cases of NaN
Better NaN handling for AMDGCN fmed3. All operands are checked for NaN now. The checks were moved before the canonicalization to provide a better mapping from fclamp. Changed the behaviour of fmed3(x,y,NaN) to return max(x,y) instead of min(x,y) in light of this. Updated tests as a result and added some new cases to cover the fix. Patch by Alan Baker llvm-svn: 336375
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp25
-rw-r--r--llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll48
2 files changed, 63 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92533ffdb4b..78c2d312939 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3424,6 +3424,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Src1 = II->getArgOperand(1);
Value *Src2 = II->getArgOperand(2);
+ // Checking for NaN before canonicalization provides better fidelity when
+ // mapping other operations onto fmed3 since the order of operands is
+ // unchanged.
+ CallInst *NewCall = nullptr;
+ if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) {
+ NewCall = Builder.CreateMinNum(Src1, Src2);
+ } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) {
+ NewCall = Builder.CreateMinNum(Src0, Src2);
+ } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
+ NewCall = Builder.CreateMaxNum(Src0, Src1);
+ }
+
+ if (NewCall) {
+ NewCall->copyFastMathFlags(II);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+
bool Swap = false;
// Canonicalize constants to RHS operands.
//
@@ -3450,13 +3468,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return II;
}
- if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
- CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
- NewCall->copyFastMathFlags(II);
- NewCall->takeName(II);
- return replaceInstUsesWith(*II, NewCall);
- }
-
if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 5ef3f5db2f7..1fad1d8f22e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1229,7 +1229,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) {
}
; CHECK-LABEL: @fmed3_x_y_undef_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK: call float @llvm.maxnum.f32(float %x, float %y)
define float @fmed3_x_y_undef_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
ret float %med3
@@ -1250,7 +1250,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
}
; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK: call float @llvm.maxnum.f32(float %x, float %y)
define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
ret float %med3
@@ -1265,7 +1265,7 @@ define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
; This can return any of the qnans.
; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
-; CHECK: ret float 0x7FF8002000000000
+; CHECK: ret float 0x7FF8030000000000
define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
ret float %med3
@@ -1334,6 +1334,48 @@ define float @fmed3_qnan0_qnan1_x_f32(float %x) {
ret float %med3
}
+; CHECK-LABEL: @fmed3_nan_0_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_nan_0_1_f32() {
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0)
+ ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_0_nan_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_0_nan_1_f32() {
+ %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0)
+ ret float %med
+}
+
+; CHECK-LABEL: @fmed3_0_1_nan_f32(
+; CHECK: ret float 1.0
+define float @fmed3_0_1_nan_f32() {
+ %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
+ ret float %med
+}
+
+; CHECK-LABEL: @fmed3_undef_0_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_undef_0_1_f32() {
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0)
+ ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_0_undef_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_0_undef_1_f32() {
+ %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0)
+ ret float %med
+}
+
+; CHECK-LABEL: @fmed3_0_1_undef_f32(
+; CHECK: ret float 1.0
+define float @fmed3_0_1_undef_f32() {
+ %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
+ ret float %med
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.icmp
; --------------------------------------------------------------------
OpenPOWER on IntegriCloud