summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2018-02-01 13:48:40 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2018-02-01 13:48:40 +0000
commit9d9a86535e9949a04352ac51e4cc5a933fb73a72 (patch)
tree81e6b3cd55d3389bd8169f62ff7cbd0397387e34
parenta8d12bbc3fae40f20f87a94c400176110ac7d51b (diff)
downloadbcm5719-llvm-9d9a86535e9949a04352ac51e4cc5a933fb73a72.tar.gz
bcm5719-llvm-9d9a86535e9949a04352ac51e4cc5a933fb73a72.zip
[ARM] FullFP16 LowerReturn Fix
Commit r323512 introduced an optimisation in LowerReturn for half-precision return values. A missing check caused a crash when the return value is "undef" (i.e. a node that has no operands). Differential Revision: https://reviews.llvm.org/D42743 llvm-svn: 323968
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp4
-rw-r--r--llvm/test/CodeGen/ARM/fp16-instructions.ll29
2 files changed, 23 insertions, 10 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index cddd957417e..8e58bd330ac 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2491,12 +2491,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// t11 f16 = fadd ...
// t12: i16 = bitcast t11
// t13: i32 = zero_extend t12
- // t14: f32 = bitcast t13
+ // t14: f32 = bitcast t13 <~~~~~~~ Arg
//
// to avoid code generation for bitcasts, we simply set Arg to the node
// that produces the f16 value, t11 in this case.
//
- if (Arg.getValueType() == MVT::f32) {
+ if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
SDValue ZE = Arg.getOperand(0);
if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
SDValue BC = ZE.getOperand(0);
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index 93b6982f5d6..b8ba9a6e706 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -1,15 +1,27 @@
; SOFT:
-; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; SOFTFP:
-; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefix=CHECK-SOFTFP-VFP3
-; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefix=CHECK-SOFTFP-FP16
-; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-SOFTFP-FULLFP16
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
; HARD:
-; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefix=CHECK-HARDFP-VFP3
-; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefix=CHECK-HARDFP-FP16
-; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-HARDFP-FULLFP16
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
+
+
+define float @RetValBug(float %A.coerce) local_unnamed_addr {
+entry:
+ ret float undef
+; This expression is optimised away due to the undef value. Check that
+; LowerReturn can handle undef nodes (i.e. nodes which do not have any
+; operands) when FullFP16 is enabled.
+;
+; CHECK-LABEL: RetValBug:
+; CHECK-HARDFP-FULLFP16: mov pc, lr
+}
define float @Add(float %a.coerce, float %b.coerce) local_unnamed_addr {
entry:
@@ -25,6 +37,8 @@ entry:
%5 = bitcast i32 %tmp4.0.insert.ext to float
ret float %5
+; CHECK-LABEL: Add:
+
; CHECK-SOFT: bl __aeabi_h2f
; CHECK-SOFT: bl __aeabi_h2f
; CHECK-SOFT: bl __aeabi_fadd
@@ -64,5 +78,4 @@ entry:
; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
; CHECK-HARDFP-FULLFP16-NEXT: mov pc, lr
-
}
OpenPOWER on IntegriCloud