summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dimitry@andric.com>2016-02-19 20:14:11 +0000
committerDimitry Andric <dimitry@andric.com>2016-02-19 20:14:11 +0000
commitdb417b6d4021c25acb00811d40a68a96753a8352 (patch)
tree8930eb2d2884041a5f59945f4c2563344abcb737 /llvm
parentffb7bd11f77a351e23f3a747873a8e3c7972d4c5 (diff)
downloadbcm5719-llvm-db417b6d4021c25acb00811d40a68a96753a8352.tar.gz
bcm5719-llvm-db417b6d4021c25acb00811d40a68a96753a8352.zip
Fix incorrect selection of AVX512 sqrt when OptForSize is on
Summary: When optimizing for size, sqrt calls can be incorrectly selected as AVX512 VSQRT instructions. This is because X86InstrAVX512.td has a `Requires<[OptForSize]>` in its `avx512_sqrt_scalar` multiclass definition. Even if the target does not support AVX512, the class can apparently still be chosen, leading to an incorrect selection of `vsqrtss`. In PR26625, this lead to an assertion: Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!", because the `vsqrtss` instruction requires an XMM register, which is not available on i686 CPUs. Reviewers: grosbach, resistor, joker.eph Subscribers: spatel, emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D17414 llvm-svn: 261360
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--llvm/test/CodeGen/X86/pr26625.ll20
2 files changed, 21 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index a7a5a129f81..4076c7e1ec1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6005,7 +6005,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
def : Pat<(_.EltVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#SUFF#Zm)
- (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
+ (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
diff --git a/llvm/test/CodeGen/X86/pr26625.ll b/llvm/test/CodeGen/X86/pr26625.ll
new file mode 100644
index 00000000000..1b2e227bb59
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr26625.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mcpu=i686 2>&1 | FileCheck %s
+; PR26625
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386"
+
+define float @x0(float %f) #0 {
+entry:
+ %call = tail call float @sqrtf(float %f) #1
+ ret float %call
+; CHECK-LABEL: x0:
+; CHECK: flds
+; CHECK-NEXT: fsqrt
+; CHECK-NOT: vsqrtss
+}
+
+declare float @sqrtf(float) #0
+
+attributes #0 = { nounwind optsize readnone }
+attributes #1 = { nounwind optsize readnone }
OpenPOWER on IntegriCloud