[X86][CM] update add\sub costs of vectors of 64 in X86\SLM arch

this patch updates the cost of addq\subq (add\subtract of vectors of 64bits) based on the performance numbers of SLM arch. Differential Revision: https://reviews.llvm.org/D33983 llvm-svn: 306974
author: Mohammed Agabaria <mohammed.agabaria@intel.com> 2017-07-02 12:16:15 +0000
committer: Mohammed Agabaria <mohammed.agabaria@intel.com> 2017-07-02 12:16:15 +0000
commit: eb09a810e6a59c436bef6ac392273b8f764dcd29 (patch)
tree: 9ef6b18944bf1f8bfb53579024f66491215f3037 /llvm/test
parent: dc25c2b08b834ca40ebabf90c132abd4c128f7f2 (diff)
download: bcm5719-llvm-eb09a810e6a59c436bef6ac392273b8f764dcd29.tar.gz
bcm5719-llvm-eb09a810e6a59c436bef6ac392273b8f764dcd29.zip
2 files changed, 69 insertions, 7 deletions
diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
index 3673a5d9e06..a767aa30b8e 100644
--- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
+++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
@@ -3,6 +3,20 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+define <2 x i64> @slm-costs_64_vector_add(<2 x i64> %a, <2 x i64> %b) {
+entry:
+; SLM:  cost of 4 {{.*}} add <2 x i64>
+  %res = add <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @slm-costs_64_vector_sub(<2 x i64> %a, <2 x i64> %b) {
+entry:
+; SLM:  cost of 4 {{.*}} sub <2 x i64>
+  %res = sub <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
 ; 8bit mul
 define i8 @slm-costs_8_scalar_mul(i8 %a, i8 %b)  {
 entry:
@@ -13,7 +27,7 @@ entry:
 
 define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b)  {
 entry:
-; SLM:  cost of 11 {{.*}} mul nsw <2 x i8>
+; SLM:  cost of 17 {{.*}} mul nsw <2 x i8>
   %res = mul nsw <2 x i8> %a, %b
   ret <2 x i8> %res
 }
@@ -97,7 +111,7 @@ entry:
 
 define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b)  {
 entry:
-; SLM:  cost of 11 {{.*}} mul nsw <2 x i16>
+; SLM:  cost of 17 {{.*}} mul nsw <2 x i16>
   %res = mul nsw <2 x i16> %a, %b
   ret <2 x i16> %res
 }
@@ -181,7 +195,7 @@ entry:
 
 define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b)  {
 entry:
-; SLM:  cost of 11 {{.*}} mul nsw <2 x i32>
+; SLM:  cost of 17 {{.*}} mul nsw <2 x i32>
   %res = mul nsw <2 x i32> %a, %b
   ret <2 x i32> %res
 }
@@ -217,28 +231,28 @@ entry:
 
 define <2 x i64> @slm-costs_64_v2_mul(<2 x i64> %a, <2 x i64> %b)  {
 entry:
-; SLM:  cost of 11 {{.*}} mul nsw <2 x i64>
+; SLM:  cost of 17 {{.*}} mul nsw <2 x i64>
   %res = mul nsw <2 x i64> %a, %b
   ret <2 x i64> %res
 }
 
 define <4 x i64> @slm-costs_64_v4_mul(<4 x i64> %a, <4 x i64> %b)  {
 entry:
-; SLM:  cost of 22 {{.*}} mul nsw <4 x i64>
+; SLM:  cost of 34 {{.*}} mul nsw <4 x i64>
   %res = mul nsw <4 x i64> %a, %b
   ret <4 x i64> %res
 }
 
 define <8 x i64> @slm-costs_64_v8_mul(<8 x i64> %a, <8 x i64> %b)  {
 entry:
-; SLM:  cost of 44 {{.*}} mul nsw <8 x i64>
+; SLM:  cost of 68 {{.*}} mul nsw <8 x i64>
   %res = mul nsw <8 x i64> %a, %b
   ret <8 x i64> %res
 }
 
 define <16 x i64> @slm-costs_64_v16_mul(<16 x i64> %a, <16 x i64> %b)  {
 entry:
-; SLM:  cost of 88 {{.*}} mul nsw <16 x i64>
+; SLM:  cost of 136 {{.*}} mul nsw <16 x i64>
   %res = mul nsw <16 x i64> %a, %b
   ret <16 x i64> %res
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
new file mode 100644
index 00000000000..8be9f1d0799
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm -debug 2>&1 | FileCheck -check-prefix=MSG %s
+; This test should not be vectorized in X86\SLM arch
+; Vectorizing the 64bit multiply in this case is wrong since
+; it can be done with a lower bit mode (notice that the sources is 16bit)
+; Also addq\subq (quad word) has a high cost on SLM arch.
+; this test has a bad performance (regression of -70%) if vectorized on SLM arch
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
+entry:
+; MSG: LV: Selecting VF: 1. 
+  %cmp17 = icmp sgt i32 %LastIndex, 0
+  br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv5 = sext i16 %Scale to i64
+  %sh_prom = and i64 %conv5, 4294967295
+  %0 = sext i16 %lag to i64
+  %wide.trip.count = zext i32 %LastIndex to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %conv8 = trunc i64 %add7 to i32
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %Accumulator.0.lcssa = phi i32 [ 0, %entry ], [ %conv8, %for.cond.cleanup.loopexit ]
+  ret i32 %Accumulator.0.lcssa
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv
+  %1 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %1 to i64
+  %2 = add nsw i64 %indvars.iv, %0
+  %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2
+  %3 = load i16, i16* %arrayidx3, align 2 
+  %conv4 = sext i16 %3 to i64
+  %mul = mul nsw i64 %conv4, %conv
+  %shr = ashr i64 %mul, %sh_prom
+  %add7 = add i64 %shr, %Accumulator.018 
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
author	Mohammed Agabaria <mohammed.agabaria@intel.com>	2017-07-02 12:16:15 +0000
committer	Mohammed Agabaria <mohammed.agabaria@intel.com>	2017-07-02 12:16:15 +0000
commit	eb09a810e6a59c436bef6ac392273b8f764dcd29 (patch)
tree	9ef6b18944bf1f8bfb53579024f66491215f3037 /llvm/test
parent	dc25c2b08b834ca40ebabf90c132abd4c128f7f2 (diff)
download	bcm5719-llvm-eb09a810e6a59c436bef6ac392273b8f764dcd29.tar.gz bcm5719-llvm-eb09a810e6a59c436bef6ac392273b8f764dcd29.zip