diff options
| author | Karthik Bhat <kv.bhat@samsung.com> | 2014-06-20 04:32:48 +0000 |
|---|---|---|
| committer | Karthik Bhat <kv.bhat@samsung.com> | 2014-06-20 04:32:48 +0000 |
| commit | e03a25da70c6fd476d9be766eba0e46c9f3c1796 (patch) | |
| tree | 17758b552280556426b0a83160112b11ef9c3829 /llvm/test/Transforms | |
| parent | e1552f664861d9f60c21125f3ca042847c3d1b4a (diff) | |
| download | bcm5719-llvm-e03a25da70c6fd476d9be766eba0e46c9f3c1796.tar.gz bcm5719-llvm-e03a25da70c6fd476d9be766eba0e46c9f3c1796.zip | |
Add Support to Recognize and Vectorize NON SIMD instructions in SLPVectorizer.
This patch adds support to recognize patterns such as fadd,fsub,fadd,fsub.../add,sub,add,sub... and
vectorizes them as vector shuffles if they are profitable.
These patterns of vector shuffle can later be converted to instructions such as addsubpd etc on X86.
Thanks to Arnold and Hal for the reviews. http://reviews.llvm.org/D4015
llvm-svn: 211339
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/addsub.ll | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll new file mode 100644 index 00000000000..8303bc8181e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll @@ -0,0 +1,181 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = common global [4 x i32] zeroinitializer, align 16 +@c = common global [4 x i32] zeroinitializer, align 16 +@d = common global [4 x i32] zeroinitializer, align 16 +@e = common global [4 x i32] zeroinitializer, align 16 +@a = common global [4 x i32] zeroinitializer, align 16 +@fb = common global [4 x float] zeroinitializer, align 16 +@fc = common global [4 x float] zeroinitializer, align 16 +@fa = common global [4 x float] zeroinitializer, align 16 + +; CHECK-LABEL: @addsub +; CHECK: %5 = add <4 x i32> %3, %4 +; CHECK: %6 = add <4 x i32> %2, %5 +; CHECK: %7 = sub <4 x i32> %2, %5 +; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7> + +; Function Attrs: nounwind uwtable +define void @addsub() #0 { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4 + %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4 + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4 + %add1 = add nsw i32 %2, %3 + %add2 = add nsw i32 %add, %add1 + store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4 + %add3 = add nsw i32 %4, %5 + %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4 + %add4 = add nsw i32 %6, %7 + %sub = sub nsw i32 %add3, %add4 + store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4 + %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4 + %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4 + %add5 = add nsw i32 %8, %9 + %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4 + %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4 + %add6 = add nsw i32 %10, %11 + %add7 = add nsw i32 %add5, %add6 + store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4 + %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4 + %add8 = add nsw i32 %12, %13 + %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4 + %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4 + %add9 = add nsw i32 %14, %15 + %sub10 = sub nsw i32 %add8, %add9 + store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @subadd +; CHECK: %5 = add <4 x i32> %3, %4 +; CHECK: %6 = sub <4 x i32> %2, %5 +; CHECK: %7 = add <4 x i32> %2, %5 +; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7> + +; Function Attrs: nounwind uwtable +define void @subadd() #0 { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4 + %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4 + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4 + %add1 = add nsw i32 %2, %3 + %sub = sub nsw i32 %add, %add1 + store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4 + %add2 = add nsw i32 %4, %5 + %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4 + %add3 = add nsw i32 %6, %7 + %add4 = add nsw i32 %add2, %add3 + store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4 + %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4 + %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4 + %add5 = add nsw i32 %8, %9 + %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4 + %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4 + %add6 = add nsw i32 %10, %11 + %sub7 = sub nsw i32 %add5, %add6 + store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4 + %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4 + %add8 = add nsw i32 %12, %13 + %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4 + %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4 + %add9 = add nsw i32 %14, %15 + %add10 = add nsw i32 %add8, %add9 + store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @faddfsub +; CHECK: %2 = fadd <4 x float> %0, %1 +; CHECK: %3 = fsub <4 x float> %0, %1 +; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; Function Attrs: nounwind uwtable +define void @faddfsub() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %add = fadd float %0, %1 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %sub = fsub float %2, %3 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %add1 = fadd float %4, %5 + store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %sub2 = fsub float %6, %7 + store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @fsubfadd +; CHECK: %2 = fsub <4 x float> %0, %1 +; CHECK: %3 = fadd <4 x float> %0, %1 +; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; Function Attrs: nounwind uwtable +define void @fsubfadd() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %sub = fsub float %0, %1 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %add = fadd float %2, %3 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %sub1 = fsub float %4, %5 + store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %add2 = fadd float %6, %7 + store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @No_faddfsub +; CHECK-NOT: fadd <4 x float> +; CHECK-NOT: fsub <4 x float> +; CHECK-NOT: shufflevector +; Function Attrs: nounwind uwtable +define void @No_faddfsub() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %add = fadd float %0, %1 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %add1 = fadd float %2, %3 + store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %add2 = fadd float %4, %5 + store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %sub = fsub float %6, %7 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +attributes #0 = { nounwind } + |

