diff options
-rw-r--r-- | llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll b/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll new file mode 100644 index 00000000000..4e81811a56f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll @@ -0,0 +1,66 @@ +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs < %s \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s + +; void test(unsigned *a, unsigned *b, unsigned *c) +; { +; for (unsigned long i = 0; i < 1024; i++) +; c[i] = a[i] + b[i]; +; } +; +; compile with -fno-unroll-loops + +define void @lsr-insts-cost(i32* %0, i32* %1, i32* %2) { +; CHECK-LABEL: lsr-insts-cost +; CHECK: .LBB0_4: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lxvd2x vs34, 0, r3 +; CHECK-NEXT: lxvd2x vs35, 0, r4 +; CHECK-NEXT: addi r4, r4, 16 +; CHECK-NEXT: addi r3, r3, 16 +; CHECK-NEXT: vadduwm v2, v3, v2 +; CHECK-NEXT: stxvd2x vs34, 0, r5 +; CHECK-NEXT: addi r5, r5, 16 +; CHECK-NEXT: bdnz .LBB0_4 + %4 = getelementptr i32, i32* %2, i64 1024 + %5 = getelementptr i32, i32* %0, i64 1024 + %6 = getelementptr i32, i32* %1, i64 1024 + %7 = icmp ugt i32* %5, %2 + %8 = icmp ugt i32* %4, %0 + %9 = and i1 %7, %8 + %10 = icmp ugt i32* %6, %2 + %11 = icmp ugt i32* %4, %1 + %12 = and i1 %10, %11 + %13 = or i1 %9, %12 + br i1 %13, label %28, label %14 + +14: ; preds = %3, %14 + %15 = phi i64 [ %25, %14 ], [ 0, %3 ] + %16 = getelementptr inbounds i32, i32* %0, i64 %15 + %17 = bitcast i32* %16 to <4 x i32>* + %18 = load <4 x i32>, <4 x i32>* %17, align 4 + %19 = getelementptr inbounds i32, i32* %1, i64 %15 + %20 = bitcast i32* %19 to <4 x i32>* + %21 = load <4 x i32>, <4 x i32>* %20, align 4 + %22 = add <4 x i32> %21, %18 + %23 = getelementptr inbounds i32, i32* %2, i64 %15 + %24 = bitcast i32* %23 to <4 x i32>* + store <4 x i32> %22, <4 x i32>* %24, align 4 + %25 = add i64 %15, 4 + %26 = icmp eq i64 %25, 1024 + br i1 %26, label %27, label %14 + +27: ; preds = %14, %28 + ret void + +28: ; preds = %3, %28 + %29 = phi i64 [ %36, %28 ], [ 0, %3 ] + %30 = getelementptr inbounds i32, i32* %0, i64 %29 + %31 = load i32, i32* %30, align 4 + %32 = getelementptr inbounds i32, i32* %1, i64 %29 + %33 = load i32, i32* %32, align 4 + %34 = add i32 %33, %31 + %35 = getelementptr inbounds i32, i32* %2, i64 %29 + store i32 %34, i32* %35, align 4 + %36 = add nuw nsw i64 %29, 1 + %37 = icmp eq i64 %36, 1024 + br i1 %37, label %27, label %28 +} |