From e97c71b8fddcbf61fdbfb13b520761d05a646a2c Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 11 Jul 2013 15:21:55 +0000 Subject: LoopVectorize: Vectorize all accesses in address space zero with unit stride We can vectorize them because in the case where we wrap in the address space the unvectorized code would have had to access a pointer value of zero which is undefined behavior in address space zero according to the LLVM IR semantics. (Thank you Duncan, for pointing this out to me). Fixes PR16592. llvm-svn: 186088 --- llvm/test/Transforms/LoopVectorize/safegep.ll | 61 +++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/safegep.ll (limited to 'llvm/test/Transforms/LoopVectorize/safegep.ll') diff --git a/llvm/test/Transforms/LoopVectorize/safegep.ll b/llvm/test/Transforms/LoopVectorize/safegep.ll new file mode 100644 index 00000000000..46ec28beeaa --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/safegep.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s +target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" + + +; We can vectorize this code because if the address computation would wrap then +; a load from 0 would take place which is undefined behaviour in address space 0 +; according to LLVM IR semantics. + +; PR16592 + +; CHECK: safe +; CHECK: <4 x float> + +define void @safe(float* %A, float* %B, float %K) { +entry: + br label %"" + +"": + %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"" ] + %pp3 = getelementptr float* %A, i32 %i_15 + %D.1396_10 = load float* %pp3, align 4 + %pp24 = getelementptr float* %B, i32 %i_15 + %D.1398_15 = load float* %pp24, align 4 + %D.1399_17 = fadd float %D.1398_15, %K + %D.1400_18 = fmul float %D.1396_10, %D.1399_17 + store float %D.1400_18, float* %pp3, align 4 + %i_19 = add nsw i32 %i_15, 1 + %exitcond = icmp ne i32 %i_19, 64 + br i1 %exitcond, label %"", label %return + +return: + ret void +} + +; In a non-default address space we don't have this rule. + +; CHECK: notsafe +; CHECK-NOT: <4 x float> + +define void @notsafe(float addrspace(5) * %A, float* %B, float %K) { +entry: + br label %"" + +"": + %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"" ] + %pp3 = getelementptr float addrspace(5) * %A, i32 %i_15 + %D.1396_10 = load float addrspace(5) * %pp3, align 4 + %pp24 = getelementptr float* %B, i32 %i_15 + %D.1398_15 = load float* %pp24, align 4 + %D.1399_17 = fadd float %D.1398_15, %K + %D.1400_18 = fmul float %D.1396_10, %D.1399_17 + store float %D.1400_18, float addrspace(5) * %pp3, align 4 + %i_19 = add nsw i32 %i_15, 1 + %exitcond = icmp ne i32 %i_19, 64 + br i1 %exitcond, label %"", label %return + +return: + ret void +} + + -- cgit v1.2.3