diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-10-21 23:00:20 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-10-21 23:00:20 +0000 |
commit | d6511b49ace8e3d55ad43beb2cb142022a8032cc (patch) | |
tree | ec95b0252ce02c2c5f0445bdbdb53d0081bd9f4c /llvm/test/Transforms/LoopVectorize/intrinsic.ll | |
parent | 44e5b4e533a92ff44ae57cf0129398b162e82bf6 (diff) | |
download | bcm5719-llvm-d6511b49ace8e3d55ad43beb2cb142022a8032cc.tar.gz bcm5719-llvm-d6511b49ace8e3d55ad43beb2cb142022a8032cc.zip |
Add minnum / maxnum intrinsics
These are named following the IEEE-754 names for these
functions, rather than the libm fmin / fmax to avoid
possible ambiguities. Some languages may implement something
resembling fmin / fmax which return NaN if either operand is
to propagate errors. These implement the IEEE-754 semantics
of returning the other operand if either is a NaN representing
missing data.
llvm-svn: 220341
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/intrinsic.ll')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/intrinsic.ll | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 80149d23a46..d48731a0743 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -1192,3 +1192,59 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +declare float @llvm.minnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @minnum_f32( +;CHECK: llvm.minnum.v4f32 +;CHECK: ret void +define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.maxnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @maxnum_f32( +;CHECK: llvm.maxnum.v4f32 +;CHECK: ret void +define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} |