diff options
author | Tobias Grosser <tobias@grosser.es> | 2015-06-24 04:13:29 +0000 |
---|---|---|
committer | Tobias Grosser <tobias@grosser.es> | 2015-06-24 04:13:29 +0000 |
commit | 50165ffdee8b2be80b1dfc2b439c0131fb943958 (patch) | |
tree | e7ddff5f7232ad9eda1f2afa194b7d36097d4c49 | |
parent | d157d470621134aa556e6aee940cfce5ce553a5b (diff) | |
download | bcm5719-llvm-50165ffdee8b2be80b1dfc2b439c0131fb943958.tar.gz bcm5719-llvm-50165ffdee8b2be80b1dfc2b439c0131fb943958.zip |
Add support for srem instruction
Remainder operations with constant divisor can be modeled as quasi-affine
expression. This patch adds support for detecting and modeling them. We also
add a test that ensures they are correctly code generated.
This patch was extracted from a larger patch contributed by Johannes Doerfert
in http://reviews.llvm.org/D5293
llvm-svn: 240518
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 19 | ||||
-rw-r--r-- | polly/lib/Support/SCEVValidator.cpp | 16 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/srem-in-other-bb.ll | 38 | ||||
-rw-r--r-- | polly/test/ScopDetect/srem_with_parametric_divisor.ll | 35 | ||||
-rw-r--r-- | polly/test/ScopInfo/NonAffine/non_affine_but_srem.ll | 81 | ||||
-rw-r--r-- | polly/test/ScopInfo/reduction_alternating_base.ll | 9 |
6 files changed, 194 insertions, 4 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 0e7527e7167..654a95c400d 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -105,6 +105,7 @@ private: __isl_give isl_pw_aff *visitUMaxExpr(const SCEVUMaxExpr *Expr); __isl_give isl_pw_aff *visitUnknown(const SCEVUnknown *Expr); __isl_give isl_pw_aff *visitSDivInstruction(Instruction *SDiv); + __isl_give isl_pw_aff *visitSRemInstruction(Instruction *SDiv); friend struct SCEVVisitor<SCEVAffinator, isl_pw_aff *>; }; @@ -283,11 +284,29 @@ __isl_give isl_pw_aff *SCEVAffinator::visitSDivInstruction(Instruction *SDiv) { return isl_pw_aff_tdiv_q(DividendPWA, DivisorPWA); } +__isl_give isl_pw_aff *SCEVAffinator::visitSRemInstruction(Instruction *SRem) { + assert(SRem->getOpcode() == Instruction::SRem && "Assumed SRem instruction!"); + auto *SE = S->getSE(); + + auto *Divisor = dyn_cast<ConstantInt>(SRem->getOperand(1)); + assert(Divisor && "SRem is no parameter but has a non-constant RHS."); + auto *DivisorVal = isl_valFromAPInt(Ctx, Divisor->getValue(), + /* isSigned */ true); + + auto *Dividend = SRem->getOperand(0); + auto *DividendSCEV = SE->getSCEV(Dividend); + auto *DividendPWA = visit(DividendSCEV); + + return isl_pw_aff_mod_val(DividendPWA, isl_val_abs(DivisorVal)); +} + __isl_give isl_pw_aff *SCEVAffinator::visitUnknown(const SCEVUnknown *Expr) { if (Instruction *I = dyn_cast<Instruction>(Expr->getValue())) { switch (I->getOpcode()) { case Instruction::SDiv: return visitSDivInstruction(I); + case Instruction::SRem: + return visitSRemInstruction(I); default: break; // Fall through. } diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index b06e8beb7f9..2092047e92e 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -349,6 +349,20 @@ public: return visit(DividendSCEV); } + ValidatorResult visitSRemInstruction(Instruction *SRem, const SCEV *S) { + assert(SRem->getOpcode() == Instruction::SRem && + "Assumed SRem instruction!"); + + auto *Divisor = SRem->getOperand(1); + auto *CI = dyn_cast<ConstantInt>(Divisor); + if (!CI) + return visitGenericInst(SRem, S); + + auto *Dividend = SRem->getOperand(0); + auto *DividendSCEV = SE.getSCEV(Dividend); + return visit(DividendSCEV); + } + ValidatorResult visitUnknown(const SCEVUnknown *Expr) { Value *V = Expr->getValue(); @@ -371,6 +385,8 @@ public: switch (I->getOpcode()) { case Instruction::SDiv: return visitSDivInstruction(I, Expr); + case Instruction::SRem: + return visitSRemInstruction(I, Expr); default: return visitGenericInst(I, Expr); } diff --git a/polly/test/Isl/CodeGen/srem-in-other-bb.ll b/polly/test/Isl/CodeGen/srem-in-other-bb.ll new file mode 100644 index 00000000000..08dc5451c88 --- /dev/null +++ b/polly/test/Isl/CodeGen/srem-in-other-bb.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-codegen -S -polly-no-early-exit < %s | FileCheck %s +; +; void pos(float *A, long n) { +; for (long i = 0; i < 100; i++) +; A[n % 42] += 1; +; } +; +; CHECK: polly.stmt.bb3: +; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42 +; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3 + +define void @pos(float* %A, i64 %n) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = srem i64 %n, 42 + br label %bb3 + +bb3: + %tmp3 = getelementptr inbounds float, float* %A, i64 %tmp + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, 1.000000e+00 + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopDetect/srem_with_parametric_divisor.ll b/polly/test/ScopDetect/srem_with_parametric_divisor.ll new file mode 100644 index 00000000000..757a4da2257 --- /dev/null +++ b/polly/test/ScopDetect/srem_with_parametric_divisor.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect -analyze < %s | FileCheck %s +; +; CHECK-NOT: Valid Region for Scop: +; +; void foo(float *A, long n, long p) { +; for (long i = 0; i < 100; i++) +; A[n % p] += 1; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(float* %A, i64 %n, i64 %p) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = srem i64 %n, %p + %tmp3 = getelementptr inbounds float, float* %A, i64 %tmp + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, 1.000000e+00 + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/NonAffine/non_affine_but_srem.ll b/polly/test/ScopInfo/NonAffine/non_affine_but_srem.ll new file mode 100644 index 00000000000..acabf4efaae --- /dev/null +++ b/polly/test/ScopInfo/NonAffine/non_affine_but_srem.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void pos(float *A, long n) { +; for (long i = 0; i < 100; i++) +; A[n % 42] += 1; +; } +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_bb2[i0] -> MemRef_A[o0] : +; CHECK: exists (e0 = floor((-n + o0)/42): +; CHECK: 42e0 = -n + o0 and o0 <= 41 and o0 >= 0) }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_bb2[i0] -> MemRef_A[o0] : +; CHECK: exists (e0 = floor((-n + o0)/42): +; CHECK: 42e0 = -n + o0 and o0 <= 41 and o0 >= 0) }; +; +; void neg(float *A, long n) { +; for (long i = 0; i < 100; i++) +; A[n % (-42)] += 1; +; } +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_bb2[i0] -> MemRef_A[o0] : +; CHECK: exists (e0 = floor((-n + o0)/42): +; CHECK: 42e0 = -n + o0 and o0 <= 41 and o0 >= 0) }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_bb2[i0] -> MemRef_A[o0] : +; CHECK: exists (e0 = floor((-n + o0)/42): +; CHECK: 42e0 = -n + o0 and o0 <= 41 and o0 >= 0) }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @pos(float* %A, i64 %n) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = srem i64 %n, 42 + %tmp3 = getelementptr inbounds float, float* %A, i64 %tmp + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, 1.000000e+00 + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} + +define void @neg(float* %A, i64 %n) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = srem i64 %n, -42 + %tmp3 = getelementptr inbounds float, float* %A, i64 %tmp + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, 1.000000e+00 + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/reduction_alternating_base.ll b/polly/test/ScopInfo/reduction_alternating_base.ll index 28057b60cb2..8d490e5b44f 100644 --- a/polly/test/ScopInfo/reduction_alternating_base.ll +++ b/polly/test/ScopInfo/reduction_alternating_base.ll @@ -1,15 +1,16 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s ; -; FIXME: We cannot detect this SCoP yet but as soon as we can we should check -; that the reduction is detected! -; -; CHECK-NOT: Schedule ; ; void f(int *A) { ; for (int i = 0; i < 1024; i++) ; A[i % 2] += i; ; } ; +; Verify that we detect the reduction on A +; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" define void @f(i32* %A) { |