diff options
-rw-r--r-- | llvm/lib/Transforms/Utils/BypassSlowDivision.cpp | 3 | ||||
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll | 16 |
2 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 42287d3bb2e..41a854362c9 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -120,8 +120,7 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType, BypassType); // udiv/urem because optimization only handles positive numbers - Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, - ShortDivisorV); + Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, ShortDivisorV); Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll new file mode 100644 index 00000000000..9db23d639a4 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll @@ -0,0 +1,16 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; Check that the smaller-width division that the BypassSlowDivision pass +; creates is not marked as "exact" (that is, it doesn't claim that the +; numerator is a multiple of the denominator). +; +; CHECK-LABEL: @test +define void @test(i64 %a, i64 %b, i64* %retptr) { + ; CHECK: udiv i32 + %d = sdiv i64 %a, %b + store i64 %d, i64* %retptr + ret void +} |