diff options
author | Sanjay Patel <spatel@rotateright.com> | 2016-02-01 17:00:10 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2016-02-01 17:00:10 +0000 |
commit | b695c5557cc9453a250cd36f137853b0e39cfd74 (patch) | |
tree | d2a71f569a862d8512265a7a01a543a8f613a099 | |
parent | 916895073ea48349e044fc00e76256cae9d88b41 (diff) | |
download | bcm5719-llvm-b695c5557cc9453a250cd36f137853b0e39cfd74.tar.gz bcm5719-llvm-b695c5557cc9453a250cd36f137853b0e39cfd74.zip |
[InstCombine] simplify masked load intrinsics with all ones or zeros masks
A masked load with a zero mask means there's no load.
A masked load with an allOnes mask means it's a normal vector load.
Differential Revision: http://reviews.llvm.org/D16691
llvm-svn: 259369
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 30 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/masked_intrinsics.ll | 23 |
2 files changed, 32 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2661e579876..ede5aeb1cd1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -753,6 +753,26 @@ static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) { return nullptr; } +static Value *simplifyMaskedLoad(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2)); + if (!ConstMask) + return nullptr; + + // If the mask is all zeros, the "passthru" argument is the result. + if (ConstMask->isNullValue()) + return II.getArgOperand(3); + + // If the mask is all ones, this is a plain vector load of the 1st argument. + if (ConstMask->isAllOnesValue()) { + Value *LoadPtr = II.getArgOperand(0); + unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue(); + return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload"); + } + + return nullptr; +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. @@ -877,6 +897,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::masked_load: + if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder)) + return ReplaceInstUsesWith(CI, SimplifiedMaskedOp); + break; + + // TODO: Handle the other masked ops. + // case Intrinsic::masked_store: + // case Intrinsic::masked_gather: + // case Intrinsic::masked_scatter: + case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index a0330e84d1d..30e98b8f746 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -2,15 +2,13 @@ declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) -; FIXME: All of these could be simplified. define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) { %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) ret <2 x double> %res ; CHECK-LABEL: @load_zeromask( -; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) -; CHECK-NEXT ret <2 x double> %res +; CHECK-NEXT ret <2 x double> %passthru } define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { @@ -18,24 +16,7 @@ define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { ret <2 x double> %res ; CHECK-LABEL: @load_onemask( -; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru) -; CHECK-NEXT ret <2 x double> %res -} - -define <2 x double> @load_onesetbitmask1(<2 x double>* %ptr, <2 x double> %passthru) { - %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 3, <2 x i1> <i1 0, i1 1>, <2 x double> %passthru) - ret <2 x double> %res - -; CHECK-LABEL: @load_onesetbitmask1( -; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 3, <2 x i1> <i1 false, i1 true>, <2 x double> %passthru) +; CHECK-NEXT: %unmaskedload = load <2 x double>, <2 x double>* %ptr, align 2 ; CHECK-NEXT ret <2 x double> %res } -define <2 x double> @load_onesetbitmask2(<2 x double>* %ptr, <2 x double> %passthru) { - %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 0>, <2 x double> %passthru) - ret <2 x double> %res - -; CHECK-LABEL: @load_onesetbitmask2( -; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>, <2 x double> %passthru) -; CHECK-NEXT ret <2 x double> %res -} |