summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Reames <listmail@philipreames.com>2019-04-23 15:25:14 +0000
committerPhilip Reames <listmail@philipreames.com>2019-04-23 15:25:14 +0000
commit2ce017026af538867d592b64a75e182cddab1961 (patch)
tree5853d8978ea822d535b45281b76740605a41346f
parent12a561fa1b79c449d518100e1dd0dfce0a37b65d (diff)
downloadbcm5719-llvm-2ce017026af538867d592b64a75e182cddab1961.tar.gz
bcm5719-llvm-2ce017026af538867d592b64a75e182cddab1961.zip
[InstCombine] Convert a masked.load of a dereferenceable address to an unconditional load
If we have a masked.load from a location we know to be dereferenceable, we can simply issue a speculative unconditional load against that address. The key advantage is that it produces IR which is well understood by the optimizer. The select (cnd, load, passthrough) form produced should be pattern matchable back to hardware predication if profitable. Differential Revision: https://reviews.llvm.org/D59703 llvm-svn: 359000
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp18
-rw-r--r--llvm/test/Transforms/InstCombine/masked_intrinsics.ll5
2 files changed, 17 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0fe52b145fe..51c72eb1837 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1183,17 +1184,26 @@ static APInt possiblyDemandedEltsInMask(Value *Mask) {
}
// TODO, Obvious Missing Transforms:
-// * Dereferenceable address -> speculative load/select
// * Narrow width by halfs excluding zero/undef lanes
static Value *simplifyMaskedLoad(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
- Value *LoadPtr = II.getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+ if (maskIsAllOneOrUndef(II.getArgOperand(2)))
return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
+
+ // If we can unconditionally load from this address, replace with a
+ // load/select idiom. TODO: use DT for context sensitive query
+ if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment,
+ II.getModule()->getDataLayout(),
+ &II, nullptr)) {
+ Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index d71402e1192..b451724866e 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -87,8 +87,9 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) %ptr,
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
-; CHECK-NEXT: ret <2 x double> [[RES]]
+; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
double %pt, <2 x i1> %mask) {
%ptv1 = insertelement <2 x double> undef, double %pt, i64 0
OpenPOWER on IntegriCloud