summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikael Holmen <mikael.holmen@ericsson.com>2018-02-01 06:38:34 +0000
committerMikael Holmen <mikael.holmen@ericsson.com>2018-02-01 06:38:34 +0000
commit6d06976e74dcc4ee9401d7194da49b16f57fee81 (patch)
tree0f695accdffefec477fd1b0ec20350e8db7014f7
parent1a6493b4e08c7789138b8087a44bcec53b9cd3fa (diff)
downloadbcm5719-llvm-6d06976e74dcc4ee9401d7194da49b16f57fee81.tar.gz
bcm5719-llvm-6d06976e74dcc4ee9401d7194da49b16f57fee81.zip
[LSR] Don't force bases of foldable formulae to the final type.
Summary: Before emitting code for scaled registers, we prevent SCEVExpander from hoisting any scaled addressing mode by emitting all the bases first. However, these bases are being forced to the final type, resulting in some odd code. For example, if the type of the base is an integer and the final type is a pointer, we will emit an inttoptr for the base, a ptrtoint for the scale, and then a 'reverse' GEP where the GEP pointer is actually the base integer and the index is the pointer. It's more intuitive to use the pointer as a pointer and the integer as index. Patch by: Bevin Hansson Reviewers: atrick, qcolombet, sanjoy Reviewed By: qcolombet Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42103 llvm-svn: 323946
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll56
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll20
3 files changed, 35 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 332c074a1df..4b8e2286ed9 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
index b1909ed3299..219aed4b836 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
@@ -16,27 +16,25 @@ target triple = "x86_64-unknown-unknown"
define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) {
; JAG-LABEL: @maxArray(
; JAG-NEXT: entry:
-; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
-; JAG-NEXT: [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; JAG-NEXT: [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
-; JAG-NEXT: [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
+; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
+; JAG-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
; JAG-NEXT: br label [[VECTOR_BODY:%.*]]
; JAG: vector.body:
; JAG-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
-; JAG-NEXT: [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; JAG-NEXT: [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
-; JAG-NEXT: [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
-; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
-; JAG-NEXT: [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
-; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
-; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
+; JAG-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
+; JAG-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
+; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
+; JAG-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
+; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
+; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
; JAG-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
; JAG-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; JAG-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
-; JAG-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; JAG-NEXT: [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
-; JAG-NEXT: [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
-; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
+; JAG-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
+; JAG-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
+; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
; JAG-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
; JAG-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; JAG-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
@@ -45,27 +43,25 @@ define void @maxArray(double* noalias nocapture %x, double* noalias nocapture re
;
; HSW-LABEL: @maxArray(
; HSW-NEXT: entry:
-; HSW-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
-; HSW-NEXT: [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; HSW-NEXT: [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
-; HSW-NEXT: [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
+; HSW-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
+; HSW-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
; HSW-NEXT: br label [[VECTOR_BODY:%.*]]
; HSW: vector.body:
; HSW-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
-; HSW-NEXT: [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; HSW-NEXT: [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
-; HSW-NEXT: [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
-; HSW-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
-; HSW-NEXT: [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
-; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
-; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
+; HSW-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; HSW-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
+; HSW-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
+; HSW-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
+; HSW-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
+; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
+; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
; HSW-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
; HSW-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; HSW-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
-; HSW-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; HSW-NEXT: [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
-; HSW-NEXT: [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
-; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
+; HSW-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; HSW-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
+; HSW-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
+; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
; HSW-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
; HSW-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; HSW-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
index 5a582b25bde..b9af5a0c68a 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
@@ -15,28 +15,25 @@ define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray)
; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
; CHECK: for.body2.preheader:
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
; CHECK: for.body2:
-; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
-; CHECK-NEXT: [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
-; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
-; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
-; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
-; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
+; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[TMP0]]
+; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP5]], align 1
; CHECK-NEXT: [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
-; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
-; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[LSR_IV1]]
+; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP4]], align 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
; CHECK: for.inc.loopexit:
@@ -94,4 +91,3 @@ for.inc: ; preds = %for.inc.loopexit, %
for.end.loopexit: ; preds = %for.inc
ret void
}
-
OpenPOWER on IntegriCloud