summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp3
-rw-r--r--llvm/test/Transforms/InstCombine/pr38984.ll44
2 files changed, 32 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e019d43ba30..07bd98b30ab 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -909,7 +909,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If all indices are the same, just compare the base pointers.
- if (IndicesTheSame)
+ Type *BaseType = GEPLHS->getOperand(0)->getType();
+ if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
diff --git a/llvm/test/Transforms/InstCombine/pr38984.ll b/llvm/test/Transforms/InstCombine/pr38984.ll
index 2da2f9fa42d..1334042d4a1 100644
--- a/llvm/test/Transforms/InstCombine/pr38984.ll
+++ b/llvm/test/Transforms/InstCombine/pr38984.ll
@@ -2,24 +2,40 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "p:16:16"
-@offsets = external dso_local global [4 x i16], align 1
+@a = external global [21 x i16], align 1
+@offsets = external global [4 x i16], align 1
-define void @PR38984() {
-; CHECK-LABEL: @PR38984(
+; The "same gep" optimization should work with vector icmp.
+define <4 x i1> @PR38984_1() {
+; CHECK-LABEL: @PR38984_1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1
%1 = insertelement <4 x i16> undef, i16 %0, i32 3
- %2 = sub <4 x i16> zeroinitializer, %1
- %3 = sext <4 x i16> %2 to <4 x i32>
- %4 = getelementptr inbounds i64, i64* null, <4 x i32> %3
- %5 = ptrtoint <4 x i64*> %4 to <4 x i32>
- %6 = getelementptr inbounds i64, i64* null, <4 x i16> %2
- %7 = ptrtoint <4 x i64*> %6 to <4 x i32>
- %8 = icmp eq <4 x i32> %5, %7
- %9 = select <4 x i1> %8, <4 x i16> zeroinitializer, <4 x i16> <i16 1, i16 1, i16 1, i16 1>
- %10 = sext <4 x i16> %9 to <4 x i32>
- ret void
+ %2 = getelementptr i32, i32* null, <4 x i16> %1
+ %3 = getelementptr i32, i32* null, <4 x i16> %1
+ %4 = icmp eq <4 x i32*> %2, %3
+ ret <4 x i1> %4
+}
+
+; The "compare base pointers" optimization should not kick in for vector icmp.
+define <4 x i1> @PR38984_2() {
+; CHECK-LABEL: @PR38984_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i1> [[TMP4]]
+;
+entry:
+ %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef)
+ %1 = insertelement <4 x i16> undef, i16 %0, i32 3
+ %2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1
+ %3 = getelementptr i16, i16* null, <4 x i16> %1
+ %4 = icmp eq <4 x i16*> %2, %3
+ ret <4 x i1> %4
}
OpenPOWER on IntegriCloud