summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp22
-rw-r--r--llvm/test/CodeGen/X86/setcc-combine.ll63
2 files changed, 47 insertions, 38 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a42e89da994..2be8ac20238 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -441,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
- N->getOperand(1));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // If the input also needs to be promoted, do that first so we can get a
+ // get a good idea for the output type.
+ if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue In = GetPromotedInteger(Op0);
+
+ // If the new type is larger than NVT, use it. We probably won't need to
+ // promote it again.
+ EVT SVT = In.getValueType().getScalarType();
+ if (SVT.bitsGE(NVT)) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+ return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+ }
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index da29c3b9a63..e8ebce9e56a 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -8,8 +8,8 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_1:
@@ -17,8 +17,7 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -32,15 +31,14 @@ define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_ne_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pextrw $2, %xmm1, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm1, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -69,8 +67,8 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ge_1:
@@ -78,8 +76,7 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -93,15 +90,14 @@ define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_lt_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pextrw $2, %xmm1, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm1, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -130,8 +126,8 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pextrw $2, %xmm1, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_2:
@@ -139,8 +135,7 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm1, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -154,15 +149,14 @@ define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_ne_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -178,8 +172,8 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pextrw $2, %xmm1, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_le_2:
@@ -187,8 +181,7 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm1, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -215,15 +208,14 @@ define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_lt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -237,15 +229,14 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_gt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: movsbl %al, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_gt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: movsbl %al, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
OpenPOWER on IntegriCloud