summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-02-04 23:26:39 +0000
committerCraig Topper <craig.topper@gmail.com>2017-02-04 23:26:39 +0000
commit42b83f8d6e44225562451cf428cea3d7af2a69db (patch)
treee18e887f4b6cc6c731269f4d95eea417c055b883
parent04dce84ead73732dc7a88eab9fbee5b60a9bcd1a (diff)
downloadbcm5719-llvm-42b83f8d6e44225562451cf428cea3d7af2a69db.tar.gz
bcm5719-llvm-42b83f8d6e44225562451cf428cea3d7af2a69db.zip
[DAGCombiner] Canonicalize the order of a chain of INSERT_SUBVECTORs.
Based on similar code for INSERT_VECTOR_ELT. llvm-svn: 294110
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp28
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll34
2 files changed, 40 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d38a9fca019..aad07d8ae3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14547,15 +14547,35 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
if (N0.getValueType() != N1.getValueType())
return SDValue();
// If the input vector is a concatenation, and the insert replaces
// one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
- isa<ConstantSDNode>(N2)) {
- unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
-
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2) {
// Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
// (concat_vectors Z, Y)
if (InsIdx == 0)
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 41fb19f38e0..6bd28b18b23 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -509,18 +509,13 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; KNL-NEXT: movl $1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
-; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -528,6 +523,10 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
@@ -573,20 +572,15 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %edx, %esi
; KNL-NEXT: setg %al
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
-; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -594,6 +588,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
OpenPOWER on IntegriCloud