summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Storsjo <martin@martin.st>2017-08-19 19:47:48 +0000
committerMartin Storsjo <martin@martin.st>2017-08-19 19:47:48 +0000
commit91522ffa12309176ce555f4c0bcbd2888135b85c (patch)
tree145e76ea3afd95961f7e1df884476409f8a4f690
parentb225ad05af66035274d0003237bb62c3493721d2 (diff)
downloadbcm5719-llvm-91522ffa12309176ce555f4c0bcbd2888135b85c.tar.gz
bcm5719-llvm-91522ffa12309176ce555f4c0bcbd2888135b85c.zip
[ARM] Check the right order for halves of VZIP/VUZP if both parts are used
This is the exact same fix as in SVN r247254. In that commit, the fix was applied only for isVTRNMask and isVTRN_v_undef_Mask, but the same issue is present for VZIP/VUZP as well. This fixes PR33921. Differential Revision: https://reviews.llvm.org/D36899 llvm-svn: 311258
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp20
-rw-r--r--llvm/test/CodeGen/ARM/vzip.ll19
2 files changed, 35 insertions, 4 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ff8491d2e62..320d7ccfbc2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -5901,7 +5901,10 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
return false;
@@ -5932,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
for (unsigned k = 0; k < Half; ++k) {
@@ -5972,7 +5978,10 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -6005,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- WhichResult = M[i] == 0 ? 0 : 1;
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index 771bf5f0521..06b49ab9405 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -282,6 +282,25 @@ entry:
ret <8 x i16> %0
}
+; NOTE: The mask here looks like something that could be done with a vzip,
+; but which the current handling of two-result vzip can't do - thus ending up
+; as a vtrn.
+define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) {
+; CHECK-LABEL: vzip_lower_shufflemask_undef_rev:
+; CHECK: @ BB#0: @ %entry
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d19, [r0]
+; CHECK-NEXT: vtrn.16 d19, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
+entry:
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 4, i32 undef, i32 undef>
+ ret <8 x i16> %0
+}
+
define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
; CHECK-LABEL: vzip_lower_shufflemask_zeroed:
; CHECK: @ BB#0: @ %entry
OpenPOWER on IntegriCloud