summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQingShan Zhang <qshanz@cn.ibm.com>2020-01-03 03:26:41 +0000
committerQingShan Zhang <qshanz@cn.ibm.com>2020-01-03 03:26:41 +0000
commit2133d3c5586b1a782e4d8e2a34c9f501499705cf (patch)
treee1a37e6a0781cbe289b8ff7f4f75b7bcc75b16b2
parent60333a531799c0d0db1c3995bc784d2b314920ff (diff)
downloadbcm5719-llvm-2133d3c5586b1a782e4d8e2a34c9f501499705cf.tar.gz
bcm5719-llvm-2133d3c5586b1a782e4d8e2a34c9f501499705cf.zip
[DAGCombine] Initialize the default operation action for SIGN_EXTEND_INREG for vector type as 'expand' instead of 'legal'
For now, we didn't set the default operation action for SIGN_EXTEND_INREG for vector type, which is 0 by default, that is legal. However, most target didn't have native instructions to support this opcode. It should be set as expand by default, as what we did for ANY_EXTEND_VECTOR_INREG. Differential Revision: https://reviews.llvm.org/D70000
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp7
-rw-r--r--llvm/test/CodeGen/ARM/signext-inreg.ll14
-rw-r--r--llvm/test/CodeGen/Hexagon/signext-inreg.ll272
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-sext.ll102
8 files changed, 403 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 5f89f49009b..9198ae07c00 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -696,6 +696,7 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index af09eb0ed04..c07ef82ea88 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -189,6 +189,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
+
+ for (auto VT :
+ { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+ MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
}
// Compute derived properties from the register classes
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3e27a1b8343..9c5f663a9cc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -379,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
+ // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
+
// Some truncating stores are legal too.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 8e0848a59b5..fb78fb48ebf 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1536,6 +1536,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+
// Types natively supported:
for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index bc8a9959c91..204950f9010 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -194,6 +194,13 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, BoolV, Legal);
}
+ if (Use64b)
+ for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+ else
+ for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+
setTargetDAGCombine(ISD::VSELECT);
}
diff --git a/llvm/test/CodeGen/ARM/signext-inreg.ll b/llvm/test/CodeGen/ARM/signext-inreg.ll
index 06836286bfd..dd8b144bbe2 100644
--- a/llvm/test/CodeGen/ARM/signext-inreg.ll
+++ b/llvm/test/CodeGen/ARM/signext-inreg.ll
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=armv8 -mattr=+mve | FileCheck %s
+; RUN: llc < %s -mtriple=armv8 | FileCheck %s
define <4 x i32> @test(<4 x i32> %m) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov d1, r2, r3
-; CHECK-NEXT: vmov d0, r0, r1
-; CHECK-NEXT: vshl.i32 q0, q0, #24
-; CHECK-NEXT: vshr.s32 q0, q0, #24
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vshl.i32 q8, q8, #24
+; CHECK-NEXT: vshr.s32 q8, q8, #24
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: bx lr
entry:
%shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>
diff --git a/llvm/test/CodeGen/Hexagon/signext-inreg.ll b/llvm/test/CodeGen/Hexagon/signext-inreg.ll
new file mode 100644
index 00000000000..cd9d7835869
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/signext-inreg.ll
@@ -0,0 +1,272 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=hexagon | FileCheck %s
+; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length64b | FileCheck %s --check-prefix=CHECK-64B
+; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length128b | FileCheck %s --check-prefix=CHECK-128B
+define <2 x i32> @test1(<2 x i32> %m) {
+; CHECK-LABEL: test1:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = extract(r1,#8,#0)
+; CHECK-NEXT: r0 = sxtb(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+;
+; CHECK-64B-LABEL: test1:
+; CHECK-64B: .cfi_startproc
+; CHECK-64B-NEXT: // %bb.0: // %entry
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: r1 = extract(r1,#8,#0)
+; CHECK-64B-NEXT: r0 = sxtb(r0)
+; CHECK-64B-NEXT: jumpr r31
+; CHECK-64B-NEXT: }
+;
+; CHECK-128B-LABEL: test1:
+; CHECK-128B: .cfi_startproc
+; CHECK-128B-NEXT: // %bb.0: // %entry
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: r1 = extract(r1,#8,#0)
+; CHECK-128B-NEXT: r0 = sxtb(r0)
+; CHECK-128B-NEXT: jumpr r31
+; CHECK-128B-NEXT: }
+entry:
+ %shl = shl <2 x i32> %m, <i32 24, i32 24>
+ %shr = ashr exact <2 x i32> %shl, <i32 24, i32 24>
+ ret <2 x i32> %shr
+}
+
+define <16 x i32> @test2(<16 x i32> %m) {
+; CHECK-LABEL: test2:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = extract(r3,#8,#0)
+; CHECK-NEXT: r29 = add(r29,#-8)
+; CHECK-NEXT: r2 = sxtb(r2)
+; CHECK-NEXT: r4 = sxtb(r4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = extract(r5,#8,#0)
+; CHECK-NEXT: r13:12 = memd(r29+#48)
+; CHECK-NEXT: memd(r29+#0) = r17:16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r13 = extract(r13,#8,#0)
+; CHECK-NEXT: r12 = sxtb(r12)
+; CHECK-NEXT: r15:14 = memd(r29+#40)
+; CHECK-NEXT: r9:8 = memd(r29+#32)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9 = extract(r9,#8,#0)
+; CHECK-NEXT: r8 = sxtb(r8)
+; CHECK-NEXT: r11:10 = memd(r29+#24)
+; CHECK-NEXT: r7:6 = memd(r29+#16)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11 = extract(r11,#8,#0)
+; CHECK-NEXT: r10 = sxtb(r10)
+; CHECK-NEXT: r14 = sxtb(r14)
+; CHECK-NEXT: r17:16 = memd(r29+#8)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r15 = extract(r15,#8,#0)
+; CHECK-NEXT: r17 = extract(r17,#8,#0)
+; CHECK-NEXT: r16 = sxtb(r16)
+; CHECK-NEXT: r6 = sxtb(r6)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7 = extract(r7,#8,#0)
+; CHECK-NEXT: memd(r0+#56) = r13:12
+; CHECK-NEXT: memd(r0+#48) = r15:14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memd(r0+#40) = r9:8
+; CHECK-NEXT: memd(r0+#32) = r11:10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memd(r0+#24) = r7:6
+; CHECK-NEXT: memd(r0+#16) = r17:16
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memd(r0+#8) = r5:4
+; CHECK-NEXT: memd(r0+#0) = r3:2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r29 = add(r29,#8)
+; CHECK-NEXT: r17:16 = memd(r29+#0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: } // 8-byte Folded Reload
+;
+; CHECK-64B-LABEL: test2:
+; CHECK-64B: .cfi_startproc
+; CHECK-64B-NEXT: // %bb.0: // %entry
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: r0 = #24
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v0.w = vasl(v0.w,r0)
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v0.w = vasr(v0.w,r0)
+; CHECK-64B-NEXT: jumpr r31
+; CHECK-64B-NEXT: }
+;
+; CHECK-128B-LABEL: test2:
+; CHECK-128B: .cfi_startproc
+; CHECK-128B-NEXT: // %bb.0: // %entry
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: r0 = #24
+; CHECK-128B-NEXT: }
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: v0.w = vasl(v0.w,r0)
+; CHECK-128B-NEXT: }
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: v0.w = vasr(v0.w,r0)
+; CHECK-128B-NEXT: jumpr r31
+; CHECK-128B-NEXT: }
+entry:
+ %shl = shl <16 x i32> %m, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+ %shr = ashr exact <16 x i32> %shl, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+ ret <16 x i32> %shr
+}
+
+define <64 x i16> @test3(<64 x i16> %m) {
+; CHECK-LABEL: test3:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vaslh(r3:2,#8)
+; CHECK-NEXT: r5:4 = vaslh(r5:4,#8)
+; CHECK-NEXT: r9:8 = memd(r29+#96)
+; CHECK-NEXT: r11:10 = memd(r29+#88)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r13:12 = vaslh(r9:8,#8)
+; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
+; CHECK-NEXT: r9:8 = memd(r29+#80)
+; CHECK-NEXT: r7:6 = memd(r29+#104)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r15:14 = vaslh(r7:6,#8)
+; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
+; CHECK-NEXT: r7:6 = memd(r29+#72)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r15:14 = vasrh(r15:14,#8)
+; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
+; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
+; CHECK-NEXT: r15:14 = memd(r29+#64)
+; CHECK-NEXT: memd(r0+#120) = r15:14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vaslh(r7:6,#8)
+; CHECK-NEXT: r15:14 = vaslh(r15:14,#8)
+; CHECK-NEXT: r13:12 = memd(r29+#56)
+; CHECK-NEXT: memd(r0+#112) = r13:12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r13:12 = vaslh(r13:12,#8)
+; CHECK-NEXT: r7:6 = vasrh(r7:6,#8)
+; CHECK-NEXT: r11:10 = memd(r29+#48)
+; CHECK-NEXT: memd(r0+#104) = r11:10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
+; CHECK-NEXT: r15:14 = vasrh(r15:14,#8)
+; CHECK-NEXT: r9:8 = memd(r29+#40)
+; CHECK-NEXT: memd(r0+#96) = r9:8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
+; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
+; CHECK-NEXT: r7:6 = memd(r29+#32)
+; CHECK-NEXT: memd(r0+#88) = r7:6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
+; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
+; CHECK-NEXT: r15:14 = memd(r29+#0)
+; CHECK-NEXT: memd(r0+#80) = r15:14
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vaslh(r7:6,#8)
+; CHECK-NEXT: r15:14 = vaslh(r15:14,#8)
+; CHECK-NEXT: r13:12 = memd(r29+#16)
+; CHECK-NEXT: memd(r0+#72) = r13:12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r13:12 = vaslh(r13:12,#8)
+; CHECK-NEXT: r7:6 = vasrh(r7:6,#8)
+; CHECK-NEXT: r11:10 = memd(r29+#24)
+; CHECK-NEXT: memd(r0+#64) = r11:10
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
+; CHECK-NEXT: r3:2 = vasrh(r3:2,#8)
+; CHECK-NEXT: r9:8 = memd(r29+#8)
+; CHECK-NEXT: memd(r0+#56) = r9:8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
+; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
+; CHECK-NEXT: memd(r0+#48) = r7:6
+; CHECK-NEXT: memd(r0+#0) = r3:2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
+; CHECK-NEXT: r7:6 = vasrh(r15:14,#8)
+; CHECK-NEXT: memd(r0+#32) = r13:12
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
+; CHECK-NEXT: r5:4 = vasrh(r5:4,#8)
+; CHECK-NEXT: memd(r0+#40) = r11:10
+; CHECK-NEXT: memd(r0+#16) = r7:6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: memd(r0+#24) = r9:8
+; CHECK-NEXT: memd(r0+#8) = r5:4
+; CHECK-NEXT: }
+;
+; CHECK-64B-LABEL: test3:
+; CHECK-64B: .cfi_startproc
+; CHECK-64B-NEXT: // %bb.0: // %entry
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: r0 = #8
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v0.h = vasl(v0.h,r0)
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v1.h = vasl(v1.h,r0)
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v0.h = vasr(v0.h,r0)
+; CHECK-64B-NEXT: }
+; CHECK-64B-NEXT: {
+; CHECK-64B-NEXT: v1.h = vasr(v1.h,r0)
+; CHECK-64B-NEXT: jumpr r31
+; CHECK-64B-NEXT: }
+;
+; CHECK-128B-LABEL: test3:
+; CHECK-128B: .cfi_startproc
+; CHECK-128B-NEXT: // %bb.0: // %entry
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: r0 = #8
+; CHECK-128B-NEXT: }
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: v0.h = vasl(v0.h,r0)
+; CHECK-128B-NEXT: }
+; CHECK-128B-NEXT: {
+; CHECK-128B-NEXT: v0.h = vasr(v0.h,r0)
+; CHECK-128B-NEXT: jumpr r31
+; CHECK-128B-NEXT: }
+entry:
+ %shl = shl <64 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %shr = ashr exact <64 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ ret <64 x i16> %shr
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll
index be414882181..ffade88a94a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-sext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll
@@ -1,6 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i1(<4 x i32> %m) {
+; CHECK-LABEL: sext_v4i32_v4i32_v4i1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.i32 q0, q0, #31
+; CHECK-NEXT: vshr.s32 q0, q0, #31
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <4 x i32> %m, <i32 31, i32 31, i32 31, i32 31>
+ %shr = ashr exact <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
+ ret <4 x i32> %shr
+}
+
+define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i8(<4 x i32> %m) {
+; CHECK-LABEL: sext_v4i32_v4i32_v4i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovlb.s8 q0, q0
+; CHECK-NEXT: vmovlb.s16 q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>
+ %shr = ashr exact <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
+ ret <4 x i32> %shr
+}
+
+define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i16(<4 x i32> %m) {
+; CHECK-LABEL: sext_v4i32_v4i32_v4i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovlb.s16 q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
+ %shr = ashr exact <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
+ ret <4 x i32> %shr
+}
+
+define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i8(<8 x i16> %m) {
+; CHECK-LABEL: sext_v8i16_v8i16_v8i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovlb.s8 q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %shr = ashr exact <8 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ ret <8 x i16> %shr
+}
+
+define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i1(<8 x i16> %m) {
+; CHECK-LABEL: sext_v8i16_v8i16_v8i1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.i16 q0, q0, #15
+; CHECK-NEXT: vshr.s16 q0, q0, #15
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <8 x i16> %m, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %shr = ashr exact <8 x i16> %shl, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ ret <8 x i16> %shr
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) {
+; CHECK-LABEL: sext_v2i64_v2i64_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[3], r0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <2 x i64> %m, <i64 32, i64 32>
+ %shr = ashr exact <2 x i64> %shl, <i64 32, i64 32>
+ ret <2 x i64> %shr
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
+; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s1
+; CHECK-NEXT: sbfx r0, r0, #0, #3
+; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: sbfx r0, r0, #0, #3
+; CHECK-NEXT: vmov.32 q1[3], r0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl <2 x i64> %m, <i64 29, i64 29>
+ %shr = ashr exact <2 x i64> %shl, <i64 29, i64 29>
+ ret <2 x i64> %shr
+}
+
define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) {
; CHECK-LABEL: sext_v8i8_v8i16:
; CHECK: @ %bb.0: @ %entry
@@ -332,13 +430,13 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
; CHECK-LABEL: zext_v2i32_v2i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI13_0
+; CHECK-NEXT: adr r0, .LCPI20_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI13_0:
+; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
OpenPOWER on IntegriCloud