summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2014-01-26 03:27:40 +0000
committerJiangning Liu <jiangning.liu@arm.com>2014-01-26 03:27:40 +0000
commit6398d839c6b36c07314ef6ae13ee332ef399f700 (patch)
tree4253f8acb6a3f7416ed7e5fcae39612c7d6e5d62
parentcdee0edf2ab80184cd1a0e3d52fe3291db29f77a (diff)
downloadbcm5719-llvm-6398d839c6b36c07314ef6ae13ee332ef399f700.tar.gz
bcm5719-llvm-6398d839c6b36c07314ef6ae13ee332ef399f700.zip
Implement pattern match from v1xx to v1xx for AArch64 Neon.
llvm-svn: 200113
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrNEON.td85
-rw-r--r--llvm/test/CodeGen/AArch64/neon-scalar-ext.ll114
2 files changed, 199 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td
index badd9e0f402..1180485b727 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td
@@ -6211,6 +6211,91 @@ defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
+// The following is for sext/zext from v1xx to v1xx
+multiclass NeonI_ext<string prefix, SDNode ExtOp> {
+ // v1i32 -> v1i64
+ def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))),
+ (EXTRACT_SUBREG
+ (v2i64 (!cast<Instruction>(prefix # "_2S")
+ (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)),
+ sub_64)>;
+
+ // v1i16 -> v1i32
+ def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))),
+ (EXTRACT_SUBREG
+ (v4i32 (!cast<Instruction>(prefix # "_4H")
+ (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
+ sub_32)>;
+
+ // v1i8 -> v1i16
+ def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))),
+ (EXTRACT_SUBREG
+ (v8i16 (!cast<Instruction>(prefix # "_8B")
+ (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
+ sub_16)>;
+
+ // v1i8 -> v1i32
+ def : Pat<(v1i32 (ExtOp (v1i8 FPR8:$Rn))),
+ (EXTRACT_SUBREG
+ (v4i32 (!cast<Instruction>(prefix # "_4H")
+ (v4i16 (SUBREG_TO_REG (i64 0),
+ (v1i16 (EXTRACT_SUBREG
+ (v8i16 (!cast<Instruction>(prefix # "_8B")
+ (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
+ sub_16)),
+ sub_16)), 0)),
+ sub_32)>;
+}
+
+defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
+defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
+
+// zext v1i8 -> v1i64
+def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
+ (v1i64 (SUBREG_TO_REG (i64 0),
+ (v1i8 (DUPbv_B
+ (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
+ 0)),
+ sub_8))>;
+
+// zext v1i16 -> v1i64
+def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
+ (v1i64 (SUBREG_TO_REG (i64 0),
+ (v1i16 (DUPhv_H
+ (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)),
+ 0)),
+ sub_16))>;
+
+// sext v1i8 -> v1i64
+def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
+ (EXTRACT_SUBREG
+ (v2i64 (SSHLLvvi_2S
+ (v2i32 (SUBREG_TO_REG (i64 0),
+ (v1i32 (EXTRACT_SUBREG
+ (v4i32 (SSHLLvvi_4H
+ (v4i16 (SUBREG_TO_REG (i64 0),
+ (v1i16 (EXTRACT_SUBREG
+ (v8i16 (SSHLLvvi_8B
+ (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
+ sub_16)),
+ sub_16)), 0)),
+ sub_32)),
+ sub_32)), 0)),
+ sub_64)>;
+
+
+// sext v1i16 -> v1i64
+def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))),
+ (EXTRACT_SUBREG
+ (v2i64 (SSHLLvvi_2S
+ (v2i32 (SUBREG_TO_REG (i64 0),
+ (v1i32 (EXTRACT_SUBREG
+ (v4i32 (SSHLLvvi_4H
+ (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
+ sub_32)),
+ sub_32)), 0)),
+ sub_64)>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/neon-scalar-ext.ll b/llvm/test/CodeGen/AArch64/neon-scalar-ext.ll
new file mode 100644
index 00000000000..4e574237e8d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-scalar-ext.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i32_v1i64:
+; CHECK: ushll v0.2d, v0.2s, #0
+ %1 = extractelement <2 x i32> %v, i32 0
+ %2 = insertelement <1 x i32> undef, i32 %1, i32 0
+ %3 = zext <1 x i32> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
+
+define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i16_v1i32:
+; CHECK: ushll v0.4s, v0.4h, #0
+ %1 = extractelement <4 x i16> %v, i32 0
+ %2 = insertelement <1 x i16> undef, i16 %1, i32 0
+ %3 = zext <1 x i16> %2 to <1 x i32>
+ ret <1 x i32> %3
+}
+
+define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i8_v1i16:
+; CHECK: ushll v0.8h, v0.8b, #0
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = zext <1 x i8> %2 to <1 x i16>
+ ret <1 x i16> %3
+}
+
+define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i8_v1i32:
+; CHECK: ushll v0.8h, v0.8b, #0
+; CHECK: ushll v0.4s, v0.4h, #0
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = zext <1 x i8> %2 to <1 x i32>
+ ret <1 x i32> %3
+}
+
+define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i16_v1i64:
+; CHECK: dup h0, v0.h[0]
+ %1 = extractelement <4 x i16> %v, i32 0
+ %2 = insertelement <1 x i16> undef, i16 %1, i32 0
+ %3 = zext <1 x i16> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
+
+define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_zext_v1i8_v1i64:
+; CHECK: dup b0, v0.b[0]
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = zext <1 x i8> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
+
+define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i32_v1i64:
+; CHECK: sshll v0.2d, v0.2s, #0
+ %1 = extractelement <2 x i32> %v, i32 0
+ %2 = insertelement <1 x i32> undef, i32 %1, i32 0
+ %3 = sext <1 x i32> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
+
+define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i16_v1i32:
+; CHECK: sshll v0.4s, v0.4h, #0
+ %1 = extractelement <4 x i16> %v, i32 0
+ %2 = insertelement <1 x i16> undef, i16 %1, i32 0
+ %3 = sext <1 x i16> %2 to <1 x i32>
+ ret <1 x i32> %3
+}
+
+define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i8_v1i16:
+; CHECK: sshll v0.8h, v0.8b, #0
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = sext <1 x i8> %2 to <1 x i16>
+ ret <1 x i16> %3
+}
+
+define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i8_v1i32:
+; CHECK: sshll v0.8h, v0.8b, #0
+; CHECK: sshll v0.4s, v0.4h, #0
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = sext <1 x i8> %2 to <1 x i32>
+ ret <1 x i32> %3
+}
+
+define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i16_v1i64:
+; CHECK: sshll v0.4s, v0.4h, #0
+; CHECK: sshll v0.2d, v0.2s, #0
+ %1 = extractelement <4 x i16> %v, i32 0
+ %2 = insertelement <1 x i16> undef, i16 %1, i32 0
+ %3 = sext <1 x i16> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
+
+define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_sext_v1i8_v1i64:
+; CHECK: sshll v0.8h, v0.8b, #0
+; CHECK: sshll v0.4s, v0.4h, #0
+; CHECK: sshll v0.2d, v0.2s, #0
+ %1 = extractelement <8 x i8> %v, i32 0
+ %2 = insertelement <1 x i8> undef, i8 %1, i32 0
+ %3 = sext <1 x i8> %2 to <1 x i64>
+ ret <1 x i64> %3
+}
OpenPOWER on IntegriCloud