summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrzysztof Parzyszek <kparzysz@codeaurora.org>2018-01-23 19:56:16 +0000
committerKrzysztof Parzyszek <kparzysz@codeaurora.org>2018-01-23 19:56:16 +0000
commitd5e8a260bb537e6ee7303061e0b4a380cd1c920c (patch)
treede3ba488b354754b6a7923e0b841cdc3c61283ac
parent564f845b74833e28b82e14e075d8e93ef3562bac (diff)
downloadbcm5719-llvm-d5e8a260bb537e6ee7303061e0b4a380cd1c920c.tar.gz
bcm5719-llvm-d5e8a260bb537e6ee7303061e0b4a380cd1c920c.zip
[Hexagon] Add patterns for sext_inreg of HVX vector types
llvm-svn: 323250
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td19
-rw-r--r--llvm/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll54
2 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 6f1949aa149..f39dc7bbf57 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -3081,6 +3081,25 @@ let Predicates = [UseHVX] in {
def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
(LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+ // The "source" types are not legal, and there are no parameterized
+ // definitions for them, but they are length-specific.
+ let Predicates = [UseHVX,UseHVX64B] in {
+ def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
+ (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
+ (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
+ (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
+ }
+ let Predicates = [UseHVX,UseHVX128B] in {
+ def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
+ (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
+ (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
+ (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
+ }
+
def: Pat<(VecI8 (trunc HWI16:$Vss)),
(V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
def: Pat<(VecI16 (trunc HWI32:$Vss)),
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll
new file mode 100644
index 00000000000..aee2a59af1a
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that both functions compile successfully.
+
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; CHECK-LABEL: danny:
+; CHECK: vmem
+define void @danny() #0 {
+b0:
+ %v1 = load i16, i16* undef, align 2
+ %v2 = insertelement <8 x i16> undef, i16 %v1, i32 6
+ %v3 = insertelement <8 x i16> %v2, i16 undef, i32 7
+ %v4 = sext <8 x i16> %v3 to <8 x i32>
+ %v5 = mul <8 x i32> %v4, <i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410>
+ %v6 = add <8 x i32> %v5, <i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768>
+ %v7 = add <8 x i32> %v6, zeroinitializer
+ %v8 = ashr <8 x i32> %v7, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v9 = add nsw <8 x i32> zeroinitializer, %v8
+ %v10 = shl <8 x i32> %v9, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v11 = ashr exact <8 x i32> %v10, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v12 = sub nsw <8 x i32> zeroinitializer, %v11
+ %v13 = trunc <8 x i32> %v12 to <8 x i16>
+ %v14 = extractelement <8 x i16> %v13, i32 7
+ store i16 %v14, i16* undef, align 2
+ unreachable
+}
+
+; CHECK-LABEL: sammy:
+; CHECK: vmem
+define void @sammy() #1 {
+b0:
+ %v1 = load i16, i16* undef, align 2
+ %v2 = insertelement <16 x i16> undef, i16 %v1, i32 14
+ %v3 = insertelement <16 x i16> %v2, i16 undef, i32 15
+ %v4 = sext <16 x i16> %v3 to <16 x i32>
+ %v5 = mul <16 x i32> %v4, <i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410>
+ %v6 = add <16 x i32> %v5, <i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768>
+ %v7 = add <16 x i32> %v6, zeroinitializer
+ %v8 = ashr <16 x i32> %v7, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v9 = add nsw <16 x i32> zeroinitializer, %v8
+ %v10 = shl <16 x i32> %v9, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v11 = ashr exact <16 x i32> %v10, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %v12 = sub nsw <16 x i32> zeroinitializer, %v11
+ %v13 = trunc <16 x i32> %v12 to <16 x i16>
+ %v14 = extractelement <16 x i16> %v13, i32 15
+ store i16 %v14, i16* undef, align 2
+ unreachable
+}
+
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
+attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }
OpenPOWER on IntegriCloud