diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2015-12-21 18:31:25 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2015-12-21 18:31:25 +0000 |
commit | 11c4de6054501a4efa39583d0778ca7ad09fd6d5 (patch) | |
tree | db06e9abfd116297dfaae229396a07fe13b7ca14 | |
parent | eb65e912aaa0cb0bb7adf138495f3e25fb2ecdf6 (diff) | |
download | bcm5719-llvm-11c4de6054501a4efa39583d0778ca7ad09fd6d5.tar.gz bcm5719-llvm-11c4de6054501a4efa39583d0778ca7ad09fd6d5.zip |
[AArch64] Add additional extract-extend patterns for smov
This patch adds to the target description two additional patterns for matching
extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and
v8i16-to-i64 cases. The existing patterns miss these cases because the
extracted elements must first be legalized to i32, resulting in any_extend
nodes.
This was originally implemented as a DAG combine (r255895), but was reverted
due to failing out-of-tree tests.
llvm-svn: 256176
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 17 |
2 files changed, 15 insertions, 9 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 70a1f849f1a..d02bc9ff394 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types // for AArch64. Match these patterns here since UMOV already zeroes out the high diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index b74a40626ce..83b1cac70f5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) { ret i32 %tmp5 } -define i32 @smovx16b(<16 x i8> %tmp1) { +define i64 @smovx16b(<16 x i8> %tmp1) { ; CHECK-LABEL: smovx16b: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 %tmp4, %tmp4 - ret i32 %tmp5 + %tmp4 = sext i8 %tmp3 to i64 + ret i64 %tmp4 } -define i32 @smovx8h(<8 x i16> %tmp1) { +define i64 @smovx8h(<8 x i16> %tmp1) { ; CHECK-LABEL: smovx8h: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 + %tmp4 = sext i16 %tmp3 to i64 + ret i64 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) { |