diff options
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll | 21 |
2 files changed, 40 insertions, 14 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0d4d698b229..c418e17c884 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6183,20 +6183,25 @@ def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), // A 64-bit subvector insert to the first 128-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +multiclass InsertSubvectorUndef<ValueType Ty> { + def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +} + +defm : InsertSubvectorUndef<i32>; +defm : InsertSubvectorUndef<i64>; // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 // or v2f32. diff --git a/llvm/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll new file mode 100644 index 00000000000..0337f04e579 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s + +; Check that this does not ICE. + +@d = common dso_local local_unnamed_addr global <4 x i16> zeroinitializer, align 8 + +define <8 x i16> @c(i32 %e) { +entry: + %0 = load <4 x i16>, <4 x i16>* @d, align 8 + %vminv = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %0) + %1 = trunc i32 %vminv to i16 + %vecinit3 = insertelement <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, i16 %1, i32 1 + %call = tail call <8 x i16> @c(i32 0) #3 + %vgetq_lane = extractelement <8 x i16> %call, i32 0 + %vset_lane = insertelement <4 x i16> %vecinit3, i16 %vgetq_lane, i32 0 + %call4 = tail call i32 bitcast (i32 (...)* @k to i32 (<4 x i16>)*)(<4 x i16> %vset_lane) #3 + ret <8 x i16> undef +} + +declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) +declare i32 @k(...) |