diff options
author | David Green <david.green@arm.com> | 2019-12-02 10:29:01 +0000 |
---|---|---|
committer | David Green <david.green@arm.com> | 2019-12-02 10:38:14 +0000 |
commit | e9e1daf2b9e800c00a3c08db53650c1569288a1b (patch) | |
tree | b6a960b9d14d7371249394cf63c3982c699ec2fe | |
parent | 44b9942898c7167ed95cccef4c3da3d2113e11e8 (diff) | |
download | bcm5719-llvm-e9e1daf2b9e800c00a3c08db53650c1569288a1b.tar.gz bcm5719-llvm-e9e1daf2b9e800c00a3c08db53650c1569288a1b.zip |
[ARM] Remove VHADD patterns
These instructions do not work quite like I expected them to. They
perform the addition and then shift in a higher precision integer, so do
not match up with the patterns that we added.
For example with s8s, adding 100 and 100 should wrap leaving the shift
to work on a negative number. VHADD will instead do the arithmetic in
higher precision, giving 100 overall. The vhadd gives a "better" result,
but not one that matches up with the input.
I am just removing the patterns here. We might be able to re-add them in
the future by checking for wrap flags or changing bitwidths. But for the
moment just remove them to remove the problem cases.
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 54 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll | 72 |
2 files changed, 48 insertions, 78 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index df385034589..cc3a8ee77d7 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1785,60 +1785,6 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; -} - class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { diff --git a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll index 19979f203f1..83534e2c3e8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll @@ -4,7 +4,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -15,7 +16,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -26,7 +28,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -37,7 +40,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -48,7 +52,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -59,7 +64,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -70,7 +76,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -81,7 +88,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -92,7 +100,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -103,7 +112,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -114,7 +124,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -125,7 +136,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -140,7 +152,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -153,7 +166,8 @@ define arm_aapcs_vfpcc <8 x i16> @add_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -166,7 +180,8 @@ define arm_aapcs_vfpcc <4 x i32> @add_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -177,7 +192,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -188,7 +204,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -199,7 +216,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -212,7 +230,8 @@ define arm_aapcs_vfpcc <16 x i8> @sub_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -225,7 +244,8 @@ define arm_aapcs_vfpcc <8 x i16> @sub_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -238,7 +258,8 @@ define arm_aapcs_vfpcc <4 x i32> @sub_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -249,7 +270,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -260,7 +282,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -271,7 +294,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 |