diff options
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 30 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/vqneg.ll | 44 |
2 files changed, 63 insertions, 11 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 79f1bcd186d..429d0a1cf1b 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1961,12 +1961,13 @@ def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>; // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert -multiclass vqabs_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max, - dag zero_vec, MVE_VQABSNEG vqabs_instruction> { - // The below tree can be replaced by a vqabs instruction, as it represents - // the following vectorized expression (r being the value in $reg): - // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r) +multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max, + dag zero_vec, MVE_VQABSNEG vqabs_instruction, + MVE_VQABSNEG vqneg_instruction> { let Predicates = [HasMVEInt] in { + // The below tree can be replaced by a vqabs instruction, as it represents + // the following vectorized expression (r being the value in $reg): + // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r) def : Pat<(VTI.Vec (vselect (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), (i32 12))), (VTI.Vec MQPR:$reg), @@ -1975,24 +1976,31 @@ multiclass vqabs_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max, int_max, (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))), (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>; + // Similarly, this tree represents vqneg, i.e. the following vectorized expression: + // r == INT_MIN ? INT_MAX : -r + def : Pat<(VTI.Vec (vselect + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))), + int_max, + (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))), + (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>; } } -defm MVE_VQABS_Ps8 : vqabs_pattern<MVE_v16i8, +defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8, (v16i8 (ARMvmovImm (i32 3712))), (v16i8 (ARMvmovImm (i32 3711))), (bitconvert (v4i32 (ARMvmovImm (i32 0)))), - MVE_VQABSs8>; -defm MVE_VQABS_Ps16 : vqabs_pattern<MVE_v8i16, + MVE_VQABSs8, MVE_VQNEGs8>; +defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16, (v8i16 (ARMvmovImm (i32 2688))), (v8i16 (ARMvmvnImm (i32 2688))), (bitconvert (v4i32 (ARMvmovImm (i32 0)))), - MVE_VQABSs16>; -defm MVE_VQABS_Ps32 : vqabs_pattern<MVE_v4i32, + MVE_VQABSs16, MVE_VQNEGs16>; +defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32, (v4i32 (ARMvmovImm (i32 1664))), (v4i32 (ARMvmvnImm (i32 1664))), (ARMvmovImm (i32 0)), - MVE_VQABSs32>; + MVE_VQABSs32, MVE_VQNEGs32>; class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op, dag iops, list<dag> pattern=[]> diff --git a/llvm/test/CodeGen/Thumb2/vqneg.ll b/llvm/test/CodeGen/Thumb2/vqneg.ll new file mode 100644 index 00000000000..b92a7ab270f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/vqneg.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @vqneg_test16(<16 x i8> %A) nounwind { +; CHECK-LABEL: vqneg_test16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqneg.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + + %0 = icmp eq <16 x i8> %A, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> + %1 = sub nsw <16 x i8> zeroinitializer, %A + %2 = select <16 x i1> %0, <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> %1 + + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @vqneg_test8(<8 x i16> %A) nounwind { +; CHECK-LABEL: vqneg_test8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqneg.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + + %0 = icmp eq <8 x i16> %A, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> + %1 = sub nsw <8 x i16> zeroinitializer, %A + %2 = select <8 x i1> %0, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> %1 + + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @vqneg_test4(<4 x i32> %A) nounwind { +; CHECK-LABEL: vqneg_test4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqneg.s32 q0, q0 +; CHECK-NEXT: bx lr +entry: + + %0 = icmp eq <4 x i32> %A, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> + %1 = sub nsw <4 x i32> zeroinitializer, %A + %2 = select <4 x i1> %0, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> %1 + + ret <4 x i32> %2 +} + |