diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-06 19:11:59 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-06 19:11:59 +0000 |
| commit | fea153f3411c4f1d3426ab5ae50753a961cfa68b (patch) | |
| tree | f175a2ef3cee413cc0d8bcd9fed2cb785658d4c4 /llvm | |
| parent | 0f4e94673dd19c30393532808a0f78ed0f4c5d4e (diff) | |
| download | bcm5719-llvm-fea153f3411c4f1d3426ab5ae50753a961cfa68b.tar.gz bcm5719-llvm-fea153f3411c4f1d3426ab5ae50753a961cfa68b.zip | |
[X86][AVX512] Move v2i64/v4i64 VPABS lowering to tablegen
Extend NoVLX targets to use the 512-bit versions
llvm-svn: 302359
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-abs.ll | 38 |
3 files changed, 44 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6c56083f5e3..260dd464cef 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1240,8 +1240,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } if (Subtarget.hasVLX()) { - setOperationAction(ISD::ABS, MVT::v4i64, Legal); - setOperationAction(ISD::ABS, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1308,6 +1306,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v16i32, Legal); + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + setOperationAction(ISD::ABS, MVT::v4i64, Legal); + setOperationAction(ISD::ABS, MVT::v2i64, Legal); + for (auto VT : { MVT::v8i1, MVT::v16i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 91eff70ac6a..71d395244b4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8631,6 +8631,20 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; +// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v4i64 (abs VR256X:$src)), + (EXTRACT_SUBREG + (VPABSQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v2i64 (abs VR128X:$src)), + (EXTRACT_SUBREG + (VPABSQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; +} + multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{ defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>; diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 2181ad35348..887abe99f6e 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL ; fold (abs c1) -> c2 define <4 x i32> @combine_v4i32_abs_constant() { @@ -46,17 +48,29 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { } define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { -; CHECK-LABEL: combine_v4i64_abs_abs: -; CHECK: # BB#0: -; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: retq +; AVX2-LABEL: combine_v4i64_abs_abs: +; AVX2: # BB#0: +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: combine_v4i64_abs_abs: +; AVX512F: # BB#0: +; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512F-NEXT: vpabsq %zmm0, %zmm0 +; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: combine_v4i64_abs_abs: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpabsq %ymm0, %ymm0 +; AVX512VL-NEXT: retq %n1 = sub <4 x i64> zeroinitializer, %a %b1 = icmp slt <4 x i64> %a, zeroinitializer %a1 = select <4 x i1> %b1, <4 x i64> %n1, <4 x i64> %a |

