diff options
author | Robert Khasanov <rob.khasanov@gmail.com> | 2014-10-28 12:28:51 +0000 |
---|---|---|
committer | Robert Khasanov <rob.khasanov@gmail.com> | 2014-10-28 12:28:51 +0000 |
commit | dd09a8f320d8dc0e5e57e68cc4e3d6dbb15ed4a3 (patch) | |
tree | 318dbe284a2b30d1926cc9270cf6ac761014ca84 /llvm/lib | |
parent | d0e13af22cd700c57349b2b039b5c16723cfac6b (diff) | |
download | bcm5719-llvm-dd09a8f320d8dc0e5e57e68cc4e3d6dbb15ed4a3.tar.gz bcm5719-llvm-dd09a8f320d8dc0e5e57e68cc4e3d6dbb15ed4a3.zip |
[AVX512] Bring back vector-shuffle lowering support through broadcasts
Ffter commit at rev219046 512-bit broadcasts lowering become non-optimal. Most of tests on broadcasting and embedded broadcasting were changed and they doesn’t produce efficient code.
Example below is from commit changes (it’s the first test from test/CodeGen/X86/avx512-vbroadcast.ll):
define <16 x i32> @_inreg16xi32(i32 %a) {
; CHECK-LABEL: _inreg16xi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <16 x i32> undef, i32 %a, i32 0
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %c
}
Here, 256-bit broadcast was generated instead of 512-bit one.
In this patch
1) I added vector-shuffle lowering through broadcasts
2) Removed asserts and branches likes because this is incorrect
- assert(Subtarget->hasDQI() && "We can only lower v8i64 with AVX-512-DQI");
3) Fixed lowering tests
llvm-svn: 220774
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 10 |
2 files changed, 17 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 16a21a96616..656c1dea1f7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10231,7 +10231,6 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - assert(Subtarget->hasDQI() && "We can only lower v8i64 with AVX-512-DQI"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); @@ -10247,7 +10246,6 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - assert(Subtarget->hasDQI() && "We can only lower v16i32 with AVX-512-DQI!"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); @@ -10299,6 +10297,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(Subtarget->hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); + // Check for being able to broadcast a single element. + if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1, + Mask, Subtarget, DAG)) + return Broadcast; + // Dispatch to each element type for lowering. If we don't have supprot for // specific element type shuffles at 512 bits, immediately split them and // lower them. Each lowering routine of a given type is allowed to assume that @@ -10309,13 +10312,9 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, case MVT::v16f32: return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i64: - if (Subtarget->hasDQI()) - return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); - break; + return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i32: - if (Subtarget->hasDQI()) - return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); - break; + return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1d1abcfb40f..3dbc3d2abd8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -711,6 +711,16 @@ def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), (VBROADCASTSDZrr VR128X:$src)>; +def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), + (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; +def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), + (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; + +def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), + (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; +def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), + (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; + def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZrr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), |