diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-11-04 23:25:08 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-11-04 23:25:08 +0000 |
| commit | c9a0779309ebf8240dcea044fafbf072c1163095 (patch) | |
| tree | b6f9a9917a3ca17f506944e6b0c57afd695a7e97 /llvm/lib | |
| parent | de4277a076a22e396bf80f0974844af558deb8aa (diff) | |
| download | bcm5719-llvm-c9a0779309ebf8240dcea044fafbf072c1163095.tar.gz bcm5719-llvm-c9a0779309ebf8240dcea044fafbf072c1163095.zip | |
[X86][SSE] Enable commutation for SSE immediate blend instructions
Patch to allow (v)blendps, (v)blendpd, (v)pblendw and vpblendd instructions to be commuted - swaps the src registers and inverts the blend mask.
This is primarily to improve memory folding (see new tests), but it also improves the quality of shuffles (see modified tests).
Differential Revision: http://reviews.llvm.org/D6015
llvm-svn: 221313
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 49 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 56 |
2 files changed, 77 insertions, 28 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 7e3b4fd3644..4e05c7c2d2e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2420,6 +2420,41 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(3).setImm(Size-Amt); return TargetInstrInfo::commuteInstruction(MI, NewMI); } + case X86::BLENDPDrri: + case X86::BLENDPSrri: + case X86::PBLENDWrri: + case X86::VBLENDPDrri: + case X86::VBLENDPSrri: + case X86::VBLENDPDYrri: + case X86::VBLENDPSYrri: + case X86::VPBLENDDrri: + case X86::VPBLENDWrri: + case X86::VPBLENDDYrri: + case X86::VPBLENDWYrri:{ + unsigned Mask; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::BLENDPDrri: Mask = 0x03; break; + case X86::BLENDPSrri: Mask = 0x0F; break; + case X86::PBLENDWrri: Mask = 0xFF; break; + case X86::VBLENDPDrri: Mask = 0x03; break; + case X86::VBLENDPSrri: Mask = 0x0F; break; + case X86::VBLENDPDYrri: Mask = 0x0F; break; + case X86::VBLENDPSYrri: Mask = 0xFF; break; + case X86::VPBLENDDrri: Mask = 0x0F; break; + case X86::VPBLENDWrri: Mask = 0xFF; break; + case X86::VPBLENDDYrri: Mask = 0xFF; break; + case X86::VPBLENDWYrri: Mask = 0xFF; break; + } + unsigned Imm = MI->getOperand(3).getImm(); + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } + MI->getOperand(3).setImm(Mask ^ Imm); + return TargetInstrInfo::commuteInstruction(MI, NewMI); + } case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: @@ -2504,6 +2539,20 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { switch (MI->getOpcode()) { + case X86::BLENDPDrri: + case X86::BLENDPSrri: + case X86::PBLENDWrri: + case X86::VBLENDPDrri: + case X86::VBLENDPSrri: + case X86::VBLENDPDYrri: + case X86::VBLENDPSYrri: + case X86::VPBLENDDrri: + case X86::VPBLENDDYrri: + case X86::VPBLENDWrri: + case X86::VPBLENDWYrri: + SrcOpIdx1 = 1; + SrcOpIdx2 = 2; + return true; case X86::VFMADDPDr231r: case X86::VFMADDPSr231r: case X86::VFMADDSDr231r: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 3874c1968b5..cc896f08528 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7537,31 +7537,33 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { let isCommutable = 0 in { - let ExeDomain = SSEPackedSingle in { - defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, loadv4f32, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; - defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, loadv8f32, - f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, - VEX_4V, VEX_L; - } - let ExeDomain = SSEPackedDouble in { - defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, loadv2f64, f128mem, 0, - DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; - defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256,VR256, loadv4f64, - f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, - VEX_4V, VEX_L; - } + defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, + VR128, loadv2i64, i128mem, 0, + DEFAULT_ITINS_MPSADSCHED>, VEX_4V; + } + + let ExeDomain = SSEPackedSingle in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + VR128, loadv4f32, f128mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", + int_x86_avx_blend_ps_256, VR256, loadv8f32, + f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, + VEX_4V, VEX_L; + } + let ExeDomain = SSEPackedDouble in { + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + VR128, loadv2f64, f128mem, 0, + DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", + int_x86_avx_blend_pd_256,VR256, loadv4f64, + f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, + VEX_4V, VEX_L; + } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, VR128, loadv2i64, i128mem, 0, DEFAULT_ITINS_BLENDSCHED>, VEX_4V; - defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - VR128, loadv2i64, i128mem, 0, - DEFAULT_ITINS_MPSADSCHED>, VEX_4V; - } + let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, loadv4f32, f128mem, 0, @@ -7589,6 +7591,10 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, + VR128, memopv2i64, i128mem, + 1, SSE_MPSADBW_ITINS>; + } let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, VR128, memopv4f32, f128mem, @@ -7600,10 +7606,6 @@ let Constraints = "$src1 = $dst" in { defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_BLEND_P>; - defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv2i64, i128mem, - 1, SSE_MPSADBW_ITINS>; - } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv4f32, f128mem, 1, @@ -8827,12 +8829,10 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V; } -let isCommutable = 0 in { defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, VR128, loadv2i64, i128mem>; defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, VR256, loadv4i64, i256mem>, VEX_L; -} def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$mask)), |

