diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7830b48280e..fa2ee401b93 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2897,7 +2897,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f256mem, - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (bc_v4i64 (v8f32 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L; @@ -2909,12 +2910,10 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, (loadv4i64 addr:$src2)))], 0>, PD, VEX_4V, VEX_L; - // In AVX no need to add a pattern for 128-bit logical rr ps, because they - // are all promoted to v2i64, and the patterns are covered by the int - // version. This is needed in SSE only, because v2i64 isn't supported on - // SSE1, but only on SSE2. defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, [], + !strconcat(OpcodeStr, "ps"), f128mem, + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4f32 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V; @@ -2930,7 +2929,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4f32 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), (memopv2i64 addr:$src2)))]>, PS; @@ -2949,9 +2949,18 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; -// AVX1 requires type coercions in order to fold loads directly into logical -// operations. +// If only AVX1 is supported, we need to handle integer operations with +// floating point instructions since the integer versions aren't available. let Predicates = [HasAVX1Only] in { + def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), + (VANDPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), + (VORPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), + (VXORPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), + (VANDNPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), (VANDPSYrm VR256:$src1, addr:$src2)>; def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), |