summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-08-17 23:20:57 +0000
committerCraig Topper <craig.topper@intel.com>2017-08-17 23:20:57 +0000
commit1fae3ae6f0e82a1c174ec5dc19ca9bf43418efa9 (patch)
tree857dde902ef38be033c9ed82df95f4e2213f462a
parent48fff995d62b227e742c53f472c04e8ac11a3ede (diff)
downloadbcm5719-llvm-1fae3ae6f0e82a1c174ec5dc19ca9bf43418efa9.tar.gz
bcm5719-llvm-1fae3ae6f0e82a1c174ec5dc19ca9bf43418efa9.zip
[X86] Remove SSE/AVX patterns for AND/XOR/OR/ANDN that checked for the inputs being bitcasted from floating point types.
There's really no reason to do this we should just let isel pick the integer version and let the execution dependency fixing pass take care of moving to FP if necessary. It's not very reliable to look for bitcasts at the edges of patterns. If for some reason one input was bitcasted and the other wasn't, or if one was a v4f32 bitcast and one was a v2f64 bitcast, we would have fallen back to the integer pattern anyway. llvm-svn: 311138
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td35
-rw-r--r--llvm/test/CodeGen/X86/cast-vsel.ll2
2 files changed, 10 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 60cd226bd21..6ac7ae67220 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -310,6 +310,7 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rr, IIC_SSE_BIT_P_RR, d>,
Sched<[WriteVecLogic]>;
+ let hasSideEffects = 0, mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -2799,54 +2800,36 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
+/// There are no patterns here because isel prefers integer versions for SSE2
+/// and later. There are SSE1 v4f32 patterns later.
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem,
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (bc_v4i64 (v8f32 VR256:$src2))))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
+ [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (loadv4i64 addr:$src2)))], 0>,
- PD, VEX_4V, VEX_L, VEX_WIG;
+ [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4f32 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_WIG;
+ [], [], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (loadv2i64 addr:$src2)))], 0>,
- PD, VEX_4V, VEX_WIG;
+ [], [], 0>, PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4f32 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>, PS;
+ [], []>, PS;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>, PD;
+ [], []>, PD;
}
}
diff --git a/llvm/test/CodeGen/X86/cast-vsel.ll b/llvm/test/CodeGen/X86/cast-vsel.ll
index 260535985e2..751320e7259 100644
--- a/llvm/test/CodeGen/X86/cast-vsel.ll
+++ b/llvm/test/CodeGen/X86/cast-vsel.ll
@@ -411,8 +411,8 @@ define void @example25() nounwind {
; AVX2-NEXT: vcmpltps db+4096(%rax), %ymm1, %ymm1
; AVX2-NEXT: vmovups dc+4096(%rax), %ymm2
; AVX2-NEXT: vcmpltps dd+4096(%rax), %ymm2, %ymm2
+; AVX2-NEXT: vandps %ymm0, %ymm2, %ymm2
; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vandps %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vmovups %ymm1, dj+4096(%rax)
; AVX2-NEXT: addq $32, %rax
; AVX2-NEXT: jne .LBB5_1
OpenPOWER on IntegriCloud