diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-13 21:36:51 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-13 21:36:51 +0000 |
commit | 9613b64916f736461382f53e04b1d1b3eeb9b77c (patch) | |
tree | e68299e059033b968c6b958f5617d07fe29849df /llvm | |
parent | 7ba479d22fc687587adb0c2d4d5c4f70d234c117 (diff) | |
download | bcm5719-llvm-9613b64916f736461382f53e04b1d1b3eeb9b77c.tar.gz bcm5719-llvm-9613b64916f736461382f53e04b1d1b3eeb9b77c.zip |
Make X86ISD::ANDNP more general and Codegen 256-bit VANDNP. A more
general version of X86ISD::ANDNP also opened the room for a little bit
of refactoring.
llvm-svn: 135088
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 91 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-256-logic.ll | 45 |
4 files changed, 82 insertions, 64 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f953bf2bc7f..e06d84c54a0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11821,10 +11821,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form ANDNP nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 67a5a345be8..e35a6751929 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -47,7 +47,7 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", - SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 72b383000af..e6167c73d30 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1473,98 +1473,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, - SDNode OpNode, int HasPat = 0, - list<list<dag>> Pattern = []> { + SDNode OpNode> { let Pattern = []<dag> in { defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - VEX_4V; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, TB; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, - TB, OpSize; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB, OpSize; } } /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, - SDNode OpNode, int HasNoPat = 0> { + SDNode OpNode> { defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f256mem, - !if(HasNoPat, []<dag>, // rr - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, - VR256:$src2)))]), - !if(HasNoPat, []<dag>, // rm - [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))]), 0>, VEX_4V; + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, - !if(HasNoPat, []<dag>, // rr - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (bc_v4i64 (v4f64 VR256:$src2))))]), - !if(HasNoPat, []<dag>, // rm - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (memopv4i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; -defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; -let isCommutable = 0 in { - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>; -} +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ - // single r+r - [(set VR128:$dst, (X86andnp VR128:$src1, VR128:$src2))], - // double r+r - [], - // single r+m - [(set VR128:$dst, (X86andnp VR128:$src1, (memopv2i64 addr:$src2)))], - // double r+m - []]>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -3678,6 +3648,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; } // Move scalar to XMM zero-extended diff --git a/llvm/test/CodeGen/X86/avx-256-logic.ll b/llvm/test/CodeGen/X86/avx-256-logic.ll index 05e82895fec..d9e5d081fb1 100644 --- a/llvm/test/CodeGen/X86/avx-256-logic.ll +++ b/llvm/test/CodeGen/X86/avx-256-logic.ll @@ -114,3 +114,48 @@ entry: ret <8 x float> %1 } +; CHECK: vandnpd +define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnpd (% +define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <4 x double>* %x, align 32 + %0 = bitcast <4 x double> %y to <4 x i64> + %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <4 x double> %tmp2 to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnps +define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vandnps (% +define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <8 x float>* %x, align 32 + %0 = bitcast <8 x float> %y to <8 x i32> + %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <8 x float> %tmp2 to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} |