diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 68 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 4 |
2 files changed, 24 insertions, 48 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a50d1585e6e..a0a11442cb2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5098,7 +5098,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), // SSE3 - Arithmetic //===---------------------------------------------------------------------===// -multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, +multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, OpndItins itins, PatFrag ld_frag, bit Is2Addr = 1> { def rr : I<0xD0, MRMSrcReg, @@ -5106,70 +5106,42 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))], itins.rr>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))], + itins.rr>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { - defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, - f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V, VEX_WIG; - defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L, VEX_WIG; + defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, + SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V, + VEX_WIG; + defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, + SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, + VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { - defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, - f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V, VEX_WIG; - defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L, VEX_WIG; + defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, + SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V, + VEX_WIG; + defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, + SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, + VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in - defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, - f128mem, SSE_ALU_F32P, memopv4f32>, XD; + defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, SSE_ALU_F32P, + memopv4f32>, XD; let ExeDomain = SSEPackedDouble in - defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, - f128mem, SSE_ALU_F64P, memopv2f64>, PD; -} - -// Patterns used to select 'addsub' instructions. -let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), - (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (loadv4f32 addr:$rhs))), - (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; - def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), - (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (loadv2f64 addr:$rhs))), - (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; - - def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))), - (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; - def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (loadv8f32 addr:$rhs))), - (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>; - def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))), - (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; - def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (loadv4f64 addr:$rhs))), - (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>; -} - -let Predicates = [UseSSE3] in { - def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), - (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (memopv4f32 addr:$rhs))), - (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; - def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), - (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (memopv2f64 addr:$rhs))), - (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; + defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, SSE_ALU_F64P, + memopv2f64>, PD; } //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 6b1add8ff8e..7f7701893e7 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -342,6 +342,8 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { + X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), + X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), @@ -1650,6 +1652,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_ucomile_sd, COMI, X86ISD::UCOMI, ISD::SETLE), X86_INTRINSIC_DATA(sse2_ucomilt_sd, COMI, X86ISD::UCOMI, ISD::SETLT), X86_INTRINSIC_DATA(sse2_ucomineq_sd, COMI, X86ISD::UCOMI, ISD::SETNE), + X86_INTRINSIC_DATA(sse3_addsub_pd, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), + X86_INTRINSIC_DATA(sse3_addsub_ps, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(sse3_hadd_pd, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(sse3_hadd_ps, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(sse3_hsub_pd, INTR_TYPE_2OP, X86ISD::FHSUB, 0), |