diff options
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 88 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 12 |
3 files changed, 59 insertions, 54 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c7ade0af844..64d203c6d2a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2007-06-02 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md ("sse4_1_blendvpd"): Require "z" class XMM + register for operand[3]. Adjust asm template. + ("sse4_1_blendvpd"): Ditto. + ("sse4_1_pblendvb"): Ditto. + * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Call + safe_vector_operand() if input operand is VECTOR_MODE_P operand. Do not + force operands[3] into xmm0 register for variable blend instructions. + (ix86_expand_sse_pcmpestr): Do not check operands for + "register_operand", when insn operand predicate is "register_operand". + (ix86_expand_sse_pcmpistr): Ditto. + 2007-06-02 H.J. Lu <hongjiu.lu@intel.com> Uros Bizjak <ubizjak@gmail.com> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 593db77a4e0..64fb97ed89f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16929,8 +16929,7 @@ static const struct builtin_description bdesc_crc32[] = { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 }, }; -/* SSE builtins with 3 arguments and the last argument must be a 8 bit - constant or xmm0. */ +/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */ static const struct builtin_description bdesc_sse_3arg[] = { /* SSE4.1 */ @@ -18279,51 +18278,48 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp, rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; - enum machine_mode mode2; - rtx xmm0; - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); + enum machine_mode mode1 = insn_data[icode].operand[1].mode; + enum machine_mode mode2 = insn_data[icode].operand[2].mode; + enum machine_mode mode3 = insn_data[icode].operand[3].mode; - switch (icode) - { - case CODE_FOR_sse4_1_blendvpd: - case CODE_FOR_sse4_1_blendvps: - case CODE_FOR_sse4_1_pblendvb: - /* The third argument of variable blends must be xmm0. */ - xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG); - emit_move_insn (xmm0, op2); - op2 = xmm0; - break; - default: - mode2 = insn_data[icode].operand[2].mode; - if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) - { - switch (icode) - { - case CODE_FOR_sse4_1_roundsd: - case CODE_FOR_sse4_1_roundss: - error ("the third argument must be a 4-bit immediate"); - break; - default: - error ("the third argument must be a 8-bit immediate"); - break; - } - return const0_rtx; - } - break; - } + if (VECTOR_MODE_P (mode1)) + op0 = safe_vector_operand (op0, mode1); + if (VECTOR_MODE_P (mode2)) + op1 = safe_vector_operand (op1, mode2); + if (VECTOR_MODE_P (mode3)) + op2 = safe_vector_operand (op2, mode3); if (optimize || target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if ((optimize && !register_operand (op1, mode2)) + || !(*insn_data[icode].operand[2].predicate) (op1, mode2)) + op1 = copy_to_mode_reg (mode2, op1); + + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + switch (icode) + { + case CODE_FOR_sse4_1_blendvpd: + case CODE_FOR_sse4_1_blendvps: + case CODE_FOR_sse4_1_pblendvb: + op2 = copy_to_mode_reg (mode3, op2); + break; + + case CODE_FOR_sse4_1_roundsd: + case CODE_FOR_sse4_1_roundss: + error ("the third argument must be a 4-bit immediate"); + return const0_rtx; + + default: + error ("the third argument must be an 8-bit immediate"); + return const0_rtx; + } + pat = GEN_FCN (icode) (target, op0, op1, op2); if (! pat) return 0; @@ -18732,17 +18728,14 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d, if (VECTOR_MODE_P (modev4)) op2 = safe_vector_operand (op2, modev4); - if ((optimize && !register_operand (op0, modev2)) - || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2)) + if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) op0 = copy_to_mode_reg (modev2, op0); - if ((optimize && !register_operand (op1, modei3)) - || !(*insn_data[d->icode].operand[3].predicate) (op1, modei3)) + if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) op1 = copy_to_mode_reg (modei3, op1); if ((optimize && !register_operand (op2, modev4)) || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) op2 = copy_to_mode_reg (modev4, op2); - if ((optimize && !register_operand (op3, modei5)) - || !(*insn_data[d->icode].operand[5].predicate) (op3, modei5)) + if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) op3 = copy_to_mode_reg (modei5, op3); if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) @@ -18833,8 +18826,7 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d, if (VECTOR_MODE_P (modev3)) op1 = safe_vector_operand (op1, modev3); - if ((optimize && !register_operand (op0, modev2)) - || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2)) + if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) op0 = copy_to_mode_reg (modev2, op0); if ((optimize && !register_operand (op1, modev3)) || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index da7e582b9ab..1f8086f99c7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5844,10 +5844,10 @@ [(set (match_operand:V2DF 0 "register_operand" "=x") (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (reg:V2DF 21)] + (match_operand:V2DF 3 "register_operand" "z")] UNSPEC_BLENDV))] "TARGET_SSE4_1" - "blendvpd\t{%%xmm0, %2, %0|%0, %2, %%xmm0}" + "blendvpd\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "mode" "V2DF")]) @@ -5856,10 +5856,10 @@ [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (reg:V4SF 21)] + (match_operand:V4SF 3 "register_operand" "z")] UNSPEC_BLENDV))] "TARGET_SSE4_1" - "blendvps\t{%%xmm0, %2, %0|%0, %2, %%xmm0}" + "blendvps\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "mode" "V4SF")]) @@ -5927,10 +5927,10 @@ [(set (match_operand:V16QI 0 "register_operand" "=x") (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") (match_operand:V16QI 2 "nonimmediate_operand" "xm") - (reg:V16QI 21)] + (match_operand:V16QI 3 "register_operand" "z")] UNSPEC_BLENDV))] "TARGET_SSE4_1" - "pblendvb\t{%%xmm0, %2, %0|%0, %2, %%xmm0}" + "pblendvb\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) |