summaryrefslogtreecommitdiffstats
path: root/gcc/config/rs6000
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r--gcc/config/rs6000/altivec.h59
-rw-r--r--gcc/config/rs6000/altivec.md67
-rw-r--r--gcc/config/rs6000/power7.md318
-rw-r--r--gcc/config/rs6000/ppc-asm.h139
-rw-r--r--gcc/config/rs6000/predicates.md10
-rw-r--r--gcc/config/rs6000/rs6000-c.c242
-rw-r--r--gcc/config/rs6000/rs6000.c676
-rw-r--r--gcc/config/rs6000/rs6000.h11
-rw-r--r--gcc/config/rs6000/rs6000.md233
-rw-r--r--gcc/config/rs6000/rs6000.opt4
-rw-r--r--gcc/config/rs6000/t-rs60002
-rw-r--r--gcc/config/rs6000/vector.md388
-rw-r--r--gcc/config/rs6000/vsx.md1339
13 files changed, 3263 insertions, 225 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 7b397997dd3..bc4f30f7cb2 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -306,6 +306,17 @@
#define vec_splats __builtin_vec_splats
#define vec_promote __builtin_vec_promote
+#ifdef __VSX__
+/* VSX additions */
+#define vec_div __builtin_vec_div
+#define vec_mul __builtin_vec_mul
+#define vec_msub __builtin_vec_msub
+#define vec_nmadd __builtin_vec_nmadd
+#define vec_nearbyint __builtin_vec_nearbyint
+#define vec_rint __builtin_vec_rint
+#define vec_sqrt __builtin_vec_sqrt
+#endif
+
/* Predicates.
For C++, we use templates in order to allow non-parenthesized arguments.
For C, instead, we use macros since non-parenthesized arguments were
@@ -356,14 +367,14 @@ __altivec_scalar_pred(vec_any_out,
__builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, a1, a2))
__altivec_unary_pred(vec_all_nan,
- __builtin_altivec_vcmpeqfp_p (__CR6_EQ, a1, a1))
+ __builtin_altivec_vcmpeq_p (__CR6_EQ, a1, a1))
__altivec_unary_pred(vec_any_nan,
- __builtin_altivec_vcmpeqfp_p (__CR6_LT_REV, a1, a1))
+ __builtin_altivec_vcmpeq_p (__CR6_LT_REV, a1, a1))
__altivec_unary_pred(vec_all_numeric,
- __builtin_altivec_vcmpeqfp_p (__CR6_LT, a1, a1))
+ __builtin_altivec_vcmpeq_p (__CR6_LT, a1, a1))
__altivec_unary_pred(vec_any_numeric,
- __builtin_altivec_vcmpeqfp_p (__CR6_EQ_REV, a1, a1))
+ __builtin_altivec_vcmpeq_p (__CR6_EQ_REV, a1, a1))
__altivec_scalar_pred(vec_all_eq,
__builtin_vec_vcmpeq_p (__CR6_LT, a1, a2))
@@ -384,13 +395,13 @@ __altivec_scalar_pred(vec_any_lt,
__builtin_vec_vcmpgt_p (__CR6_EQ_REV, a2, a1))
__altivec_scalar_pred(vec_all_ngt,
- __builtin_altivec_vcmpgtfp_p (__CR6_EQ, a1, a2))
+ __builtin_altivec_vcmpgt_p (__CR6_EQ, a1, a2))
__altivec_scalar_pred(vec_all_nlt,
- __builtin_altivec_vcmpgtfp_p (__CR6_EQ, a2, a1))
+ __builtin_altivec_vcmpgt_p (__CR6_EQ, a2, a1))
__altivec_scalar_pred(vec_any_ngt,
- __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, a1, a2))
+ __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a1, a2))
__altivec_scalar_pred(vec_any_nlt,
- __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, a2, a1))
+ __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a2, a1))
/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
while for integer types it is converted to __builtin_vec_vcmpgt_p,
@@ -405,13 +416,13 @@ __altivec_scalar_pred(vec_any_ge,
__builtin_vec_vcmpge_p (__CR6_EQ_REV, a1, a2))
__altivec_scalar_pred(vec_all_nge,
- __builtin_altivec_vcmpgefp_p (__CR6_EQ, a1, a2))
+ __builtin_altivec_vcmpge_p (__CR6_EQ, a1, a2))
__altivec_scalar_pred(vec_all_nle,
- __builtin_altivec_vcmpgefp_p (__CR6_EQ, a2, a1))
+ __builtin_altivec_vcmpge_p (__CR6_EQ, a2, a1))
__altivec_scalar_pred(vec_any_nge,
- __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, a1, a2))
+ __builtin_altivec_vcmpge_p (__CR6_LT_REV, a1, a2))
__altivec_scalar_pred(vec_any_nle,
- __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, a2, a1))
+ __builtin_altivec_vcmpge_p (__CR6_LT_REV, a2, a1))
#undef __altivec_scalar_pred
#undef __altivec_unary_pred
@@ -423,11 +434,11 @@ __altivec_scalar_pred(vec_any_nle,
#define vec_all_in(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ, (a1), (a2))
#define vec_any_out(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, (a1), (a2))
-#define vec_all_nan(a1) __builtin_altivec_vcmpeqfp_p (__CR6_EQ, (a1), (a1))
-#define vec_any_nan(a1) __builtin_altivec_vcmpeqfp_p (__CR6_LT_REV, (a1), (a1))
+#define vec_all_nan(a1) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a1))
+#define vec_any_nan(a1) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a1))
-#define vec_all_numeric(a1) __builtin_altivec_vcmpeqfp_p (__CR6_LT, (a1), (a1))
-#define vec_any_numeric(a1) __builtin_altivec_vcmpeqfp_p (__CR6_EQ_REV, (a1), (a1))
+#define vec_all_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a1))
+#define vec_any_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a1))
#define vec_all_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a2))
#define vec_all_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a2))
@@ -439,10 +450,10 @@ __altivec_scalar_pred(vec_any_nle,
#define vec_any_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a1), (a2))
#define vec_any_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a2), (a1))
-#define vec_all_ngt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_EQ, (a1), (a2))
-#define vec_all_nlt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_EQ, (a2), (a1))
-#define vec_any_ngt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, (a1), (a2))
-#define vec_any_nlt(a1, a2) __builtin_altivec_vcmpgtfp_p (__CR6_LT_REV, (a2), (a1))
+#define vec_all_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a2), (a1))
+#define vec_any_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a2), (a1))
/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
while for integer types it is converted to __builtin_vec_vcmpgt_p,
@@ -452,10 +463,10 @@ __altivec_scalar_pred(vec_any_nle,
#define vec_any_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a2), (a1))
#define vec_any_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a1), (a2))
-#define vec_all_nge(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_EQ, (a1), (a2))
-#define vec_all_nle(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_EQ, (a2), (a1))
-#define vec_any_nge(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, (a1), (a2))
-#define vec_any_nle(a1, a2) __builtin_altivec_vcmpgefp_p (__CR6_LT_REV, (a2), (a1))
+#define vec_all_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a2), (a1))
+#define vec_any_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a2), (a1))
#endif
/* These do not accept vectors, so they do not have a __builtin_vec_*
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 58af47c15ce..53b1054d200 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -20,8 +20,8 @@
;; <http://www.gnu.org/licenses/>.
(define_constants
- [(UNSPEC_VCMPBFP 50)
;; 51-62 deleted
+ [(UNSPEC_VCMPBFP 64)
(UNSPEC_VMSUMU 65)
(UNSPEC_VMSUMM 66)
(UNSPEC_VMSUMSHM 68)
@@ -66,9 +66,9 @@
(UNSPEC_VSUMSWS 135)
(UNSPEC_VPERM 144)
(UNSPEC_VPERM_UNS 145)
- (UNSPEC_VRFIP 148)
+ ;; 148 deleted
(UNSPEC_VRFIN 149)
- (UNSPEC_VRFIM 150)
+ ;; 150 deleted
(UNSPEC_VCFUX 151)
(UNSPEC_VCFSX 152)
(UNSPEC_VCTUXS 153)
@@ -220,6 +220,35 @@
}
[(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+;; Load up a vector with the most significant bit set by loading up -1 and
+;; doing a shift left
+(define_split
+ [(set (match_operand:VM 0 "altivec_register_operand" "")
+ (match_operand:VM 1 "easy_vector_constant_msb" ""))]
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode) && reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ enum machine_mode mode = GET_MODE (operands[0]);
+ rtvec v;
+ int i, num_elements;
+
+ if (mode == V4SFmode)
+ {
+ mode = V4SImode;
+ dest = gen_lowpart (V4SImode, dest);
+ }
+
+ num_elements = GET_MODE_NUNITS (mode);
+ v = rtvec_alloc (num_elements);
+ for (i = 0; i < num_elements; i++)
+ RTVEC_ELT (v, i) = constm1_rtx;
+
+ emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+ emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_ASHIFT (mode, dest, dest)));
+ DONE;
+})
+
(define_split
[(set (match_operand:VM 0 "altivec_register_operand" "")
(match_operand:VM 1 "easy_vector_constant_add_self" ""))]
@@ -1310,7 +1339,7 @@
"vspltis<VI_char> %0,%1"
[(set_attr "type" "vecperm")])
-(define_insn "*altivec_ftruncv4sf2"
+(define_insn "*altivec_vrfiz"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
@@ -1337,10 +1366,10 @@
"vperm %0,%1,%2,%3"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vrfip"
+(define_insn "altivec_vrfip" ; ceil
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
- UNSPEC_VRFIP))]
+ UNSPEC_FRIP))]
"TARGET_ALTIVEC"
"vrfip %0,%1"
[(set_attr "type" "vecfloat")])
@@ -1353,10 +1382,10 @@
"vrfin %0,%1"
[(set_attr "type" "vecfloat")])
-(define_insn "altivec_vrfim"
+(define_insn "*altivec_vrfim" ; floor
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
- UNSPEC_VRFIM))]
+ UNSPEC_FRIM))]
"TARGET_ALTIVEC"
"vrfim %0,%1"
[(set_attr "type" "vecfloat")])
@@ -1431,6 +1460,28 @@
"vrefp %0,%1"
[(set_attr "type" "vecfloat")])
+(define_expand "altivec_copysign_v4sf3"
+ [(use (match_operand:V4SF 0 "register_operand" ""))
+ (use (match_operand:V4SF 1 "register_operand" ""))
+ (use (match_operand:V4SF 2 "register_operand" ""))]
+ "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+ "
+{
+ rtx mask = gen_reg_rtx (V4SImode);
+ rtvec v = rtvec_alloc (4);
+ unsigned HOST_WIDE_INT mask_val = ((unsigned HOST_WIDE_INT)1) << 31;
+
+ RTVEC_ELT (v, 0) = GEN_INT (mask_val);
+ RTVEC_ELT (v, 1) = GEN_INT (mask_val);
+ RTVEC_ELT (v, 2) = GEN_INT (mask_val);
+ RTVEC_ELT (v, 3) = GEN_INT (mask_val);
+
+ emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+ emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
+ gen_lowpart (V4SFmode, mask)));
+ DONE;
+}")
+
(define_insn "altivec_vsldoi_<mode>"
[(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md
new file mode 100644
index 00000000000..3b6a95e284e
--- /dev/null
+++ b/gcc/config/rs6000/power7.md
@@ -0,0 +1,318 @@
+;; Scheduling description for IBM POWER7 processor.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power7iu,power7lsu,power7vsu,power7misc")
+
+(define_cpu_unit "iu1_power7,iu2_power7" "power7iu")
+(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu")
+(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu")
+(define_cpu_unit "bpu_power7,cru_power7" "power7misc")
+(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7"
+ "power7misc")
+
+
+(define_reservation "DU_power7"
+ "du1_power7|du2_power7|du3_power7|du4_power7")
+
+(define_reservation "DU2F_power7"
+ "du1_power7+du2_power7")
+
+(define_reservation "DU4_power7"
+ "du1_power7+du2_power7+du3_power7+du4_power7")
+
+(define_reservation "FXU_power7"
+ "iu1_power7|iu2_power7")
+
+(define_reservation "VSU_power7"
+ "vsu1_power7|vsu2_power7")
+
+(define_reservation "LSU_power7"
+ "lsu1_power7|lsu2_power7")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7")
+(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7")
+(absence_set "du3_power7" "du4_power7,du5_power7")
+(absence_set "du4_power7" "du5_power7")
+
+
+; LS Unit
+(define_insn_reservation "power7-load" 2
+ (and (eq_attr "type" "load")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-load-ext" 3
+ (and (eq_attr "type" "load_ext")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,LSU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-update" 2
+ (and (eq_attr "type" "load_u")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-update-indexed" 3
+ (and (eq_attr "type" "load_ux")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,FXU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update" 4
+ (and (eq_attr "type" "load_ext_u")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update-indexed" 4
+ (and (eq_attr "type" "load_ext_ux")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpload" 3
+ (and (eq_attr "type" "fpload")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-fpload-update" 3
+ (and (eq_attr "type" "fpload_u,fpload_ux")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store" 6 ; store-forwarding latency
+ (and (eq_attr "type" "store")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store-update" 6
+ (and (eq_attr "type" "store_u")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-store-update-indexed" 6
+ (and (eq_attr "type" "store_ux")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpstore" 6
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-fpstore-update" 6
+ (and (eq_attr "type" "fpstore_u,fpstore_ux")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7+VSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-larx" 3
+ (and (eq_attr "type" "load_l")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-stcx" 10
+ (and (eq_attr "type" "store_c")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecload" 3
+ (and (eq_attr "type" "vecload")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecstore" 6
+ (and (eq_attr "type" "vecstore")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-sync" 11
+ (and (eq_attr "type" "sync")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,LSU_power7")
+
+
+; FX Unit
+(define_insn_reservation "power7-integer" 1
+ (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+ var_shift_rotate,exts")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cntlz" 2
+ (and (eq_attr "type" "cntlz")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-two" 2
+ (and (eq_attr "type" "two")
+ (eq_attr "cpu" "power7"))
+ "DU_power7+DU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-three" 3
+ (and (eq_attr "type" "three")
+ (eq_attr "cpu" "power7"))
+ "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cmp" 1
+ (and (eq_attr "type" "cmp,fast_compare")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-compare" 2
+ (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,FXU_power7,FXU_power7")
+
+(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr")
+
+(define_insn_reservation "power7-mul" 4
+ (and (eq_attr "type" "imul,imul2,imul3,lmul")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-mul-compare" 5
+ (and (eq_attr "type" "imul_compare,lmul_compare")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,FXU_power7,nothing*3,FXU_power7")
+
+(define_insn_reservation "power7-idiv" 36
+ (and (eq_attr "type" "idiv")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,iu1_power7*36|iu2_power7*36")
+
+(define_insn_reservation "power7-ldiv" 68
+ (and (eq_attr "type" "ldiv")
+ (eq_attr "cpu" "power7"))
+ "DU2F_power7,iu1_power7*68|iu2_power7*68")
+
+(define_insn_reservation "power7-isync" 1 ;
+ (and (eq_attr "type" "isync")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,FXU_power7")
+
+
+; CR Unit
+(define_insn_reservation "power7-mtjmpr" 4
+ (and (eq_attr "type" "mtjmpr")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,FXU_power7")
+
+(define_insn_reservation "power7-mfjmpr" 5
+ (and (eq_attr "type" "mfjmpr")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,cru_power7+FXU_power7")
+
+(define_insn_reservation "power7-crlogical" 3
+ (and (eq_attr "type" "cr_logical")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-delayedcr" 3
+ (and (eq_attr "type" "delayed_cr")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcr" 6
+ (and (eq_attr "type" "mfcr")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcrf" 3
+ (and (eq_attr "type" "mfcrf")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mtcr" 3
+ (and (eq_attr "type" "mtcr")
+ (eq_attr "cpu" "power7"))
+ "DU4_power7,cru_power7+FXU_power7")
+
+
+; BR Unit
+; Branches take dispatch Slot 4. The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power7-branch" 3
+ (and (eq_attr "type" "jmpreg,branch")
+ (eq_attr "cpu" "power7"))
+ "(du5_power7\
+ |du4_power7+du5_power7\
+ |du3_power7+du4_power7+du5_power7\
+ |du2_power7+du3_power7+du4_power7+du5_power7\
+ |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP)
+(define_insn_reservation "power7-fp" 6
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_bypass 8 "power7-fp" "power7-branch")
+
+(define_insn_reservation "power7-fpcompare" 4
+ (and (eq_attr "type" "fpcompare")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sdiv" 26
+ (and (eq_attr "type" "sdiv")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-ddiv" 32
+ (and (eq_attr "type" "ddiv")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sqrt" 31
+ (and (eq_attr "type" "ssqrt")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-dsqrt" 43
+ (and (eq_attr "type" "dsqrt")
+ (eq_attr "cpu" "power7"))
+ "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecsimple" 2
+ (and (eq_attr "type" "vecsimple")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-veccmp" 7
+ (and (eq_attr "type" "veccmp")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecfloat" 7
+ (and (eq_attr "type" "vecfloat")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,VSU_power7")
+
+(define_bypass 6 "power7-vecfloat" "power7-vecfloat")
+
+(define_insn_reservation "power7-veccomplex" 7
+ (and (eq_attr "type" "veccomplex")
+ (eq_attr "cpu" "power7"))
+ "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecperm" 3
+ (and (eq_attr "type" "vecperm")
+ (eq_attr "cpu" "power7"))
+ "du2_power7,VSU_power7")
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index 147f1092753..c963eb98abb 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -87,7 +87,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define f16 16
#define f17 17
#define f18 18
-#define f19 19
+#define f19 19
#define f20 20
#define f21 21
#define f22 22
@@ -101,6 +101,143 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define f30 30
#define f31 31
+#ifdef __VSX__
+#define f32 32
+#define f33 33
+#define f34 34
+#define f35 35
+#define f36 36
+#define f37 37
+#define f38 38
+#define f39 39
+#define f40 40
+#define f41 41
+#define f42 42
+#define f43 43
+#define f44 44
+#define f45 45
+#define f46 46
+#define f47 47
+#define f48 48
+#define f49 49
+#define f50 30
+#define f51 51
+#define f52 52
+#define f53 53
+#define f54 54
+#define f55 55
+#define f56 56
+#define f57 57
+#define f58 58
+#define f59 59
+#define f60 60
+#define f61 61
+#define f62 62
+#define f63 63
+#endif
+
+#ifdef __ALTIVEC__
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31
+#endif
+
+#ifdef __VSX__
+#define vs0 0
+#define vs1 1
+#define vs2 2
+#define vs3 3
+#define vs4 4
+#define vs5 5
+#define vs6 6
+#define vs7 7
+#define vs8 8
+#define vs9 9
+#define vs10 10
+#define vs11 11
+#define vs12 12
+#define vs13 13
+#define vs14 14
+#define vs15 15
+#define vs16 16
+#define vs17 17
+#define vs18 18
+#define vs19 19
+#define vs20 20
+#define vs21 21
+#define vs22 22
+#define vs23 23
+#define vs24 24
+#define vs25 25
+#define vs26 26
+#define vs27 27
+#define vs28 28
+#define vs29 29
+#define vs30 30
+#define vs31 31
+#define vs32 32
+#define vs33 33
+#define vs34 34
+#define vs35 35
+#define vs36 36
+#define vs37 37
+#define vs38 38
+#define vs39 39
+#define vs40 40
+#define vs41 41
+#define vs42 42
+#define vs43 43
+#define vs44 44
+#define vs45 45
+#define vs46 46
+#define vs47 47
+#define vs48 48
+#define vs49 49
+#define vs50 30
+#define vs51 51
+#define vs52 52
+#define vs53 53
+#define vs54 54
+#define vs55 55
+#define vs56 56
+#define vs57 57
+#define vs58 58
+#define vs59 59
+#define vs60 60
+#define vs61 61
+#define vs62 62
+#define vs63 63
+#endif
+
/*
* Macros to glue together two tokens.
*/
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 3e5c1a1a8df..cf25cb7bf0f 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -377,6 +377,16 @@
return EASY_VECTOR_15_ADD_SELF (val);
})
+;; Same as easy_vector_constant but only for EASY_VECTOR_MSB.
+(define_predicate "easy_vector_constant_msb"
+ (and (match_code "const_vector")
+ (and (match_test "TARGET_ALTIVEC")
+ (match_test "easy_altivec_constant (op, mode)")))
+{
+ HOST_WIDE_INT val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+ return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
+})
+
;; Return 1 if operand is constant zero (scalars and vectors).
(define_predicate "zero_constant"
(and (match_code "const_int,const_double,const_vector")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 3b3ba96b5cd..94354528ebf 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -214,7 +214,8 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
|| rid_code == RID_SHORT || rid_code == RID_SIGNED
|| rid_code == RID_INT || rid_code == RID_CHAR
- || rid_code == RID_FLOAT)
+ || rid_code == RID_FLOAT
+ || (rid_code == RID_DOUBLE && TARGET_VSX))
{
expand_this = C_CPP_HASHNODE (__vector_keyword);
/* If the next keyword is bool or pixel, it
@@ -329,7 +330,42 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
if (TARGET_NO_LWSYNC)
builtin_define ("__NO_LWSYNC__");
if (TARGET_VSX)
- builtin_define ("__VSX__");
+ {
+ builtin_define ("__VSX__");
+
+ /* For the VSX builtin functions identical to Altivec functions, just map
+ the altivec builtin into the vsx version (the altivec functions
+ generate VSX code if -mvsx). */
+ builtin_define ("__builtin_vsx_xxland=__builtin_vec_and");
+ builtin_define ("__builtin_vsx_xxlandc=__builtin_vec_andc");
+ builtin_define ("__builtin_vsx_xxlnor=__builtin_vec_nor");
+ builtin_define ("__builtin_vsx_xxlor=__builtin_vec_or");
+ builtin_define ("__builtin_vsx_xxlxor=__builtin_vec_xor");
+ builtin_define ("__builtin_vsx_xxsel=__builtin_vec_sel");
+ builtin_define ("__builtin_vsx_vperm=__builtin_vec_perm");
+
+ /* Also map the a and m versions of the multiply/add instructions to the
+ builtin for people blindly going off the instruction manual. */
+ builtin_define ("__builtin_vsx_xvmaddadp=__builtin_vsx_xvmadddp");
+ builtin_define ("__builtin_vsx_xvmaddmdp=__builtin_vsx_xvmadddp");
+ builtin_define ("__builtin_vsx_xvmaddasp=__builtin_vsx_xvmaddsp");
+ builtin_define ("__builtin_vsx_xvmaddmsp=__builtin_vsx_xvmaddsp");
+ builtin_define ("__builtin_vsx_xvmsubadp=__builtin_vsx_xvmsubdp");
+ builtin_define ("__builtin_vsx_xvmsubmdp=__builtin_vsx_xvmsubdp");
+ builtin_define ("__builtin_vsx_xvmsubasp=__builtin_vsx_xvmsubsp");
+ builtin_define ("__builtin_vsx_xvmsubmsp=__builtin_vsx_xvmsubsp");
+ builtin_define ("__builtin_vsx_xvnmaddadp=__builtin_vsx_xvnmadddp");
+ builtin_define ("__builtin_vsx_xvnmaddmdp=__builtin_vsx_xvnmadddp");
+ builtin_define ("__builtin_vsx_xvnmaddasp=__builtin_vsx_xvnmaddsp");
+ builtin_define ("__builtin_vsx_xvnmaddmsp=__builtin_vsx_xvnmaddsp");
+ builtin_define ("__builtin_vsx_xvnmsubadp=__builtin_vsx_xvnmsubdp");
+ builtin_define ("__builtin_vsx_xvnmsubmdp=__builtin_vsx_xvnmsubdp");
+ builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp");
+ builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp");
+ }
+
+ /* Tell users they can use __builtin_bswap{16,64}. */
+ builtin_define ("__HAVE_BSWAP__");
/* May be overridden by target configuration. */
RS6000_CPU_CPP_ENDIAN_BUILTINS();
@@ -393,7 +429,7 @@ struct altivec_builtin_types
};
const struct altivec_builtin_types altivec_overloaded_builtins[] = {
- /* Unary AltiVec builtins. */
+ /* Unary AltiVec/VSX builtins. */
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V8HI,
@@ -402,6 +438,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V8HI,
@@ -410,8 +448,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_CEIL, ALTIVEC_BUILTIN_VRFIP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_CEIL, VSX_BUILTIN_XVRDPIP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_EXPTE, ALTIVEC_BUILTIN_VEXPTEFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_FLOOR, VSX_BUILTIN_XVRDPIM,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_FLOOR, ALTIVEC_BUILTIN_VRFIM,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_LOGE, ALTIVEC_BUILTIN_VLOGEFP,
@@ -444,6 +486,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
@@ -489,7 +533,7 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB,
RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
- /* Binary AltiVec builtins. */
+ /* Binary AltiVec/VSX builtins. */
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
@@ -528,6 +572,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
@@ -673,9 +719,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
- RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
- RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
@@ -727,9 +773,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
- RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
- RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
@@ -812,6 +858,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VCMPEQFP, ALTIVEC_BUILTIN_VCMPEQFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
@@ -832,6 +880,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_CMPGE, ALTIVEC_BUILTIN_VCMPGEFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB,
RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB,
@@ -846,6 +896,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VCMPGTFP, ALTIVEC_BUILTIN_VCMPGTFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW,
@@ -874,6 +926,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VCMPGEFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB,
RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB,
@@ -888,6 +942,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_COPYSIGN, VSX_BUILTIN_CPSGNDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_COPYSIGN_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFUX,
RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX,
@@ -900,6 +960,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+ { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
@@ -1234,6 +1298,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMAXFP, ALTIVEC_BUILTIN_VMAXFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
@@ -1410,6 +1476,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMINFP, ALTIVEC_BUILTIN_VMINFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
@@ -1460,6 +1528,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULSP,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB,
@@ -1492,6 +1564,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRDPI,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
@@ -1523,9 +1599,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
- RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
- RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
@@ -1622,6 +1698,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRDPIC,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRSPIC,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
@@ -1658,6 +1738,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
@@ -1984,6 +2068,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
@@ -2145,9 +2231,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
- RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
- RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 },
+ RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
@@ -2191,7 +2277,7 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
- /* Ternary AltiVec builtins. */
+ /* Ternary AltiVec/VSX builtins. */
{ ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
{ ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
@@ -2354,6 +2440,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
{ ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
{ ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
@@ -2366,6 +2454,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
{ ALTIVEC_BUILTIN_VEC_MRADDS, ALTIVEC_BUILTIN_VMHRADDSHS,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+ { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBSP,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUBM,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI },
{ ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMMBM,
@@ -2390,8 +2482,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
{ ALTIVEC_BUILTIN_VEC_VMSUMUHS, ALTIVEC_BUILTIN_VMSUMUHS,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+ { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDSP,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { ALTIVEC_BUILTIN_VEC_NMSUB, VSX_BUILTIN_XVNMSUBDP,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
@@ -2812,6 +2910,54 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_NOT_OPAQUE },
/* Predicates. */
{ ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
@@ -2852,6 +2998,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
{ ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { ALTIVEC_BUILTIN_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
@@ -2900,6 +3048,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
{ ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { ALTIVEC_BUILTIN_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
/* cmpge is the same as cmpgt for all cases except floating point.
@@ -2943,6 +3093,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
{ ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+ { ALTIVEC_BUILTIN_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
{ (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
};
@@ -3064,8 +3216,10 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
const struct altivec_builtin_types *desc;
unsigned int n;
- if (fcode < ALTIVEC_BUILTIN_OVERLOADED_FIRST
- || fcode > ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ if ((fcode < ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ || fcode > ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ && (fcode < VSX_BUILTIN_OVERLOADED_FIRST
+ || fcode > VSX_BUILTIN_OVERLOADED_LAST))
return NULL_TREE;
/* For now treat vec_splats and vec_promote as the same. */
@@ -3105,11 +3259,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
&& !INTEGRAL_TYPE_P (type))
goto bad;
unsigned_p = TYPE_UNSIGNED (type);
- if (type == long_long_unsigned_type_node
- || type == long_long_integer_type_node)
- goto bad;
switch (TYPE_MODE (type))
{
+ case DImode:
+ type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+ size = 2;
+ break;
case SImode:
type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
size = 4;
@@ -3123,6 +3278,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
size = 16;
break;
case SFmode: type = V4SF_type_node; size = 4; break;
+ case DFmode: type = V2DF_type_node; size = 2; break;
default:
goto bad;
}
@@ -3139,7 +3295,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
return build_constructor (type, vec);
}
- /* For now use pointer tricks to do the extaction. */
+ /* For now use pointer tricks to do the extaction, unless we are on VSX
+ extracting a double from a constant offset. */
if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
{
tree arg1;
@@ -3148,6 +3305,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
tree arg1_inner_type;
tree decl, stmt;
tree innerptrtype;
+ enum machine_mode mode;
/* No second argument. */
if (nargs != 2)
@@ -3164,6 +3322,25 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
goto bad;
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
goto bad;
+
+ /* If we can use the VSX xxpermdi instruction, use that for extract. */
+ mode = TYPE_MODE (arg1_type);
+ if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
+ && TREE_CODE (arg2) == INTEGER_CST
+ && TREE_INT_CST_HIGH (arg2) == 0
+ && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+ {
+ tree call = NULL_TREE;
+
+ if (mode == V2DFmode)
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+ else if (mode == V2DImode)
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+
+ if (call)
+ return build_call_expr (call, 2, arg1, arg2);
+ }
+
/* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
arg1_inner_type = TREE_TYPE (arg1_type);
arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
@@ -3193,7 +3370,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
return stmt;
}
- /* For now use pointer tricks to do the insertation. */
+ /* For now use pointer tricks to do the insertation, unless we are on VSX
+ inserting a double to a constant offset.. */
if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
{
tree arg0;
@@ -3203,7 +3381,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
tree arg1_inner_type;
tree decl, stmt;
tree innerptrtype;
-
+ enum machine_mode mode;
+
/* No second or third arguments. */
if (nargs != 3)
{
@@ -3220,6 +3399,27 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
goto bad;
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
goto bad;
+
+ /* If we can use the VSX xxpermdi instruction, use that for insert. */
+ mode = TYPE_MODE (arg1_type);
+ if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+ && TREE_CODE (arg2) == INTEGER_CST
+ && TREE_INT_CST_HIGH (arg2) == 0
+ && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+ {
+ tree call = NULL_TREE;
+
+ if (mode == V2DFmode)
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+ else if (mode == V2DImode)
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+ /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+ reversed. */
+ if (call)
+ return build_call_expr (call, 3, arg1, arg0, arg2);
+ }
+
/* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
arg1_inner_type = TREE_TYPE (arg1_type);
arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index b077c83c2db..25cacc46e91 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -837,6 +837,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
enum machine_mode, bool, bool, bool);
static bool rs6000_reg_live_or_pic_offset_p (int);
+static tree rs6000_builtin_vectorized_function (unsigned int, tree, tree);
static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int);
static void rs6000_restore_saved_cr (rtx, int);
static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT);
@@ -1395,6 +1396,10 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_HANDLE_OPTION
#define TARGET_HANDLE_OPTION rs6000_handle_option
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+ rs6000_builtin_vectorized_function
+
#undef TARGET_DEFAULT_TARGET_FLAGS
#define TARGET_DEFAULT_TARGET_FLAGS \
(TARGET_DEFAULT)
@@ -1871,20 +1876,14 @@ rs6000_init_hard_regno_mode_ok (void)
}
}
- /* V2DImode, prefer vsx over altivec, since the main use will be for
- vectorized floating point conversions. */
+ /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
+ Altivec doesn't have 64-bit support. */
if (TARGET_VSX)
{
rs6000_vector_mem[V2DImode] = VECTOR_VSX;
rs6000_vector_unit[V2DImode] = VECTOR_NONE;
rs6000_vector_align[V2DImode] = align64;
}
- else if (TARGET_ALTIVEC)
- {
- rs6000_vector_mem[V2DImode] = VECTOR_ALTIVEC;
- rs6000_vector_unit[V2DImode] = VECTOR_NONE;
- rs6000_vector_align[V2DImode] = align64;
- }
/* DFmode, see if we want to use the VSX unit. */
if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
@@ -2169,7 +2168,7 @@ rs6000_override_options (const char *default_cpu)
{"power7", PROCESSOR_POWER7,
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
- /* | MASK_VSX */}, /* Don't add MASK_ISEL by default */
+ | MASK_VSX}, /* Don't add MASK_ISEL by default */
{"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK},
{"powerpc64", PROCESSOR_POWERPC64,
POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
@@ -2765,6 +2764,14 @@ rs6000_builtin_conversion (unsigned int tcode, tree type)
case FIX_TRUNC_EXPR:
switch (TYPE_MODE (type))
{
+ case V2DImode:
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
+ return NULL_TREE;
+
+ return TYPE_UNSIGNED (type)
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVDPUXDS_UNS]
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVDPSXDS];
+
case V4SImode:
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
return NULL_TREE;
@@ -2780,6 +2787,14 @@ rs6000_builtin_conversion (unsigned int tcode, tree type)
case FLOAT_EXPR:
switch (TYPE_MODE (type))
{
+ case V2DImode:
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
+ return NULL_TREE;
+
+ return TYPE_UNSIGNED (type)
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVUXDDP]
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVSXDDP];
+
case V4SImode:
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
return NULL_TREE;
@@ -2908,6 +2923,22 @@ rs6000_builtin_vec_perm (tree type, tree *mask_element_type)
d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SF];
break;
+ case V2DFmode:
+ if (!TARGET_ALLOW_DF_PERMUTE)
+ return NULL_TREE;
+
+ d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DF];
+ break;
+
+ case V2DImode:
+ if (!TARGET_ALLOW_DF_PERMUTE)
+ return NULL_TREE;
+
+ d = (uns_p
+ ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI_UNS]
+ : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI]);
+ break;
+
default:
return NULL_TREE;
}
@@ -2981,6 +3012,136 @@ rs6000_parse_fpu_option (const char *option)
return FPU_NONE;
}
+/* Returns a function decl for a vectorized version of the builtin function
+ with builtin function code FN and the result vector type TYPE, or NULL_TREE
+ if it is not available. */
+
+static tree
+rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
+ tree type_in)
+{
+ enum machine_mode in_mode, out_mode;
+ int in_n, out_n;
+
+ if (TREE_CODE (type_out) != VECTOR_TYPE
+ || TREE_CODE (type_in) != VECTOR_TYPE
+ || !TARGET_VECTORIZE_BUILTINS)
+ return NULL_TREE;
+
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+ switch (fn)
+ {
+ case BUILT_IN_COPYSIGN:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
+ break;
+ case BUILT_IN_COPYSIGNF:
+ if (out_mode != SFmode || out_n != 4
+ || in_mode != SFmode || in_n != 4)
+ break;
+ if (VECTOR_UNIT_VSX_P (V4SFmode))
+ return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
+ if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+ return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
+ break;
+ case BUILT_IN_SQRT:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
+ break;
+ case BUILT_IN_SQRTF:
+ if (VECTOR_UNIT_VSX_P (V4SFmode)
+ && out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
+ break;
+ case BUILT_IN_CEIL:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
+ break;
+ case BUILT_IN_CEILF:
+ if (out_mode != SFmode || out_n != 4
+ || in_mode != SFmode || in_n != 4)
+ break;
+ if (VECTOR_UNIT_VSX_P (V4SFmode))
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
+ if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+ return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
+ break;
+ case BUILT_IN_FLOOR:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
+ break;
+ case BUILT_IN_FLOORF:
+ if (out_mode != SFmode || out_n != 4
+ || in_mode != SFmode || in_n != 4)
+ break;
+ if (VECTOR_UNIT_VSX_P (V4SFmode))
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
+ if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+ return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
+ break;
+ case BUILT_IN_TRUNC:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
+ break;
+ case BUILT_IN_TRUNCF:
+ if (out_mode != SFmode || out_n != 4
+ || in_mode != SFmode || in_n != 4)
+ break;
+ if (VECTOR_UNIT_VSX_P (V4SFmode))
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
+ if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+ return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
+ break;
+ case BUILT_IN_NEARBYINT:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && flag_unsafe_math_optimizations
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
+ break;
+ case BUILT_IN_NEARBYINTF:
+ if (VECTOR_UNIT_VSX_P (V4SFmode)
+ && flag_unsafe_math_optimizations
+ && out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
+ break;
+ case BUILT_IN_RINT:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && !flag_trapping_math
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
+ break;
+ case BUILT_IN_RINTF:
+ if (VECTOR_UNIT_VSX_P (V4SFmode)
+ && !flag_trapping_math
+ && out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
+ break;
+ default:
+ break;
+ }
+ return NULL_TREE;
+}
+
+
/* Implement TARGET_HANDLE_OPTION. */
static bool
@@ -3621,6 +3782,11 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
&& (splat_val >= 0 || (step == 1 && copies == 1)))
;
+ /* Also check if are loading up the most significant bit which can be done by
+ loading up -1 and shifting the value left by -1. */
+ else if (EASY_VECTOR_MSB (splat_val, inner))
+ ;
+
else
return false;
@@ -3971,8 +4137,6 @@ rs6000_expand_vector_init (rtx target, rtx vals)
emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
return;
}
- else if (all_same && int_vector_p)
- ; /* Splat vector element. */
else
{
/* Load from constant pool. */
@@ -3981,8 +4145,66 @@ rs6000_expand_vector_init (rtx target, rtx vals)
}
}
- /* Store value to stack temp. Load vector element. Splat. */
- if (all_same)
+ /* Double word values on VSX can use xxpermdi or lxvdsx. */
+ if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+ {
+ if (all_same)
+ {
+ rtx element = XVECEXP (vals, 0, 0);
+ if (mode == V2DFmode)
+ emit_insn (gen_vsx_splat_v2df (target, element));
+ else
+ emit_insn (gen_vsx_splat_v2di (target, element));
+ }
+ else
+ {
+ rtx op0 = copy_to_reg (XVECEXP (vals, 0, 0));
+ rtx op1 = copy_to_reg (XVECEXP (vals, 0, 1));
+ if (mode == V2DFmode)
+ emit_insn (gen_vsx_concat_v2df (target, op0, op1));
+ else
+ emit_insn (gen_vsx_concat_v2di (target, op0, op1));
+ }
+ return;
+ }
+
+ /* With single precision floating point on VSX, know that internally single
+ precision is actually represented as a double, and either make 2 V2DF
+ vectors, and convert these vectors to single precision, or do one
+ conversion, and splat the result to the other elements. */
+ if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
+ {
+ if (all_same)
+ {
+ rtx freg = gen_reg_rtx (V4SFmode);
+ rtx sreg = copy_to_reg (XVECEXP (vals, 0, 0));
+
+ emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+ emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
+ }
+ else
+ {
+ rtx dbl_even = gen_reg_rtx (V2DFmode);
+ rtx dbl_odd = gen_reg_rtx (V2DFmode);
+ rtx flt_even = gen_reg_rtx (V4SFmode);
+ rtx flt_odd = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vsx_concat_v2sf (dbl_even,
+ copy_to_reg (XVECEXP (vals, 0, 0)),
+ copy_to_reg (XVECEXP (vals, 0, 1))));
+ emit_insn (gen_vsx_concat_v2sf (dbl_odd,
+ copy_to_reg (XVECEXP (vals, 0, 2)),
+ copy_to_reg (XVECEXP (vals, 0, 3))));
+ emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
+ emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
+ emit_insn (gen_vec_extract_evenv4sf (target, flt_even, flt_odd));
+ }
+ return;
+ }
+
+ /* Store value to stack temp. Load vector element. Splat. However, splat
+ of 64-bit items is not supported on Altivec. */
+ if (all_same && GET_MODE_SIZE (mode) <= 4)
{
mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0);
emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
@@ -4040,6 +4262,14 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
int width = GET_MODE_SIZE (inner_mode);
int i;
+ if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+ {
+ rtx (*set_func) (rtx, rtx, rtx, rtx)
+ = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
+ emit_insn (set_func (target, target, val, GEN_INT (elt)));
+ return;
+ }
+
/* Load single variable value. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0);
emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
@@ -4077,6 +4307,14 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
enum machine_mode inner_mode = GET_MODE_INNER (mode);
rtx mem, x;
+ if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+ {
+ rtx (*extract_func) (rtx, rtx, rtx)
+ = ((mode == V2DFmode) ? gen_vsx_extract_v2df : gen_vsx_extract_v2di);
+ emit_insn (extract_func (target, vec, GEN_INT (elt)));
+ return;
+ }
+
/* Allocate mode-sized buffer. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
@@ -5447,6 +5685,10 @@ rs6000_mode_dependent_address (rtx addr)
case PRE_MODIFY:
return TARGET_UPDATE;
+ /* AND is only allowed in Altivec loads. */
+ case AND:
+ return true;
+
default:
break;
}
@@ -6048,6 +6290,8 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
case V2SFmode:
case V2SImode:
case V1DImode:
+ case V2DFmode:
+ case V2DImode:
if (CONSTANT_P (operands[1])
&& !easy_vector_constant (operands[1], mode))
operands[1] = force_const_mem (mode, operands[1]);
@@ -8192,6 +8436,59 @@ static const struct builtin_description bdesc_3arg[] =
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_perm", ALTIVEC_BUILTIN_VEC_PERM },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sel", ALTIVEC_BUILTIN_VEC_SEL },
+ { MASK_VSX, CODE_FOR_vsx_fmaddv2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP },
+ { MASK_VSX, CODE_FOR_vsx_fmsubv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP },
+ { MASK_VSX, CODE_FOR_vsx_fnmaddv2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP },
+ { MASK_VSX, CODE_FOR_vsx_fnmsubv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP },
+
+ { MASK_VSX, CODE_FOR_vsx_fmaddv4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP },
+ { MASK_VSX, CODE_FOR_vsx_fmsubv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP },
+ { MASK_VSX, CODE_FOR_vsx_fnmaddv4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP },
+ { MASK_VSX, CODE_FOR_vsx_fnmsubv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP },
+
+ { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msub", VSX_BUILTIN_VEC_MSUB },
+ { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmadd", VSX_BUILTIN_VEC_NMADD },
+
+ { MASK_VSX, CODE_FOR_vector_select_v2di, "__builtin_vsx_xxsel_2di", VSX_BUILTIN_XXSEL_2DI },
+ { MASK_VSX, CODE_FOR_vector_select_v2df, "__builtin_vsx_xxsel_2df", VSX_BUILTIN_XXSEL_2DF },
+ { MASK_VSX, CODE_FOR_vector_select_v4sf, "__builtin_vsx_xxsel_4sf", VSX_BUILTIN_XXSEL_4SF },
+ { MASK_VSX, CODE_FOR_vector_select_v4si, "__builtin_vsx_xxsel_4si", VSX_BUILTIN_XXSEL_4SI },
+ { MASK_VSX, CODE_FOR_vector_select_v8hi, "__builtin_vsx_xxsel_8hi", VSX_BUILTIN_XXSEL_8HI },
+ { MASK_VSX, CODE_FOR_vector_select_v16qi, "__builtin_vsx_xxsel_16qi", VSX_BUILTIN_XXSEL_16QI },
+ { MASK_VSX, CODE_FOR_vector_select_v2di_uns, "__builtin_vsx_xxsel_2di_uns", VSX_BUILTIN_XXSEL_2DI_UNS },
+ { MASK_VSX, CODE_FOR_vector_select_v4si_uns, "__builtin_vsx_xxsel_4si_uns", VSX_BUILTIN_XXSEL_4SI_UNS },
+ { MASK_VSX, CODE_FOR_vector_select_v8hi_uns, "__builtin_vsx_xxsel_8hi_uns", VSX_BUILTIN_XXSEL_8HI_UNS },
+ { MASK_VSX, CODE_FOR_vector_select_v16qi_uns, "__builtin_vsx_xxsel_16qi_uns", VSX_BUILTIN_XXSEL_16QI_UNS },
+
+ { MASK_VSX, CODE_FOR_altivec_vperm_v2di, "__builtin_vsx_vperm_2di", VSX_BUILTIN_VPERM_2DI },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v2df, "__builtin_vsx_vperm_2df", VSX_BUILTIN_VPERM_2DF },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v4sf, "__builtin_vsx_vperm_4sf", VSX_BUILTIN_VPERM_4SF },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v4si, "__builtin_vsx_vperm_4si", VSX_BUILTIN_VPERM_4SI },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v8hi, "__builtin_vsx_vperm_8hi", VSX_BUILTIN_VPERM_8HI },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v16qi, "__builtin_vsx_vperm_16qi", VSX_BUILTIN_VPERM_16QI },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v2di_uns, "__builtin_vsx_vperm_2di_uns", VSX_BUILTIN_VPERM_2DI_UNS },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v4si_uns, "__builtin_vsx_vperm_4si_uns", VSX_BUILTIN_VPERM_4SI_UNS },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v8hi_uns, "__builtin_vsx_vperm_8hi_uns", VSX_BUILTIN_VPERM_8HI_UNS },
+ { MASK_VSX, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_vsx_vperm_16qi_uns", VSX_BUILTIN_VPERM_16QI_UNS },
+
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2df, "__builtin_vsx_xxpermdi_2df", VSX_BUILTIN_XXPERMDI_2DF },
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2di, "__builtin_vsx_xxpermdi_2di", VSX_BUILTIN_XXPERMDI_2DI },
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4sf, "__builtin_vsx_xxpermdi_4sf", VSX_BUILTIN_XXPERMDI_4SF },
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4si, "__builtin_vsx_xxpermdi_4si", VSX_BUILTIN_XXPERMDI_4SI },
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v8hi, "__builtin_vsx_xxpermdi_8hi", VSX_BUILTIN_XXPERMDI_8HI },
+ { MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, "__builtin_vsx_xxpermdi_16qi", VSX_BUILTIN_XXPERMDI_16QI },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxpermdi", VSX_BUILTIN_VEC_XXPERMDI },
+ { MASK_VSX, CODE_FOR_vsx_set_v2df, "__builtin_vsx_set_2df", VSX_BUILTIN_SET_2DF },
+ { MASK_VSX, CODE_FOR_vsx_set_v2di, "__builtin_vsx_set_2di", VSX_BUILTIN_SET_2DI },
+
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2di, "__builtin_vsx_xxsldwi_2di", VSX_BUILTIN_XXSLDWI_2DI },
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2df, "__builtin_vsx_xxsldwi_2df", VSX_BUILTIN_XXSLDWI_2DF },
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4sf, "__builtin_vsx_xxsldwi_4sf", VSX_BUILTIN_XXSLDWI_4SF },
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4si, "__builtin_vsx_xxsldwi_4si", VSX_BUILTIN_XXSLDWI_4SI },
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v8hi, "__builtin_vsx_xxsldwi_8hi", VSX_BUILTIN_XXSLDWI_8HI },
+ { MASK_VSX, CODE_FOR_vsx_xxsldwi_v16qi, "__builtin_vsx_xxsldwi_16qi", VSX_BUILTIN_XXSLDWI_16QI },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxsldwi", VSX_BUILTIN_VEC_XXSLDWI },
+
{ 0, CODE_FOR_paired_msub, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB },
{ 0, CODE_FOR_paired_madd, "__builtin_paired_madd", PAIRED_BUILTIN_MADD },
{ 0, CODE_FOR_paired_madds0, "__builtin_paired_madds0", PAIRED_BUILTIN_MADDS0 },
@@ -8337,9 +8634,50 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_ALTIVEC, CODE_FOR_altivec_vsum2sws, "__builtin_altivec_vsum2sws", ALTIVEC_BUILTIN_VSUM2SWS },
{ MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS },
{ MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR },
-
- { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD },
- { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP },
+ { MASK_ALTIVEC, CODE_FOR_vector_copysignv4sf3, "__builtin_altivec_copysignfp", ALTIVEC_BUILTIN_COPYSIGN_V4SF },
+
+ { MASK_VSX, CODE_FOR_addv2df3, "__builtin_vsx_xvadddp", VSX_BUILTIN_XVADDDP },
+ { MASK_VSX, CODE_FOR_subv2df3, "__builtin_vsx_xvsubdp", VSX_BUILTIN_XVSUBDP },
+ { MASK_VSX, CODE_FOR_mulv2df3, "__builtin_vsx_xvmuldp", VSX_BUILTIN_XVMULDP },
+ { MASK_VSX, CODE_FOR_divv2df3, "__builtin_vsx_xvdivdp", VSX_BUILTIN_XVDIVDP },
+ { MASK_VSX, CODE_FOR_sminv2df3, "__builtin_vsx_xvmindp", VSX_BUILTIN_XVMINDP },
+ { MASK_VSX, CODE_FOR_smaxv2df3, "__builtin_vsx_xvmaxdp", VSX_BUILTIN_XVMAXDP },
+ { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fe, "__builtin_vsx_xvtdivdp_fe", VSX_BUILTIN_XVTDIVDP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fg, "__builtin_vsx_xvtdivdp_fg", VSX_BUILTIN_XVTDIVDP_FG },
+ { MASK_VSX, CODE_FOR_vector_eqv2df, "__builtin_vsx_xvcmpeqdp", VSX_BUILTIN_XVCMPEQDP },
+ { MASK_VSX, CODE_FOR_vector_gtv2df, "__builtin_vsx_xvcmpgtdp", VSX_BUILTIN_XVCMPGTDP },
+ { MASK_VSX, CODE_FOR_vector_gev2df, "__builtin_vsx_xvcmpgedp", VSX_BUILTIN_XVCMPGEDP },
+
+ { MASK_VSX, CODE_FOR_addv4sf3, "__builtin_vsx_xvaddsp", VSX_BUILTIN_XVADDSP },
+ { MASK_VSX, CODE_FOR_subv4sf3, "__builtin_vsx_xvsubsp", VSX_BUILTIN_XVSUBSP },
+ { MASK_VSX, CODE_FOR_mulv4sf3, "__builtin_vsx_xvmulsp", VSX_BUILTIN_XVMULSP },
+ { MASK_VSX, CODE_FOR_divv4sf3, "__builtin_vsx_xvdivsp", VSX_BUILTIN_XVDIVSP },
+ { MASK_VSX, CODE_FOR_sminv4sf3, "__builtin_vsx_xvminsp", VSX_BUILTIN_XVMINSP },
+ { MASK_VSX, CODE_FOR_smaxv4sf3, "__builtin_vsx_xvmaxsp", VSX_BUILTIN_XVMAXSP },
+ { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fe, "__builtin_vsx_xvtdivsp_fe", VSX_BUILTIN_XVTDIVSP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fg, "__builtin_vsx_xvtdivsp_fg", VSX_BUILTIN_XVTDIVSP_FG },
+ { MASK_VSX, CODE_FOR_vector_eqv4sf, "__builtin_vsx_xvcmpeqsp", VSX_BUILTIN_XVCMPEQSP },
+ { MASK_VSX, CODE_FOR_vector_gtv4sf, "__builtin_vsx_xvcmpgtsp", VSX_BUILTIN_XVCMPGTSP },
+ { MASK_VSX, CODE_FOR_vector_gev4sf, "__builtin_vsx_xvcmpgesp", VSX_BUILTIN_XVCMPGESP },
+
+ { MASK_VSX, CODE_FOR_smindf3, "__builtin_vsx_xsmindp", VSX_BUILTIN_XSMINDP },
+ { MASK_VSX, CODE_FOR_smaxdf3, "__builtin_vsx_xsmaxdp", VSX_BUILTIN_XSMAXDP },
+ { MASK_VSX, CODE_FOR_vsx_tdivdf3_fe, "__builtin_vsx_xstdivdp_fe", VSX_BUILTIN_XSTDIVDP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tdivdf3_fg, "__builtin_vsx_xstdivdp_fg", VSX_BUILTIN_XSTDIVDP_FG },
+ { MASK_VSX, CODE_FOR_vector_copysignv2df3, "__builtin_vsx_cpsgndp", VSX_BUILTIN_CPSGNDP },
+ { MASK_VSX, CODE_FOR_vector_copysignv4sf3, "__builtin_vsx_cpsgnsp", VSX_BUILTIN_CPSGNSP },
+
+ { MASK_VSX, CODE_FOR_vsx_concat_v2df, "__builtin_vsx_concat_2df", VSX_BUILTIN_CONCAT_2DF },
+ { MASK_VSX, CODE_FOR_vsx_concat_v2di, "__builtin_vsx_concat_2di", VSX_BUILTIN_CONCAT_2DI },
+ { MASK_VSX, CODE_FOR_vsx_splat_v2df, "__builtin_vsx_splat_2df", VSX_BUILTIN_SPLAT_2DF },
+ { MASK_VSX, CODE_FOR_vsx_splat_v2di, "__builtin_vsx_splat_2di", VSX_BUILTIN_SPLAT_2DI },
+ { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4sf, "__builtin_vsx_xxmrghw", VSX_BUILTIN_XXMRGHW_4SF },
+ { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4si, "__builtin_vsx_xxmrghw_4si", VSX_BUILTIN_XXMRGHW_4SI },
+ { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4sf, "__builtin_vsx_xxmrglw", VSX_BUILTIN_XXMRGLW_4SF },
+ { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4si, "__builtin_vsx_xxmrglw_4si", VSX_BUILTIN_XXMRGLW_4SI },
+
+ { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD },
+ { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduwm", ALTIVEC_BUILTIN_VEC_VADDUWM },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduhm", ALTIVEC_BUILTIN_VEC_VADDUHM },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddubm", ALTIVEC_BUILTIN_VEC_VADDUBM },
@@ -8377,6 +8715,7 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtub", ALTIVEC_BUILTIN_VEC_VCMPGTUB },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmple", ALTIVEC_BUILTIN_VEC_CMPLE },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmplt", ALTIVEC_BUILTIN_VEC_CMPLT },
+ { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_copysign", ALTIVEC_BUILTIN_VEC_COPYSIGN },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_max", ALTIVEC_BUILTIN_VEC_MAX },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vmaxfp", ALTIVEC_BUILTIN_VEC_VMAXFP },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsw", ALTIVEC_BUILTIN_VEC_VMAXSW },
@@ -8466,6 +8805,9 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sums", ALTIVEC_BUILTIN_VEC_SUMS },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_mul", VSX_BUILTIN_VEC_MUL },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_div", VSX_BUILTIN_VEC_DIV },
+
{ 0, CODE_FOR_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 },
{ 0, CODE_FOR_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 },
{ 0, CODE_FOR_subv2sf3, "__builtin_paired_subv2sf3", PAIRED_BUILTIN_SUBV2SF3 },
@@ -8661,6 +9003,19 @@ static const struct builtin_description_predicates bdesc_altivec_preds[] =
{ MASK_ALTIVEC, CODE_FOR_vector_gtu_v16qi_p, "__builtin_altivec_vcmpgtub_p",
ALTIVEC_BUILTIN_VCMPGTUB_P },
+ { MASK_VSX, CODE_FOR_vector_eq_v4sf_p, "__builtin_vsx_xvcmpeqsp_p",
+ VSX_BUILTIN_XVCMPEQSP_P },
+ { MASK_VSX, CODE_FOR_vector_ge_v4sf_p, "__builtin_vsx_xvcmpgesp_p",
+ VSX_BUILTIN_XVCMPGESP_P },
+ { MASK_VSX, CODE_FOR_vector_gt_v4sf_p, "__builtin_vsx_xvcmpgtsp_p",
+ VSX_BUILTIN_XVCMPGTSP_P },
+ { MASK_VSX, CODE_FOR_vector_eq_v2df_p, "__builtin_vsx_xvcmpeqdp_p",
+ VSX_BUILTIN_XVCMPEQDP_P },
+ { MASK_VSX, CODE_FOR_vector_ge_v2df_p, "__builtin_vsx_xvcmpgedp_p",
+ VSX_BUILTIN_XVCMPGEDP_P },
+ { MASK_VSX, CODE_FOR_vector_gt_v2df_p, "__builtin_vsx_xvcmpgtdp_p",
+ VSX_BUILTIN_XVCMPGTDP_P },
+
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpeq_p",
ALTIVEC_BUILTIN_VCMPEQ_P },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpgt_p",
@@ -8724,7 +9079,11 @@ static const struct builtin_description bdesc_abs[] =
{ MASK_ALTIVEC, CODE_FOR_absv16qi2, "__builtin_altivec_abs_v16qi", ALTIVEC_BUILTIN_ABS_V16QI },
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v4si, "__builtin_altivec_abss_v4si", ALTIVEC_BUILTIN_ABSS_V4SI },
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v8hi, "__builtin_altivec_abss_v8hi", ALTIVEC_BUILTIN_ABSS_V8HI },
- { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI }
+ { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI },
+ { MASK_VSX, CODE_FOR_absv2df2, "__builtin_vsx_xvabsdp", VSX_BUILTIN_XVABSDP },
+ { MASK_VSX, CODE_FOR_vsx_nabsv2df2, "__builtin_vsx_xvnabsdp", VSX_BUILTIN_XVNABSDP },
+ { MASK_VSX, CODE_FOR_absv4sf2, "__builtin_vsx_xvabssp", VSX_BUILTIN_XVABSSP },
+ { MASK_VSX, CODE_FOR_vsx_nabsv4sf2, "__builtin_vsx_xvnabssp", VSX_BUILTIN_XVNABSSP },
};
/* Simple unary operations: VECb = foo (unsigned literal) or VECb =
@@ -8735,10 +9094,10 @@ static struct builtin_description bdesc_1arg[] =
{ MASK_ALTIVEC, CODE_FOR_altivec_vexptefp, "__builtin_altivec_vexptefp", ALTIVEC_BUILTIN_VEXPTEFP },
{ MASK_ALTIVEC, CODE_FOR_altivec_vlogefp, "__builtin_altivec_vlogefp", ALTIVEC_BUILTIN_VLOGEFP },
{ MASK_ALTIVEC, CODE_FOR_altivec_vrefp, "__builtin_altivec_vrefp", ALTIVEC_BUILTIN_VREFP },
- { MASK_ALTIVEC, CODE_FOR_altivec_vrfim, "__builtin_altivec_vrfim", ALTIVEC_BUILTIN_VRFIM },
+ { MASK_ALTIVEC, CODE_FOR_vector_floorv4sf2, "__builtin_altivec_vrfim", ALTIVEC_BUILTIN_VRFIM },
{ MASK_ALTIVEC, CODE_FOR_altivec_vrfin, "__builtin_altivec_vrfin", ALTIVEC_BUILTIN_VRFIN },
- { MASK_ALTIVEC, CODE_FOR_altivec_vrfip, "__builtin_altivec_vrfip", ALTIVEC_BUILTIN_VRFIP },
- { MASK_ALTIVEC, CODE_FOR_ftruncv4sf2, "__builtin_altivec_vrfiz", ALTIVEC_BUILTIN_VRFIZ },
+ { MASK_ALTIVEC, CODE_FOR_vector_ceilv4sf2, "__builtin_altivec_vrfip", ALTIVEC_BUILTIN_VRFIP },
+ { MASK_ALTIVEC, CODE_FOR_vector_btruncv4sf2, "__builtin_altivec_vrfiz", ALTIVEC_BUILTIN_VRFIZ },
{ MASK_ALTIVEC, CODE_FOR_altivec_vrsqrtefp, "__builtin_altivec_vrsqrtefp", ALTIVEC_BUILTIN_VRSQRTEFP },
{ MASK_ALTIVEC, CODE_FOR_altivec_vspltisb, "__builtin_altivec_vspltisb", ALTIVEC_BUILTIN_VSPLTISB },
{ MASK_ALTIVEC, CODE_FOR_altivec_vspltish, "__builtin_altivec_vspltish", ALTIVEC_BUILTIN_VSPLTISH },
@@ -8750,6 +9109,65 @@ static struct builtin_description bdesc_1arg[] =
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklpx, "__builtin_altivec_vupklpx", ALTIVEC_BUILTIN_VUPKLPX },
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklsh, "__builtin_altivec_vupklsh", ALTIVEC_BUILTIN_VUPKLSH },
+ { MASK_VSX, CODE_FOR_negv2df2, "__builtin_vsx_xvnegdp", VSX_BUILTIN_XVNEGDP },
+ { MASK_VSX, CODE_FOR_sqrtv2df2, "__builtin_vsx_xvsqrtdp", VSX_BUILTIN_XVSQRTDP },
+ { MASK_VSX, CODE_FOR_vsx_rsqrtev2df2, "__builtin_vsx_xvrsqrtedp", VSX_BUILTIN_XVRSQRTEDP },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fe, "__builtin_vsx_xvtsqrtdp_fe", VSX_BUILTIN_XVTSQRTDP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fg, "__builtin_vsx_xvtsqrtdp_fg", VSX_BUILTIN_XVTSQRTDP_FG },
+ { MASK_VSX, CODE_FOR_vsx_frev2df2, "__builtin_vsx_xvredp", VSX_BUILTIN_XVREDP },
+
+ { MASK_VSX, CODE_FOR_negv4sf2, "__builtin_vsx_xvnegsp", VSX_BUILTIN_XVNEGSP },
+ { MASK_VSX, CODE_FOR_sqrtv4sf2, "__builtin_vsx_xvsqrtsp", VSX_BUILTIN_XVSQRTSP },
+ { MASK_VSX, CODE_FOR_vsx_rsqrtev4sf2, "__builtin_vsx_xvrsqrtesp", VSX_BUILTIN_XVRSQRTESP },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fe, "__builtin_vsx_xvtsqrtsp_fe", VSX_BUILTIN_XVTSQRTSP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fg, "__builtin_vsx_xvtsqrtsp_fg", VSX_BUILTIN_XVTSQRTSP_FG },
+ { MASK_VSX, CODE_FOR_vsx_frev4sf2, "__builtin_vsx_xvresp", VSX_BUILTIN_XVRESP },
+
+ { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvdpsp", VSX_BUILTIN_XSCVDPSP },
+ { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvspdp", VSX_BUILTIN_XSCVSPDP },
+ { MASK_VSX, CODE_FOR_vsx_xvcvdpsp, "__builtin_vsx_xvcvdpsp", VSX_BUILTIN_XVCVDPSP },
+ { MASK_VSX, CODE_FOR_vsx_xvcvspdp, "__builtin_vsx_xvcvspdp", VSX_BUILTIN_XVCVSPDP },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fe, "__builtin_vsx_xstsqrtdp_fe", VSX_BUILTIN_XSTSQRTDP_FE },
+ { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fg, "__builtin_vsx_xstsqrtdp_fg", VSX_BUILTIN_XSTSQRTDP_FG },
+
+ { MASK_VSX, CODE_FOR_vsx_fix_truncv2dfv2di2, "__builtin_vsx_xvcvdpsxds", VSX_BUILTIN_XVCVDPSXDS },
+ { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds", VSX_BUILTIN_XVCVDPUXDS },
+ { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds_uns", VSX_BUILTIN_XVCVDPUXDS_UNS },
+ { MASK_VSX, CODE_FOR_vsx_floatv2div2df2, "__builtin_vsx_xvcvsxddp", VSX_BUILTIN_XVCVSXDDP },
+ { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp", VSX_BUILTIN_XVCVUXDDP },
+ { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp_uns", VSX_BUILTIN_XVCVUXDDP_UNS },
+
+ { MASK_VSX, CODE_FOR_vsx_fix_truncv4sfv4si2, "__builtin_vsx_xvcvspsxws", VSX_BUILTIN_XVCVSPSXWS },
+ { MASK_VSX, CODE_FOR_vsx_fixuns_truncv4sfv4si2, "__builtin_vsx_xvcvspuxws", VSX_BUILTIN_XVCVSPUXWS },
+ { MASK_VSX, CODE_FOR_vsx_floatv4siv4sf2, "__builtin_vsx_xvcvsxwsp", VSX_BUILTIN_XVCVSXWSP },
+ { MASK_VSX, CODE_FOR_vsx_floatunsv4siv4sf2, "__builtin_vsx_xvcvuxwsp", VSX_BUILTIN_XVCVUXWSP },
+
+ { MASK_VSX, CODE_FOR_vsx_xvcvdpsxws, "__builtin_vsx_xvcvdpsxws", VSX_BUILTIN_XVCVDPSXWS },
+ { MASK_VSX, CODE_FOR_vsx_xvcvdpuxws, "__builtin_vsx_xvcvdpuxws", VSX_BUILTIN_XVCVDPUXWS },
+ { MASK_VSX, CODE_FOR_vsx_xvcvsxwdp, "__builtin_vsx_xvcvsxwdp", VSX_BUILTIN_XVCVSXWDP },
+ { MASK_VSX, CODE_FOR_vsx_xvcvuxwdp, "__builtin_vsx_xvcvuxwdp", VSX_BUILTIN_XVCVUXWDP },
+ { MASK_VSX, CODE_FOR_vsx_xvrdpi, "__builtin_vsx_xvrdpi", VSX_BUILTIN_XVRDPI },
+ { MASK_VSX, CODE_FOR_vsx_xvrdpic, "__builtin_vsx_xvrdpic", VSX_BUILTIN_XVRDPIC },
+ { MASK_VSX, CODE_FOR_vsx_floorv2df2, "__builtin_vsx_xvrdpim", VSX_BUILTIN_XVRDPIM },
+ { MASK_VSX, CODE_FOR_vsx_ceilv2df2, "__builtin_vsx_xvrdpip", VSX_BUILTIN_XVRDPIP },
+ { MASK_VSX, CODE_FOR_vsx_btruncv2df2, "__builtin_vsx_xvrdpiz", VSX_BUILTIN_XVRDPIZ },
+
+ { MASK_VSX, CODE_FOR_vsx_xvcvspsxds, "__builtin_vsx_xvcvspsxds", VSX_BUILTIN_XVCVSPSXDS },
+ { MASK_VSX, CODE_FOR_vsx_xvcvspuxds, "__builtin_vsx_xvcvspuxds", VSX_BUILTIN_XVCVSPUXDS },
+ { MASK_VSX, CODE_FOR_vsx_xvcvsxdsp, "__builtin_vsx_xvcvsxdsp", VSX_BUILTIN_XVCVSXDSP },
+ { MASK_VSX, CODE_FOR_vsx_xvcvuxdsp, "__builtin_vsx_xvcvuxdsp", VSX_BUILTIN_XVCVUXDSP },
+ { MASK_VSX, CODE_FOR_vsx_xvrspi, "__builtin_vsx_xvrspi", VSX_BUILTIN_XVRSPI },
+ { MASK_VSX, CODE_FOR_vsx_xvrspic, "__builtin_vsx_xvrspic", VSX_BUILTIN_XVRSPIC },
+ { MASK_VSX, CODE_FOR_vsx_floorv4sf2, "__builtin_vsx_xvrspim", VSX_BUILTIN_XVRSPIM },
+ { MASK_VSX, CODE_FOR_vsx_ceilv4sf2, "__builtin_vsx_xvrspip", VSX_BUILTIN_XVRSPIP },
+ { MASK_VSX, CODE_FOR_vsx_btruncv4sf2, "__builtin_vsx_xvrspiz", VSX_BUILTIN_XVRSPIZ },
+
+ { MASK_VSX, CODE_FOR_vsx_xsrdpi, "__builtin_vsx_xsrdpi", VSX_BUILTIN_XSRDPI },
+ { MASK_VSX, CODE_FOR_vsx_xsrdpic, "__builtin_vsx_xsrdpic", VSX_BUILTIN_XSRDPIC },
+ { MASK_VSX, CODE_FOR_vsx_floordf2, "__builtin_vsx_xsrdpim", VSX_BUILTIN_XSRDPIM },
+ { MASK_VSX, CODE_FOR_vsx_ceildf2, "__builtin_vsx_xsrdpip", VSX_BUILTIN_XSRDPIP },
+ { MASK_VSX, CODE_FOR_vsx_btruncdf2, "__builtin_vsx_xsrdpiz", VSX_BUILTIN_XSRDPIZ },
+
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abs", ALTIVEC_BUILTIN_VEC_ABS },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abss", ALTIVEC_BUILTIN_VEC_ABSS },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_ceil", ALTIVEC_BUILTIN_VEC_CEIL },
@@ -8770,6 +9188,10 @@ static struct builtin_description bdesc_1arg[] =
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsh", ALTIVEC_BUILTIN_VEC_VUPKLSH },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsb", ALTIVEC_BUILTIN_VEC_VUPKLSB },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_nearbyint", ALTIVEC_BUILTIN_VEC_NEARBYINT },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_rint", ALTIVEC_BUILTIN_VEC_RINT },
+ { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_sqrt", ALTIVEC_BUILTIN_VEC_SQRT },
+
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vec_float_sisf", VECTOR_BUILTIN_FLOAT_V4SI_V4SF },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vec_uns_float_sisf", VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI },
@@ -9293,6 +9715,36 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
}
break;
+ case CODE_FOR_vsx_xxpermdi_v2df:
+ case CODE_FOR_vsx_xxpermdi_v2di:
+ case CODE_FOR_vsx_xxsldwi_v16qi:
+ case CODE_FOR_vsx_xxsldwi_v8hi:
+ case CODE_FOR_vsx_xxsldwi_v4si:
+ case CODE_FOR_vsx_xxsldwi_v4sf:
+ case CODE_FOR_vsx_xxsldwi_v2di:
+ case CODE_FOR_vsx_xxsldwi_v2df:
+ /* Only allow 2-bit unsigned literals. */
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg2) & ~0x3)
+ {
+ error ("argument 3 must be a 2-bit unsigned literal");
+ return const0_rtx;
+ }
+ break;
+
+ case CODE_FOR_vsx_set_v2df:
+ case CODE_FOR_vsx_set_v2di:
+ /* Only allow 1-bit unsigned literals. */
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg2) & ~0x1)
+ {
+ error ("argument 3 must be a 1-bit unsigned literal");
+ return const0_rtx;
+ }
+ break;
+
default:
break;
}
@@ -9602,8 +10054,10 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
enum machine_mode tmode, mode0;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
- if (fcode >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
- && fcode <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ if ((fcode >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ && fcode <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ || (fcode >= VSX_BUILTIN_OVERLOADED_FIRST
+ && fcode <= VSX_BUILTIN_OVERLOADED_LAST))
{
*expandedp = true;
error ("unresolved overload for Altivec builtin %qF", fndecl);
@@ -9711,18 +10165,24 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
+ case VSX_BUILTIN_VEC_INIT_V2DF:
+ case VSX_BUILTIN_VEC_INIT_V2DI:
return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
case ALTIVEC_BUILTIN_VEC_SET_V4SI:
case ALTIVEC_BUILTIN_VEC_SET_V8HI:
case ALTIVEC_BUILTIN_VEC_SET_V16QI:
case ALTIVEC_BUILTIN_VEC_SET_V4SF:
+ case VSX_BUILTIN_VEC_SET_V2DF:
+ case VSX_BUILTIN_VEC_SET_V2DI:
return altivec_expand_vec_set_builtin (exp);
case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
+ case VSX_BUILTIN_VEC_EXT_V2DF:
+ case VSX_BUILTIN_VEC_EXT_V2DI:
return altivec_expand_vec_ext_builtin (exp, target);
default:
@@ -10245,6 +10705,11 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (fcode == RS6000_BUILTIN_BSWAP_HI)
return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target);
+ if (fcode == POWER7_BUILTIN_BPERMD)
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si), exp, target);
+
if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD
|| fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
{
@@ -10500,6 +10965,33 @@ rs6000_init_builtins (void)
TYPE_NAME (pixel_V8HI_type_node) = tdecl;
(*lang_hooks.decls.pushdecl) (tdecl);
+ if (TARGET_VSX)
+ {
+ tdecl = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__vector double"),
+ unsigned_V2DI_type_node);
+ TYPE_NAME (V2DF_type_node) = tdecl;
+ (*lang_hooks.decls.pushdecl) (tdecl);
+
+ tdecl = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__vector long"),
+ V2DI_type_node);
+ TYPE_NAME (V2DI_type_node) = tdecl;
+ (*lang_hooks.decls.pushdecl) (tdecl);
+
+ tdecl = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__vector unsigned long"),
+ unsigned_V2DI_type_node);
+ TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+ (*lang_hooks.decls.pushdecl) (tdecl);
+
+ tdecl = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__vector __bool long"),
+ bool_V2DI_type_node);
+ TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ (*lang_hooks.decls.pushdecl) (tdecl);
+ }
+
if (TARGET_PAIRED_FLOAT)
paired_init_builtins ();
if (TARGET_SPE)
@@ -10531,6 +11023,15 @@ rs6000_init_builtins (void)
RS6000_BUILTIN_RECIP);
}
+ if (TARGET_POPCNTD)
+ {
+ enum machine_mode mode = (TARGET_64BIT) ? DImode : SImode;
+ tree ftype = builtin_function_type (mode, mode, mode, VOIDmode,
+ POWER7_BUILTIN_BPERMD,
+ "__builtin_bpermd");
+ def_builtin (MASK_POPCNTD, "__builtin_bpermd", ftype,
+ POWER7_BUILTIN_BPERMD);
+ }
if (TARGET_POWERPC)
{
/* Don't use builtin_function_type here, as it maps HI/QI to SI. */
@@ -10969,6 +11470,10 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V4SF_type_node,
V4SF_type_node, NULL_TREE);
+ tree int_ftype_int_v2df_v2df
+ = build_function_type_list (integer_type_node,
+ integer_type_node, V2DF_type_node,
+ V2DF_type_node, NULL_TREE);
tree v4si_ftype_v4si
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_v8hi
@@ -10977,6 +11482,8 @@ altivec_init_builtins (void)
= build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
tree v4sf_ftype_v4sf
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
tree void_ftype_pcvoid_int_int
= build_function_type_list (void_type_node,
pcvoid_type_node, integer_type_node,
@@ -11079,8 +11586,10 @@ altivec_init_builtins (void)
{
enum machine_mode mode1;
tree type;
- bool is_overloaded = dp->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
- && dp->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST;
+ bool is_overloaded = ((dp->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ && dp->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ || (dp->code >= VSX_BUILTIN_OVERLOADED_FIRST
+ && dp->code <= VSX_BUILTIN_OVERLOADED_LAST));
if (is_overloaded)
mode1 = VOIDmode;
@@ -11104,6 +11613,9 @@ altivec_init_builtins (void)
case V4SFmode:
type = int_ftype_int_v4sf_v4sf;
break;
+ case V2DFmode:
+ type = int_ftype_int_v2df_v2df;
+ break;
default:
gcc_unreachable ();
}
@@ -11134,6 +11646,9 @@ altivec_init_builtins (void)
case V4SFmode:
type = v4sf_ftype_v4sf;
break;
+ case V2DFmode:
+ type = v2df_ftype_v2df;
+ break;
default:
gcc_unreachable ();
}
@@ -11193,6 +11708,19 @@ altivec_init_builtins (void)
def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v4sf", ftype,
ALTIVEC_BUILTIN_VEC_INIT_V4SF);
+ if (TARGET_VSX)
+ {
+ ftype = build_function_type_list (V2DF_type_node, double_type_node,
+ double_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_init_v2df", ftype,
+ VSX_BUILTIN_VEC_INIT_V2DF);
+
+ ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
+ intDI_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_init_v2di", ftype,
+ VSX_BUILTIN_VEC_INIT_V2DI);
+ }
+
/* Access to the vec_set patterns. */
ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
intSI_type_node,
@@ -11218,6 +11746,21 @@ altivec_init_builtins (void)
def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_set_v4sf", ftype,
ALTIVEC_BUILTIN_VEC_SET_V4SF);
+ if (TARGET_VSX)
+ {
+ ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
+ double_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_set_v2df", ftype,
+ VSX_BUILTIN_VEC_SET_V2DF);
+
+ ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ intDI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_set_v2di", ftype,
+ VSX_BUILTIN_VEC_SET_V2DI);
+ }
+
/* Access to the vec_extract patterns. */
ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
integer_type_node, NULL_TREE);
@@ -11238,6 +11781,19 @@ altivec_init_builtins (void)
integer_type_node, NULL_TREE);
def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_ext_v4sf", ftype,
ALTIVEC_BUILTIN_VEC_EXT_V4SF);
+
+ if (TARGET_VSX)
+ {
+ ftype = build_function_type_list (double_type_node, V2DF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_ext_v2df", ftype,
+ VSX_BUILTIN_VEC_EXT_V2DF);
+
+ ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_VSX, "__builtin_vec_ext_v2di", ftype,
+ VSX_BUILTIN_VEC_EXT_V2DI);
+ }
}
/* Hash function for builtin functions with up to 3 arguments and a return
@@ -11333,6 +11889,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
+ case VSX_BUILTIN_VPERM_16QI_UNS:
+ case VSX_BUILTIN_VPERM_8HI_UNS:
+ case VSX_BUILTIN_VPERM_4SI_UNS:
+ case VSX_BUILTIN_VPERM_2DI_UNS:
+ case VSX_BUILTIN_XXSEL_16QI_UNS:
+ case VSX_BUILTIN_XXSEL_8HI_UNS:
+ case VSX_BUILTIN_XXSEL_4SI_UNS:
+ case VSX_BUILTIN_XXSEL_2DI_UNS:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -11346,6 +11910,12 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
case ALTIVEC_BUILTIN_VPERM_4SF:
case ALTIVEC_BUILTIN_VPERM_2DI:
case ALTIVEC_BUILTIN_VPERM_2DF:
+ case VSX_BUILTIN_VPERM_16QI:
+ case VSX_BUILTIN_VPERM_8HI:
+ case VSX_BUILTIN_VPERM_4SI:
+ case VSX_BUILTIN_VPERM_4SF:
+ case VSX_BUILTIN_VPERM_2DI:
+ case VSX_BUILTIN_VPERM_2DF:
h.uns_p[3] = 1;
break;
@@ -11442,8 +12012,10 @@ rs6000_common_init_builtins (void)
|| (mask == 0 && !TARGET_PAIRED_FLOAT))
continue;
- if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
- && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+ && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
{
if (! (type = opaque_ftype_opaque_opaque_opaque))
type = opaque_ftype_opaque_opaque_opaque
@@ -11481,8 +12053,10 @@ rs6000_common_init_builtins (void)
|| (mask == 0 && !TARGET_PAIRED_FLOAT))
continue;
- if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
- && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+ && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
{
if (! (type = opaque_ftype_opaque_opaque))
type = opaque_ftype_opaque_opaque
@@ -11537,14 +12111,15 @@ rs6000_common_init_builtins (void)
enum machine_mode mode0, mode1;
tree type;
int mask = d->mask;
- bool is_overloaded = d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
- && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST;
if ((mask != 0 && (mask & target_flags) == 0)
|| (mask == 0 && !TARGET_PAIRED_FLOAT))
continue;
- if (is_overloaded)
+ if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+ && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+ || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+ && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
{
if (! (type = opaque_ftype_opaque))
type = opaque_ftype_opaque
@@ -22228,18 +22803,24 @@ rs6000_handle_altivec_attribute (tree *node,
mode = TYPE_MODE (type);
/* Check for invalid AltiVec type qualifiers. */
- if (type == long_unsigned_type_node || type == long_integer_type_node)
- {
- if (TARGET_64BIT)
- error ("use of %<long%> in AltiVec types is invalid for 64-bit code");
- else if (rs6000_warn_altivec_long)
- warning (0, "use of %<long%> in AltiVec types is deprecated; use %<int%>");
- }
- else if (type == long_long_unsigned_type_node
- || type == long_long_integer_type_node)
- error ("use of %<long long%> in AltiVec types is invalid");
- else if (type == double_type_node)
- error ("use of %<double%> in AltiVec types is invalid");
+ if (!TARGET_VSX)
+ {
+ if (type == long_unsigned_type_node || type == long_integer_type_node)
+ {
+ if (TARGET_64BIT)
+ error ("use of %<long%> in AltiVec types is invalid for "
+ "64-bit code without -mvsx");
+ else if (rs6000_warn_altivec_long)
+ warning (0, "use of %<long%> in AltiVec types is deprecated; "
+ "use %<int%>");
+ }
+ else if (type == long_long_unsigned_type_node
+ || type == long_long_integer_type_node)
+ error ("use of %<long long%> in AltiVec types is invalid without "
+ "-mvsx");
+ else if (type == double_type_node)
+ error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+ }
else if (type == long_double_type_node)
error ("use of %<long double%> in AltiVec types is invalid");
else if (type == boolean_type_node)
@@ -22255,6 +22836,9 @@ rs6000_handle_altivec_attribute (tree *node,
unsigned_p = TYPE_UNSIGNED (type);
switch (mode)
{
+ case DImode:
+ result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+ break;
case SImode:
result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
break;
@@ -22265,10 +22849,12 @@ rs6000_handle_altivec_attribute (tree *node,
result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
break;
case SFmode: result = V4SF_type_node; break;
+ case DFmode: result = V2DF_type_node; break;
/* If the user says 'vector int bool', we may be handed the 'bool'
attribute _before_ the 'vector' attribute, and so select the
proper type in the 'b' case below. */
case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
+ case V2DImode: case V2DFmode:
result = type;
default: break;
}
@@ -22276,6 +22862,7 @@ rs6000_handle_altivec_attribute (tree *node,
case 'b':
switch (mode)
{
+ case DImode: case V2DImode: result = bool_V2DI_type_node; break;
case SImode: case V4SImode: result = bool_V4SI_type_node; break;
case HImode: case V8HImode: result = bool_V8HI_type_node; break;
case QImode: case V16QImode: result = bool_V16QI_type_node;
@@ -22320,6 +22907,7 @@ rs6000_mangle_type (const_tree type)
if (type == bool_short_type_node) return "U6__bools";
if (type == pixel_type_node) return "u7__pixel";
if (type == bool_int_type_node) return "U6__booli";
+ if (type == bool_long_type_node) return "U6__booll";
/* Mangle IBM extended float long double as `g' (__float128) on
powerpc*-linux where long-double-64 previously was the default. */
@@ -24557,7 +25145,7 @@ rs6000_vector_mode_supported_p (enum machine_mode mode)
if (TARGET_SPE && SPE_VECTOR_MODE (mode))
return true;
- else if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode))
+ else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
return true;
else
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3153243b30d..0c5e59333ab 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1883,6 +1883,10 @@ typedef struct rs6000_args
&& EASY_VECTOR_15((n) >> 1) \
&& ((n) & 1) == 0)
+#define EASY_VECTOR_MSB(n,mode) \
+ (((unsigned HOST_WIDE_INT)n) == \
+ ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1))
+
/* Try a machine-dependent way of reloading an illegitimate address
operand. If we find one, push the reload and jump to WIN. This
@@ -2678,6 +2682,7 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_VEC_EXT_V8HI,
ALTIVEC_BUILTIN_VEC_EXT_V16QI,
ALTIVEC_BUILTIN_VEC_EXT_V4SF,
+ ALTIVEC_BUILTIN_COPYSIGN_V4SF,
/* Altivec overloaded builtins. */
ALTIVEC_BUILTIN_VCMPEQ_P,
@@ -2703,6 +2708,7 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_VEC_CMPGT,
ALTIVEC_BUILTIN_VEC_CMPLE,
ALTIVEC_BUILTIN_VEC_CMPLT,
+ ALTIVEC_BUILTIN_VEC_COPYSIGN,
ALTIVEC_BUILTIN_VEC_CTF,
ALTIVEC_BUILTIN_VEC_CTS,
ALTIVEC_BUILTIN_VEC_CTU,
@@ -2745,6 +2751,7 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_VEC_MTVSCR,
ALTIVEC_BUILTIN_VEC_MULE,
ALTIVEC_BUILTIN_VEC_MULO,
+ ALTIVEC_BUILTIN_VEC_NEARBYINT,
ALTIVEC_BUILTIN_VEC_NMSUB,
ALTIVEC_BUILTIN_VEC_NOR,
ALTIVEC_BUILTIN_VEC_OR,
@@ -2755,6 +2762,7 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_VEC_PERM,
ALTIVEC_BUILTIN_VEC_RE,
ALTIVEC_BUILTIN_VEC_RL,
+ ALTIVEC_BUILTIN_VEC_RINT,
ALTIVEC_BUILTIN_VEC_ROUND,
ALTIVEC_BUILTIN_VEC_RSQRTE,
ALTIVEC_BUILTIN_VEC_SEL,
@@ -2772,6 +2780,7 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_VEC_SPLTB,
ALTIVEC_BUILTIN_VEC_SPLTH,
ALTIVEC_BUILTIN_VEC_SPLTW,
+ ALTIVEC_BUILTIN_VEC_SQRT,
ALTIVEC_BUILTIN_VEC_SR,
ALTIVEC_BUILTIN_VEC_SRA,
ALTIVEC_BUILTIN_VEC_SRL,
@@ -3228,6 +3237,8 @@ enum rs6000_builtins
VSX_BUILTIN_XSRSQRTEDP,
VSX_BUILTIN_XSSQRTDP,
VSX_BUILTIN_XSSUBDP,
+ VSX_BUILTIN_CPSGNDP,
+ VSX_BUILTIN_CPSGNSP,
VSX_BUILTIN_XSTDIVDP_FE,
VSX_BUILTIN_XSTDIVDP_FG,
VSX_BUILTIN_XSTSQRTDP_FE,
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ae1ea99d0a3..9524fe81f13 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -101,6 +101,7 @@
(UNSPEC_RSQRT 48)
(UNSPEC_TOCREL 49)
(UNSPEC_MACHOPIC_OFFSET 50)
+ (UNSPEC_BPERM 51)
])
;;
@@ -167,6 +168,7 @@
(include "power4.md")
(include "power5.md")
(include "power6.md")
+(include "power7.md")
(include "cell.md")
(include "xfpu.md")
@@ -5900,9 +5902,18 @@
(match_dup 5))
(match_dup 3)
(match_dup 4)))]
- "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
- && !HONOR_NANS (DFmode) && !HONOR_SIGNED_ZEROS (DFmode)"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && ((TARGET_PPC_GFXOPT
+ && !HONOR_NANS (DFmode)
+ && !HONOR_SIGNED_ZEROS (DFmode))
+ || VECTOR_UNIT_VSX_P (DFmode))"
{
+ if (VECTOR_UNIT_VSX_P (DFmode))
+ {
+ emit_insn (gen_vsx_copysigndf3 (operands[0], operands[1],
+ operands[2], CONST0_RTX (DFmode)));
+ DONE;
+ }
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (DFmode);
operands[5] = CONST0_RTX (DFmode);
@@ -6037,7 +6048,8 @@
(define_insn "*negdf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fneg %0,%1"
[(set_attr "type" "fp")])
@@ -6050,14 +6062,16 @@
(define_insn "*absdf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fabs %0,%1"
[(set_attr "type" "fp")])
(define_insn "*nabsdf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fnabs %0,%1"
[(set_attr "type" "fp")])
@@ -6072,7 +6086,8 @@
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"{fa|fadd} %0,%1,%2"
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_addsub_d")])
@@ -6088,7 +6103,8 @@
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
(match_operand:DF 2 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"{fs|fsub} %0,%1,%2"
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_addsub_d")])
@@ -6104,7 +6120,8 @@
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_mul_d")])
@@ -6122,7 +6139,8 @@
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
(match_operand:DF 2 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
@@ -6138,73 +6156,81 @@
DONE;
})
-(define_insn "fred"
+(define_expand "fred"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRES))]
- "TARGET_POPCNTB && flag_finite_math_only"
+ "(TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)) && flag_finite_math_only"
+ "")
+
+(define_insn "*fred_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+ "TARGET_POPCNTB && flag_finite_math_only && !VECTOR_UNIT_VSX_P (DFmode)"
"fre %0,%1"
[(set_attr "type" "fp")])
-(define_insn ""
+(define_insn "*fmadddf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d"))
(match_operand:DF 3 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
"{fma|fmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
-(define_insn ""
+(define_insn "*fmsubdf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d"))
(match_operand:DF 3 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
"{fms|fmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
-(define_insn ""
+(define_insn "*fnmadddf4_fpr_1"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d"))
(match_operand:DF 3 "gpc_reg_operand" "d"))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && HONOR_SIGNED_ZEROS (DFmode)"
+ && HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
-(define_insn ""
+(define_insn "*fnmadddf4_fpr_2"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d"))
(match_operand:DF 2 "gpc_reg_operand" "d"))
(match_operand:DF 3 "gpc_reg_operand" "d")))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && ! HONOR_SIGNED_ZEROS (DFmode)"
+ && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
-(define_insn ""
+(define_insn "*fnmsubdf4_fpr_1"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d"))
(match_operand:DF 3 "gpc_reg_operand" "d"))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && HONOR_SIGNED_ZEROS (DFmode)"
+ && HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
-(define_insn ""
+(define_insn "*fnmsubdf4_fpr_2"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(minus:DF (match_operand:DF 3 "gpc_reg_operand" "d")
(mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
(match_operand:DF 2 "gpc_reg_operand" "d"))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && ! HONOR_SIGNED_ZEROS (DFmode)"
+ && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")
(set_attr "fp_type" "fp_maddsub_d")])
@@ -6213,7 +6239,8 @@
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
"(TARGET_PPC_GPOPT || TARGET_POWER2) && TARGET_HARD_FLOAT && TARGET_FPRS
- && TARGET_DOUBLE_FLOAT"
+ && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fsqrt %0,%1"
[(set_attr "type" "dsqrt")])
@@ -6308,6 +6335,12 @@
"TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
"")
+(define_expand "fixuns_truncdfdi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unsigned_fix:DI (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_HARD_FLOAT && TARGET_VSX"
+ "")
+
; For each of these conversions, there is a define_expand, a define_insn
; with a '#' template, and a define_split (with C code). The idea is
; to allow constant folding with the template of the define_insn,
@@ -6549,24 +6582,38 @@
"{fcirz|fctiwz} %0,%1"
[(set_attr "type" "fp")])
-(define_insn "btruncdf2"
+(define_expand "btruncdf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIZ))]
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "")
+
+(define_insn "*btruncdf2_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))]
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"friz %0,%1"
[(set_attr "type" "fp")])
(define_insn "btruncsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))]
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT "
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
"friz %0,%1"
[(set_attr "type" "fp")])
-(define_insn "ceildf2"
+(define_expand "ceildf2"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "")] UNSPEC_FRIP))]
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "")
+
+(define_insn "*ceildf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIP))]
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"frip %0,%1"
[(set_attr "type" "fp")])
@@ -6577,10 +6624,17 @@
"frip %0,%1"
[(set_attr "type" "fp")])
-(define_insn "floordf2"
+(define_expand "floordf2"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "")] UNSPEC_FRIM))]
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "")
+
+(define_insn "*floordf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIM))]
- "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"frim %0,%1"
[(set_attr "type" "fp")])
@@ -6591,6 +6645,7 @@
"frim %0,%1"
[(set_attr "type" "fp")])
+;; No VSX equivalent to frin
(define_insn "rounddf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIN))]
@@ -6605,6 +6660,12 @@
"frin %0,%1"
[(set_attr "type" "fp")])
+(define_expand "ftruncdf2"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (fix:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "")
+
; An UNSPEC is used so we don't have to support SImode in FP registers.
(define_insn "stfiwx"
[(set (match_operand:SI 0 "memory_operand" "=Z")
@@ -6620,17 +6681,40 @@
"TARGET_HARD_FLOAT && !TARGET_FPRS"
"")
-(define_insn "floatdidf2"
+(define_expand "floatdidf2"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (float:DF (match_operand:DI 1 "gpc_reg_operand" "")))]
+ "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode))
+ && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+ "")
+
+(define_insn "*floatdidf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(float:DF (match_operand:DI 1 "gpc_reg_operand" "!d#r")))]
- "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+ "(TARGET_POWERPC64 || TARGET_XILINX_FPU)
+ && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fcfid %0,%1"
[(set_attr "type" "fp")])
-(define_insn "fix_truncdfdi2"
+(define_expand "floatunsdidf2"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "")))]
+ "TARGET_VSX"
+ "")
+
+(define_expand "fix_truncdfdi2"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "")
+ (fix:DI (match_operand:DF 1 "gpc_reg_operand" "")))]
+ "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode))
+ && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+ "")
+
+(define_insn "*fix_truncdfdi2_fpr"
[(set (match_operand:DI 0 "gpc_reg_operand" "=!d#r")
(fix:DI (match_operand:DF 1 "gpc_reg_operand" "d")))]
- "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+ "(TARGET_POWERPC64 || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
+ && TARGET_DOUBLE_FLOAT && TARGET_FPRS && !VECTOR_UNIT_VSX_P (DFmode)"
"fctidz %0,%1"
[(set_attr "type" "fp")])
@@ -8956,8 +9040,8 @@
;; The "??" is a kludge until we can figure out a more reasonable way
;; of handling these non-offsettable values.
(define_insn "*movdf_hardfloat32"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
- (match_operand:DF 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,!r,!r,!r")
+ (match_operand:DF 1 "input_operand" "r,m,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], DFmode)
|| gpc_reg_operand (operands[1], DFmode))"
@@ -9036,19 +9120,30 @@
return \"\";
}
case 3:
- return \"fmr %0,%1\";
case 4:
- return \"lfd%U1%X1 %0,%1\";
+ return \"xxlor %x0,%x1,%x1\";
case 5:
- return \"stfd%U0%X0 %1,%0\";
case 6:
+ return \"lxsd%U1x %x0,%y1\";
case 7:
case 8:
+ return \"stxsd%U0x %x1,%y0\";
+ case 9:
+ return \"fmr %0,%1\";
+ case 10:
+ return \"lfd%U1%X1 %0,%1\";
+ case 11:
+ return \"stfd%U0%X0 %1,%0\";
+ case 12:
+ return \"xxlxor %x0,%x0,%x0\";
+ case 13:
+ case 14:
+ case 15:
return \"#\";
}
}"
- [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
- (set_attr "length" "8,16,16,4,4,4,8,12,16")])
+ [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*")
+ (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")])
(define_insn "*movdf_softfloat32"
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r")
@@ -9096,19 +9191,26 @@
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*movdf_hardfloat64_mfpgpr"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
- (match_operand:DF 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r,r,d")
+ (match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
"TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
- && TARGET_DOUBLE_FLOAT
+ && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], DFmode)
|| gpc_reg_operand (operands[1], DFmode))"
"@
std%U0%X0 %1,%0
ld%U1%X1 %0,%1
mr %0,%1
+ xxlor %x0,%x1,%x1
+ xxlor %x0,%x1,%x1
+ lxsd%U1x %x0,%y1
+ lxsd%U1x %x0,%y1
+ stxsd%U0x %x1,%y0
+ stxsd%U0x %x1,%y0
fmr %0,%1
lfd%U1%X1 %0,%1
stfd%U0%X0 %1,%0
+ xxlxor %x0,%x0,%x0
mt%0 %1
mf%1 %0
{cror 0,0,0|nop}
@@ -9117,33 +9219,40 @@
#
mftgpr %0,%1
mffgpr %0,%1"
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+ [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*movdf_hardfloat64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
- (match_operand:DF 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r")
+ (match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))]
"TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
- && TARGET_DOUBLE_FLOAT
+ && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], DFmode)
|| gpc_reg_operand (operands[1], DFmode))"
"@
std%U0%X0 %1,%0
ld%U1%X1 %0,%1
mr %0,%1
+ xxlor %x0,%x1,%x1
+ xxlor %x0,%x1,%x1
+ lxsd%U1x %x0,%y1
+ lxsd%U1x %x0,%y1
+ stxsd%U0x %x1,%y0
+ stxsd%U0x %x1,%y0
fmr %0,%1
lfd%U1%X1 %0,%1
stfd%U0%X0 %1,%0
+ xxlxor %x0,%x0,%x0
mt%0 %1
mf%1 %0
{cror 0,0,0|nop}
#
#
#"
- [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
+ [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
(define_insn "*movdf_softfloat64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
@@ -9720,15 +9829,16 @@
(define_insn "*movti_ppc64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o<>,r")
(match_operand:TI 1 "input_operand" "r,r,m"))]
- "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
- || gpc_reg_operand (operands[1], TImode))"
+ "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
+ || gpc_reg_operand (operands[1], TImode)))
+ && VECTOR_MEM_NONE_P (TImode)"
"#"
[(set_attr "type" "*,store,load")])
(define_split
[(set (match_operand:TI 0 "gpc_reg_operand" "")
(match_operand:TI 1 "const_double_operand" ""))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
"
@@ -9754,7 +9864,7 @@
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
(match_operand:TI 1 "input_operand" ""))]
- "reload_completed
+ "reload_completed && VECTOR_MEM_NONE_P (TImode)
&& gpr_or_gpr_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
@@ -12647,7 +12757,8 @@
[(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
(match_operand:DF 2 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && !VECTOR_UNIT_VSX_P (DFmode)"
"fcmpu %0,%1,%2"
[(set_attr "type" "fpcompare")])
@@ -15320,9 +15431,19 @@
}"
[(set_attr "type" "load")])
+(define_insn "bpermd_<mode>"
+ [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+ (unspec:P [(match_operand:P 1 "gpc_reg_operand" "r")
+ (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
+ "TARGET_POPCNTD"
+ "bpermd %0,%1,%2"
+ [(set_attr "type" "integer")])
+
+
(include "sync.md")
(include "vector.md")
+(include "vsx.md")
(include "altivec.md")
(include "spe.md")
(include "dfp.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ac61ffc582e..90af9dce47b 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -151,6 +151,10 @@ malign-branch-targets
Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1)
; Explicitly set/unset whether rs6000_align_branch_targets is set
+mvectorize-builtins
+Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
+; Explicitly control whether we vectorize the builtins or not.
+
mupdate
Target Report Var(TARGET_UPDATE) Init(1)
Generate load/store with update instructions
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 0b8e311078d..66a367a7b62 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -53,6 +53,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \
$(srcdir)/config/rs6000/power4.md \
$(srcdir)/config/rs6000/power5.md \
$(srcdir)/config/rs6000/power6.md \
+ $(srcdir)/config/rs6000/power7.md \
$(srcdir)/config/rs6000/cell.md \
$(srcdir)/config/rs6000/xfpu.md \
$(srcdir)/config/rs6000/predicates.md \
@@ -60,6 +61,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \
$(srcdir)/config/rs6000/darwin.md \
$(srcdir)/config/rs6000/sync.md \
$(srcdir)/config/rs6000/vector.md \
+ $(srcdir)/config/rs6000/vsx.md \
$(srcdir)/config/rs6000/altivec.md \
$(srcdir)/config/rs6000/spe.md \
$(srcdir)/config/rs6000/dfp.md \
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 1546db7a74f..6366e4fe0e7 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -1,6 +1,7 @@
-;; Expander definitions for vector support. No instructions are in this file,
-;; this file provides the generic vector expander, and the actual vector
-;; instructions will be in altivec.md.
+;; Expander definitions for vector support between altivec & vsx. No
+;; instructions are in this file, this file provides the generic vector
+;; expander, and the actual vector instructions will be in altivec.md and
+;; vsx.md
;; Copyright (C) 2009
;; Free Software Foundation, Inc.
@@ -27,10 +28,10 @@
(define_mode_iterator VEC_I [V16QI V8HI V4SI])
;; Vector float modes
-(define_mode_iterator VEC_F [V4SF])
+(define_mode_iterator VEC_F [V4SF V2DF])
;; Vector arithmetic modes
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF])
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
;; Vector modes that need alginment via permutes
(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
@@ -41,6 +42,9 @@
;; Vector modes for moves. Don't do TImode here.
(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+;; Vector modes for types that don't need a realignment under VSX
+(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
+
;; Vector comparison modes
(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
@@ -75,7 +79,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
(match_operand:VEC_M 1 "any_operand" ""))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
if (can_create_pseudo_p ())
{
@@ -89,24 +93,25 @@
}
})
-;; Generic vector floating point load/store instructions.
+;; Generic vector floating point load/store instructions. These will match
+;; insns defined in vsx.md or altivec.md depending on the switches.
(define_expand "vector_load_<mode>"
[(set (match_operand:VEC_M 0 "vfloat_operand" "")
(match_operand:VEC_M 1 "memory_operand" ""))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_store_<mode>"
[(set (match_operand:VEC_M 0 "memory_operand" "")
(match_operand:VEC_M 1 "vfloat_operand" ""))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Splits if a GPR register was chosen for the move
(define_split
[(set (match_operand:VEC_L 0 "nonimmediate_operand" "")
(match_operand:VEC_L 1 "input_operand" ""))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& reload_completed
&& gpr_or_gpr_p (operands[0], operands[1])"
[(pc)]
@@ -149,7 +154,7 @@
(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
(match_operand:P 2 "reg_or_cint_operand" "rI"))
(const_int -16)))]
- "TARGET_ALTIVEC && (reload_in_progress || reload_completed)"
+ "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
"#"
"&& reload_completed"
[(set (match_dup 0)
@@ -167,7 +172,7 @@
[(set (match_operand:P 0 "gpc_reg_operand" "=b")
(and:P (match_operand:P 1 "gpc_reg_operand" "r")
(const_int -16)))]
- "TARGET_ALTIVEC && (reload_in_progress || reload_completed)"
+ "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
"#"
"&& reload_completed"
[(parallel [(set (match_dup 0)
@@ -180,68 +185,131 @@
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
(match_operand:VEC_F 2 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "sub<mode>3"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
(match_operand:VEC_F 2 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "mul<mode>3"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
(match_operand:VEC_F 2 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode) && TARGET_FUSED_MADD"
+ "(VECTOR_UNIT_VSX_P (<MODE>mode)
+ || (VECTOR_UNIT_ALTIVEC_P (<MODE>mode) && TARGET_FUSED_MADD))"
"
{
- emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2]));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
}")
+(define_expand "div<mode>3"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+ (match_operand:VEC_F 2 "vfloat_operand" "")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "")
+
(define_expand "neg<mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1]));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1]));
+ DONE;
+ }
}")
(define_expand "abs<mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1]));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1]));
+ DONE;
+ }
}")
(define_expand "smin<mode>3"
[(set (match_operand:VEC_F 0 "register_operand" "")
(smin:VEC_F (match_operand:VEC_F 1 "register_operand" "")
(match_operand:VEC_F 2 "register_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "smax<mode>3"
[(set (match_operand:VEC_F 0 "register_operand" "")
(smax:VEC_F (match_operand:VEC_F 1 "register_operand" "")
(match_operand:VEC_F 2 "register_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
+(define_expand "sqrt<mode>2"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "")
+
(define_expand "ftrunc<mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "")
+
+(define_expand "vector_ceil<mode>2"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+ UNSPEC_FRIP))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "")
+
+(define_expand "vector_floor<mode>2"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+ UNSPEC_FRIM))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
+(define_expand "vector_btrunc<mode>2"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "")
+
+(define_expand "vector_copysign<mode>3"
+ [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+ (if_then_else:VEC_F
+ (ge:VEC_F (match_operand:VEC_F 2 "vfloat_operand" "")
+ (match_dup 3))
+ (abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" ""))
+ (neg:VEC_F (abs:VEC_F (match_dup 1)))))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_copysign_v4sf3 (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+
+ operands[3] = CONST0_RTX (<MODE>mode);
+}")
+
;; Vector comparisons
(define_expand "vcond<mode>"
@@ -252,7 +320,7 @@
(match_operand:VEC_F 5 "vfloat_operand" "")])
(match_operand:VEC_F 1 "vfloat_operand" "")
(match_operand:VEC_F 2 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
@@ -302,21 +370,21 @@
[(set (match_operand:VEC_C 0 "vlogical_operand" "")
(eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
(match_operand:VEC_C 2 "vlogical_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_gt<mode>"
[(set (match_operand:VEC_C 0 "vlogical_operand" "")
(gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
(match_operand:VEC_C 2 "vlogical_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_ge<mode>"
[(set (match_operand:VEC_C 0 "vlogical_operand" "")
(ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
(match_operand:VEC_C 2 "vlogical_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_gtu<mode>"
@@ -342,7 +410,7 @@
(const_int 0))
(match_operand:VEC_L 2 "vlogical_operand" "")
(match_operand:VEC_L 1 "vlogical_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_select_<mode>_uns"
@@ -352,7 +420,7 @@
(const_int 0))
(match_operand:VEC_L 2 "vlogical_operand" "")
(match_operand:VEC_L 1 "vlogical_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Expansions that compare vectors producing a vector result and a predicate,
@@ -366,7 +434,7 @@
(set (match_operand:VEC_A 0 "vlogical_operand" "")
(eq:VEC_A (match_dup 1)
(match_dup 2)))])]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_gt_<mode>_p"
@@ -378,7 +446,7 @@
(set (match_operand:VEC_A 0 "vlogical_operand" "")
(gt:VEC_A (match_dup 1)
(match_dup 2)))])]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_ge_<mode>_p"
@@ -390,7 +458,7 @@
(set (match_operand:VEC_F 0 "vfloat_operand" "")
(ge:VEC_F (match_dup 1)
(match_dup 2)))])]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "vector_gtu_<mode>_p"
@@ -402,16 +470,16 @@
(set (match_operand:VEC_I 0 "vlogical_operand" "")
(gtu:VEC_I (match_dup 1)
(match_dup 2)))])]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
-;; AltiVec predicates.
+;; AltiVec/VSX predicates.
(define_expand "cr6_test_for_zero"
[(set (match_operand:SI 0 "register_operand" "=r")
(eq:SI (reg:CC 74)
(const_int 0)))]
- "TARGET_ALTIVEC"
+ "TARGET_ALTIVEC || TARGET_VSX"
"")
(define_expand "cr6_test_for_zero_reverse"
@@ -419,14 +487,14 @@
(eq:SI (reg:CC 74)
(const_int 0)))
(set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
- "TARGET_ALTIVEC"
+ "TARGET_ALTIVEC || TARGET_VSX"
"")
(define_expand "cr6_test_for_lt"
[(set (match_operand:SI 0 "register_operand" "=r")
(lt:SI (reg:CC 74)
(const_int 0)))]
- "TARGET_ALTIVEC"
+ "TARGET_ALTIVEC || TARGET_VSX"
"")
(define_expand "cr6_test_for_lt_reverse"
@@ -434,7 +502,7 @@
(lt:SI (reg:CC 74)
(const_int 0)))
(set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
- "TARGET_ALTIVEC"
+ "TARGET_ALTIVEC || TARGET_VSX"
"")
@@ -443,82 +511,94 @@
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
(match_operand:VEC_L 2 "vlogical_operand" "")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "ior<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
(match_operand:VEC_L 2 "vlogical_operand" "")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "and<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
(match_operand:VEC_L 2 "vlogical_operand" "")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "one_cmpl<mode>2"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "nor<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
(match_operand:VEC_L 2 "vlogical_operand" ""))))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
(define_expand "andc<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
(and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
(match_operand:VEC_L 1 "vlogical_operand" "")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Same size conversions
(define_expand "float<VEC_int><mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx));
+ DONE;
+ }
}")
(define_expand "unsigned_float<VEC_int><mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(unsigned_float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx));
+ DONE;
+ }
}")
(define_expand "fix_trunc<mode><VEC_int>2"
[(set (match_operand:<VEC_INT> 0 "vint_operand" "")
(fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx));
+ DONE;
+ }
}")
(define_expand "fixuns_trunc<mode><VEC_int>2"
[(set (match_operand:<VEC_INT> 0 "vint_operand" "")
(unsigned_fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
- emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx));
- DONE;
+ if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+ {
+ emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx));
+ DONE;
+ }
}")
@@ -526,7 +606,7 @@
(define_expand "vec_init<mode>"
[(match_operand:VEC_E 0 "vlogical_operand" "")
(match_operand:VEC_E 1 "" "")]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
rs6000_expand_vector_init (operands[0], operands[1]);
DONE;
@@ -536,7 +616,7 @@
[(match_operand:VEC_E 0 "vlogical_operand" "")
(match_operand:<VEC_base> 1 "register_operand" "")
(match_operand 2 "const_int_operand" "")]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2]));
DONE;
@@ -546,7 +626,7 @@
[(match_operand:<VEC_base> 0 "register_operand" "")
(match_operand:VEC_E 1 "vlogical_operand" "")
(match_operand 2 "const_int_operand" "")]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
rs6000_expand_vector_extract (operands[0], operands[1],
INTVAL (operands[2]));
@@ -568,7 +648,7 @@
(const_int 3)
(const_int 1)]))
(const_int 5)))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
"")
(define_expand "vec_interleave_lowv4sf"
@@ -585,23 +665,171 @@
(const_int 1)
(const_int 3)]))
(const_int 5)))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+ "")
+
+(define_expand "vec_interleave_highv2df"
+ [(set (match_operand:V2DF 0 "vfloat_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
+ (parallel [(const_int 0)]))))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "")
+
+(define_expand "vec_interleave_lowv2df"
+ [(set (match_operand:V2DF 0 "vfloat_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
+ (parallel [(const_int 1)]))
+ (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
+ (parallel [(const_int 1)]))))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
"")
+;; Convert double word types to single word types
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "vfloat_operand" "")
+ (match_operand:V2DF 1 "vfloat_operand" "")
+ (match_operand:V2DF 2 "vfloat_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SFmode);
+ rtx r2 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "vint_operand" "")
+ (match_operand:V2DF 1 "vfloat_operand" "")
+ (match_operand:V2DF 2 "vfloat_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SImode);
+ rtx r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_ufix_trunc_v2df"
+ [(match_operand:V4SI 0 "vint_operand" "")
+ (match_operand:V2DF 1 "vfloat_operand" "")
+ (match_operand:V2DF 2 "vfloat_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SImode);
+ rtx r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+ DONE;
+})
+
+;; Convert single word types to double word
+(define_expand "vec_unpacks_hi_v4sf"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SF 1 "vfloat_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+ rtx reg = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SF 1 "vfloat_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+ rtx reg = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SI 1 "vint_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SI 1 "vint_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v4si"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SI 1 "vint_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+ [(match_operand:V2DF 0 "vfloat_operand" "")
+ (match_operand:V4SI 1 "vint_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+ DONE;
+})
+
+
;; Align vector loads with a permute.
(define_expand "vec_realign_load_<mode>"
[(match_operand:VEC_K 0 "vlogical_operand" "")
(match_operand:VEC_K 1 "vlogical_operand" "")
(match_operand:VEC_K 2 "vlogical_operand" "")
(match_operand:V16QI 3 "vlogical_operand" "")]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2],
operands[3]));
DONE;
})
+;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned
+;; since the load already handles it.
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VEC_N 0 "vfloat_operand" "")
+ (match_operand:VEC_N 1 "vfloat_operand" ""))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN"
+ "")
+
;; Vector shift left in bits. Currently supported ony for shift
;; amounts that can be expressed as byte shifts (divisible by 8).
@@ -627,9 +855,18 @@
if (bitshift_val & 0x7)
FAIL;
byteshift_val = bitshift_val >> 3;
- shift = gen_rtx_CONST_INT (QImode, byteshift_val);
- insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+ {
+ shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+ insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+ shift);
+ }
+ else
+ {
+ shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ shift);
+ }
emit_insn (insn);
DONE;
@@ -659,9 +896,18 @@
if (bitshift_val & 0x7)
FAIL;
byteshift_val = 16 - (bitshift_val >> 3);
- shift = gen_rtx_CONST_INT (QImode, byteshift_val);
- insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+ {
+ shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+ insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+ shift);
+ }
+ else
+ {
+ shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ shift);
+ }
emit_insn (insn);
DONE;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
new file mode 100644
index 00000000000..c6aafa6fac0
--- /dev/null
+++ b/gcc/config/rs6000/vsx.md
@@ -0,0 +1,1339 @@
+;; VSX patterns.
+;; Copyright (C) 2009
+;; Free Software Foundation, Inc.
+;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Iterator for both scalar and vector floating point types supported by VSX
+(define_mode_iterator VSX_B [DF V4SF V2DF])
+
+;; Iterator for the 2 64-bit vector types
+(define_mode_iterator VSX_D [V2DF V2DI])
+
+;; Iterator for the 2 32-bit vector types
+(define_mode_iterator VSX_W [V4SF V4SI])
+
+;; Iterator for vector floating point types supported by VSX
+(define_mode_iterator VSX_F [V4SF V2DF])
+
+;; Iterator for logical types supported by VSX
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+
+;; Iterator for memory move. Handle TImode specially to allow
+;; it to use gprs as well as vsx registers.
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Iterator for types for load/store with update
+(define_mode_iterator VSX_U [V16QI V8HI V4SI V2DI V4SF V2DF TI DF])
+
+;; Map into the appropriate load/store name based on the type
+(define_mode_attr VSm [(V16QI "vw4")
+ (V8HI "vw4")
+ (V4SI "vw4")
+ (V4SF "vw4")
+ (V2DF "vd2")
+ (V2DI "vd2")
+ (DF "d")
+ (TI "vw4")])
+
+;; Map into the appropriate suffix based on the type
+(define_mode_attr VSs [(V16QI "sp")
+ (V8HI "sp")
+ (V4SI "sp")
+ (V4SF "sp")
+ (V2DF "dp")
+ (V2DI "dp")
+ (DF "dp")
+ (SF "sp")
+ (TI "sp")])
+
+;; Map the register class used
+(define_mode_attr VSr [(V16QI "v")
+ (V8HI "v")
+ (V4SI "v")
+ (V4SF "wf")
+ (V2DI "wd")
+ (V2DF "wd")
+ (DF "ws")
+ (SF "d")
+ (TI "wd")])
+
+;; Map the register class used for float<->int conversions
+(define_mode_attr VSr2 [(V2DF "wd")
+ (V4SF "wf")
+ (DF "!f#r")])
+
+(define_mode_attr VSr3 [(V2DF "wa")
+ (V4SF "wa")
+ (DF "!f#r")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr4 [(SF "ws")
+ (DF "f")
+ (V2DF "wd")
+ (V4SF "v")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr5 [(SF "ws")
+ (DF "f")
+ (V2DF "v")
+ (V4SF "wd")])
+
+;; Same size integer type for floating point data
+(define_mode_attr VSi [(V4SF "v4si")
+ (V2DF "v2di")
+ (DF "di")])
+
+(define_mode_attr VSI [(V4SF "V4SI")
+ (V2DF "V2DI")
+ (DF "DI")])
+
+;; Word size for same size conversion
+(define_mode_attr VSc [(V4SF "w")
+ (V2DF "d")
+ (DF "d")])
+
+;; Bitsize for DF load with update
+(define_mode_attr VSbit [(SI "32")
+ (DI "64")])
+
+;; Map into either s or v, depending on whether this is a scalar or vector
+;; operation
+(define_mode_attr VSv [(V16QI "v")
+ (V8HI "v")
+ (V4SI "v")
+ (V4SF "v")
+ (V2DI "v")
+ (V2DF "v")
+ (TI "v")
+ (DF "s")])
+
+;; Appropriate type for add ops (and other simple FP ops)
+(define_mode_attr VStype_simple [(V2DF "vecfloat")
+ (V4SF "vecfloat")
+ (DF "fp")])
+
+(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
+ (V4SF "fp_addsub_s")
+ (DF "fp_addsub_d")])
+
+;; Appropriate type for multiply ops
+(define_mode_attr VStype_mul [(V2DF "vecfloat")
+ (V4SF "vecfloat")
+ (DF "dmul")])
+
+(define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
+ (V4SF "fp_mul_s")
+ (DF "fp_mul_d")])
+
+;; Appropriate type for divide ops. For now, just lump the vector divide with
+;; the scalar divides
+(define_mode_attr VStype_div [(V2DF "ddiv")
+ (V4SF "sdiv")
+ (DF "ddiv")])
+
+(define_mode_attr VSfptype_div [(V2DF "fp_div_d")
+ (V4SF "fp_div_s")
+ (DF "fp_div_d")])
+
+;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
+;; the scalar sqrt
+(define_mode_attr VStype_sqrt [(V2DF "dsqrt")
+ (V4SF "sdiv")
+ (DF "ddiv")])
+
+(define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
+ (V4SF "fp_sqrt_s")
+ (DF "fp_sqrt_d")])
+
+;; Iterator and modes for sp<->dp conversions
+;; Because scalar SF values are represented internally as double, use the
+;; V4SF type to represent this than SF.
+(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
+
+(define_mode_attr VS_spdp_res [(DF "V4SF")
+ (V4SF "V2DF")
+ (V2DF "V4SF")])
+
+(define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
+ (V4SF "xvcvspdp")
+ (V2DF "xvcvdpsp")])
+
+(define_mode_attr VS_spdp_type [(DF "fp")
+ (V4SF "vecfloat")
+ (V2DF "vecfloat")])
+
+;; Map the scalar mode for a vector type
+(define_mode_attr VS_scalar [(V2DF "DF")
+ (V2DI "DI")
+ (V4SF "SF")
+ (V4SI "SI")
+ (V8HI "HI")
+ (V16QI "QI")])
+
+;; Appropriate type for load + update
+(define_mode_attr VStype_load_update [(V16QI "vecload")
+ (V8HI "vecload")
+ (V4SI "vecload")
+ (V4SF "vecload")
+ (V2DI "vecload")
+ (V2DF "vecload")
+ (TI "vecload")
+ (DF "fpload")])
+
+;; Appropriate type for store + update
+(define_mode_attr VStype_store_update [(V16QI "vecstore")
+ (V8HI "vecstore")
+ (V4SI "vecstore")
+ (V4SF "vecstore")
+ (V2DI "vecstore")
+ (V2DF "vecstore")
+ (TI "vecstore")
+ (DF "fpstore")])
+
+;; Constants for creating unspecs
+(define_constants
+ [(UNSPEC_VSX_CONCAT 500)
+ (UNSPEC_VSX_CVDPSXWS 501)
+ (UNSPEC_VSX_CVDPUXWS 502)
+ (UNSPEC_VSX_CVSPDP 503)
+ (UNSPEC_VSX_CVSXWDP 504)
+ (UNSPEC_VSX_CVUXWDP 505)
+ (UNSPEC_VSX_CVSXDSP 506)
+ (UNSPEC_VSX_CVUXDSP 507)
+ (UNSPEC_VSX_CVSPSXDS 508)
+ (UNSPEC_VSX_CVSPUXDS 509)
+ (UNSPEC_VSX_MADD 510)
+ (UNSPEC_VSX_MSUB 511)
+ (UNSPEC_VSX_NMADD 512)
+ (UNSPEC_VSX_NMSUB 513)
+ (UNSPEC_VSX_RSQRTE 514)
+ (UNSPEC_VSX_TDIV 515)
+ (UNSPEC_VSX_TSQRT 516)
+ (UNSPEC_VSX_XXPERMDI 517)
+ (UNSPEC_VSX_SET 518)
+ (UNSPEC_VSX_ROUND_I 519)
+ (UNSPEC_VSX_ROUND_IC 520)
+ (UNSPEC_VSX_SLDWI 521)])
+
+;; VSX moves
+(define_insn "*vsx_mov<mode>"
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*o,*r,*r,<VSr>,?wa,v,wZ,v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,o,r,j,j,W,v,wZ"))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 3:
+ return "stx<VSm>%U0x %x1,%y0";
+
+ case 1:
+ case 4:
+ return "lx<VSm>%U0x %x0,%y1";
+
+ case 2:
+ case 5:
+ return "xxlor %x0,%x1,%x1";
+
+ case 6:
+ case 7:
+ case 8:
+ return "#";
+
+ case 9:
+ case 10:
+ return "xxlxor %x0,%x0,%x0";
+
+ case 11:
+ return output_vec_const_move (operands);
+
+ case 12:
+ return "stvx %1,%y0";
+
+ case 13:
+ return "lvx %0,%y1";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,vecstore,vecload")])
+
+;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
+;; unions. However for plain data movement, slightly favor the vector loads
+(define_insn "*vsx_movti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?o,?r,?r,wa,v,v,wZ")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,r,o,r,j,W,wZ,v"))]
+ "VECTOR_MEM_VSX_P (TImode)
+ && (register_operand (operands[0], TImode)
+ || register_operand (operands[1], TImode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "stxvd2%U0x %x1,%y0";
+
+ case 1:
+ return "lxvd2%U0x %x0,%y1";
+
+ case 2:
+ return "xxlor %x0,%x1,%x1";
+
+ case 3:
+ case 4:
+ case 5:
+ return "#";
+
+ case 6:
+ return "xxlxor %x0,%x0,%x0";
+
+ case 7:
+ return output_vec_const_move (operands);
+
+ case 8:
+ return "stvx %1,%y0";
+
+ case 9:
+ return "lvx %0,%y1";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+
+;; Load/store with update
+;; Define insns that do load or store with update. Because VSX only has
+;; reg+reg addressing, pre-decrement or pre-increment is unlikely to be
+;; generated.
+;;
+;; In all these cases, we use operands 0 and 1 for the register being
+;; incremented because those are the operands that local-alloc will
+;; tie and these are the pair most likely to be tieable (and the ones
+;; that will benefit the most).
+
+(define_insn "*vsx_load<VSX_U:mode>_update_<P:mptrsize>"
+ [(set (match_operand:VSX_U 3 "vsx_register_operand" "=<VSr>,?wa")
+ (mem:VSX_U (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+ (match_operand:P 2 "gpc_reg_operand" "r,r"))))
+ (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+ (plus:P (match_dup 1)
+ (match_dup 2)))]
+ "<P:tptrsize> && TARGET_UPDATE && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "lx<VSm>ux %x3,%0,%2"
+ [(set_attr "type" "<VSX_U:VStype_load_update>")])
+
+(define_insn "*vsx_store<mode>_update_<P:mptrsize>"
+ [(set (mem:VSX_U (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+ (match_operand:P 2 "gpc_reg_operand" "r,r")))
+ (match_operand:VSX_U 3 "gpc_reg_operand" "<VSr>,?wa"))
+ (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+ (plus:P (match_dup 1)
+ (match_dup 2)))]
+ "<P:tptrsize> && TARGET_UPDATE && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "stx<VSm>ux %x3,%0,%2"
+ [(set_attr "type" "<VSX_U:VStype_store_update>")])
+
+;; We may need to have a varient on the pattern for use in the prologue
+;; that doesn't depend on TARGET_UPDATE.
+
+
+;; VSX scalar and vector floating point arithmetic instructions
+(define_insn "*vsx_add<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>add<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_sub<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>sub<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_mul<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>mul<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_div<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>div<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_div>")
+ (set_attr "fp_type" "<VSfptype_div>")])
+
+;; *tdiv* instruction returning the FG flag
+(define_expand "vsx_tdiv<mode>3_fg"
+ [(set (match_dup 3)
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" "")]
+ UNSPEC_VSX_TDIV))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (gt:SI (match_dup 3)
+ (const_int 0)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tdiv* instruction returning the FE flag
+(define_expand "vsx_tdiv<mode>3_fe"
+ [(set (match_dup 3)
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" "")]
+ UNSPEC_VSX_TDIV))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (eq:SI (match_dup 3)
+ (const_int 0)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tdiv<mode>3_internal"
+ [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_VSX_TDIV))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>tdiv<VSs> %0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fre<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_FRES))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>re<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_neg<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>neg<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_abs<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>abs<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_nabs<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (neg:VSX_B
+ (abs:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>nabs<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_smax<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>max<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_smin<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>min<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_sqrt<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>sqrt<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_sqrt>")
+ (set_attr "fp_type" "<VSfptype_sqrt>")])
+
+(define_insn "vsx_rsqrte<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_VSX_RSQRTE))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>rsqrte<VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; *tsqrt* returning the fg flag
+(define_expand "vsx_tsqrt<mode>2_fg"
+ [(set (match_dup 3)
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+ UNSPEC_VSX_TSQRT))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (gt:SI (match_dup 3)
+ (const_int 0)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tsqrt* returning the fe flag
+(define_expand "vsx_tsqrt<mode>2_fe"
+ [(set (match_dup 3)
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+ UNSPEC_VSX_TSQRT))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (eq:SI (match_dup 3)
+ (const_int 0)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tsqrt<mode>2_internal"
+ [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+ (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_VSX_TSQRT))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>tsqrt<VSs> %0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Fused vector multiply/add instructions
+
+;; Note we have a pattern for the multiply/add operations that uses unspec and
+;; does not check -mfused-madd to allow users to use these ops when they know
+;; they want the fused multiply/add.
+
+(define_expand "vsx_fmadd<mode>4"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
+ (plus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" ""))
+ (match_operand:VSX_B 3 "vsx_register_operand" "")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ if (!TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_fmadd<mode>4_1"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (plus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD"
+ "@
+ x<VSv>madda<VSs> %x0,%x1,%x2
+ x<VSv>maddm<VSs> %x0,%x1,%x3
+ x<VSv>madda<VSs> %x0,%x1,%x2
+ x<VSv>maddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fmadd<mode>4_2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
+ UNSPEC_VSX_MADD))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "@
+ x<VSv>madda<VSs> %x0,%x1,%x2
+ x<VSv>maddm<VSs> %x0,%x1,%x3
+ x<VSv>madda<VSs> %x0,%x1,%x2
+ x<VSv>maddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_expand "vsx_fmsub<mode>4"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
+ (minus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" ""))
+ (match_operand:VSX_B 3 "vsx_register_operand" "")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ if (!TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_fmsub<mode>4_1"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (minus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD"
+ "@
+ x<VSv>msuba<VSs> %x0,%x1,%x2
+ x<VSv>msubm<VSs> %x0,%x1,%x3
+ x<VSv>msuba<VSs> %x0,%x1,%x2
+ x<VSv>msubm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fmsub<mode>4_2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
+ UNSPEC_VSX_MSUB))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "@
+ x<VSv>msuba<VSs> %x0,%x1,%x2
+ x<VSv>msubm<VSs> %x0,%x1,%x3
+ x<VSv>msuba<VSs> %x0,%x1,%x2
+ x<VSv>msubm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_expand "vsx_fnmadd<mode>4"
+ [(match_operand:VSX_B 0 "vsx_register_operand" "")
+ (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" "")
+ (match_operand:VSX_B 3 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
+ {
+ emit_insn (gen_vsx_fnmadd<mode>4_1 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+ else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
+ {
+ emit_insn (gen_vsx_fnmadd<mode>4_2 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+ else
+ {
+ emit_insn (gen_vsx_fnmadd<mode>4_3 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "vsx_fnmadd<mode>4_1"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (neg:VSX_B
+ (plus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (DFmode)"
+ "@
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fnmadd<mode>4_2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (minus:VSX_B
+ (mult:VSX_B
+ (neg:VSX_B
+ (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSr>,wa,wa"))
+ (match_operand:VSX_B 2 "gpc_reg_operand" "<VSr>,0,wa,0"))
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
+ && !HONOR_SIGNED_ZEROS (DFmode)"
+ "@
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fnmadd<mode>4_3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
+ UNSPEC_VSX_NMADD))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "@
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_expand "vsx_fnmsub<mode>4"
+ [(match_operand:VSX_B 0 "vsx_register_operand" "")
+ (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" "")
+ (match_operand:VSX_B 3 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+ if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
+ {
+ emit_insn (gen_vsx_fnmsub<mode>4_1 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+ else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
+ {
+ emit_insn (gen_vsx_fnmsub<mode>4_2 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+ else
+ {
+ emit_insn (gen_vsx_fnmsub<mode>4_3 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "vsx_fnmsub<mode>4_1"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (neg:VSX_B
+ (minus:VSX_B
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
+ && HONOR_SIGNED_ZEROS (DFmode)"
+ "@
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fnmsub<mode>4_2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (minus:VSX_B
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")
+ (mult:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
+ && !HONOR_SIGNED_ZEROS (DFmode)"
+ "@
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "vsx_fnmsub<mode>4_3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
+ UNSPEC_VSX_NMSUB))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "@
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
+
+;; Vector conditional expressions (no scalar version for these instructions)
+(define_insn "vsx_eq<mode>"
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpeq<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_gt<mode>"
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpgt<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_ge<mode>"
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpge<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Floating point scalar compare
+(define_insn "*vsx_cmpdf_internal1"
+ [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
+ (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
+ (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_VSX_P (DFmode)"
+ "xscmpudp %0,%x1,%x2"
+ [(set_attr "type" "fpcompare")])
+
+;; Compare vectors producing a vector result and a predicate, setting CR6 to
+;; indicate a combined status
+(define_insn "*vsx_eq_<mode>_p"
+ [(set (reg:CC 74)
+ (unspec:CC
+ [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+ UNSPEC_PREDICATE))
+ (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (eq:VSX_F (match_dup 1)
+ (match_dup 2)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpeq<VSs>. %x0,%x1,%x2"
+ [(set_attr "type" "veccmp")])
+
+(define_insn "*vsx_gt_<mode>_p"
+ [(set (reg:CC 74)
+ (unspec:CC
+ [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+ UNSPEC_PREDICATE))
+ (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (gt:VSX_F (match_dup 1)
+ (match_dup 2)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpgt<VSs>. %x0,%x1,%x2"
+ [(set_attr "type" "veccmp")])
+
+(define_insn "*vsx_ge_<mode>_p"
+ [(set (reg:CC 74)
+ (unspec:CC
+ [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+ UNSPEC_PREDICATE))
+ (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (ge:VSX_F (match_dup 1)
+ (match_dup 2)))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "xvcmpge<VSs>. %x0,%x1,%x2"
+ [(set_attr "type" "veccmp")])
+
+;; Vector select
+(define_insn "*vsx_xxsel<mode>"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (if_then_else:VSX_L
+ (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+ (const_int 0))
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxsel %x0,%x3,%x2,%x1"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxsel<mode>_uns"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (if_then_else:VSX_L
+ (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+ (const_int 0))
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxsel %x0,%x3,%x2,%x1"
+ [(set_attr "type" "vecperm")])
+
+;; Copy sign
+(define_insn "vsx_copysign<mode>3"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (if_then_else:VSX_B
+ (ge:VSX_B (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_B 3 "zero_constant" "j,j"))
+ (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))
+ (neg:VSX_B (abs:VSX_B (match_dup 1)))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; For the conversions, limit the register class for the integer value to be
+;; the fprs because we don't want to add the altivec registers to movdi/movsi.
+;; For the unsigned tests, there isn't a generic double -> unsigned conversion
+;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
+(define_insn "vsx_float<VSi><mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (float:VSX_B (match_operand:<VSI> 1 "vsx_register_operand" "<VSr2>,<VSr3>")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>cvsx<VSc><VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floatuns<VSi><mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unsigned_float:VSX_B (match_operand:<VSI> 1 "vsx_register_operand" "<VSr2>,<VSr3>")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>cvux<VSc><VSs> %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fix_trunc<mode><VSi>2"
+ [(set (match_operand:<VSI> 0 "vsx_register_operand" "=<VSr2>,?<VSr3>")
+ (fix:<VSI> (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fixuns_trunc<mode><VSi>2"
+ [(set (match_operand:<VSI> 0 "vsx_register_operand" "=<VSr2>,?<VSr3>")
+ (unsigned_fix:<VSI> (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Math rounding functions
+(define_insn "vsx_x<VSv>r<VSs>i"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_VSX_ROUND_I))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>i %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_x<VSv>r<VSs>ic"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_VSX_ROUND_IC))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>ic %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_btrunc<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>iz %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_b2trunc<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_FRIZ))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>iz %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floor<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_FRIM))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>im %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_ceil<mode>2"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ UNSPEC_FRIP))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "x<VSv>r<VSs>ip %x0,%x1"
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "fp_type" "<VSfptype_simple>")])
+
+
+;; VSX convert to/from double vector
+
+;; Convert between single and double precision
+;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
+;; scalar single precision instructions internally use the double format.
+;; Prefer the altivec registers, since we likely will need to do a vperm
+(define_insn "vsx_<VS_spdp_insn>"
+ [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
+ (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
+ UNSPEC_VSX_CVSPDP))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
+ "<VS_spdp_insn> %x0,%x1"
+ [(set_attr "type" "<VS_spdp_type>")])
+
+;; xscvspdp, represent the scalar SF type as V4SF
+(define_insn "vsx_xscvspdp"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDP))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "xscvspdp %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
+;; format of scalars is actually DF.
+(define_insn "vsx_xscvdpsp_scalar"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVSPDP))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "xscvdpsp %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; Convert from 64-bit to 32-bit types
+;; Note, favor the Altivec registers since the usual use of these instructions
+;; is in vector converts and we need to use the Altivec vperm instruction.
+
+(define_insn "vsx_xvcvdpsxws"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPSXWS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpsxws %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvdpuxws"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPUXWS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpuxws %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvsxdsp"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_CVSXDSP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvsxdsp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxdsp"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_CVUXDSP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvuxwdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+;; Convert from 32-bit to 64-bit types
+(define_insn "vsx_xvcvsxwdp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_CVSXWDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvsxwdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxwdp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_CVUXWDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvuxwdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvspsxds"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVSPSXDS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvspsxds %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvspuxds"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVSPUXDS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvspuxds %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+;; Logical and permute operations
+(define_insn "*vsx_and<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (and:VSX_L
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxland %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_ior<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_xor<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (xor:VSX_L
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlxor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_one_cmpl<mode>2"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (not:VSX_L
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlnor %x0,%x1,%x1"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_nor<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (not:VSX_L
+ (ior:VSX_L
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlnor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_andc<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (and:VSX_L
+ (not:VSX_L
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+
+;; Permute operations
+
+;; Build a V2DF/V2DI vector from two scalars
+(define_insn "vsx_concat_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:VSX_D
+ [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
+ UNSPEC_VSX_CONCAT))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxpermdi %x0,%x1,%x2,0"
+ [(set_attr "type" "vecperm")])
+
+;; Special purpose concat using xxpermdi to glue two single precision values
+;; together, relying on the fact that internally scalar floats are represented
+;; as doubles. This is used to initialize a V4SF vector with 4 floats
+(define_insn "vsx_concat_v2sf"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF
+ [(match_operand:SF 1 "vsx_register_operand" "f,f")
+ (match_operand:SF 2 "vsx_register_operand" "f,f")]
+ UNSPEC_VSX_CONCAT))]
+ "VECTOR_MEM_VSX_P (V2DFmode)"
+ "xxpermdi %x0,%x1,%x2,0"
+ [(set_attr "type" "vecperm")])
+
+;; Set the element of a V2DI/VD2F mode
+(define_insn "vsx_set_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
+ (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
+ UNSPEC_VSX_SET))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ if (INTVAL (operands[3]) == 0)
+ return \"xxpermdi %x0,%x1,%x2,1\";
+ else if (INTVAL (operands[3]) == 1)
+ return \"xxpermdi %x0,%x2,%x1,0\";
+ else
+ gcc_unreachable ();
+}
+ [(set_attr "type" "vecperm")])
+
+;; Extract a DF/DI element from V2DF/V2DI
+(define_insn "vsx_extract_<mode>"
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+ (vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
+ (parallel
+ [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ gcc_assert (UINTVAL (operands[2]) <= 1);
+ operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+ return \"xxpermdi %x0,%x1,%x1,%3\";
+}
+ [(set_attr "type" "vecperm")])
+
+;; Optimize extracting element 0 from memory
+(define_insn "*vsx_extract_<mode>_zero"
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+ (parallel [(const_int 0)])))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+ "lxsd%U1x %x0,%y1"
+ [(set_attr "type" "fpload")
+ (set_attr "length" "4")])
+
+;; General double word oriented permute, allow the other vector types for
+;; optimizing the permute instruction.
+(define_insn "vsx_xxpermdi_<mode>"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wd,wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "wd,wa")
+ (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
+ UNSPEC_VSX_XXPERMDI))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxpermdi %x0,%x1,%x2,%3"
+ [(set_attr "type" "vecperm")])
+
+;; Varient of xxpermdi that is emitted by the vec_interleave functions
+(define_insn "*vsx_xxpermdi2_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
+ (vec_concat:VSX_D
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "vsx_register_operand" "wd")
+ (parallel
+ [(match_operand:QI 2 "u5bit_cint_operand" "i")]))
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 3 "vsx_register_operand" "wd")
+ (parallel
+ [(match_operand:QI 4 "u5bit_cint_operand" "i")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1));
+ operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1)
+ | (INTVAL (operands[4]) & 1));
+ return \"xxpermdi %x0,%x1,%x3,%5\";
+}
+ [(set_attr "type" "vecperm")])
+
+;; V2DF/V2DI splat
+(define_insn "vsx_splat_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
+ (vec_duplicate:VSX_D
+ (match_operand:<VS_scalar> 1 "input_operand" "ws,f,Z,wa,wa,Z")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxpermdi %x0,%x1,%x1,0
+ xxpermdi %x0,%x1,%x1,0
+ lxvdsx %x0,%y1
+ xxpermdi %x0,%x1,%x1,0
+ xxpermdi %x0,%x1,%x1,0
+ lxvdsx %x0,%y1"
+ [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
+
+;; V4SF/V4SI splat
+(define_insn "vsx_xxspltw_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+ (vec_duplicate:VSX_W
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+ (parallel
+ [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxspltw %x0,%x1,%2"
+ [(set_attr "type" "vecperm")])
+
+;; V4SF/V4SI interleave
+(define_insn "vsx_xxmrghw_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+ (vec_merge:VSX_W
+ (vec_select:VSX_W
+ (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (vec_select:VSX_W
+ (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (const_int 5)))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxmrghw %x0,%x1,%x2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxmrglw_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+ (vec_merge:VSX_W
+ (vec_select:VSX_W
+ (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (vec_select:VSX_W
+ (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (const_int 5)))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxmrglw %x0,%x1,%x2"
+ [(set_attr "type" "vecperm")])
+
+;; Shift left double by word immediate
+(define_insn "vsx_xxsldwi_<mode>"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
+ (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
+ (match_operand:VSX_L 2 "vsx_register_operand" "wa")
+ (match_operand:QI 3 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_SLDWI))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxsldwi %x0,%x1,%x2,%3"
+ [(set_attr "type" "vecperm")])
OpenPOWER on IntegriCloud