diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro')
94 files changed, 45251 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch new file mode 100644 index 0000000000..ef33afff70 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch @@ -0,0 +1,45 @@ +Index: gcc-4_6-branch/gcc/config/arm/arm.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 17:14:09.901129286 -0800 ++++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 17:18:23.061141606 -0800 +@@ -17525,6 +17525,13 @@ + } + return; + ++ case 'v': ++ { ++ gcc_assert (GET_CODE (x) == CONST_DOUBLE); ++ fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x)); ++ return; ++ } ++ + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': +@@ -24925,4 +24932,26 @@ + return 4; + } + ++int ++vfp3_const_double_for_fract_bits (rtx operand) ++{ ++ REAL_VALUE_TYPE r0; ++ ++ if (GET_CODE (operand) != CONST_DOUBLE) ++ return 0; ++ ++ REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); ++ if (exact_real_inverse (DFmode, &r0)) ++ { ++ if (exact_real_truncate (DFmode, &r0)) ++ { ++ HOST_WIDE_INT value = real_to_integer (&r0); ++ value = value & 0xffffffff; ++ if ((value != 0) && ( (value & (value - 1)) == 0)) ++ return int_log2 (value); ++ } ++ } ++ return 0; ++} ++ + #include "gt-arm.h" diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch new file mode 100644 index 0000000000..4c573f401e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch @@ -0,0 +1,51 @@ +2011-02-21 Andrew Stubbs <ams@codesourcery.com> + Julian Brown <julian@codesourcery.com> + Mark Shinwell <shinwell@codesourcery.com> + + Forward-ported from Linaro GCC 4.5 (bzr99324). + + gcc/ + * config/arm/arm.h (arm_class_likely_spilled_p): Check against + LO_REGS only for Thumb-1. + (MODE_BASE_REG_CLASS): Restrict base registers to those which can + be used in short instructions when optimising for size on Thumb-2. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-01-29 03:20:57 +0000 ++++ new/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000 +@@ -22304,14 +22304,16 @@ + + /* Implement TARGET_CLASS_LIKELY_SPILLED_P. + +- We need to define this for LO_REGS on thumb. Otherwise we can end up +- using r0-r4 for function arguments, r7 for the stack frame and don't +- have enough left over to do doubleword arithmetic. */ +- ++ We need to define this for LO_REGS on Thumb-1. Otherwise we can end up ++ using r0-r4 for function arguments, r7 for the stack frame and don't have ++ enough left over to do doubleword arithmetic. For Thumb-2 all the ++ potentially problematic instructions accept high registers so this is not ++ necessary. Care needs to be taken to avoid adding new Thumb-2 patterns ++ that require many low registers. */ + static bool + arm_class_likely_spilled_p (reg_class_t rclass) + { +- if ((TARGET_THUMB && rclass == LO_REGS) ++ if ((TARGET_THUMB1 && rclass == LO_REGS) + || rclass == CC_REG) + return true; + + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-01-29 03:20:57 +0000 ++++ new/gcc/config/arm/arm.h 2011-02-21 14:04:51 +0000 +@@ -1185,7 +1185,7 @@ + when addressing quantities in QI or HI mode; if we don't know the + mode, then we must be conservative. */ + #define MODE_BASE_REG_CLASS(MODE) \ +- (TARGET_32BIT ? CORE_REGS : \ ++ (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS : \ + (((MODE) == SImode) ? BASE_REGS : LO_REGS)) + + /* For Thumb we can not support SP+reg addressing, so we return LO_REGS + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch new file mode 100644 index 0000000000..4b0079e1dc --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch @@ -0,0 +1,653 @@ +2011-03-27 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * doc/invoke.texi (max-stores-to-sink): Document. + * params.h (MAX_STORES_TO_SINK): Define. + * opts.c (finish_options): Set MAX_STORES_TO_SINK to 0 + if either vectorization or if-conversion is disabled. + * tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from + tree-vect-data-refs.c vect_equal_offsets. + (dr_equal_offsets_p): New function. + (find_data_references_in_bb): Remove static. + * tree-data-ref.h (find_data_references_in_bb): Declare. + (dr_equal_offsets_p): Likewise. + * tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c. + (vect_drs_dependent_in_basic_block): Update calls to + vect_equal_offsets. + (vect_check_interleaving): Likewise. + * tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h. + (cond_if_else_store_replacement): Rename to... + (cond_if_else_store_replacement_1): ... this. Change arguments and + documentation. + (cond_if_else_store_replacement): New function. + * Makefile.in (tree-ssa-phiopt.o): Adjust dependencies. + * params.def (PARAM_MAX_STORES_TO_SINK): Define. + + gcc/testsuite/ + * gcc.dg/vect/vect-cselim-1.c: New test. + * gcc.dg/vect/vect-cselim-2.c: New test. + +=== modified file 'gcc/Makefile.in' +--- old/gcc/Makefile.in 2011-03-26 09:20:34 +0000 ++++ new/gcc/Makefile.in 2011-04-18 11:31:29 +0000 +@@ -2422,7 +2422,8 @@ + tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ + $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \ +- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h ++ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \ ++ $(TREE_DATA_REF_H) + tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \ + $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \ + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-03-29 14:24:42 +0000 ++++ new/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000 +@@ -8909,6 +8909,11 @@ + The maximum number of namespaces to consult for suggestions when C++ + name lookup fails for an identifier. The default is 1000. + ++@item max-stores-to-sink ++The maximum number of conditional stores paires that can be sunk. Set to 0 ++if either vectorization (@option{-ftree-vectorize}) or if-conversion ++(@option{-ftree-loop-if-convert}) is disabled. The default is 2. ++ + @end table + @end table + + +=== modified file 'gcc/opts.c' +--- old/gcc/opts.c 2011-02-17 22:51:57 +0000 ++++ new/gcc/opts.c 2011-03-27 09:38:18 +0000 +@@ -823,6 +823,12 @@ + opts->x_flag_split_stack = 0; + } + } ++ ++ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion ++ is disabled. */ ++ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert) ++ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0, ++ opts->x_param_values, opts_set->x_param_values); + } + + #define LEFT_COLUMN 27 + +=== modified file 'gcc/params.def' +--- old/gcc/params.def 2011-03-26 09:20:34 +0000 ++++ new/gcc/params.def 2011-04-18 11:31:29 +0000 +@@ -883,6 +883,13 @@ + "name lookup fails", + 1000, 0, 0) + ++/* Maximum number of conditional store pairs that can be sunk. */ ++DEFPARAM (PARAM_MAX_STORES_TO_SINK, ++ "max-stores-to-sink", ++ "Maximum number of conditional store pairs that can be sunk", ++ 2, 0, 0) ++ ++ + /* + Local variables: + mode:c + +=== modified file 'gcc/params.h' +--- old/gcc/params.h 2011-01-13 13:41:03 +0000 ++++ new/gcc/params.h 2011-03-27 09:38:18 +0000 +@@ -206,4 +206,6 @@ + PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO) + #define MIN_NONDEBUG_INSN_UID \ + PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID) ++#define MAX_STORES_TO_SINK \ ++ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK) + #endif /* ! GCC_PARAMS_H */ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-03-27 09:38:18 +0000 +@@ -0,0 +1,86 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 50 ++ ++typedef struct { ++ short a; ++ short b; ++} data; ++ ++data in1[N], in2[N], out[N]; ++short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42}; ++short out1[N], out2[N]; ++ ++__attribute__ ((noinline)) void ++foo () ++{ ++ int i; ++ short c, d; ++ ++ /* Vectorizable with conditional store sinking. */ ++ for (i = 0; i < N; i++) ++ { ++ c = in1[i].b; ++ d = in2[i].b; ++ ++ if (c >= d) ++ { ++ out[i].b = c; ++ out[i].a = d + 5; ++ } ++ else ++ { ++ out[i].b = d - 12; ++ out[i].a = c + d; ++ } ++ } ++ ++ /* Not vectorizable. */ ++ for (i = 0; i < N; i++) ++ { ++ c = in1[i].b; ++ d = in2[i].b; ++ ++ if (c >= d) ++ { ++ out1[i] = c; ++ } ++ else ++ { ++ out2[i] = c + d; ++ } ++ } ++} ++ ++int ++main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in1[i].a = i; ++ in1[i].b = i + 2; ++ in2[i].a = 5; ++ in2[i].b = i + 5; ++ __asm__ volatile (""); ++ } ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (out[i].a != result[2*i] || out[i].b != result[2*i+1]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 2011-03-27 09:38:18 +0000 +@@ -0,0 +1,65 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 50 ++ ++int a[N], b[N], in1[N], in2[N]; ++int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42}; ++ ++__attribute__ ((noinline)) void ++foo (int *pa, int *pb) ++{ ++ int i; ++ int c, d; ++ ++ /* Store sinking should not work here since the pointers may alias. */ ++ for (i = 0; i < N; i++) ++ { ++ c = in1[i]; ++ d = in2[i]; ++ ++ if (c >= d) ++ { ++ *pa = c; ++ *pb = d + 5; ++ } ++ else ++ { ++ *pb = d - 12; ++ *pa = c + d; ++ } ++ ++ pa++; ++ pb++; ++ } ++} ++ ++int ++main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in1[i] = i; ++ in2[i] = i + 5; ++ __asm__ volatile (""); ++ } ++ ++ foo (a, b); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (a[i] != result[2*i] || b[i] != result[2*i+1]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/tree-data-ref.c' +--- old/gcc/tree-data-ref.c 2011-02-05 01:39:20 +0000 ++++ new/gcc/tree-data-ref.c 2011-03-27 09:38:18 +0000 +@@ -991,6 +991,48 @@ + return dr; + } + ++/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical ++ expressions. */ ++static bool ++dr_equal_offsets_p1 (tree offset1, tree offset2) ++{ ++ bool res; ++ ++ STRIP_NOPS (offset1); ++ STRIP_NOPS (offset2); ++ ++ if (offset1 == offset2) ++ return true; ++ ++ if (TREE_CODE (offset1) != TREE_CODE (offset2) ++ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) ++ return false; ++ ++ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), ++ TREE_OPERAND (offset2, 0)); ++ ++ if (!res || !BINARY_CLASS_P (offset1)) ++ return res; ++ ++ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), ++ TREE_OPERAND (offset2, 1)); ++ ++ return res; ++} ++ ++/* Check if DRA and DRB have equal offsets. */ ++bool ++dr_equal_offsets_p (struct data_reference *dra, ++ struct data_reference *drb) ++{ ++ tree offset1, offset2; ++ ++ offset1 = DR_OFFSET (dra); ++ offset2 = DR_OFFSET (drb); ++ ++ return dr_equal_offsets_p1 (offset1, offset2); ++} ++ + /* Returns true if FNA == FNB. */ + + static bool +@@ -4294,7 +4336,7 @@ + DATAREFS. Returns chrec_dont_know when failing to analyze a + difficult case, returns NULL_TREE otherwise. */ + +-static tree ++tree + find_data_references_in_bb (struct loop *loop, basic_block bb, + VEC (data_reference_p, heap) **datarefs) + { + +=== modified file 'gcc/tree-data-ref.h' +--- old/gcc/tree-data-ref.h 2011-01-25 21:24:23 +0000 ++++ new/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000 +@@ -426,10 +426,14 @@ + extern void compute_all_dependences (VEC (data_reference_p, heap) *, + VEC (ddr_p, heap) **, VEC (loop_p, heap) *, + bool); ++extern tree find_data_references_in_bb (struct loop *, basic_block, ++ VEC (data_reference_p, heap) **); + + extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); + extern bool dr_may_alias_p (const struct data_reference *, + const struct data_reference *); ++extern bool dr_equal_offsets_p (struct data_reference *, ++ struct data_reference *); + + + /* Return true when the base objects of data references A and B are + +=== modified file 'gcc/tree-ssa-phiopt.c' +--- old/gcc/tree-ssa-phiopt.c 2010-11-03 15:18:50 +0000 ++++ new/gcc/tree-ssa-phiopt.c 2011-03-27 09:38:18 +0000 +@@ -34,6 +34,8 @@ + #include "langhooks.h" + #include "pointer-set.h" + #include "domwalk.h" ++#include "cfgloop.h" ++#include "tree-data-ref.h" + + static unsigned int tree_ssa_phiopt (void); + static unsigned int tree_ssa_phiopt_worker (bool); +@@ -1292,35 +1294,18 @@ + return true; + } + +-/* Do the main work of conditional store replacement. We already know +- that the recognized pattern looks like so: +- +- split: +- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) +- THEN_BB: +- X = Y; +- goto JOIN_BB; +- ELSE_BB: +- X = Z; +- fallthrough (edge E0) +- JOIN_BB: +- some more +- +- We check that THEN_BB and ELSE_BB contain only one store +- that the stores have a "simple" RHS. */ ++/* Do the main work of conditional store replacement. */ + + static bool +-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, +- basic_block join_bb) ++cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, ++ basic_block join_bb, gimple then_assign, ++ gimple else_assign) + { +- gimple then_assign = last_and_only_stmt (then_bb); +- gimple else_assign = last_and_only_stmt (else_bb); + tree lhs_base, lhs, then_rhs, else_rhs; + source_location then_locus, else_locus; + gimple_stmt_iterator gsi; + gimple newphi, new_stmt; + +- /* Check if then_bb and else_bb contain only one store each. */ + if (then_assign == NULL + || !gimple_assign_single_p (then_assign) + || else_assign == NULL +@@ -1385,6 +1370,190 @@ + return true; + } + ++/* Conditional store replacement. We already know ++ that the recognized pattern looks like so: ++ ++ split: ++ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) ++ THEN_BB: ++ ... ++ X = Y; ++ ... ++ goto JOIN_BB; ++ ELSE_BB: ++ ... ++ X = Z; ++ ... ++ fallthrough (edge E0) ++ JOIN_BB: ++ some more ++ ++ We check that it is safe to sink the store to JOIN_BB by verifying that ++ there are no read-after-write or write-after-write dependencies in ++ THEN_BB and ELSE_BB. */ ++ ++static bool ++cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, ++ basic_block join_bb) ++{ ++ gimple then_assign = last_and_only_stmt (then_bb); ++ gimple else_assign = last_and_only_stmt (else_bb); ++ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs; ++ VEC (ddr_p, heap) *then_ddrs, *else_ddrs; ++ gimple then_store, else_store; ++ bool found, ok = false, res; ++ struct data_dependence_relation *ddr; ++ data_reference_p then_dr, else_dr; ++ int i, j; ++ tree then_lhs, else_lhs; ++ VEC (gimple, heap) *then_stores, *else_stores; ++ basic_block blocks[3]; ++ ++ if (MAX_STORES_TO_SINK == 0) ++ return false; ++ ++ /* Handle the case with single statement in THEN_BB and ELSE_BB. */ ++ if (then_assign && else_assign) ++ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, ++ then_assign, else_assign); ++ ++ /* Find data references. */ ++ then_datarefs = VEC_alloc (data_reference_p, heap, 1); ++ else_datarefs = VEC_alloc (data_reference_p, heap, 1); ++ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) ++ == chrec_dont_know) ++ || !VEC_length (data_reference_p, then_datarefs) ++ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) ++ == chrec_dont_know) ++ || !VEC_length (data_reference_p, else_datarefs)) ++ { ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ return false; ++ } ++ ++ /* Find pairs of stores with equal LHS. */ ++ then_stores = VEC_alloc (gimple, heap, 1); ++ else_stores = VEC_alloc (gimple, heap, 1); ++ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr) ++ { ++ if (DR_IS_READ (then_dr)) ++ continue; ++ ++ then_store = DR_STMT (then_dr); ++ then_lhs = gimple_assign_lhs (then_store); ++ found = false; ++ ++ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr) ++ { ++ if (DR_IS_READ (else_dr)) ++ continue; ++ ++ else_store = DR_STMT (else_dr); ++ else_lhs = gimple_assign_lhs (else_store); ++ ++ if (operand_equal_p (then_lhs, else_lhs, 0)) ++ { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ continue; ++ ++ VEC_safe_push (gimple, heap, then_stores, then_store); ++ VEC_safe_push (gimple, heap, else_stores, else_store); ++ } ++ ++ /* No pairs of stores found. */ ++ if (!VEC_length (gimple, then_stores) ++ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK) ++ { ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } ++ ++ /* Compute and check data dependencies in both basic blocks. */ ++ then_ddrs = VEC_alloc (ddr_p, heap, 1); ++ else_ddrs = VEC_alloc (ddr_p, heap, 1); ++ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false); ++ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false); ++ blocks[0] = then_bb; ++ blocks[1] = else_bb; ++ blocks[2] = join_bb; ++ renumber_gimple_stmt_uids_in_blocks (blocks, 3); ++ ++ /* Check that there are no read-after-write or write-after-write dependencies ++ in THEN_BB. */ ++ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr) ++ { ++ struct data_reference *dra = DDR_A (ddr); ++ struct data_reference *drb = DDR_B (ddr); ++ ++ if (DDR_ARE_DEPENDENT (ddr) != chrec_known ++ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) ++ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) ++ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) ++ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) ++ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) ++ { ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } ++ } ++ ++ /* Check that there are no read-after-write or write-after-write dependencies ++ in ELSE_BB. */ ++ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr) ++ { ++ struct data_reference *dra = DDR_A (ddr); ++ struct data_reference *drb = DDR_B (ddr); ++ ++ if (DDR_ARE_DEPENDENT (ddr) != chrec_known ++ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) ++ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) ++ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) ++ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) ++ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) ++ { ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } ++ } ++ ++ /* Sink stores with same LHS. */ ++ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store) ++ { ++ else_store = VEC_index (gimple, else_stores, i); ++ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, ++ then_store, else_store); ++ ok = ok || res; ++ } ++ ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ ++ return ok; ++} ++ + /* Always do these optimizations if we have SSA + trees to work on. */ + static bool + +=== modified file 'gcc/tree-vect-data-refs.c' +--- old/gcc/tree-vect-data-refs.c 2011-02-25 11:18:14 +0000 ++++ new/gcc/tree-vect-data-refs.c 2011-03-27 09:38:18 +0000 +@@ -289,39 +289,6 @@ + } + } + +- +-/* Function vect_equal_offsets. +- +- Check if OFFSET1 and OFFSET2 are identical expressions. */ +- +-static bool +-vect_equal_offsets (tree offset1, tree offset2) +-{ +- bool res; +- +- STRIP_NOPS (offset1); +- STRIP_NOPS (offset2); +- +- if (offset1 == offset2) +- return true; +- +- if (TREE_CODE (offset1) != TREE_CODE (offset2) +- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) +- return false; +- +- res = vect_equal_offsets (TREE_OPERAND (offset1, 0), +- TREE_OPERAND (offset2, 0)); +- +- if (!res || !BINARY_CLASS_P (offset1)) +- return res; +- +- res = vect_equal_offsets (TREE_OPERAND (offset1, 1), +- TREE_OPERAND (offset2, 1)); +- +- return res; +-} +- +- + /* Check dependence between DRA and DRB for basic block vectorization. + If the accesses share same bases and offsets, we can compare their initial + constant offsets to decide whether they differ or not. In case of a read- +@@ -352,7 +319,7 @@ + || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR + || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) + != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) +- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))) ++ || !dr_equal_offsets_p (dra, drb)) + return true; + + /* Check the types. */ +@@ -402,7 +369,7 @@ + || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR + || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) + != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) +- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)) ++ || !dr_equal_offsets_p (dra, drb) + || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) + || DR_IS_READ (dra) != DR_IS_READ (drb)) + return false; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch new file mode 100644 index 0000000000..017b1df7e3 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch @@ -0,0 +1,126 @@ +2011-04-21 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2008-12-03 Daniel Jacobowitz <dan@codesourcery.com> + + gcc/testsuite/ + * gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New. + * lib/target-supports.exp (check_effective_target_vect_shift_char): New + function. + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-3.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 2011-04-21 13:51:06 +0000 +@@ -0,0 +1,37 @@ ++/* { dg-require-effective-target vect_shift } */ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++ ++#define N 32 ++ ++unsigned short dst[N] __attribute__((aligned(N))); ++unsigned short src[N] __attribute__((aligned(N))); ++ ++__attribute__ ((noinline)) ++void array_shift(void) ++{ ++ int i; ++ for (i = 0; i < N; i++) ++ dst[i] = src[i] >> 3; ++} ++ ++int main() ++{ ++ volatile int i; ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ src[i] = i << 3; ++ ++ array_shift (); ++ ++ for (i = 0; i < N; i++) ++ if (dst[i] != i) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 2011-04-21 13:51:06 +0000 +@@ -0,0 +1,37 @@ ++/* { dg-require-effective-target vect_shift_char } */ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++ ++#define N 32 ++ ++unsigned char dst[N] __attribute__((aligned(N))); ++unsigned char src[N] __attribute__((aligned(N))); ++ ++__attribute__ ((noinline)) ++void array_shift(void) ++{ ++ int i; ++ for (i = 0; i < N; i++) ++ dst[i] = src[i] >> 3; ++} ++ ++int main() ++{ ++ volatile int i; ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ src[i] = i << 3; ++ ++ array_shift (); ++ ++ for (i = 0; i < N; i++) ++ if (dst[i] != i) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-02-19 15:31:15 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-04-21 13:51:06 +0000 +@@ -2308,6 +2308,26 @@ + } + + ++# Return 1 if the target supports hardware vector shift operation for char. ++ ++proc check_effective_target_vect_shift_char { } { ++ global et_vect_shift_char_saved ++ ++ if [info exists et_vect_shift_char_saved] { ++ verbose "check_effective_target_vect_shift_char: using cached result" 2 ++ } else { ++ set et_vect_shift_char_saved 0 ++ if { ([istarget powerpc*-*-*] ++ && ![istarget powerpc-*-linux*paired*]) ++ || [check_effective_target_arm32] } { ++ set et_vect_shift_char_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2 ++ return $et_vect_shift_char_saved ++} ++ + # Return 1 if the target supports hardware vectors of long, 0 otherwise. + # + # This can change for different subtargets so do not cache the result. + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch new file mode 100644 index 0000000000..3dde3b29a1 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch @@ -0,0 +1,177 @@ +2011-04-27 Ira Rosen <ira.rosen@linaro.org> + + Backport from FSF: + + 2011-04-03 Richard Guenther <rguenther@suse.de> + Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all + non-variable offsets and compare the remaining bases of the two + accesses instead of looking for exact same data-ref. + + gcc/testsuite/ + * gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test. + * gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with + -ftree-loop-if-convert-stores. + +=== added file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c' +--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-04-24 07:45:49 +0000 +@@ -0,0 +1,69 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 50 ++ ++typedef struct { ++ short a; ++ short b; ++} data; ++ ++data in1[N], in2[N], out[N]; ++short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42}; ++short out1[N], out2[N]; ++ ++__attribute__ ((noinline)) void ++foo () ++{ ++ int i; ++ short c, d; ++ ++ for (i = 0; i < N; i++) ++ { ++ c = in1[i].b; ++ d = in2[i].b; ++ ++ if (c >= d) ++ { ++ out[i].b = in1[i].a; ++ out[i].a = d + 5; ++ } ++ else ++ { ++ out[i].b = d - 12; ++ out[i].a = in2[i].a + d; ++ } ++ } ++} ++ ++int ++main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in1[i].a = i; ++ in1[i].b = i + 2; ++ in2[i].a = 5; ++ in2[i].b = i + 5; ++ __asm__ volatile (""); ++ } ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (out[i].a != result[2*i] || out[i].b != result[2*i+1]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp' +--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2010-11-22 21:49:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-04-24 07:45:49 +0000 +@@ -210,6 +210,12 @@ + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + ++# -ftree-loop-if-convert-stores ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ + # With -O3. + # Don't allow IPA cloning, because it throws our counts out of whack. + set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS + +=== modified file 'gcc/tree-if-conv.c' +--- old/gcc/tree-if-conv.c 2011-02-23 16:49:52 +0000 ++++ new/gcc/tree-if-conv.c 2011-04-24 07:45:49 +0000 +@@ -464,8 +464,8 @@ + /* Returns true when the memory references of STMT are read or written + unconditionally. In other words, this function returns true when + for every data reference A in STMT there exist other accesses to +- the same data reference with predicates that add up (OR-up) to the +- true predicate: this ensures that the data reference A is touched ++ a data reference with the same base with predicates that add up (OR-up) to ++ the true predicate: this ensures that the data reference A is touched + (read or written) on every iteration of the if-converted loop. */ + + static bool +@@ -489,21 +489,38 @@ + continue; + + for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++) +- if (DR_STMT (b) != stmt +- && same_data_refs (a, b)) +- { +- tree cb = bb_predicate (gimple_bb (DR_STMT (b))); +- +- if (DR_RW_UNCONDITIONALLY (b) == 1 +- || is_true_predicate (cb) +- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), +- ca, cb))) +- { +- DR_RW_UNCONDITIONALLY (a) = 1; +- DR_RW_UNCONDITIONALLY (b) = 1; +- found = true; +- break; +- } ++ { ++ tree ref_base_a = DR_REF (a); ++ tree ref_base_b = DR_REF (b); ++ ++ if (DR_STMT (b) == stmt) ++ continue; ++ ++ while (TREE_CODE (ref_base_a) == COMPONENT_REF ++ || TREE_CODE (ref_base_a) == IMAGPART_EXPR ++ || TREE_CODE (ref_base_a) == REALPART_EXPR) ++ ref_base_a = TREE_OPERAND (ref_base_a, 0); ++ ++ while (TREE_CODE (ref_base_b) == COMPONENT_REF ++ || TREE_CODE (ref_base_b) == IMAGPART_EXPR ++ || TREE_CODE (ref_base_b) == REALPART_EXPR) ++ ref_base_b = TREE_OPERAND (ref_base_b, 0); ++ ++ if (!operand_equal_p (ref_base_a, ref_base_b, 0)) ++ { ++ tree cb = bb_predicate (gimple_bb (DR_STMT (b))); ++ ++ if (DR_RW_UNCONDITIONALLY (b) == 1 ++ || is_true_predicate (cb) ++ || is_true_predicate (ca ++ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb))) ++ { ++ DR_RW_UNCONDITIONALLY (a) = 1; ++ DR_RW_UNCONDITIONALLY (b) = 1; ++ found = true; ++ break; ++ } ++ } + } + + if (!found) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch new file mode 100644 index 0000000000..2c14ceb8cb --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch @@ -0,0 +1,140 @@ +2011-05-02 Ira Rosen <ira.rosen@linaro.org> + + Backport from FSF: + + 2011-03-27 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * config/arm/arm.c (arm_autovectorize_vector_sizes): New function. + (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define. + + gcc/testsuite/ + * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data + accesses to preserve the meaning of the test for doubleword vectors. + * gcc.dg/vect/no-vfa-pr29145.c: Likewise. + * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000 ++++ new/gcc/config/arm/arm.c 2011-04-28 11:46:58 +0000 +@@ -250,6 +250,7 @@ + bool is_packed); + static void arm_conditional_register_usage (void); + static reg_class_t arm_preferred_rename_class (reg_class_t rclass); ++static unsigned int arm_autovectorize_vector_sizes (void); + + + /* Table of machine attributes. */ +@@ -395,6 +396,9 @@ + #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p + #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE + #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode ++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES ++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ ++ arm_autovectorize_vector_sizes + + #undef TARGET_MACHINE_DEPENDENT_REORG + #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg +@@ -23511,6 +23515,12 @@ + } + } + ++static unsigned int ++arm_autovectorize_vector_sizes (void) ++{ ++ return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; ++} ++ + static bool + arm_vector_alignment_reachable (const_tree type, bool is_packed) + { + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000 +@@ -8,7 +8,7 @@ + void with_restrict(int * __restrict p) + { + int i; +- int *q = p - 2; ++ int *q = p - 1; + + for (i = 0; i < 1000; ++i) { + p[i] = q[i]; +@@ -19,7 +19,7 @@ + void without_restrict(int * p) + { + int i; +- int *q = p - 2; ++ int *q = p - 1; + + for (i = 0; i < 1000; ++i) { + p[i] = q[i]; +@@ -38,8 +38,8 @@ + a[i] = b[i] = i; + } + +- with_restrict(a + 2); +- without_restrict(b + 2); ++ with_restrict(a + 1); ++ without_restrict(b + 1); + + for (i = 0; i < 1002; ++i) { + if (a[i] != b[i]) + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000 +@@ -4,9 +4,9 @@ + #include <stdarg.h> + #include "tree-vect.h" + +-#define N 8 ++#define N 12 + +-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + + int + main1 () +@@ -101,7 +101,7 @@ + } + + /* SLP with unrolling by 8. */ +- for (i = 0; i < N/2; i++) ++ for (i = 0; i < N/4; i++) + { + out[i*9] = in[i*9]; + out[i*9 + 1] = in[i*9 + 1]; +@@ -115,7 +115,7 @@ + } + + /* check results: */ +- for (i = 0; i < N/2; i++) ++ for (i = 0; i < N/4; i++) + { + if (out[i*9] != in[i*9] + || out[i*9 + 1] != in[i*9 + 1] + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000 +@@ -17,7 +17,7 @@ + float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +- float E[4] = {0,1,2,480}; ++ float E[4] = {0,480,960,1440}; + float s; + + int i, j; +@@ -55,7 +55,7 @@ + s = 0; + for (j=0; j<N; j+=4) + s += C[j]; +- B[i+3] = B[i] + s; ++ B[i+1] = B[i] + s; + } + + /* check results: */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch new file mode 100644 index 0000000000..6e76c21272 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch @@ -0,0 +1,255 @@ +2011-04-26 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-04-15 Maxim Kuvyrkov <maxim@codesourcery.com> + + gcc/ + * combine.c (subst, combine_simlify_rtx): Add new argument, use it + to track processing of conditionals. Update all callers. + (try_combine, simplify_if_then_else): Update. + + 2011-04-25 Maxim Kuvyrkov <maxim@codesourcery.com> + Eric Botcazou <ebotcazou@adacore.com> + + gcc/ + * combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals + for STORE_FLAG_VALUE==-1 case. + +=== modified file 'gcc/combine.c' +Index: gcc-4_6-branch/gcc/combine.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/combine.c 2012-03-05 00:16:20.000000000 -0800 ++++ gcc-4_6-branch/gcc/combine.c 2012-03-05 16:05:01.212928507 -0800 +@@ -391,8 +391,8 @@ + static void undo_all (void); + static void undo_commit (void); + static rtx *find_split_point (rtx *, rtx, bool); +-static rtx subst (rtx, rtx, rtx, int, int); +-static rtx combine_simplify_rtx (rtx, enum machine_mode, int); ++static rtx subst (rtx, rtx, rtx, int, int, int); ++static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int); + static rtx simplify_if_then_else (rtx); + static rtx simplify_set (rtx); + static rtx simplify_logical (rtx); +@@ -3119,12 +3119,12 @@ + if (i1) + { + subst_low_luid = DF_INSN_LUID (i1); +- i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0); ++ i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0); + } + else + { + subst_low_luid = DF_INSN_LUID (i2); +- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0); ++ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); + } + } + +@@ -3136,7 +3136,7 @@ + self-referential RTL when we will be substituting I1SRC for I1DEST + later. Likewise if I0 feeds into I2, either directly or indirectly + through I1, and I0DEST is in I0SRC. */ +- newpat = subst (PATTERN (i3), i2dest, i2src, 0, ++ newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0, + (i1_feeds_i2_n && i1dest_in_i1src) + || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n)) + && i0dest_in_i0src)); +@@ -3180,7 +3180,7 @@ + copy of I1SRC each time we substitute it, in order to avoid creating + self-referential RTL when we will be substituting I0SRC for I0DEST + later. */ +- newpat = subst (newpat, i1dest, i1src, 0, ++ newpat = subst (newpat, i1dest, i1src, 0, 0, + i0_feeds_i1_n && i0dest_in_i0src); + substed_i1 = 1; + +@@ -3214,7 +3214,7 @@ + + n_occurrences = 0; + subst_low_luid = DF_INSN_LUID (i0); +- newpat = subst (newpat, i0dest, i0src, 0, 0); ++ newpat = subst (newpat, i0dest, i0src, 0, 0, 0); + substed_i0 = 1; + } + +@@ -3276,7 +3276,7 @@ + { + rtx t = i1pat; + if (i0_feeds_i1_n) +- t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0); ++ t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0); + + XVECEXP (newpat, 0, --total_sets) = t; + } +@@ -3284,10 +3284,10 @@ + { + rtx t = i2pat; + if (i1_feeds_i2_n) +- t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, ++ t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0, + i0_feeds_i1_n && i0dest_in_i0src); + if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n) +- t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0); ++ t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0); + + XVECEXP (newpat, 0, --total_sets) = t; + } +@@ -4959,11 +4959,13 @@ + + IN_DEST is nonzero if we are processing the SET_DEST of a SET. + ++ IN_COND is nonzero if we are on top level of the condition. ++ + UNIQUE_COPY is nonzero if each substitution must be unique. We do this + by copying if `n_occurrences' is nonzero. */ + + static rtx +-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy) ++subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy) + { + enum rtx_code code = GET_CODE (x); + enum machine_mode op0_mode = VOIDmode; +@@ -5024,7 +5026,7 @@ + && GET_CODE (XVECEXP (x, 0, 0)) == SET + && GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS) + { +- new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy); ++ new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy); + + /* If this substitution failed, this whole thing fails. */ + if (GET_CODE (new_rtx) == CLOBBER +@@ -5041,7 +5043,7 @@ + && GET_CODE (dest) != CC0 + && GET_CODE (dest) != PC) + { +- new_rtx = subst (dest, from, to, 0, unique_copy); ++ new_rtx = subst (dest, from, to, 0, 0, unique_copy); + + /* If this substitution failed, this whole thing fails. */ + if (GET_CODE (new_rtx) == CLOBBER +@@ -5087,8 +5089,8 @@ + } + else + { +- new_rtx = subst (XVECEXP (x, i, j), from, to, 0, +- unique_copy); ++ new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0, ++ unique_copy); + + /* If this substitution failed, this whole thing + fails. */ +@@ -5165,7 +5167,9 @@ + && (code == SUBREG || code == STRICT_LOW_PART + || code == ZERO_EXTRACT)) + || code == SET) +- && i == 0), unique_copy); ++ && i == 0), ++ code == IF_THEN_ELSE && i == 0, ++ unique_copy); + + /* If we found that we will have to reject this combination, + indicate that by returning the CLOBBER ourselves, rather than +@@ -5222,7 +5226,7 @@ + /* If X is sufficiently simple, don't bother trying to do anything + with it. */ + if (code != CONST_INT && code != REG && code != CLOBBER) +- x = combine_simplify_rtx (x, op0_mode, in_dest); ++ x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond); + + if (GET_CODE (x) == code) + break; +@@ -5242,10 +5246,12 @@ + expression. + + OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero +- if we are inside a SET_DEST. */ ++ if we are inside a SET_DEST. IN_COND is nonzero if we are on the top level ++ of a condition. */ + + static rtx +-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest) ++combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest, ++ int in_cond) + { + enum rtx_code code = GET_CODE (x); + enum machine_mode mode = GET_MODE (x); +@@ -5300,8 +5306,8 @@ + false arms to store-flag values. Be careful to use copy_rtx + here since true_rtx or false_rtx might share RTL with x as a + result of the if_then_else_cond call above. */ +- true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0); +- false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0); ++ true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0); ++ false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0); + + /* If true_rtx and false_rtx are not general_operands, an if_then_else + is unlikely to be simpler. */ +@@ -5645,7 +5651,7 @@ + { + /* Try to simplify the expression further. */ + rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1)); +- temp = combine_simplify_rtx (tor, VOIDmode, in_dest); ++ temp = combine_simplify_rtx (tor, VOIDmode, in_dest, 0); + + /* If we could, great. If not, do not go ahead with the IOR + replacement, since PLUS appears in many special purpose +@@ -5738,7 +5744,16 @@ + ZERO_EXTRACT is indeed appropriate, it will be placed back by + the call to make_compound_operation in the SET case. */ + +- if (STORE_FLAG_VALUE == 1 ++ if (in_cond) ++ /* Don't apply below optimizations if the caller would ++ prefer a comparison rather than a value. ++ E.g., for the condition in an IF_THEN_ELSE most targets need ++ an explicit comparison. */ ++ { ++ ; ++ } ++ ++ else if (STORE_FLAG_VALUE == 1 + && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT + && op1 == const0_rtx + && mode == GET_MODE (op0) +@@ -5784,7 +5799,10 @@ + + /* If STORE_FLAG_VALUE is -1, we have cases similar to + those above. */ +- if (STORE_FLAG_VALUE == -1 ++ if (in_cond) ++ ; ++ ++ else if (STORE_FLAG_VALUE == -1 + && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT + && op1 == const0_rtx + && (num_sign_bit_copies (op0, mode) +@@ -5982,11 +6000,11 @@ + if (reg_mentioned_p (from, true_rtx)) + true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code, + from, true_val), +- pc_rtx, pc_rtx, 0, 0); ++ pc_rtx, pc_rtx, 0, 0, 0); + if (reg_mentioned_p (from, false_rtx)) + false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code, + from, false_val), +- pc_rtx, pc_rtx, 0, 0); ++ pc_rtx, pc_rtx, 0, 0, 0); + + SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx); + SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx); +@@ -6203,11 +6221,11 @@ + { + temp = subst (simplify_gen_relational (true_code, m, VOIDmode, + cond_op0, cond_op1), +- pc_rtx, pc_rtx, 0, 0); ++ pc_rtx, pc_rtx, 0, 0, 0); + temp = simplify_gen_binary (MULT, m, temp, + simplify_gen_binary (MULT, m, c1, + const_true_rtx)); +- temp = subst (temp, pc_rtx, pc_rtx, 0, 0); ++ temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0); + temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp); + + if (extend_op != UNKNOWN) diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch new file mode 100644 index 0000000000..395c08cab7 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch @@ -0,0 +1,6125 @@ +2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + From Richard Earnshaw <rearnsha@arm.com> + + PR target/46329 + * gcc.target/arm/pr46329.c: New test. + + gcc/ + PR target/46329 + * config/arm/arm.c (arm_legitimate_constant_p_1): Return false + for all Neon struct constants. + +2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * targhooks.h (default_legitimate_constant_p); Declare. + * targhooks.c (default_legitimate_constant_p): New function. + + Backport from mainline: + 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org> + + * target.def (legitimate_constant_p): New hook. + * doc/tm.texi.in (LEGITIMATE_CONSTANT_P): Replace with... + (TARGET_LEGITIMATE_CONSTANT_P): ...this. + * doc/tm.texi: Regenerate. + * calls.c (precompute_register_parameters): Replace uses of + LEGITIMATE_CONSTANT_P with targetm.legitimate_constant_p. + (emit_library_call_value_1): Likewise. + * expr.c (move_block_to_reg, can_store_by_pieces, emit_move_insn) + (compress_float_constant, emit_push_insn, expand_expr_real_1): Likewise. + * recog.c (general_operand, immediate_operand): Likewise. + * reload.c (find_reloads_toplev, find_reloads_address_part): Likewise. + * reload1.c (init_eliminable_invariants): Likewise. + + * config/arm/arm-protos.h (arm_cannot_force_const_mem): Delete. + * config/arm/arm.h (ARM_LEGITIMATE_CONSTANT_P): Likewise. + (THUMB_LEGITIMATE_CONSTANT_P, LEGITIMATE_CONSTANT_P): Likewise. + * config/arm/arm.c (TARGET_LEGITIMATE_CONSTANT_P): Define. + (arm_legitimate_constant_p_1, thumb_legitimate_constant_p) + (arm_legitimate_constant_p): New functions. + (arm_cannot_force_const_mem): Make static. + +2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * hooks.h (hook_bool_mode_uhwi_false): Declare. + * hooks.c (hook_bool_mode_uhwi_false): New function. + * target.def (array_mode_supported_p): New hook. + * doc/tm.texi.in (TARGET_ARRAY_MODE_SUPPORTED_P): Add @hook. + * doc/tm.texi: Regenerate. + * stor-layout.c (mode_for_array): New function. + (layout_type): Use it. + * config/arm/arm.c (arm_array_mode_supported_p): New function. + (TARGET_ARRAY_MODE_SUPPORTED_P): Define. + +2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-12 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the + size of a '%A' memory reference. + (T_DREG, T_QREG): New neon_builtin_type_bits. + (arm_init_neon_builtins): Assert that the load and store operands + are neon_struct_operands. + (locate_neon_builtin_icode): Provide the neon_builtin_type_bits. + (NEON_ARG_MEMORY): New builtin_arg. + (neon_dereference_pointer): New function. + (arm_expand_neon_args): Add a neon_builtin_type_bits argument. + Handle NEON_ARG_MEMORY. + (arm_expand_neon_builtin): Update after above interface changes. + Use NEON_ARG_MEMORY for loads and stores. + * config/arm/predicates.md (neon_struct_operand): New predicate. + * config/arm/iterators.md (V_two_elem): Tweak formatting. + (V_three_elem): Use BLKmode for accesses that have no associated mode. + (V_four_elem): Tweak formatting. + * config/arm/neon.md (neon_vld1<mode>, neon_vld1_dup<mode>) + (neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>) + (neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>) + (neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>) + (neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>) + (neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>) + (neon_vst4<mode>): Replace pointer operand with a memory operand. + Use %A in the output template. + (neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>) + (neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>) + (neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve + the width of the memory access. Remove post-increment. + * config/arm/neon-testgen.ml: Allow addresses to have an alignment. + + gcc/testsuite/ + Backport from mainline: + + 2011-04-12 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/arm/neon-vld3-1.c: New test. + * gcc.target/arm/neon-vst3-1.c: New test. + * gcc.target/arm/neon/v*.c: Regenerate. + +2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-03-30 Richard Sandiford <richard.sandiford@linaro.org> + Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + PR target/43590 + * config/arm/neon.md (neon_vld3qa<mode>, neon_vld4qa<mode>): Remove + operand 1 and reshuffle the operands to match. + (neon_vld3<mode>, neon_vld4<mode>): Update accordingly. + +=== modified file 'gcc/calls.c' +Index: gcc-4_6-branch/gcc/calls.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/calls.c 2011-06-24 08:33:49.000000000 -0700 ++++ gcc-4_6-branch/gcc/calls.c 2011-09-16 20:16:00.217564705 -0700 +@@ -686,7 +686,7 @@ + /* If the value is a non-legitimate constant, force it into a + pseudo now. TLS symbols sometimes need a call to resolve. */ + if (CONSTANT_P (args[i].value) +- && !LEGITIMATE_CONSTANT_P (args[i].value)) ++ && !targetm.legitimate_constant_p (args[i].mode, args[i].value)) + args[i].value = force_reg (args[i].mode, args[i].value); + + /* If we are to promote the function arg to a wider mode, +@@ -3449,7 +3449,8 @@ + + /* Make sure it is a reasonable operand for a move or push insn. */ + if (!REG_P (addr) && !MEM_P (addr) +- && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr))) ++ && !(CONSTANT_P (addr) ++ && targetm.legitimate_constant_p (Pmode, addr))) + addr = force_operand (addr, NULL_RTX); + + argvec[count].value = addr; +@@ -3490,7 +3491,7 @@ + + /* Make sure it is a reasonable operand for a move or push insn. */ + if (!REG_P (val) && !MEM_P (val) +- && ! (CONSTANT_P (val) && LEGITIMATE_CONSTANT_P (val))) ++ && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val))) + val = force_operand (val, NULL_RTX); + + if (pass_by_reference (&args_so_far, mode, NULL_TREE, 1)) +Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h 2011-06-24 08:33:37.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/arm-protos.h 2011-09-16 20:16:00.217564705 -0700 +@@ -81,7 +81,6 @@ + extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, + bool); + extern bool arm_tls_referenced_p (rtx); +-extern bool arm_cannot_force_const_mem (rtx); + + extern int cirrus_memory_offset (rtx); + extern int arm_coproc_mem_operand (rtx, bool); +Index: gcc-4_6-branch/gcc/config/arm/arm.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2011-09-16 20:14:34.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/arm.c 2011-09-16 20:16:00.237564275 -0700 +@@ -143,6 +143,8 @@ + static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); + static bool arm_have_conditional_execution (void); ++static bool arm_cannot_force_const_mem (rtx); ++static bool arm_legitimate_constant_p (enum machine_mode, rtx); + static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); + static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); + static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +@@ -241,6 +243,8 @@ + static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); + static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); + static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *); ++static bool arm_array_mode_supported_p (enum machine_mode, ++ unsigned HOST_WIDE_INT); + static enum machine_mode arm_preferred_simd_mode (enum machine_mode); + static bool arm_class_likely_spilled_p (reg_class_t); + static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); +@@ -394,6 +398,8 @@ + #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p ++#undef TARGET_ARRAY_MODE_SUPPORTED_P ++#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p + #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE + #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode + #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +@@ -523,6 +529,9 @@ + #undef TARGET_HAVE_CONDITIONAL_EXECUTION + #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution + ++#undef TARGET_LEGITIMATE_CONSTANT_P ++#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p ++ + #undef TARGET_CANNOT_FORCE_CONST_MEM + #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem + +@@ -6539,9 +6548,47 @@ + return for_each_rtx (&x, arm_tls_operand_p_1, NULL); + } + ++/* Implement TARGET_LEGITIMATE_CONSTANT_P. ++ ++ On the ARM, allow any integer (invalid ones are removed later by insn ++ patterns), nice doubles and symbol_refs which refer to the function's ++ constant pool XXX. ++ ++ When generating pic allow anything. */ ++ ++static bool ++arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x) ++{ ++ /* At present, we have no support for Neon structure constants, so forbid ++ them here. It might be possible to handle simple cases like 0 and -1 ++ in future. */ ++ if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)) ++ return false; ++ ++ return flag_pic || !label_mentioned_p (x); ++} ++ ++static bool ++thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ return (GET_CODE (x) == CONST_INT ++ || GET_CODE (x) == CONST_DOUBLE ++ || CONSTANT_ADDRESS_P (x) ++ || flag_pic); ++} ++ ++static bool ++arm_legitimate_constant_p (enum machine_mode mode, rtx x) ++{ ++ return (!arm_cannot_force_const_mem (x) ++ && (TARGET_32BIT ++ ? arm_legitimate_constant_p_1 (mode, x) ++ : thumb_legitimate_constant_p (mode, x))); ++} ++ + /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + +-bool ++static bool + arm_cannot_force_const_mem (rtx x) + { + rtx base, offset; +@@ -16598,7 +16645,7 @@ + { + rtx addr; + bool postinc = FALSE; +- unsigned align, modesize, align_bits; ++ unsigned align, memsize, align_bits; + + gcc_assert (GET_CODE (x) == MEM); + addr = XEXP (x, 0); +@@ -16613,12 +16660,12 @@ + instruction (for some alignments) as an aid to the memory subsystem + of the target. */ + align = MEM_ALIGN (x) >> 3; +- modesize = GET_MODE_SIZE (GET_MODE (x)); ++ memsize = INTVAL (MEM_SIZE (x)); + + /* Only certain alignment specifiers are supported by the hardware. */ +- if (modesize == 16 && (align % 32) == 0) ++ if (memsize == 16 && (align % 32) == 0) + align_bits = 256; +- else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) ++ else if ((memsize == 8 || memsize == 16) && (align % 16) == 0) + align_bits = 128; + else if ((align % 8) == 0) + align_bits = 64; +@@ -18278,12 +18325,14 @@ + T_V2SI = 0x0004, + T_V2SF = 0x0008, + T_DI = 0x0010, ++ T_DREG = 0x001F, + T_V16QI = 0x0020, + T_V8HI = 0x0040, + T_V4SI = 0x0080, + T_V4SF = 0x0100, + T_V2DI = 0x0200, + T_TI = 0x0400, ++ T_QREG = 0x07E0, + T_EI = 0x0800, + T_OI = 0x1000 + }; +@@ -18929,10 +18978,9 @@ + if (is_load && k == 1) + { + /* Neon load patterns always have the memory operand +- (a SImode pointer) in the operand 1 position. We +- want a const pointer to the element type in that +- position. */ +- gcc_assert (insn_data[icode].operand[k].mode == SImode); ++ in the operand 1 position. */ ++ gcc_assert (insn_data[icode].operand[k].predicate ++ == neon_struct_operand); + + switch (1 << j) + { +@@ -18967,10 +19015,9 @@ + else if (is_store && k == 0) + { + /* Similarly, Neon store patterns use operand 0 as +- the memory location to store to (a SImode pointer). +- Use a pointer to the element type of the store in +- that position. */ +- gcc_assert (insn_data[icode].operand[k].mode == SImode); ++ the memory location to store to. */ ++ gcc_assert (insn_data[icode].operand[k].predicate ++ == neon_struct_operand); + + switch (1 << j) + { +@@ -19290,12 +19337,13 @@ + } + + static enum insn_code +-locate_neon_builtin_icode (int fcode, neon_itype *itype) ++locate_neon_builtin_icode (int fcode, neon_itype *itype, ++ enum neon_builtin_type_bits *type_bit) + { + neon_builtin_datum key + = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 }; + neon_builtin_datum *found; +- int idx; ++ int idx, type, ntypes; + + key.base_fcode = fcode; + found = (neon_builtin_datum *) +@@ -19308,20 +19356,84 @@ + if (itype) + *itype = found->itype; + ++ if (type_bit) ++ { ++ ntypes = 0; ++ for (type = 0; type < T_MAX; type++) ++ if (found->bits & (1 << type)) ++ { ++ if (ntypes == idx) ++ break; ++ ntypes++; ++ } ++ gcc_assert (type < T_MAX); ++ *type_bit = (enum neon_builtin_type_bits) (1 << type); ++ } + return found->codes[idx]; + } + + typedef enum { + NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, ++ NEON_ARG_MEMORY, + NEON_ARG_STOP + } builtin_arg; + + #define NEON_MAX_BUILTIN_ARGS 5 + ++/* EXP is a pointer argument to a Neon load or store intrinsic. Derive ++ and return an expression for the accessed memory. ++ ++ The intrinsic function operates on a block of registers that has ++ mode REG_MODE. This block contains vectors of type TYPE_BIT. ++ The function references the memory at EXP in mode MEM_MODE; ++ this mode may be BLKmode if no more suitable mode is available. */ ++ ++static tree ++neon_dereference_pointer (tree exp, enum machine_mode mem_mode, ++ enum machine_mode reg_mode, ++ enum neon_builtin_type_bits type_bit) ++{ ++ HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; ++ tree elem_type, upper_bound, array_type; ++ ++ /* Work out the size of the register block in bytes. */ ++ reg_size = GET_MODE_SIZE (reg_mode); ++ ++ /* Work out the size of each vector in bytes. */ ++ gcc_assert (type_bit & (T_DREG | T_QREG)); ++ vector_size = (type_bit & T_QREG ? 16 : 8); ++ ++ /* Work out how many vectors there are. */ ++ gcc_assert (reg_size % vector_size == 0); ++ nvectors = reg_size / vector_size; ++ ++ /* Work out how many elements are being loaded or stored. ++ MEM_MODE == REG_MODE implies a one-to-one mapping between register ++ and memory elements; anything else implies a lane load or store. */ ++ if (mem_mode == reg_mode) ++ nelems = vector_size * nvectors; ++ else ++ nelems = nvectors; ++ ++ /* Work out the type of each element. */ ++ gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp))); ++ elem_type = TREE_TYPE (TREE_TYPE (exp)); ++ ++ /* Create a type that describes the full access. */ ++ upper_bound = build_int_cst (size_type_node, nelems - 1); ++ array_type = build_array_type (elem_type, build_index_type (upper_bound)); ++ ++ /* Dereference EXP using that type. */ ++ exp = convert (build_pointer_type (array_type), exp); ++ return fold_build2 (MEM_REF, array_type, exp, ++ build_int_cst (TREE_TYPE (exp), 0)); ++} ++ + /* Expand a Neon builtin. */ + static rtx + arm_expand_neon_args (rtx target, int icode, int have_retval, ++ enum neon_builtin_type_bits type_bit, + tree exp, ...) + { + va_list ap; +@@ -19330,7 +19442,9 @@ + rtx op[NEON_MAX_BUILTIN_ARGS]; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; ++ enum machine_mode other_mode; + int argc = 0; ++ int opno; + + if (have_retval + && (!target +@@ -19348,26 +19462,46 @@ + break; + else + { ++ opno = argc + have_retval; ++ mode[argc] = insn_data[icode].operand[opno].mode; + arg[argc] = CALL_EXPR_ARG (exp, argc); ++ if (thisarg == NEON_ARG_MEMORY) ++ { ++ other_mode = insn_data[icode].operand[1 - opno].mode; ++ arg[argc] = neon_dereference_pointer (arg[argc], mode[argc], ++ other_mode, type_bit); ++ } + op[argc] = expand_normal (arg[argc]); +- mode[argc] = insn_data[icode].operand[argc + have_retval].mode; + + switch (thisarg) + { + case NEON_ARG_COPY_TO_REG: + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ +- if (!(*insn_data[icode].operand[argc + have_retval].predicate) ++ if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + + case NEON_ARG_CONSTANT: + /* FIXME: This error message is somewhat unhelpful. */ +- if (!(*insn_data[icode].operand[argc + have_retval].predicate) ++ if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + error ("argument must be a constant"); + break; + ++ case NEON_ARG_MEMORY: ++ gcc_assert (MEM_P (op[argc])); ++ PUT_MODE (op[argc], mode[argc]); ++ /* ??? arm_neon.h uses the same built-in functions for signed ++ and unsigned accesses, casting where necessary. This isn't ++ alias safe. */ ++ set_mem_alias_set (op[argc], 0); ++ if (!(*insn_data[icode].operand[opno].predicate) ++ (op[argc], mode[argc])) ++ op[argc] = (replace_equiv_address ++ (op[argc], force_reg (Pmode, XEXP (op[argc], 0)))); ++ break; ++ + case NEON_ARG_STOP: + gcc_unreachable (); + } +@@ -19446,14 +19580,15 @@ + arm_expand_neon_builtin (int fcode, tree exp, rtx target) + { + neon_itype itype; +- enum insn_code icode = locate_neon_builtin_icode (fcode, &itype); ++ enum neon_builtin_type_bits type_bit; ++ enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit); + + switch (itype) + { + case NEON_UNOP: + case NEON_CONVERT: + case NEON_DUPLANE: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_BINOP: +@@ -19463,90 +19598,90 @@ + case NEON_SCALARMULH: + case NEON_SHIFTINSERT: + case NEON_LOGICBINOP: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_TERNOP: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_GETLANE: + case NEON_FIXCONV: + case NEON_SHIFTIMM: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_CREATE: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_DUP: + case NEON_SPLIT: + case NEON_REINTERP: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_COMBINE: + case NEON_VTBL: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_RESULTPAIR: +- return arm_expand_neon_args (target, icode, 0, exp, ++ return arm_expand_neon_args (target, icode, 0, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_LANEMAC: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SHIFTACC: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SCALARMAC: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SELECT: + case NEON_VTBX: +- return arm_expand_neon_args (target, icode, 1, exp, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LOAD1: + case NEON_LOADSTRUCT: +- return arm_expand_neon_args (target, icode, 1, exp, +- NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ NEON_ARG_MEMORY, NEON_ARG_STOP); + + case NEON_LOAD1LANE: + case NEON_LOADSTRUCTLANE: +- return arm_expand_neon_args (target, icode, 1, exp, +- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, ++ return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_STORE1: + case NEON_STORESTRUCT: +- return arm_expand_neon_args (target, icode, 0, exp, +- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); ++ return arm_expand_neon_args (target, icode, 0, type_bit, exp, ++ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_STORE1LANE: + case NEON_STORESTRUCTLANE: +- return arm_expand_neon_args (target, icode, 0, exp, +- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, ++ return arm_expand_neon_args (target, icode, 0, type_bit, exp, ++ NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + } + +@@ -22265,6 +22400,20 @@ + return true; + + return false; ++} ++ ++/* Implements target hook array_mode_supported_p. */ ++ ++static bool ++arm_array_mode_supported_p (enum machine_mode mode, ++ unsigned HOST_WIDE_INT nelems) ++{ ++ if (TARGET_NEON ++ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) ++ && (nelems >= 2 && nelems <= 4)) ++ return true; ++ ++ return false; + } + + /* Use the option -mvectorize-with-neon-quad to override the use of doubleword +Index: gcc-4_6-branch/gcc/config/arm/arm.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.h 2011-09-16 20:14:33.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/arm.h 2011-09-16 20:16:00.237564275 -0700 +@@ -1777,27 +1777,6 @@ + #define TARGET_DEFAULT_WORD_RELOCATIONS 0 + #endif + +-/* Nonzero if the constant value X is a legitimate general operand. +- It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. +- +- On the ARM, allow any integer (invalid ones are removed later by insn +- patterns), nice doubles and symbol_refs which refer to the function's +- constant pool XXX. +- +- When generating pic allow anything. */ +-#define ARM_LEGITIMATE_CONSTANT_P(X) (flag_pic || ! label_mentioned_p (X)) +- +-#define THUMB_LEGITIMATE_CONSTANT_P(X) \ +- ( GET_CODE (X) == CONST_INT \ +- || GET_CODE (X) == CONST_DOUBLE \ +- || CONSTANT_ADDRESS_P (X) \ +- || flag_pic) +- +-#define LEGITIMATE_CONSTANT_P(X) \ +- (!arm_cannot_force_const_mem (X) \ +- && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X) \ +- : THUMB_LEGITIMATE_CONSTANT_P (X))) +- + #ifndef SUBTARGET_NAME_ENCODING_LENGTHS + #define SUBTARGET_NAME_ENCODING_LENGTHS + #endif +Index: gcc-4_6-branch/gcc/config/arm/iterators.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/iterators.md 2011-06-24 08:33:37.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/iterators.md 2011-09-16 20:16:00.237564275 -0700 +@@ -194,24 +194,22 @@ + + ;; Mode of pair of elements for each vector mode, to define transfer + ;; size for structure lane/dup loads and stores. +-(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") +- (V4HI "SI") (V8HI "SI") ++(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") ++ (V4HI "SI") (V8HI "SI") + (V2SI "V2SI") (V4SI "V2SI") + (V2SF "V2SF") (V4SF "V2SF") + (DI "V2DI") (V2DI "V2DI")]) + + ;; Similar, for three elements. +-;; ??? Should we define extra modes so that sizes of all three-element +-;; accesses can be accurately represented? +-(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI") +- (V4HI "V4HI") (V8HI "V4HI") +- (V2SI "V4SI") (V4SI "V4SI") +- (V2SF "V4SF") (V4SF "V4SF") +- (DI "EI") (V2DI "EI")]) ++(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") ++ (V4HI "BLK") (V8HI "BLK") ++ (V2SI "BLK") (V4SI "BLK") ++ (V2SF "BLK") (V4SF "BLK") ++ (DI "EI") (V2DI "EI")]) + + ;; Similar, for four elements. + (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") +- (V4HI "V4HI") (V8HI "V4HI") ++ (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (V2SF "V4SF") (V4SF "V4SF") + (DI "OI") (V2DI "OI")]) +Index: gcc-4_6-branch/gcc/config/arm/neon-testgen.ml +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/neon-testgen.ml 2011-06-24 08:33:37.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/neon-testgen.ml 2011-09-16 20:16:00.237564275 -0700 +@@ -177,7 +177,7 @@ + let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in + "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" + | (PtrTo elt | CstPtrTo elt) -> +- "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]" ++ "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]" + | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" +Index: gcc-4_6-branch/gcc/config/arm/neon.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/neon.md 2011-07-19 21:50:44.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/neon.md 2011-09-16 20:16:00.247564269 -0700 +@@ -4250,16 +4250,16 @@ + + (define_insn "neon_vld1<mode>" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") +- (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))] ++ (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] + UNSPEC_VLD1))] + "TARGET_NEON" +- "vld1.<V_sz_elem>\t%h0, [%1]" ++ "vld1.<V_sz_elem>\t%h0, %A1" + [(set_attr "neon_type" "neon_vld1_1_2_regs")] + ) + + (define_insn "neon_vld1_lane<mode>" + [(set (match_operand:VDX 0 "s_register_operand" "=w") +- (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") + (match_operand:VDX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] +@@ -4270,9 +4270,9 @@ + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) +- return "vld1.<V_sz_elem>\t%P0, [%1]"; ++ return "vld1.<V_sz_elem>\t%P0, %A1"; + else +- return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; ++ return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) +@@ -4282,7 +4282,7 @@ + + (define_insn "neon_vld1_lane<mode>" + [(set (match_operand:VQX 0 "s_register_operand" "=w") +- (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") + (match_operand:VQX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] +@@ -4301,9 +4301,9 @@ + } + operands[0] = gen_rtx_REG (<V_HALF>mode, regno); + if (max == 2) +- return "vld1.<V_sz_elem>\t%P0, [%1]"; ++ return "vld1.<V_sz_elem>\t%P0, %A1"; + else +- return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]"; ++ return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2)) +@@ -4313,14 +4313,14 @@ + + (define_insn "neon_vld1_dup<mode>" + [(set (match_operand:VDX 0 "s_register_operand" "=w") +- (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] ++ (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] + UNSPEC_VLD1_DUP))] + "TARGET_NEON" + { + if (GET_MODE_NUNITS (<MODE>mode) > 1) +- return "vld1.<V_sz_elem>\t{%P0[]}, [%1]"; ++ return "vld1.<V_sz_elem>\t{%P0[]}, %A1"; + else +- return "vld1.<V_sz_elem>\t%h0, [%1]"; ++ return "vld1.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) +@@ -4330,14 +4330,14 @@ + + (define_insn "neon_vld1_dup<mode>" + [(set (match_operand:VQX 0 "s_register_operand" "=w") +- (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))] ++ (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] + UNSPEC_VLD1_DUP))] + "TARGET_NEON" + { + if (GET_MODE_NUNITS (<MODE>mode) > 2) +- return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; ++ return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; + else +- return "vld1.<V_sz_elem>\t%h0, [%1]"; ++ return "vld1.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) +@@ -4346,15 +4346,15 @@ + ) + + (define_insn "neon_vst1<mode>" +- [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] + UNSPEC_VST1))] + "TARGET_NEON" +- "vst1.<V_sz_elem>\t%h1, [%0]" ++ "vst1.<V_sz_elem>\t%h1, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + + (define_insn "neon_vst1_lane<mode>" +- [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") + (vec_select:<V_elem> + (match_operand:VDX 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] +@@ -4365,9 +4365,9 @@ + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) +- return "vst1.<V_sz_elem>\t{%P1}, [%0]"; ++ return "vst1.<V_sz_elem>\t{%P1}, %A0"; + else +- return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; ++ return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1)) +@@ -4375,7 +4375,7 @@ + (const_string "neon_vst1_vst2_lane")))]) + + (define_insn "neon_vst1_lane<mode>" +- [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") + (vec_select:<V_elem> + (match_operand:VQX 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] +@@ -4394,24 +4394,24 @@ + } + operands[1] = gen_rtx_REG (<V_HALF>mode, regno); + if (max == 2) +- return "vst1.<V_sz_elem>\t{%P1}, [%0]"; ++ return "vst1.<V_sz_elem>\t{%P1}, %A0"; + else +- return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]"; ++ return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; + } + [(set_attr "neon_type" "neon_vst1_vst2_lane")] + ) + + (define_insn "neon_vld2<mode>" + [(set (match_operand:TI 0 "s_register_operand" "=w") +- (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vld1.64\t%h0, [%1]"; ++ return "vld1.64\t%h0, %A1"; + else +- return "vld2.<V_sz_elem>\t%h0, [%1]"; ++ return "vld2.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -4421,16 +4421,16 @@ + + (define_insn "neon_vld2<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") +- (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" +- "vld2.<V_sz_elem>\t%h0, [%1]" ++ "vld2.<V_sz_elem>\t%h0, %A1" + [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]) + + (define_insn "neon_vld2_lane<mode>" + [(set (match_operand:TI 0 "s_register_operand" "=w") +- (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") + (match_operand:TI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -4447,7 +4447,7 @@ + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = operands[1]; + ops[3] = operands[3]; +- output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); ++ output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +@@ -4455,7 +4455,7 @@ + + (define_insn "neon_vld2_lane<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") +- (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -4477,7 +4477,7 @@ + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = operands[1]; + ops[3] = GEN_INT (lane); +- output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops); ++ output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +@@ -4485,15 +4485,15 @@ + + (define_insn "neon_vld2_dup<mode>" + [(set (match_operand:TI 0 "s_register_operand" "=w") +- (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_DUP))] + "TARGET_NEON" + { + if (GET_MODE_NUNITS (<MODE>mode) > 1) +- return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]"; ++ return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; + else +- return "vld1.<V_sz_elem>\t%h0, [%1]"; ++ return "vld1.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) +@@ -4502,16 +4502,16 @@ + ) + + (define_insn "neon_vst2<mode>" +- [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:TI 0 "neon_struct_operand" "=Um") + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vst1.64\t%h1, [%0]"; ++ return "vst1.64\t%h1, %A0"; + else +- return "vst2.<V_sz_elem>\t%h1, [%0]"; ++ return "vst2.<V_sz_elem>\t%h1, %A0"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -4520,17 +4520,17 @@ + ) + + (define_insn "neon_vst2<mode>" +- [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" +- "vst2.<V_sz_elem>\t%h1, [%0]" ++ "vst2.<V_sz_elem>\t%h1, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] + ) + + (define_insn "neon_vst2_lane<mode>" +- [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_two_elem> + [(match_operand:TI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -4548,14 +4548,14 @@ + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = operands[2]; +- output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); ++ output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst1_vst2_lane")] + ) + + (define_insn "neon_vst2_lane<mode>" +- [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_two_elem> + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -4578,7 +4578,7 @@ + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = GEN_INT (lane); +- output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops); ++ output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +@@ -4586,15 +4586,15 @@ + + (define_insn "neon_vld3<mode>" + [(set (match_operand:EI 0 "s_register_operand" "=w") +- (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vld1.64\t%h0, [%1]"; ++ return "vld1.64\t%h0, %A1"; + else +- return "vld3.<V_sz_elem>\t%h0, [%1]"; ++ return "vld3.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -4603,27 +4603,25 @@ + ) + + (define_expand "neon_vld3<mode>" +- [(match_operand:CI 0 "s_register_operand" "=w") +- (match_operand:SI 1 "s_register_operand" "+r") ++ [(match_operand:CI 0 "s_register_operand") ++ (match_operand:CI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" + { +- emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[0], +- operands[1], operands[1])); +- emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0], +- operands[1], operands[1])); ++ rtx mem; ++ ++ mem = adjust_address (operands[1], EImode, 0); ++ emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); ++ mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); ++ emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); + DONE; + }) + + (define_insn "neon_vld3qa<mode>" + [(set (match_operand:CI 0 "s_register_operand" "=w") +- (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) +- (match_operand:CI 1 "s_register_operand" "0") ++ (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VLD3A)) +- (set (match_operand:SI 2 "s_register_operand" "=r") +- (plus:SI (match_dup 3) +- (const_int 24)))] ++ UNSPEC_VLD3A))] + "TARGET_NEON" + { + int regno = REGNO (operands[0]); +@@ -4631,8 +4629,8 @@ + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); +- ops[3] = operands[2]; +- output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); ++ ops[3] = operands[1]; ++ output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld3_vld4")] +@@ -4640,13 +4638,10 @@ + + (define_insn "neon_vld3qb<mode>" + [(set (match_operand:CI 0 "s_register_operand" "=w") +- (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) +- (match_operand:CI 1 "s_register_operand" "0") ++ (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") ++ (match_operand:CI 2 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VLD3B)) +- (set (match_operand:SI 2 "s_register_operand" "=r") +- (plus:SI (match_dup 3) +- (const_int 24)))] ++ UNSPEC_VLD3B))] + "TARGET_NEON" + { + int regno = REGNO (operands[0]); +@@ -4654,8 +4649,8 @@ + ops[0] = gen_rtx_REG (DImode, regno + 2); + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); +- ops[3] = operands[2]; +- output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops); ++ ops[3] = operands[1]; ++ output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld3_vld4")] +@@ -4663,7 +4658,7 @@ + + (define_insn "neon_vld3_lane<mode>" + [(set (match_operand:EI 0 "s_register_operand" "=w") +- (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") + (match_operand:EI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -4681,7 +4676,7 @@ + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + ops[4] = operands[3]; +- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", ++ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", + ops); + return ""; + } +@@ -4690,7 +4685,7 @@ + + (define_insn "neon_vld3_lane<mode>" + [(set (match_operand:CI 0 "s_register_operand" "=w") +- (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") + (match_operand:CI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -4713,7 +4708,7 @@ + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = operands[1]; + ops[4] = GEN_INT (lane); +- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", ++ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3", + ops); + return ""; + } +@@ -4722,7 +4717,7 @@ + + (define_insn "neon_vld3_dup<mode>" + [(set (match_operand:EI 0 "s_register_operand" "=w") +- (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_DUP))] + "TARGET_NEON" +@@ -4735,11 +4730,11 @@ + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; +- output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops); ++ output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops); + return ""; + } + else +- return "vld1.<V_sz_elem>\t%h0, [%1]"; ++ return "vld1.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) +@@ -4747,16 +4742,16 @@ + (const_string "neon_vld1_1_2_regs")))]) + + (define_insn "neon_vst3<mode>" +- [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vst1.64\t%h1, [%0]"; ++ return "vst1.64\t%h1, %A0"; + else +- return "vst3.<V_sz_elem>\t%h1, [%0]"; ++ return "vst3.<V_sz_elem>\t%h1, %A0"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -4764,62 +4759,60 @@ + (const_string "neon_vst2_4_regs_vst3_vst4")))]) + + (define_expand "neon_vst3<mode>" +- [(match_operand:SI 0 "s_register_operand" "+r") +- (match_operand:CI 1 "s_register_operand" "w") ++ [(match_operand:CI 0 "neon_struct_operand") ++ (match_operand:CI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" + { +- emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1])); +- emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1])); ++ rtx mem; ++ ++ mem = adjust_address (operands[0], EImode, 0); ++ emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); ++ mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); ++ emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); + DONE; + }) + + (define_insn "neon_vst3qa<mode>" +- [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) +- (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") ++ [(set (match_operand:EI 0 "neon_struct_operand" "=Um") ++ (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VST3A)) +- (set (match_operand:SI 0 "s_register_operand" "=r") +- (plus:SI (match_dup 1) +- (const_int 24)))] ++ UNSPEC_VST3A))] + "TARGET_NEON" + { +- int regno = REGNO (operands[2]); ++ int regno = REGNO (operands[1]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); +- output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); ++ output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + ) + + (define_insn "neon_vst3qb<mode>" +- [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) +- (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") ++ [(set (match_operand:EI 0 "neon_struct_operand" "=Um") ++ (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VST3B)) +- (set (match_operand:SI 0 "s_register_operand" "=r") +- (plus:SI (match_dup 1) +- (const_int 24)))] ++ UNSPEC_VST3B))] + "TARGET_NEON" + { +- int regno = REGNO (operands[2]); ++ int regno = REGNO (operands[1]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); +- output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops); ++ output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + ) + + (define_insn "neon_vst3_lane<mode>" +- [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_three_elem> + [(match_operand:EI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -4838,7 +4831,7 @@ + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = operands[2]; +- output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", ++ output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", + ops); + return ""; + } +@@ -4846,7 +4839,7 @@ + ) + + (define_insn "neon_vst3_lane<mode>" +- [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_three_elem> + [(match_operand:CI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -4870,7 +4863,7 @@ + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = GEN_INT (lane); +- output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", ++ output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0", + ops); + return ""; + } +@@ -4878,15 +4871,15 @@ + + (define_insn "neon_vld4<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") +- (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vld1.64\t%h0, [%1]"; ++ return "vld1.64\t%h0, %A1"; + else +- return "vld4.<V_sz_elem>\t%h0, [%1]"; ++ return "vld4.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -4895,27 +4888,25 @@ + ) + + (define_expand "neon_vld4<mode>" +- [(match_operand:XI 0 "s_register_operand" "=w") +- (match_operand:SI 1 "s_register_operand" "+r") ++ [(match_operand:XI 0 "s_register_operand") ++ (match_operand:XI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" + { +- emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0], +- operands[1], operands[1])); +- emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0], +- operands[1], operands[1])); ++ rtx mem; ++ ++ mem = adjust_address (operands[1], OImode, 0); ++ emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); ++ mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); ++ emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); + DONE; + }) + + (define_insn "neon_vld4qa<mode>" + [(set (match_operand:XI 0 "s_register_operand" "=w") +- (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) +- (match_operand:XI 1 "s_register_operand" "0") ++ (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VLD4A)) +- (set (match_operand:SI 2 "s_register_operand" "=r") +- (plus:SI (match_dup 3) +- (const_int 32)))] ++ UNSPEC_VLD4A))] + "TARGET_NEON" + { + int regno = REGNO (operands[0]); +@@ -4924,8 +4915,8 @@ + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 12); +- ops[4] = operands[2]; +- output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); ++ ops[4] = operands[1]; ++ output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld3_vld4")] +@@ -4933,13 +4924,10 @@ + + (define_insn "neon_vld4qb<mode>" + [(set (match_operand:XI 0 "s_register_operand" "=w") +- (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) +- (match_operand:XI 1 "s_register_operand" "0") ++ (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") ++ (match_operand:XI 2 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VLD4B)) +- (set (match_operand:SI 2 "s_register_operand" "=r") +- (plus:SI (match_dup 3) +- (const_int 32)))] ++ UNSPEC_VLD4B))] + "TARGET_NEON" + { + int regno = REGNO (operands[0]); +@@ -4948,8 +4936,8 @@ + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = gen_rtx_REG (DImode, regno + 14); +- ops[4] = operands[2]; +- output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); ++ ops[4] = operands[1]; ++ output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); + return ""; + } + [(set_attr "neon_type" "neon_vld3_vld4")] +@@ -4957,7 +4945,7 @@ + + (define_insn "neon_vld4_lane<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") +- (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -4976,7 +4964,7 @@ + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + ops[5] = operands[3]; +- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", ++ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", + ops); + return ""; + } +@@ -4985,7 +4973,7 @@ + + (define_insn "neon_vld4_lane<mode>" + [(set (match_operand:XI 0 "s_register_operand" "=w") +- (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") + (match_operand:XI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +@@ -5009,7 +4997,7 @@ + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = operands[1]; + ops[5] = GEN_INT (lane); +- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", ++ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", + ops); + return ""; + } +@@ -5018,7 +5006,7 @@ + + (define_insn "neon_vld4_dup<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") +- (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r")) ++ (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_DUP))] + "TARGET_NEON" +@@ -5032,12 +5020,12 @@ + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; +- output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]", ++ output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", + ops); + return ""; + } + else +- return "vld1.<V_sz_elem>\t%h0, [%1]"; ++ return "vld1.<V_sz_elem>\t%h0, %A1"; + } + [(set (attr "neon_type") + (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) +@@ -5046,16 +5034,16 @@ + ) + + (define_insn "neon_vst4<mode>" +- [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4))] + "TARGET_NEON" + { + if (<V_sz_elem> == 64) +- return "vst1.64\t%h1, [%0]"; ++ return "vst1.64\t%h1, %A0"; + else +- return "vst4.<V_sz_elem>\t%h1, [%0]"; ++ return "vst4.<V_sz_elem>\t%h1, %A0"; + } + [(set (attr "neon_type") + (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) +@@ -5064,64 +5052,62 @@ + ) + + (define_expand "neon_vst4<mode>" +- [(match_operand:SI 0 "s_register_operand" "+r") +- (match_operand:XI 1 "s_register_operand" "w") ++ [(match_operand:XI 0 "neon_struct_operand") ++ (match_operand:XI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" + { +- emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1])); +- emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1])); ++ rtx mem; ++ ++ mem = adjust_address (operands[0], OImode, 0); ++ emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); ++ mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); ++ emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); + DONE; + }) + + (define_insn "neon_vst4qa<mode>" +- [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) +- (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") ++ [(set (match_operand:OI 0 "neon_struct_operand" "=Um") ++ (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VST4A)) +- (set (match_operand:SI 0 "s_register_operand" "=r") +- (plus:SI (match_dup 1) +- (const_int 32)))] ++ UNSPEC_VST4A))] + "TARGET_NEON" + { +- int regno = REGNO (operands[2]); ++ int regno = REGNO (operands[1]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); +- output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); ++ output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + ) + + (define_insn "neon_vst4qb<mode>" +- [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) +- (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") ++ [(set (match_operand:OI 0 "neon_struct_operand" "=Um") ++ (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] +- UNSPEC_VST4B)) +- (set (match_operand:SI 0 "s_register_operand" "=r") +- (plus:SI (match_dup 1) +- (const_int 32)))] ++ UNSPEC_VST4B))] + "TARGET_NEON" + { +- int regno = REGNO (operands[2]); ++ int regno = REGNO (operands[1]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); + ops[4] = gen_rtx_REG (DImode, regno + 14); +- output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops); ++ output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); + return ""; + } + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] + ) + + (define_insn "neon_vst4_lane<mode>" +- [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_four_elem> + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -5141,7 +5127,7 @@ + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = gen_rtx_REG (DImode, regno + 6); + ops[5] = operands[2]; +- output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", ++ output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", + ops); + return ""; + } +@@ -5149,7 +5135,7 @@ + ) + + (define_insn "neon_vst4_lane<mode>" +- [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r")) ++ [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") + (unspec:<V_four_elem> + [(match_operand:XI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") +@@ -5174,7 +5160,7 @@ + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); + ops[5] = GEN_INT (lane); +- output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", ++ output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", + ops); + return ""; + } +Index: gcc-4_6-branch/gcc/config/arm/predicates.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/predicates.md 2011-09-16 19:58:21.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/predicates.md 2011-09-16 20:19:03.967834108 -0700 +@@ -686,3 +686,8 @@ + + (define_special_predicate "add_operator" + (match_code "plus")) ++ ++(define_special_predicate "neon_struct_operand" ++ (and (match_code "mem") ++ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) ++ +Index: gcc-4_6-branch/gcc/doc/tm.texi +=================================================================== +--- gcc-4_6-branch.orig/gcc/doc/tm.texi 2011-06-24 08:13:00.000000000 -0700 ++++ gcc-4_6-branch/gcc/doc/tm.texi 2011-09-16 20:16:00.257564628 -0700 +@@ -2533,7 +2533,7 @@ + register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead + of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go +@@ -2570,8 +2570,8 @@ + register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead +-of using @code{PREFERRED_RELOAD_CLASS}. ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go + through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} +@@ -4319,6 +4319,34 @@ + must have move patterns for this mode. + @end deftypefn + ++@deftypefn {Target Hook} bool TARGET_ARRAY_MODE_SUPPORTED_P (enum machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{nelems}) ++Return true if GCC should try to use a scalar mode to store an array ++of @var{nelems} elements, given that each element has mode @var{mode}. ++Returning true here overrides the usual @code{MAX_FIXED_MODE} limit ++and allows GCC to use any defined integer mode. ++ ++One use of this hook is to support vector load and store operations ++that operate on several homogeneous vectors. For example, ARM NEON ++has operations like: ++ ++@smallexample ++int8x8x3_t vld3_s8 (const int8_t *) ++@end smallexample ++ ++where the return type is defined as: ++ ++@smallexample ++typedef struct int8x8x3_t ++@{ ++ int8x8_t val[3]; ++@} int8x8x3_t; ++@end smallexample ++ ++If this hook allows @code{val} to have a scalar mode, then ++@code{int8x8x3_t} can have the same mode. GCC can then store ++@code{int8x8x3_t}s in registers rather than forcing them onto the stack. ++@end deftypefn ++ + @deftypefn {Target Hook} bool TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P (enum machine_mode @var{mode}) + Define this to return nonzero for machine modes for which the port has + small register classes. If this target hook returns nonzero for a given +@@ -5577,13 +5605,13 @@ + @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook. + @end defmac + +-@defmac LEGITIMATE_CONSTANT_P (@var{x}) +-A C expression that is nonzero if @var{x} is a legitimate constant for +-an immediate operand on the target machine. You can assume that +-@var{x} satisfies @code{CONSTANT_P}, so you need not check this. In fact, +-@samp{1} is a suitable definition for this macro on machines where +-anything @code{CONSTANT_P} is valid. +-@end defmac ++@deftypefn {Target Hook} bool TARGET_LEGITIMATE_CONSTANT_P (enum machine_mode @var{mode}, rtx @var{x}) ++This hook returns true if @var{x} is a legitimate constant for a ++@var{mode}-mode immediate operand on the target machine. You can assume that ++@var{x} satisfies @code{CONSTANT_P}, so you need not check this. ++ ++The default definition returns true. ++@end deftypefn + + @deftypefn {Target Hook} rtx TARGET_DELEGITIMIZE_ADDRESS (rtx @var{x}) + This hook is used to undo the possibly obfuscating effects of the +Index: gcc-4_6-branch/gcc/doc/tm.texi.in +=================================================================== +--- gcc-4_6-branch.orig/gcc/doc/tm.texi.in 2011-06-24 08:13:00.000000000 -0700 ++++ gcc-4_6-branch/gcc/doc/tm.texi.in 2011-09-16 20:16:00.257564628 -0700 +@@ -2521,7 +2521,7 @@ + register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead + of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go +@@ -2558,8 +2558,8 @@ + register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead +-of using @code{PREFERRED_RELOAD_CLASS}. ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go + through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} +@@ -4305,6 +4305,8 @@ + must have move patterns for this mode. + @end deftypefn + ++@hook TARGET_ARRAY_MODE_SUPPORTED_P ++ + @hook TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P + Define this to return nonzero for machine modes for which the port has + small register classes. If this target hook returns nonzero for a given +@@ -5555,13 +5557,13 @@ + @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook. + @end defmac + +-@defmac LEGITIMATE_CONSTANT_P (@var{x}) +-A C expression that is nonzero if @var{x} is a legitimate constant for +-an immediate operand on the target machine. You can assume that +-@var{x} satisfies @code{CONSTANT_P}, so you need not check this. In fact, +-@samp{1} is a suitable definition for this macro on machines where +-anything @code{CONSTANT_P} is valid. +-@end defmac ++@hook TARGET_LEGITIMATE_CONSTANT_P ++This hook returns true if @var{x} is a legitimate constant for a ++@var{mode}-mode immediate operand on the target machine. You can assume that ++@var{x} satisfies @code{CONSTANT_P}, so you need not check this. ++ ++The default definition returns true. ++@end deftypefn + + @hook TARGET_DELEGITIMIZE_ADDRESS + This hook is used to undo the possibly obfuscating effects of the +Index: gcc-4_6-branch/gcc/expr.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/expr.c 2011-09-16 20:14:32.000000000 -0700 ++++ gcc-4_6-branch/gcc/expr.c 2011-09-16 20:16:00.267564792 -0700 +@@ -1497,7 +1497,7 @@ + if (nregs == 0) + return; + +- if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x)) ++ if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x)) + x = validize_mem (force_const_mem (mode, x)); + + /* See if the machine can do this with a load multiple insn. */ +@@ -2308,7 +2308,7 @@ + offset -= size; + + cst = (*constfun) (constfundata, offset, mode); +- if (!LEGITIMATE_CONSTANT_P (cst)) ++ if (!targetm.legitimate_constant_p (mode, cst)) + return 0; + + if (!reverse) +@@ -3363,7 +3363,7 @@ + + y_cst = y; + +- if (!LEGITIMATE_CONSTANT_P (y)) ++ if (!targetm.legitimate_constant_p (mode, y)) + { + y = force_const_mem (mode, y); + +@@ -3419,7 +3419,7 @@ + + REAL_VALUE_FROM_CONST_DOUBLE (r, y); + +- if (LEGITIMATE_CONSTANT_P (y)) ++ if (targetm.legitimate_constant_p (dstmode, y)) + oldcost = rtx_cost (y, SET, speed); + else + oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed); +@@ -3442,7 +3442,7 @@ + + trunc_y = CONST_DOUBLE_FROM_REAL_VALUE (r, srcmode); + +- if (LEGITIMATE_CONSTANT_P (trunc_y)) ++ if (targetm.legitimate_constant_p (srcmode, trunc_y)) + { + /* Skip if the target needs extra instructions to perform + the extension. */ +@@ -3855,7 +3855,7 @@ + by setting SKIP to 0. */ + skip = (reg_parm_stack_space == 0) ? 0 : not_stack; + +- if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x)) ++ if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x)) + x = validize_mem (force_const_mem (mode, x)); + + /* If X is a hard register in a non-integer mode, copy it into a pseudo; +@@ -9108,7 +9108,7 @@ + constant and we don't need a memory reference. */ + if (CONSTANT_P (op0) + && mode2 != BLKmode +- && LEGITIMATE_CONSTANT_P (op0) ++ && targetm.legitimate_constant_p (mode2, op0) + && !must_force_mem) + op0 = force_reg (mode2, op0); + +Index: gcc-4_6-branch/gcc/hooks.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/hooks.c 2011-06-24 08:33:48.000000000 -0700 ++++ gcc-4_6-branch/gcc/hooks.c 2011-09-16 20:16:00.267564792 -0700 +@@ -101,6 +101,15 @@ + return true; + } + ++/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT) ++ and returns false. */ ++bool ++hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED, ++ unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED) ++{ ++ return false; ++} ++ + /* Generic hook that takes (FILE *, const char *) and does nothing. */ + void + hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED) +Index: gcc-4_6-branch/gcc/hooks.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/hooks.h 2011-06-24 08:33:48.000000000 -0700 ++++ gcc-4_6-branch/gcc/hooks.h 2011-09-16 20:16:00.267564792 -0700 +@@ -34,6 +34,8 @@ + extern bool hook_bool_mode_true (enum machine_mode); + extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx); + extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx); ++extern bool hook_bool_mode_uhwi_false (enum machine_mode, ++ unsigned HOST_WIDE_INT); + extern bool hook_bool_tree_false (tree); + extern bool hook_bool_const_tree_false (const_tree); + extern bool hook_bool_tree_true (tree); +Index: gcc-4_6-branch/gcc/recog.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/recog.c 2011-06-24 08:33:49.000000000 -0700 ++++ gcc-4_6-branch/gcc/recog.c 2011-09-16 20:16:00.277564886 -0700 +@@ -930,7 +930,9 @@ + return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode + || mode == VOIDmode) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) +- && LEGITIMATE_CONSTANT_P (op)); ++ && targetm.legitimate_constant_p (mode == VOIDmode ++ ? GET_MODE (op) ++ : mode, op)); + + /* Except for certain constants with VOIDmode, already checked for, + OP's mode must match MODE if MODE specifies a mode. */ +@@ -1107,7 +1109,9 @@ + && (GET_MODE (op) == mode || mode == VOIDmode + || GET_MODE (op) == VOIDmode) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) +- && LEGITIMATE_CONSTANT_P (op)); ++ && targetm.legitimate_constant_p (mode == VOIDmode ++ ? GET_MODE (op) ++ : mode, op)); + } + + /* Returns 1 if OP is an operand that is a CONST_INT. */ +Index: gcc-4_6-branch/gcc/reload.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/reload.c 2011-06-24 08:33:49.000000000 -0700 ++++ gcc-4_6-branch/gcc/reload.c 2011-09-16 20:16:00.277564886 -0700 +@@ -4721,7 +4721,8 @@ + simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno], + GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); + gcc_assert (tem); +- if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem)) ++ if (CONSTANT_P (tem) ++ && !targetm.legitimate_constant_p (GET_MODE (x), tem)) + { + tem = force_const_mem (GET_MODE (x), tem); + i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0), +@@ -6049,7 +6050,7 @@ + enum reload_type type, int ind_levels) + { + if (CONSTANT_P (x) +- && (! LEGITIMATE_CONSTANT_P (x) ++ && (!targetm.legitimate_constant_p (mode, x) + || targetm.preferred_reload_class (x, rclass) == NO_REGS)) + { + x = force_const_mem (mode, x); +@@ -6059,7 +6060,7 @@ + + else if (GET_CODE (x) == PLUS + && CONSTANT_P (XEXP (x, 1)) +- && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1)) ++ && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1)) + || targetm.preferred_reload_class (XEXP (x, 1), rclass) + == NO_REGS)) + { +Index: gcc-4_6-branch/gcc/reload1.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/reload1.c 2011-06-24 08:33:49.000000000 -0700 ++++ gcc-4_6-branch/gcc/reload1.c 2011-09-16 20:16:00.277564886 -0700 +@@ -4155,6 +4155,9 @@ + } + else if (function_invariant_p (x)) + { ++ enum machine_mode mode; ++ ++ mode = GET_MODE (SET_DEST (set)); + if (GET_CODE (x) == PLUS) + { + /* This is PLUS of frame pointer and a constant, +@@ -4167,12 +4170,11 @@ + reg_equiv_invariant[i] = x; + num_eliminable_invariants++; + } +- else if (LEGITIMATE_CONSTANT_P (x)) ++ else if (targetm.legitimate_constant_p (mode, x)) + reg_equiv_constant[i] = x; + else + { +- reg_equiv_memory_loc[i] +- = force_const_mem (GET_MODE (SET_DEST (set)), x); ++ reg_equiv_memory_loc[i] = force_const_mem (mode, x); + if (! reg_equiv_memory_loc[i]) + reg_equiv_init[i] = NULL_RTX; + } +Index: gcc-4_6-branch/gcc/stor-layout.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/stor-layout.c 2011-06-24 08:33:49.000000000 -0700 ++++ gcc-4_6-branch/gcc/stor-layout.c 2011-09-16 20:16:00.287564867 -0700 +@@ -546,6 +546,34 @@ + return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT)); + } + ++/* Return the natural mode of an array, given that it is SIZE bytes in ++ total and has elements of type ELEM_TYPE. */ ++ ++static enum machine_mode ++mode_for_array (tree elem_type, tree size) ++{ ++ tree elem_size; ++ unsigned HOST_WIDE_INT int_size, int_elem_size; ++ bool limit_p; ++ ++ /* One-element arrays get the component type's mode. */ ++ elem_size = TYPE_SIZE (elem_type); ++ if (simple_cst_equal (size, elem_size)) ++ return TYPE_MODE (elem_type); ++ ++ limit_p = true; ++ if (host_integerp (size, 1) && host_integerp (elem_size, 1)) ++ { ++ int_size = tree_low_cst (size, 1); ++ int_elem_size = tree_low_cst (elem_size, 1); ++ if (int_elem_size > 0 ++ && int_size % int_elem_size == 0 ++ && targetm.array_mode_supported_p (TYPE_MODE (elem_type), ++ int_size / int_elem_size)) ++ limit_p = false; ++ } ++ return mode_for_size_tree (size, MODE_INT, limit_p); ++} + + /* Subroutine of layout_decl: Force alignment required for the data type. + But if the decl itself wants greater alignment, don't override that. */ +@@ -2039,14 +2067,8 @@ + && (TYPE_MODE (TREE_TYPE (type)) != BLKmode + || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) + { +- /* One-element arrays get the component type's mode. */ +- if (simple_cst_equal (TYPE_SIZE (type), +- TYPE_SIZE (TREE_TYPE (type)))) +- SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type))); +- else +- SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type), +- MODE_INT, 1)); +- ++ SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), ++ TYPE_SIZE (type))); + if (TYPE_MODE (type) != BLKmode + && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT + && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) +Index: gcc-4_6-branch/gcc/target.def +=================================================================== +--- gcc-4_6-branch.orig/gcc/target.def 2011-06-24 08:33:48.000000000 -0700 ++++ gcc-4_6-branch/gcc/target.def 2011-09-16 20:16:00.287564867 -0700 +@@ -1344,6 +1344,13 @@ + unsigned, (unsigned nunroll, struct loop *loop), + NULL) + ++/* True if X is a legitimate MODE-mode immediate operand. */ ++DEFHOOK ++(legitimate_constant_p, ++ "", ++ bool, (enum machine_mode mode, rtx x), ++ default_legitimate_constant_p) ++ + /* True if the constant X cannot be placed in the constant pool. */ + DEFHOOK + (cannot_force_const_mem, +@@ -1611,6 +1618,38 @@ + bool, (enum machine_mode mode), + hook_bool_mode_false) + ++/* True if we should try to use a scalar mode to represent an array, ++ overriding the usual MAX_FIXED_MODE limit. */ ++DEFHOOK ++(array_mode_supported_p, ++ "Return true if GCC should try to use a scalar mode to store an array\n\ ++of @var{nelems} elements, given that each element has mode @var{mode}.\n\ ++Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\ ++and allows GCC to use any defined integer mode.\n\ ++\n\ ++One use of this hook is to support vector load and store operations\n\ ++that operate on several homogeneous vectors. For example, ARM NEON\n\ ++has operations like:\n\ ++\n\ ++@smallexample\n\ ++int8x8x3_t vld3_s8 (const int8_t *)\n\ ++@end smallexample\n\ ++\n\ ++where the return type is defined as:\n\ ++\n\ ++@smallexample\n\ ++typedef struct int8x8x3_t\n\ ++@{\n\ ++ int8x8_t val[3];\n\ ++@} int8x8x3_t;\n\ ++@end smallexample\n\ ++\n\ ++If this hook allows @code{val} to have a scalar mode, then\n\ ++@code{int8x8x3_t} can have the same mode. GCC can then store\n\ ++@code{int8x8x3_t}s in registers rather than forcing them onto the stack.", ++ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems), ++ hook_bool_mode_uhwi_false) ++ + /* Compute cost of moving data from a register of class FROM to one of + TO, using MODE. */ + DEFHOOK +Index: gcc-4_6-branch/gcc/targhooks.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/targhooks.c 2011-06-24 08:33:48.000000000 -0700 ++++ gcc-4_6-branch/gcc/targhooks.c 2011-09-16 20:16:00.287564867 -0700 +@@ -1519,4 +1519,15 @@ + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + ++bool ++default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, ++ rtx x ATTRIBUTE_UNUSED) ++{ ++#ifdef LEGITIMATE_CONSTANT_P ++ return LEGITIMATE_CONSTANT_P (x); ++#else ++ return true; ++#endif ++} ++ + #include "gt-targhooks.h" +Index: gcc-4_6-branch/gcc/targhooks.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/targhooks.h 2011-06-24 08:33:48.000000000 -0700 ++++ gcc-4_6-branch/gcc/targhooks.h 2011-09-16 20:16:00.287564867 -0700 +@@ -183,3 +183,4 @@ + + extern void *default_get_pch_validity (size_t *); + extern const char *default_pch_valid_p (const void *, size_t); ++extern bool default_legitimate_constant_p (enum machine_mode, rtx); +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vld3-1.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vld3-1.c 2011-09-16 20:16:00.287564867 -0700 +@@ -0,0 +1,27 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_hw } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++ ++uint32_t buffer[12]; ++ ++void __attribute__((noinline)) ++foo (uint32_t *a) ++{ ++ uint32x4x3_t x; ++ ++ x = vld3q_u32 (a); ++ x.val[0] = vaddq_u32 (x.val[0], x.val[1]); ++ vst3q_u32 (a, x); ++} ++ ++int ++main (void) ++{ ++ buffer[0] = 1; ++ buffer[1] = 2; ++ foo (buffer); ++ return buffer[0] != 3; ++} +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vst3-1.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vst3-1.c 2011-09-16 20:16:00.287564867 -0700 +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_hw } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++ ++uint32_t buffer[64]; ++ ++void __attribute__((noinline)) ++foo (uint32_t *a) ++{ ++ uint32x4x3_t x; ++ ++ x = vld3q_u32 (a); ++ a[35] = 1; ++ vst3q_lane_u32 (a + 32, x, 1); ++} ++ ++int ++main (void) ++{ ++ foo (buffer); ++ return buffer[35] != 1; ++} +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c 2011-09-16 20:16:00.297564810 -0700 +@@ -15,5 +15,5 @@ + out_float32x4_t = vld1q_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c 2011-09-16 20:16:00.297564810 -0700 +@@ -15,5 +15,5 @@ + out_poly16x8_t = vld1q_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c 2011-09-16 20:16:00.297564810 -0700 +@@ -15,5 +15,5 @@ + out_poly8x16_t = vld1q_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c 2011-09-16 20:16:00.297564810 -0700 +@@ -15,5 +15,5 @@ + out_int16x8_t = vld1q_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c 2011-09-16 20:16:00.297564810 -0700 +@@ -15,5 +15,5 @@ + out_int32x4_t = vld1q_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_int64x2_t = vld1q_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_int8x16_t = vld1q_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_uint16x8_t = vld1q_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_uint32x4_t = vld1q_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_uint64x2_t = vld1q_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c 2011-09-16 20:16:00.347564808 -0700 +@@ -15,5 +15,5 @@ + out_uint8x16_t = vld1q_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_float32x4_t = vld1q_lane_f32 (0, arg1_float32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_poly16x8_t = vld1q_lane_p16 (0, arg1_poly16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_poly8x16_t = vld1q_lane_p8 (0, arg1_poly8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_int16x8_t = vld1q_lane_s16 (0, arg1_int16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_int32x4_t = vld1q_lane_s32 (0, arg1_int32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_int64x2_t = vld1q_lane_s64 (0, arg1_int64x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_int8x16_t = vld1q_lane_s8 (0, arg1_int8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_uint16x8_t = vld1q_lane_u16 (0, arg1_uint16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_uint32x4_t = vld1q_lane_u32 (0, arg1_uint32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_uint64x2_t = vld1q_lane_u64 (0, arg1_uint64x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c 2011-09-16 20:16:00.347564808 -0700 +@@ -16,5 +16,5 @@ + out_uint8x16_t = vld1q_lane_u8 (0, arg1_uint8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_float32x4_t = vld1q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_poly16x8_t = vld1q_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_poly8x16_t = vld1q_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int16x8_t = vld1q_s16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int32x4_t = vld1q_s32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int64x2_t = vld1q_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int8x16_t = vld1q_s8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint16x8_t = vld1q_u16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint32x4_t = vld1q_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint64x2_t = vld1q_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint8x16_t = vld1q_u8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_float32x2_t = vld1_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4_t = vld1_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8_t = vld1_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int16x4_t = vld1_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int32x2_t = vld1_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int64x1_t = vld1_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_int8x8_t = vld1_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4_t = vld1_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c 2011-09-16 20:16:00.357564842 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2_t = vld1_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c 2011-09-16 20:16:00.367564848 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1_t = vld1_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c 2011-09-16 20:16:00.367564848 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8_t = vld1_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c 2011-09-16 20:16:00.367564848 -0700 +@@ -16,5 +16,5 @@ + out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1f32.c 2011-09-16 20:16:00.367564848 -0700 +@@ -15,5 +15,5 @@ + out_float32x2_t = vld1_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p16.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4_t = vld1_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p8.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8_t = vld1_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s16.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_int16x4_t = vld1_s16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s32.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_int32x2_t = vld1_s32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s64.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_int64x1_t = vld1_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s8.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_int8x8_t = vld1_s8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u16.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4_t = vld1_u16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u32.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2_t = vld1_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u64.c 2011-09-16 20:16:00.377564842 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1_t = vld1_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u8.c 2011-09-16 20:16:00.387564830 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8_t = vld1_u8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c 2011-09-16 20:16:00.387564830 -0700 +@@ -16,5 +16,5 @@ + out_float32x4x2_t = vld2q_lane_f32 (0, arg1_float32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c 2011-09-16 20:16:00.387564830 -0700 +@@ -16,5 +16,5 @@ + out_poly16x8x2_t = vld2q_lane_p16 (0, arg1_poly16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c 2011-09-16 20:16:00.387564830 -0700 +@@ -16,5 +16,5 @@ + out_int16x8x2_t = vld2q_lane_s16 (0, arg1_int16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c 2011-09-16 20:16:00.387564830 -0700 +@@ -16,5 +16,5 @@ + out_int32x4x2_t = vld2q_lane_s32 (0, arg1_int32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c 2011-09-16 20:16:00.397564843 -0700 +@@ -16,5 +16,5 @@ + out_uint16x8x2_t = vld2q_lane_u16 (0, arg1_uint16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c 2011-09-16 20:16:00.397564843 -0700 +@@ -16,5 +16,5 @@ + out_uint32x4x2_t = vld2q_lane_u32 (0, arg1_uint32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_float32x4x2_t = vld2q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_poly16x8x2_t = vld2q_p16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_poly8x16x2_t = vld2q_p8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_int16x8x2_t = vld2q_s16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_int32x4x2_t = vld2q_s32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_int8x16x2_t = vld2q_s8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c 2011-09-16 20:16:00.397564843 -0700 +@@ -15,6 +15,6 @@ + out_uint16x8x2_t = vld2q_u16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,6 +15,6 @@ + out_uint32x4x2_t = vld2q_u32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,6 +15,6 @@ + out_uint8x16x2_t = vld2q_u8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x2_t = vld2_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x2_t = vld2_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x2_t = vld2_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x2_t = vld2_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x2_t = vld2_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c 2011-09-16 20:16:00.407564879 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x2_t = vld2_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c 2011-09-16 20:16:00.417564906 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x2_t = vld2_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c 2011-09-16 20:16:00.417564906 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x2_t = vld2_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c 2011-09-16 20:16:00.417564906 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x2_t = vld2_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c 2011-09-16 20:16:00.417564906 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x2_t = vld2_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c 2011-09-16 20:16:00.417564906 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x2_t = vld2_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c 2011-09-16 20:16:00.417564906 -0700 +@@ -16,5 +16,5 @@ + out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2f32.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x2_t = vld2_f32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p16.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x2_t = vld2_p16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p8.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x2_t = vld2_p8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s16.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x2_t = vld2_s16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s32.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x2_t = vld2_s32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s64.c 2011-09-16 20:16:00.427564921 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x2_t = vld2_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s8.c 2011-09-16 20:16:00.437564924 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x2_t = vld2_s8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u16.c 2011-09-16 20:16:00.437564924 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x2_t = vld2_u16 (0); + } + +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u32.c 2011-09-16 20:16:00.437564924 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x2_t = vld2_u32 (0); + } + +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u64.c 2011-09-16 20:16:00.437564924 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x2_t = vld2_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u8.c 2011-09-16 20:16:00.437564924 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x2_t = vld2_u8 (0); + } + +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c 2011-09-16 20:16:00.437564924 -0700 +@@ -16,5 +16,5 @@ + out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c 2011-09-16 20:16:00.437564924 -0700 +@@ -16,5 +16,5 @@ + out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c 2011-09-16 20:16:00.447564932 -0700 +@@ -16,5 +16,5 @@ + out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c 2011-09-16 20:16:00.447564932 -0700 +@@ -16,5 +16,5 @@ + out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c 2011-09-16 20:16:00.447564932 -0700 +@@ -16,5 +16,5 @@ + out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c 2011-09-16 20:16:00.447564932 -0700 +@@ -16,5 +16,5 @@ + out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_float32x4x3_t = vld3q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_poly16x8x3_t = vld3q_p16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_poly8x16x3_t = vld3q_p8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_int16x8x3_t = vld3q_s16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_int32x4x3_t = vld3q_s32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_int8x16x3_t = vld3q_s8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_uint16x8x3_t = vld3q_u16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c 2011-09-16 20:16:00.447564932 -0700 +@@ -15,6 +15,6 @@ + out_uint32x4x3_t = vld3q_u32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,6 +15,6 @@ + out_uint8x16x3_t = vld3q_u8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x3_t = vld3_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x3_t = vld3_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x3_t = vld3_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x3_t = vld3_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x3_t = vld3_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x3_t = vld3_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x3_t = vld3_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x3_t = vld3_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x3_t = vld3_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x3_t = vld3_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x3_t = vld3_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_float32x2x3_t = vld3_lane_f32 (0, arg1_float32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_poly16x4x3_t = vld3_lane_p16 (0, arg1_poly16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_poly8x8x3_t = vld3_lane_p8 (0, arg1_poly8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_int16x4x3_t = vld3_lane_s16 (0, arg1_int16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_int32x2x3_t = vld3_lane_s32 (0, arg1_int32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_int8x8x3_t = vld3_lane_s8 (0, arg1_int8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_uint16x4x3_t = vld3_lane_u16 (0, arg1_uint16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_uint32x2x3_t = vld3_lane_u32 (0, arg1_uint32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -16,5 +16,5 @@ + out_uint8x8x3_t = vld3_lane_u8 (0, arg1_uint8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3f32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x3_t = vld3_f32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x3_t = vld3_p16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p8.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x3_t = vld3_p8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s16.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x3_t = vld3_s16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s32.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x3_t = vld3_s32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s64.c 2011-09-16 20:16:00.457564944 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x3_t = vld3_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s8.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x3_t = vld3_s8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u16.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x3_t = vld3_u16 (0); + } + +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u32.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x3_t = vld3_u32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u64.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x3_t = vld3_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u8.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x3_t = vld3_u8 (0); + } + +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c 2011-09-16 20:16:00.467564964 -0700 +@@ -16,5 +16,5 @@ + out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,6 +15,6 @@ + out_float32x4x4_t = vld4q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,6 +15,6 @@ + out_poly16x8x4_t = vld4q_p16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c 2011-09-16 20:16:00.467564964 -0700 +@@ -15,6 +15,6 @@ + out_poly8x16x4_t = vld4q_p8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_int16x8x4_t = vld4q_s16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_int32x4x4_t = vld4q_s32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_int8x16x4_t = vld4q_s8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_uint16x8x4_t = vld4q_u16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_uint32x4x4_t = vld4q_u32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,6 +15,6 @@ + out_uint8x16x4_t = vld4q_u8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x4_t = vld4_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x4_t = vld4_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x4_t = vld4_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x4_t = vld4_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c 2011-09-16 20:16:00.477564991 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x4_t = vld4_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x4_t = vld4_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x4_t = vld4_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x4_t = vld4_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x4_t = vld4_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x4_t = vld4_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c 2011-09-16 20:16:00.487565006 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x4_t = vld4_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c 2011-09-16 20:16:00.487565006 -0700 +@@ -16,5 +16,5 @@ + out_float32x2x4_t = vld4_lane_f32 (0, arg1_float32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c 2011-09-16 20:16:00.487565006 -0700 +@@ -16,5 +16,5 @@ + out_poly16x4x4_t = vld4_lane_p16 (0, arg1_poly16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c 2011-09-16 20:16:00.487565006 -0700 +@@ -16,5 +16,5 @@ + out_poly8x8x4_t = vld4_lane_p8 (0, arg1_poly8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_int16x4x4_t = vld4_lane_s16 (0, arg1_int16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_int32x2x4_t = vld4_lane_s32 (0, arg1_int32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_int8x8x4_t = vld4_lane_s8 (0, arg1_int8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_uint16x4x4_t = vld4_lane_u16 (0, arg1_uint16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_uint32x2x4_t = vld4_lane_u32 (0, arg1_uint32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c 2011-09-16 20:16:00.497565009 -0700 +@@ -16,5 +16,5 @@ + out_uint8x8x4_t = vld4_lane_u8 (0, arg1_uint8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4f32.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_float32x2x4_t = vld4_f32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p16.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_poly16x4x4_t = vld4_p16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p8.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_poly8x8x4_t = vld4_p8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s16.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_int16x4x4_t = vld4_s16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s32.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_int32x2x4_t = vld4_s32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s64.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_int64x1x4_t = vld4_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s8.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_int8x8x4_t = vld4_s8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u16.c 2011-09-16 20:16:00.497565009 -0700 +@@ -15,5 +15,5 @@ + out_uint16x4x4_t = vld4_u16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u32.c 2011-09-16 20:16:00.507565013 -0700 +@@ -15,5 +15,5 @@ + out_uint32x2x4_t = vld4_u32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u64.c 2011-09-16 20:16:00.507565013 -0700 +@@ -15,5 +15,5 @@ + out_uint64x1x4_t = vld4_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u8.c 2011-09-16 20:16:00.507565013 -0700 +@@ -15,5 +15,5 @@ + out_uint8x8x4_t = vld4_u8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_f32 (arg0_float32_t, arg1_float32x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c 2011-09-16 20:16:00.507565013 -0700 +@@ -16,5 +16,5 @@ + vst1q_s16 (arg0_int16_t, arg1_int16x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_s32 (arg0_int32_t, arg1_int32x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_s64 (arg0_int64_t, arg1_int64x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_s8 (arg0_int8_t, arg1_int8x16_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1f32.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_f32 (arg0_float32_t, arg1_float32x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p16.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_p16 (arg0_poly16_t, arg1_poly16x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p8.c 2011-09-16 20:16:00.517565031 -0700 +@@ -16,5 +16,5 @@ + vst1_p8 (arg0_poly8_t, arg1_poly8x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_s16 (arg0_int16_t, arg1_int16x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_s32 (arg0_int32_t, arg1_int32x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s64.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_s64 (arg0_int64_t, arg1_int64x1_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s8.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_s8 (arg0_int8_t, arg1_int8x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_u16 (arg0_uint16_t, arg1_uint16x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_u32 (arg0_uint32_t, arg1_uint32x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u64.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_u64 (arg0_uint64_t, arg1_uint64x1_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u8.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst1_u8 (arg0_uint8_t, arg1_uint8x8_t); + } + +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,5 +16,5 @@ + vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c 2011-09-16 20:16:00.527565060 -0700 +@@ -16,6 +16,6 @@ + vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,6 +16,6 @@ + vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_f32 (arg0_float32_t, arg1_float32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_p16 (arg0_poly16_t, arg1_poly16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_p8 (arg0_poly8_t, arg1_poly8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_s16 (arg0_int16_t, arg1_int16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_s32 (arg0_int32_t, arg1_int32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_s8 (arg0_int8_t, arg1_int8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_u16 (arg0_uint16_t, arg1_uint16x4x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_u32 (arg0_uint32_t, arg1_uint32x2x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_lane_u8 (arg0_uint8_t, arg1_uint8x8x2_t, 1); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2f32.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_f32 (arg0_float32_t, arg1_float32x2x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p16.c 2011-09-16 20:16:00.537565077 -0700 +@@ -16,5 +16,5 @@ + vst2_p16 (arg0_poly16_t, arg1_poly16x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p8.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_p8 (arg0_poly8_t, arg1_poly8x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s16.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_s16 (arg0_int16_t, arg1_int16x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s32.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_s32 (arg0_int32_t, arg1_int32x2x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s64.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_s64 (arg0_int64_t, arg1_int64x1x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s8.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_s8 (arg0_int8_t, arg1_int8x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u16.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u32.c 2011-09-16 20:16:00.547565082 -0700 +@@ -16,5 +16,5 @@ + vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u64.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u8.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t); + } + +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,5 +16,5 @@ + vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c 2011-09-16 20:16:00.557565092 -0700 +@@ -16,6 +16,6 @@ + vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,6 +16,6 @@ + vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c 2011-09-16 20:16:00.567565108 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c 2011-09-16 20:16:00.577565135 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3f32.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_f32 (arg0_float32_t, arg1_float32x2x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p16.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p8.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s16.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_s16 (arg0_int16_t, arg1_int16x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s32.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_s32 (arg0_int32_t, arg1_int32x2x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s64.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_s64 (arg0_int64_t, arg1_int64x1x3_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s8.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_s8 (arg0_int8_t, arg1_int8x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u16.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u32.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u64.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u8.c 2011-09-16 20:16:00.587565144 -0700 +@@ -16,5 +16,5 @@ + vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t); + } + +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,5 +16,5 @@ + vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,6 +16,6 @@ + vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,6 +16,6 @@ + vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,6 +16,6 @@ + vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,6 +16,6 @@ + vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c 2011-09-16 20:16:00.597565156 -0700 +@@ -16,6 +16,6 @@ + vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,6 +16,6 @@ + vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,6 +16,6 @@ + vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,6 +16,6 @@ + vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,6 +16,6 @@ + vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4f32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4f32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4f32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_f32 (arg0_float32_t, arg1_float32x2x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4p16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4p8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_s16 (arg0_int16_t, arg1_int16x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_s32 (arg0_int32_t, arg1_int32x2x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s64.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_s64 (arg0_int64_t, arg1_int64x1x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_s8 (arg0_int8_t, arg1_int8x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u16.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u16.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u16.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u32.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u32.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u32.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u64.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u64.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u64.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t); + } + +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u8.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u8.c 2011-06-24 08:13:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u8.c 2011-09-16 20:16:00.607565171 -0700 +@@ -16,5 +16,5 @@ + vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t); + } + +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr46329.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr46329.c 2011-09-16 20:16:00.617565191 -0700 +@@ -0,0 +1,9 @@ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++int __attribute__ ((vector_size (32))) x; ++void ++foo (void) ++{ ++ x <<= x; ++} diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch new file mode 100644 index 0000000000..004f0131cf --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch @@ -0,0 +1,21 @@ +2011-05-06 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + From Sergey Grechanik <mouseentity@ispras.ru>, approved for mainline + + * config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS + for constant vectors. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-05-03 15:18:07 +0000 ++++ new/gcc/config/arm/arm.c 2011-05-06 11:33:02 +0000 +@@ -9193,7 +9193,7 @@ + /* The neon move patterns handle all legitimate vector and struct + addresses. */ + if (TARGET_NEON +- && MEM_P (x) ++ && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR) + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || VALID_NEON_STRUCT_MODE (mode))) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch new file mode 100644 index 0000000000..ce0272431d --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch @@ -0,0 +1,24 @@ +2011-05-12 Michael Hope <michael.hope@linaro.org> + + gcc/ + Backport from mainline: + + 2011-05-05 Michael Hope <michael.hope@linaro.org> + + PR pch/45979 + * config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for + __ARM_EABI__ hosts. + +=== modified file 'gcc/config/host-linux.c' +--- old/gcc/config/host-linux.c 2010-11-29 14:09:41 +0000 ++++ new/gcc/config/host-linux.c 2011-05-06 20:19:30 +0000 +@@ -84,6 +84,8 @@ + # define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__mc68000__) + # define TRY_EMPTY_VM_SPACE 0x40000000 ++#elif defined(__ARM_EABI__) ++# define TRY_EMPTY_VM_SPACE 0x60000000 + #else + # define TRY_EMPTY_VM_SPACE 0 + #endif + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch new file mode 100644 index 0000000000..7885b7af49 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch @@ -0,0 +1,640 @@ +2011-05-13 Revital Eres <revital.eres@linaro.org> + + gcc/ + * loop-doloop.c (doloop_condition_get): Support new form of + doloop pattern and use prev_nondebug_insn instead of PREV_INSN. + * config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*". + (doloop_end): New. + * config/arm/arm.md (*addsi3_compare0): Remove "*". + * params.def (sms-min-sc): New param flag. + * doc/invoke.texi (sms-min-sc): Document it. + * ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge + enters the branch create an anti edge in the opposite direction + to prevent the creation of reg-moves. + * modulo-sched.c: Adjust comment to reflect the fact we are + scheduling closing branch. + (PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine. + (stage_count): New field in struct partial_schedule. + (calculate_stage_count): New function. + (normalize_sched_times): Rename to reset_sched_times and handle + incrementing the sched time of the nodes by a constant value + passed as parameter. + (duplicate_insns_of_cycles): Skip closing branch. + (sms_schedule_by_order): Schedule closing branch. + (ps_insn_find_column): Handle closing branch. + (sms_schedule): Call reset_sched_times and adjust the code to + support scheduling of the closing branch. Use sms-min-sc. + Support new form of doloop pattern. + (ps_insert_empty_row): Update calls to normalize_sched_times + and rotate_partial_schedule functions. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-05-06 11:28:27 +0000 ++++ new/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000 +@@ -791,7 +791,7 @@ + "" + ) + +-(define_insn "*addsi3_compare0" ++(define_insn "addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "r, r") + +=== modified file 'gcc/config/arm/thumb2.md' +--- old/gcc/config/arm/thumb2.md 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000 +@@ -836,7 +836,7 @@ + "operands[4] = GEN_INT (- INTVAL (operands[2]));" + ) + +-(define_insn "*thumb2_addsi3_compare0" ++(define_insn "thumb2_addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") +@@ -1118,3 +1118,54 @@ + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + ") ++ ++;; Define the subtract-one-and-jump insns so loop.c ++;; knows what to generate. ++(define_expand "doloop_end" ++ [(use (match_operand 0 "" "")) ; loop pseudo ++ (use (match_operand 1 "" "")) ; iterations; zero if unknown ++ (use (match_operand 2 "" "")) ; max iterations ++ (use (match_operand 3 "" "")) ; loop level ++ (use (match_operand 4 "" ""))] ; label ++ "TARGET_32BIT" ++ " ++ { ++ /* Currently SMS relies on the do-loop pattern to recognize loops ++ where (1) the control part consists of all insns defining and/or ++ using a certain 'count' register and (2) the loop count can be ++ adjusted by modifying this register prior to the loop. ++ ??? The possible introduction of a new block to initialize the ++ new IV can potentially affect branch optimizations. */ ++ if (optimize > 0 && flag_modulo_sched) ++ { ++ rtx s0; ++ rtx bcomp; ++ rtx loc_ref; ++ rtx cc_reg; ++ rtx insn; ++ rtx cmp; ++ ++ /* Only use this on innermost loops. */ ++ if (INTVAL (operands[3]) > 1) ++ FAIL; ++ if (GET_MODE (operands[0]) != SImode) ++ FAIL; ++ ++ s0 = operands [0]; ++ if (TARGET_THUMB2) ++ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1))); ++ else ++ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1))); ++ ++ cmp = XVECEXP (PATTERN (insn), 0, 0); ++ cc_reg = SET_DEST (cmp); ++ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); ++ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]); ++ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ++ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, ++ loc_ref, pc_rtx))); ++ DONE; ++ }else ++ FAIL; ++}") ++ + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2010-11-30 11:41:24 +0000 ++++ new/gcc/ddg.c 2011-05-11 07:15:47 +0000 +@@ -197,6 +197,11 @@ + } + } + ++ /* If a true dep edge enters the branch create an anti edge in the ++ opposite direction to prevent the creation of reg-moves. */ ++ if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn)) ++ create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1); ++ + latency = dep_cost (link); + e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance); + add_edge_to_ddg (g, e); + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000 ++++ new/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000 +@@ -8730,6 +8730,10 @@ + The maximum number of best instructions in the ready list that are considered + for renaming in the selective scheduler. The default value is 2. + ++@item sms-min-sc ++The minimum value of stage count that swing modulo scheduler will ++generate. The default value is 2. ++ + @item max-last-value-rtl + The maximum size measured as number of RTLs that can be recorded in an expression + in combiner for a pseudo register as last known value of that register. The default + +=== modified file 'gcc/loop-doloop.c' +--- old/gcc/loop-doloop.c 2010-11-30 11:41:24 +0000 ++++ new/gcc/loop-doloop.c 2011-05-11 07:15:47 +0000 +@@ -78,6 +78,8 @@ + rtx inc_src; + rtx condition; + rtx pattern; ++ rtx cc_reg = NULL_RTX; ++ rtx reg_orig = NULL_RTX; + + /* The canonical doloop pattern we expect has one of the following + forms: +@@ -96,7 +98,16 @@ + 2) (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) +- (pc))). */ ++ (pc))). ++ ++ Some targets (ARM) do the comparison before the branch, as in the ++ following form: ++ ++ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) */ + + pattern = PATTERN (doloop_pat); + +@@ -104,19 +115,47 @@ + { + rtx cond; + rtx prev_insn = prev_nondebug_insn (doloop_pat); ++ rtx cmp_arg1, cmp_arg2; ++ rtx cmp_orig; + +- /* We expect the decrement to immediately precede the branch. */ ++ /* In case the pattern is not PARALLEL we expect two forms ++ of doloop which are cases 2) and 3) above: in case 2) the ++ decrement immediately precedes the branch, while in case 3) ++ the compare and decrement instructions immediately precede ++ the branch. */ + + if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) + return 0; + + cmp = pattern; +- inc = PATTERN (PREV_INSN (doloop_pat)); ++ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) ++ { ++ /* The third case: the compare and decrement instructions ++ immediately precede the branch. */ ++ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); ++ if (GET_CODE (cmp_orig) != SET) ++ return 0; ++ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) ++ return 0; ++ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); ++ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); ++ if (cmp_arg2 != const0_rtx ++ || GET_CODE (cmp_arg1) != PLUS) ++ return 0; ++ reg_orig = XEXP (cmp_arg1, 0); ++ if (XEXP (cmp_arg1, 1) != GEN_INT (-1) ++ || !REG_P (reg_orig)) ++ return 0; ++ cc_reg = SET_DEST (cmp_orig); ++ ++ inc = XVECEXP (PATTERN (prev_insn), 0, 1); ++ } ++ else ++ inc = PATTERN (prev_insn); + /* We expect the condition to be of the form (reg != 0) */ + cond = XEXP (SET_SRC (cmp), 0); + if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) + return 0; +- + } + else + { +@@ -162,11 +201,15 @@ + return 0; + + if ((XEXP (condition, 0) == reg) ++ /* For the third case: */ ++ || ((cc_reg != NULL_RTX) ++ && (XEXP (condition, 0) == cc_reg) ++ && (reg_orig == reg)) + || (GET_CODE (XEXP (condition, 0)) == PLUS +- && XEXP (XEXP (condition, 0), 0) == reg)) ++ && XEXP (XEXP (condition, 0), 0) == reg)) + { + if (GET_CODE (pattern) != PARALLEL) +- /* The second form we expect: ++ /* For the second form we expect: + + (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) +@@ -181,7 +224,24 @@ + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + +- So we return that form instead. ++ For the third form we expect: ++ ++ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) ++ ++ which is equivalent to the following: ++ ++ (parallel [(set (cc) (compare (reg, 1)) ++ (set (reg) (plus (reg) (const_int -1))) ++ (set (pc) (if_then_else (NE == cc) ++ (label_ref (label)) ++ (pc))))]) ++ ++ So we return the second form instead for the two cases. ++ + */ + condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); + + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-02-14 17:59:10 +0000 ++++ new/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000 +@@ -84,14 +84,13 @@ + II cycles (i.e. use register copies to prevent a def from overwriting + itself before reaching the use). + +- SMS works with countable loops (1) whose control part can be easily +- decoupled from the rest of the loop and (2) whose loop count can +- be easily adjusted. This is because we peel a constant number of +- iterations into a prologue and epilogue for which we want to avoid +- emitting the control part, and a kernel which is to iterate that +- constant number of iterations less than the original loop. So the +- control part should be a set of insns clearly identified and having +- its own iv, not otherwise used in the loop (at-least for now), which ++ SMS works with countable loops whose loop count can be easily ++ adjusted. This is because we peel a constant number of iterations ++ into a prologue and epilogue for which we want to avoid emitting ++ the control part, and a kernel which is to iterate that constant ++ number of iterations less than the original loop. So the control ++ part should be a set of insns clearly identified and having its ++ own iv, not otherwise used in the loop (at-least for now), which + initializes a register before the loop to the number of iterations. + Currently SMS relies on the do-loop pattern to recognize such loops, + where (1) the control part comprises of all insns defining and/or +@@ -116,8 +115,10 @@ + + /* The number of different iterations the nodes in ps span, assuming + the stage boundaries are placed efficiently. */ +-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ +- + 1 + (ps)->ii - 1) / (ps)->ii) ++#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ ++ + 1 + ii - 1) / ii) ++/* The stage count of ps. */ ++#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) + + /* A single instruction in the partial schedule. */ + struct ps_insn +@@ -155,6 +156,8 @@ + int max_cycle; + + ddg_ptr g; /* The DDG of the insns in the partial schedule. */ ++ ++ int stage_count; /* The stage count of the partial schedule. */ + }; + + /* We use this to record all the register replacements we do in +@@ -195,7 +198,7 @@ + rtx, rtx); + static void duplicate_insns_of_cycles (partial_schedule_ptr, + int, int, int, rtx); +- ++static int calculate_stage_count (partial_schedule_ptr ps); + #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) + #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) + #define SCHED_FIRST_REG_MOVE(x) \ +@@ -310,10 +313,10 @@ + either a single (parallel) branch-on-count or a (non-parallel) + branch immediately preceded by a single (decrement) insn. */ + first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail +- : PREV_INSN (tail)); ++ : prev_nondebug_insn (tail)); + + for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) +- if (reg_mentioned_p (reg, insn)) ++ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn)) + { + if (dump_file) + { +@@ -569,13 +572,12 @@ + } + } + +-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values +- of SCHED_ROW and SCHED_STAGE. */ ++/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of ++ SCHED_ROW and SCHED_STAGE. */ + static void +-normalize_sched_times (partial_schedule_ptr ps) ++reset_sched_times (partial_schedule_ptr ps, int amount) + { + int row; +- int amount = PS_MIN_CYCLE (ps); + int ii = ps->ii; + ps_insn_ptr crr_insn; + +@@ -584,19 +586,43 @@ + { + ddg_node_ptr u = crr_insn->node; + int normalized_time = SCHED_TIME (u) - amount; ++ int new_min_cycle = PS_MIN_CYCLE (ps) - amount; ++ int sc_until_cycle_zero, stage; + +- if (dump_file) +- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ +- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME +- (u), ps->min_cycle); ++ if (dump_file) ++ { ++ /* Print the scheduling times after the rotation. */ ++ fprintf (dump_file, "crr_insn->node=%d (insn id %d), " ++ "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, ++ INSN_UID (crr_insn->node->insn), SCHED_TIME (u), ++ normalized_time); ++ if (JUMP_P (crr_insn->node->insn)) ++ fprintf (dump_file, " (branch)"); ++ fprintf (dump_file, "\n"); ++ } ++ + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); + gcc_assert (SCHED_TIME (u) <= ps->max_cycle); + SCHED_TIME (u) = normalized_time; +- SCHED_ROW (u) = normalized_time % ii; +- SCHED_STAGE (u) = normalized_time / ii; ++ SCHED_ROW (u) = SMODULO (normalized_time, ii); ++ ++ /* The calculation of stage count is done adding the number ++ of stages before cycle zero and after cycle zero. */ ++ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii); ++ ++ if (SCHED_TIME (u) < 0) ++ { ++ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ } ++ else ++ { ++ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; ++ } + } + } +- ++ + /* Set SCHED_COLUMN of each node according to its position in PS. */ + static void + set_columns_for_ps (partial_schedule_ptr ps) +@@ -646,9 +672,12 @@ + + /* Do not duplicate any insn which refers to count_reg as it + belongs to the control part. ++ The closing branch is scheduled as well and thus should ++ be ignored. + TODO: This should be done by analyzing the control part of + the loop. */ +- if (reg_mentioned_p (count_reg, u_node->insn)) ++ if (reg_mentioned_p (count_reg, u_node->insn) ++ || JUMP_P (ps_ij->node->insn)) + continue; + + if (for_prolog) +@@ -1009,9 +1038,11 @@ + continue; + } + +- /* Don't handle BBs with calls or barriers, or !single_set insns, +- or auto-increment insns (to avoid creating invalid reg-moves +- for the auto-increment insns). ++ /* Don't handle BBs with calls or barriers or auto-increment insns ++ (to avoid creating invalid reg-moves for the auto-increment insns), ++ or !single_set with the exception of instructions that include ++ count_reg---these instructions are part of the control part ++ that do-loop recognizes. + ??? Should handle auto-increment insns. + ??? Should handle insns defining subregs. */ + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) +@@ -1021,7 +1052,8 @@ + if (CALL_P (insn) + || BARRIER_P (insn) + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) +- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) ++ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE ++ && !reg_mentioned_p (count_reg, insn)) + || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) + || (INSN_P (insn) && (set = single_set (insn)) + && GET_CODE (SET_DEST (set)) == SUBREG)) +@@ -1049,7 +1081,11 @@ + continue; + } + +- if (! (g = create_ddg (bb, 0))) ++ /* Always schedule the closing branch with the rest of the ++ instructions. The branch is rotated to be in row ii-1 at the ++ end of the scheduling procedure to make sure it's the last ++ instruction in the iteration. */ ++ if (! (g = create_ddg (bb, 1))) + { + if (dump_file) + fprintf (dump_file, "SMS create_ddg failed\n"); +@@ -1157,14 +1193,17 @@ + + ps = sms_schedule_by_order (g, mii, maxii, node_order); + +- if (ps){ +- stage_count = PS_STAGE_COUNT (ps); +- gcc_assert(stage_count >= 1); +- } ++ if (ps) ++ { ++ stage_count = calculate_stage_count (ps); ++ gcc_assert(stage_count >= 1); ++ PS_STAGE_COUNT(ps) = stage_count; ++ } + +- /* Stage count of 1 means that there is no interleaving between +- iterations, let the scheduling passes do the job. */ +- if (stage_count <= 1 ++ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of ++ 1 means that there is no interleaving between iterations thus ++ we let the scheduling passes do the job in this case. */ ++ if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC) + || (count_init && (loop_count <= stage_count)) + || (flag_branch_probabilities && (trip_count <= stage_count))) + { +@@ -1182,32 +1221,24 @@ + else + { + struct undo_replace_buff_elem *reg_move_replaces; +- +- if (dump_file) +- { ++ int amount = SCHED_TIME (g->closing_branch) + 1; ++ ++ /* Set the stage boundaries. The closing_branch was scheduled ++ and should appear in the last (ii-1) row. */ ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ set_columns_for_ps (ps); ++ ++ canon_loop (loop); ++ ++ if (dump_file) ++ { + fprintf (dump_file, + "SMS succeeded %d %d (with ii, sc)\n", ps->ii, + stage_count); + print_partial_schedule (ps, dump_file); +- fprintf (dump_file, +- "SMS Branch (%d) will later be scheduled at cycle %d.\n", +- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); + } +- +- /* Set the stage boundaries. If the DDG is built with closing_branch_deps, +- the closing_branch was scheduled and should appear in the last (ii-1) +- row. Otherwise, we are free to schedule the branch, and we let nodes +- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first +- row; this should reduce stage_count to minimum. +- TODO: Revisit the issue of scheduling the insns of the +- control part relative to the branch when the control part +- has more than one insn. */ +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); +- set_columns_for_ps (ps); +- +- canon_loop (loop); +- ++ + /* case the BCT count is not known , Do loop-versioning */ + if (count_reg && ! count_init) + { +@@ -1760,12 +1791,6 @@ + continue; + } + +- if (JUMP_P (insn)) /* Closing branch handled later. */ +- { +- RESET_BIT (tobe_scheduled, u); +- continue; +- } +- + if (TEST_BIT (sched_nodes, u)) + continue; + +@@ -1893,8 +1918,8 @@ + if (dump_file) + fprintf (dump_file, "split_row=%d\n", split_row); + +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, ps->min_cycle); ++ reset_sched_times (ps, PS_MIN_CYCLE (ps)); ++ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); + + rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); + for (row = 0; row < split_row; row++) +@@ -2571,6 +2596,7 @@ + ps_insn_ptr next_ps_i; + ps_insn_ptr first_must_follow = NULL; + ps_insn_ptr last_must_precede = NULL; ++ ps_insn_ptr last_in_row = NULL; + int row; + + if (! ps_i) +@@ -2597,8 +2623,37 @@ + else + last_must_precede = next_ps_i; + } ++ /* The closing branch must be the last in the row. */ ++ if (must_precede ++ && TEST_BIT (must_precede, next_ps_i->node->cuid) ++ && JUMP_P (next_ps_i->node->insn)) ++ return false; ++ ++ last_in_row = next_ps_i; + } + ++ /* The closing branch is scheduled as well. Make sure there is no ++ dependent instruction after it as the branch should be the last ++ instruction in the row. */ ++ if (JUMP_P (ps_i->node->insn)) ++ { ++ if (first_must_follow) ++ return false; ++ if (last_in_row) ++ { ++ /* Make the branch the last in the row. New instructions ++ will be inserted at the beginning of the row or after the ++ last must_precede instruction thus the branch is guaranteed ++ to remain the last instruction in the row. */ ++ last_in_row->next_in_row = ps_i; ++ ps_i->prev_in_row = last_in_row; ++ ps_i->next_in_row = NULL; ++ } ++ else ++ ps->rows[row] = ps_i; ++ return true; ++ } ++ + /* Now insert the node after INSERT_AFTER_PSI. */ + + if (! last_must_precede) +@@ -2820,6 +2875,24 @@ + return ps_i; + } + ++/* Calculate the stage count of the partial schedule PS. The calculation ++ takes into account the rotation to bring the closing branch to row ++ ii-1. */ ++int ++calculate_stage_count (partial_schedule_ptr ps) ++{ ++ int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1; ++ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; ++ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; ++ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); ++ ++ /* The calculation of stage count is done adding the number of stages ++ before cycle zero and after cycle zero. */ ++ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); ++ ++ return stage_count; ++} ++ + /* Rotate the rows of PS such that insns scheduled at time + START_CYCLE will appear in row 0. Updates max/min_cycles. */ + void + +=== modified file 'gcc/params.def' +--- old/gcc/params.def 2011-04-18 11:31:29 +0000 ++++ new/gcc/params.def 2011-05-11 07:15:47 +0000 +@@ -344,6 +344,11 @@ + "sms-max-ii-factor", + "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop", + 100, 0, 0) ++/* The minimum value of stage count that swing modulo scheduler will generate. */ ++DEFPARAM(PARAM_SMS_MIN_SC, ++ "sms-min-sc", ++ "The minimum value of stage count that swing modulo scheduler will generate.", ++ 2, 1, 1) + DEFPARAM(PARAM_SMS_DFA_HISTORY, + "sms-dfa-history", + "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA", + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch new file mode 100644 index 0000000000..9c62102db5 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch @@ -0,0 +1,30 @@ + 2011-05-13 Revital Eres <revital.eres@linaro.org> + + gcc/ + * ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs. + * modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails. + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-05-11 07:15:47 +0000 ++++ new/gcc/ddg.c 2011-05-13 16:03:40 +0000 +@@ -1016,6 +1016,7 @@ + for (i = 0; i < all_sccs->num_sccs; i++) + free_scc (all_sccs->sccs[i]); + ++ free (all_sccs->sccs); + free (all_sccs); + } + + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000 ++++ new/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000 +@@ -1216,7 +1216,6 @@ + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); + fprintf (dump_file, ")\n"); + } +- continue; + } + else + { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch new file mode 100644 index 0000000000..c26ee5bde4 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch @@ -0,0 +1,134 @@ +2011-06-02 Chung-Lin Tang <cltang@codesourcery.com> + + Backport from mainline: + + 2011-03-21 Chung-Lin Tang <cltang@codesourcery.com> + + gcc/ + * simplify-rtx.c (simplify_binary_operation_1): Handle + (xor (and A B) C) case when B and C are both constants. + + gcc/testsuite/ + * gcc.target/arm/xor-and.c: New. + + 2011-03-18 Chung-Lin Tang <cltang@codesourcery.com> + + gcc/ + * combine.c (try_combine): Do simplification only call of + subst() on i2 even when i1 is present. Update comments. + + gcc/testsuite/ + * gcc.target/arm/unsigned-extend-1.c: New. + +=== modified file 'gcc/combine.c' +--- old/gcc/combine.c 2011-05-06 11:28:27 +0000 ++++ new/gcc/combine.c 2011-05-27 14:31:18 +0000 +@@ -3089,7 +3089,7 @@ + /* It is possible that the source of I2 or I1 may be performing + an unneeded operation, such as a ZERO_EXTEND of something + that is known to have the high part zero. Handle that case +- by letting subst look at the innermost one of them. ++ by letting subst look at the inner insns. + + Another way to do this would be to have a function that tries + to simplify a single insn instead of merging two or more +@@ -3114,11 +3114,9 @@ + subst_low_luid = DF_INSN_LUID (i1); + i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0); + } +- else +- { +- subst_low_luid = DF_INSN_LUID (i2); +- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); +- } ++ ++ subst_low_luid = DF_INSN_LUID (i2); ++ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0); + } + + n_occurrences = 0; /* `subst' counts here */ + +=== modified file 'gcc/simplify-rtx.c' +--- old/gcc/simplify-rtx.c 2011-03-26 09:24:06 +0000 ++++ new/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000 +@@ -2484,6 +2484,46 @@ + XEXP (op0, 1), mode), + op1); + ++ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P), ++ we can transform like this: ++ (A&B)^C == ~(A&B)&C | ~C&(A&B) ++ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law ++ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order ++ Attempt a few simplifications when B and C are both constants. */ ++ if (GET_CODE (op0) == AND ++ && CONST_INT_P (op1) ++ && CONST_INT_P (XEXP (op0, 1))) ++ { ++ rtx a = XEXP (op0, 0); ++ rtx b = XEXP (op0, 1); ++ rtx c = op1; ++ HOST_WIDE_INT bval = INTVAL (b); ++ HOST_WIDE_INT cval = INTVAL (c); ++ ++ rtx na_c ++ = simplify_binary_operation (AND, mode, ++ simplify_gen_unary (NOT, mode, a, mode), ++ c); ++ if ((~cval & bval) == 0) ++ { ++ /* Try to simplify ~A&C | ~B&C. */ ++ if (na_c != NULL_RTX) ++ return simplify_gen_binary (IOR, mode, na_c, ++ GEN_INT (~bval & cval)); ++ } ++ else ++ { ++ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */ ++ if (na_c == const0_rtx) ++ { ++ rtx a_nc_b = simplify_gen_binary (AND, mode, a, ++ GEN_INT (~cval & bval)); ++ return simplify_gen_binary (IOR, mode, a_nc_b, ++ GEN_INT (~bval & cval)); ++ } ++ } ++ } ++ + /* (xor (comparison foo bar) (const_int 1)) can become the reversed + comparison if STORE_FLAG_VALUE is 1. */ + if (STORE_FLAG_VALUE == 1 + +=== added file 'gcc/testsuite/gcc.target/arm/unsigned-extend-1.c' +--- old/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 2011-05-27 14:31:18 +0000 +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv6" } */ ++ ++unsigned char foo (unsigned char c) ++{ ++ return (c >= '0') && (c <= '9'); ++} ++ ++/* { dg-final { scan-assembler-not "uxtb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/xor-and.c' +--- old/gcc/testsuite/gcc.target/arm/xor-and.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/xor-and.c 2011-05-27 14:31:18 +0000 +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -march=armv6" } */ ++ ++unsigned short foo (unsigned short x) ++{ ++ x ^= 0x4002; ++ x >>= 1; ++ x |= 0x8000; ++ return x; ++} ++ ++/* { dg-final { scan-assembler "orr" } } */ ++/* { dg-final { scan-assembler-not "mvn" } } */ ++/* { dg-final { scan-assembler-not "uxth" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch new file mode 100644 index 0000000000..bda39e8faa --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch @@ -0,0 +1,5027 @@ +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * gimple.c (gimple_build_call_internal_1): Add missing call to + gimple_call_reset_alias_info. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.dg/vect/vect-strided-u16-i3.c: New test. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * lib/target-supports.exp (check_effective_target_vect_strided): + Replace with... + (check_effective_target_vect_strided2) + (check_effective_target_vect_strided3) + (check_effective_target_vect_strided4) + (check_effective_target_vect_strided8): ...these new functions. + + * gcc.dg/vect/O3-pr39675-2.c: Update accordingly. + * gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c: Likewise. + * gcc.dg/vect/fast-math-slp-27.c: Likewise. + * gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: Likewise. + * gcc.dg/vect/pr37539.c: Likewise. + * gcc.dg/vect/slp-11a.c: Likewise. + * gcc.dg/vect/slp-11b.c: Likewise. + * gcc.dg/vect/slp-11c.c: Likewise. + * gcc.dg/vect/slp-12a.c: Likewise. + * gcc.dg/vect/slp-12b.c: Likewise. + * gcc.dg/vect/slp-18.c: Likewise. + * gcc.dg/vect/slp-19a.c: Likewise. + * gcc.dg/vect/slp-19b.c: Likewise. + * gcc.dg/vect/slp-21.c: Likewise. + * gcc.dg/vect/slp-23.c: Likewise. + * gcc.dg/vect/vect-cselim-1.c: Likewise. + + * gcc.dg/vect/fast-math-vect-complex-3.c: Use vect_stridedN + instead of vect_interleave && vect_extract_even_odd. + * gcc.dg/vect/no-scevccp-outer-10a.c: Likewise. + * gcc.dg/vect/no-scevccp-outer-10b.c: Likewise. + * gcc.dg/vect/no-scevccp-outer-20.c: Likewise. + * gcc.dg/vect/vect-1.c: Likewise. + * gcc.dg/vect/vect-10.c: Likewise. + * gcc.dg/vect/vect-98.c: Likewise. + * gcc.dg/vect/vect-107.c: Likewise. + * gcc.dg/vect/vect-strided-a-mult.c: Likewise. + * gcc.dg/vect/vect-strided-a-u16-i2.c: Likewise. + * gcc.dg/vect/vect-strided-a-u16-i4.c: Likewise. + * gcc.dg/vect/vect-strided-a-u16-mult.c: Likewise. + * gcc.dg/vect/vect-strided-a-u32-mult.c: Likewise. + * gcc.dg/vect/vect-strided-a-u8-i2-gap.c: Likewise. + * gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Likewise. + * gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Likewise. + * gcc.dg/vect/vect-strided-float.c: Likewise. + * gcc.dg/vect/vect-strided-mult-char-ls.c: Likewise. + * gcc.dg/vect/vect-strided-mult.c: Likewise. + * gcc.dg/vect/vect-strided-same-dr.c: Likewise. + * gcc.dg/vect/vect-strided-u16-i2.c: Likewise. + * gcc.dg/vect/vect-strided-u16-i4.c: Likewise. + * gcc.dg/vect/vect-strided-u32-i4.c: Likewise. + * gcc.dg/vect/vect-strided-u32-i8.c: Likewise. + * gcc.dg/vect/vect-strided-u32-mult.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i2-gap.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i2.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i8-gap2.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i8-gap7.c: Likewise. + * gcc.dg/vect/vect-strided-u8-i8.c: Likewise. + * gcc.dg/vect/vect-vfa-03.c: Likewise. + + * gcc.dg/vect/no-scevccp-outer-18.c: Add vect_stridedN to the + target condition. + * gcc.dg/vect/pr30843.c: Likewise. + * gcc.dg/vect/pr33866.c: Likewise. + * gcc.dg/vect/slp-reduc-6.c: Likewise. + * gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise. + * gcc.dg/vect/vect-strided-store-u16-i4.c: Likewise. + * gcc.dg/vect/vect-strided-store-u32-i2.c: Likewise. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.dg/vect/slp-11.c: Split into... + * gcc.dg/vect/slp-11a.c, gcc.dg/vect/slp-11b.c, + gcc.dg/vect/slp-11c.c: ...these tests. + * gcc.dg/vect/slp-12a.c: Split 4-stride loop into... + * gcc.dg/vect/slp-12c.c: ...this new test. + * gcc.dg/vect/slp-19.c: Split into... + * gcc.dg/vect/slp-19a.c, gcc.dg/vect/slp-19b.c, + gcc.dg/vect/slp-19c.c: ...these new tests. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * lib/target-supports.exp + (check_effective_target_vect_extract_even_odd_wide): Delete. + (check_effective_target_vect_strided_wide): Likewise. + * gcc.dg/vect/O3-pr39675-2.c: Use the non-wide versions instead. + * gcc.dg/vect/fast-math-pr35982.c: Likewise. + * gcc.dg/vect/fast-math-vect-complex-3.c: Likewise. + * gcc.dg/vect/pr37539.c: Likewise. + * gcc.dg/vect/slp-11.c: Likewise. + * gcc.dg/vect/slp-12a.c: Likewise. + * gcc.dg/vect/slp-12b.c: Likewise. + * gcc.dg/vect/slp-19.c: Likewise. + * gcc.dg/vect/slp-23.c: Likewise. + * gcc.dg/vect/vect-1.c: Likewise. + * gcc.dg/vect/vect-98.c: Likewise. + * gcc.dg/vect/vect-107.c: Likewise. + * gcc.dg/vect/vect-strided-float.c: Likewise. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/testsuite/ + Backport from mainline: + + 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.dg/vect/vect.exp: Run the main tests twice, one with -flto + and once without. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainlie: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/neon.md (vec_load_lanes<mode><mode>): New expanders, + (vec_store_lanes<mode><mode>): Likewise. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-05-03 Richard Sandiford <richard.sandiford@linaro.org> + + * doc/md.texi (vec_load_lanes, vec_store_lanes): Document. + * optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New + convert_optab_index values. + (vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs. + * genopinit.c (optabs): Initialize the new optabs. + * internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions. + * internal-fn.c (get_multi_vector_move, expand_LOAD_LANES) + (expand_STORE_LANES): New functions. + * tree.h (build_array_type_nelts): Declare. + * tree.c (build_array_type_nelts): New function. + * tree-vectorizer.h (vect_model_store_cost): Add a bool argument. + (vect_model_load_cost): Likewise. + (vect_store_lanes_supported, vect_load_lanes_supported) + (vect_record_strided_load_vectors): Declare. + * tree-vect-data-refs.c (vect_lanes_optab_supported_p) + (vect_store_lanes_supported, vect_load_lanes_supported): New functions. + (vect_transform_strided_load): Split out statement recording into... + (vect_record_strided_load_vectors): ...this new function. + * tree-vect-stmts.c (create_vector_array, read_vector_array) + (write_vector_array, create_array_ref): New functions. + (vect_model_store_cost): Add store_lanes_p argument. + (vect_model_load_cost): Add load_lanes_p argument. + (vectorizable_store): Try to use store-lanes functions for + interleaved stores. + (vectorizable_load): Likewise load-lanes and loads. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Update call + to vect_model_store_cost. + (vect_build_slp_tree): Likewise vect_model_load_cost. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-20 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vect-stmts.c (vectorizable_store): Only chain one related + statement per copy. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * tree-inline.c (estimate_num_insns): Likewise. + + Backport from mainline: + + 2011-04-20 Richard Sandiford <richard.sandiford@linaro.org> + + * Makefile.in (INTERNAL_FN_DEF, INTERNAL_FN_H): Define. + (GIMPLE_H): Include $(INTERNAL_FN_H). + (OBJS-common): Add internal-fn.o. + (internal-fn.o): New rule. + * internal-fn.def: New file. + * internal-fn.h: Likewise. + * internal-fn.c: Likewise. + * gimple.h: Include internal-fn.h. + (GF_CALL_INTERNAL): New gf_mask. + (gimple_statement_call): Put fntype into a union with a new + internal_fn field. + (gimple_build_call_internal): Declare. + (gimple_build_call_internal_vec): Likewise. + (gimple_call_same_target_p): Likewise. + (gimple_call_internal_p): New function. + (gimple_call_internal_fn): Likewise. + (gimple_call_set_fn): Assert that the function is not internal. + (gimple_call_set_fndecl): Likewise. + (gimple_call_set_internal_fn): New function. + (gimple_call_addr_fndecl): Handle null functions. + (gimple_call_return_type): Likewise. + [---- Plus backport adjustments: + (GF_CALL_INTERNAL_FN_SHIFT): New macro. + (GF_CALL_INTERNAL_FN): New gf_mask. + ----] + * gimple.c (gimple_build_call_internal_1): New function. + (gimple_build_call_internal): Likewise. + (gimple_build_call_internal_vec): Likewise. + (gimple_call_same_target_p): Likewise. + (gimple_call_flags): Handle calls to internal functions. + (gimple_call_fnspec): New function. + (gimple_call_arg_flags, gimple_call_return_flags): Use it. + (gimple_has_side_effects): Handle null functions. + (gimple_rhs_has_side_effects): Likewise. + (gimple_call_copy_skip_args): Handle calls to internal functions. + * cfgexpand.c (expand_call_stmt): Likewise. + * expr.c (expand_expr_real_1): Assert that the call isn't internal. + * gimple-low.c (gimple_check_call_args): Handle calls to internal + functions. + * gimple-pretty-print.c (dump_gimple_call): Likewise. + * ipa-prop.c (ipa_analyze_call_uses): Handle null functions. + * tree-cfg.c (verify_gimple_call): Handle calls to internal functions. + (do_warn_unused_result): Likewise. + [---- Plus backport adjustments: + (verify_stmt): Likewise. + ----] + * tree-eh.c (same_handler_p): Use gimple_call_same_target_p. + * tree-ssa-ccp.c (ccp_fold_stmt): Handle calls to internal functions. + [---- Plus backport adjustments: + (fold_gimple_call): Likewise. + ----] + * tree-ssa-dom.c (hashable_expr): Use the gimple statement to record + the target of a call. + (initialize_hash_element): Update accordingly. + (hashable_expr_equal_p): Use gimple_call_same_target_p. + (iterative_hash_hashable_expr): Handle calls to internal functions. + (print_expr_hash_elt): Likewise. + * tree-ssa-pre.c (can_value_number_call): Likewise. + (eliminate): Handle null functions. + * tree-ssa-sccvn.c (visit_use): Handle calls to internal functions. + * tree-ssa-structalias.c (find_func_aliases): Likewise. + * value-prof.c (gimple_ic_transform): Likewise. + (gimple_indirect_call_to_profile): Likewise. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-14 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vectorizer.h (vect_strided_store_supported): Add a + HOST_WIDE_INT argument. + (vect_strided_load_supported): Likewise. + (vect_permute_store_chain): Return void. + (vect_transform_strided_load): Likewise. + (vect_permute_load_chain): Delete. + * tree-vect-data-refs.c (vect_strided_store_supported): Take a + count argument. Check that the count is a power of two. + (vect_strided_load_supported): Likewise. + (vect_permute_store_chain): Return void. Update after above changes. + Assert that the access is supported. + (vect_permute_load_chain): Likewise. + (vect_transform_strided_load): Return void. + * tree-vect-stmts.c (vectorizable_store): Update calls after + above interface changes. + (vectorizable_load): Likewise. + (vect_analyze_stmt): Don't check for strided powers of two here. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-14 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vectorizer.h (vect_create_data_ref_ptr): Add an extra + type parameter. + * tree-vect-data-refs.c (vect_create_data_ref_ptr): Add an aggr_type + parameter. Generalise code to handle arrays as well as vectors. + (vect_setup_realignment): Update accordingly. + * tree-vect-stmts.c (vectorizable_store): Likewise. + (vectorizable_load): Likewise. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-14 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vect-stmts.c (vectorizable_load): Allocate and free dr_chain + within the per-copy loop. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-14 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vect-stmts.c (vectorizable_load): Print the number of copies + in the dump file. + +2001-06-02 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-03-25 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/arm.h (CANNOT_CHANGE_MODE_CLASS): Restrict FPA_REGS + case to VFPv1. + +=== modified file 'gcc/Makefile.in' +--- old/gcc/Makefile.in 2011-05-26 14:27:33 +0000 ++++ new/gcc/Makefile.in 2011-06-02 12:12:00 +0000 +@@ -888,6 +888,8 @@ + READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h + PARAMS_H = params.h params.def + BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def ++INTERNAL_FN_DEF = internal-fn.def ++INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) + TREE_H = tree.h all-tree.def tree.def c-family/c-common.def \ + $(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \ + $(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \ +@@ -897,7 +899,7 @@ + BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h + GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \ + $(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \ +- tree-ssa-alias.h vecir.h ++ tree-ssa-alias.h vecir.h $(INTERNAL_FN_H) + GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h + COVERAGE_H = coverage.h $(GCOV_IO_H) + DEMANGLE_H = $(srcdir)/../include/demangle.h +@@ -1269,6 +1271,7 @@ + init-regs.o \ + input.o \ + integrate.o \ ++ internal-fn.o \ + intl.o \ + ira.o \ + ira-build.o \ +@@ -2751,6 +2754,8 @@ + $(TM_H) $(TREE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ + $(TREE_PASS_H) tree-ssa-propagate.h tree-pretty-print.h \ + gimple-pretty-print.h ++internal-fn.o : internal-fn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ ++ $(GIMPLE_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) + gimple.o : gimple.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \ + $(GGC_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) gt-gimple.h \ + $(TREE_FLOW_H) value-prof.h $(FLAGS_H) $(DEMANGLE_H) \ + +=== modified file 'gcc/cfgexpand.c' +--- old/gcc/cfgexpand.c 2011-04-20 11:18:50 +0000 ++++ new/gcc/cfgexpand.c 2011-06-02 12:12:00 +0000 +@@ -1839,12 +1839,17 @@ + static void + expand_call_stmt (gimple stmt) + { +- tree exp; +- tree lhs = gimple_call_lhs (stmt); ++ tree exp, lhs; + size_t i; + bool builtin_p; + tree decl; + ++ if (gimple_call_internal_p (stmt)) ++ { ++ expand_internal_call (stmt); ++ return; ++ } ++ + exp = build_vl_exp (CALL_EXPR, gimple_call_num_args (stmt) + 3); + + CALL_EXPR_FN (exp) = gimple_call_fn (stmt); +@@ -1882,6 +1887,7 @@ + SET_EXPR_LOCATION (exp, gimple_location (stmt)); + TREE_BLOCK (exp) = gimple_block (stmt); + ++ lhs = gimple_call_lhs (stmt); + if (lhs) + expand_assignment (lhs, exp, false); + else + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-04-26 14:42:21 +0000 ++++ new/gcc/config/arm/arm.h 2011-06-02 12:12:00 +0000 +@@ -1169,12 +1169,12 @@ + } + + /* FPA registers can't do subreg as all values are reformatted to internal +- precision. VFP registers may only be accessed in the mode they +- were set. */ +-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ +- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ +- ? reg_classes_intersect_p (FPA_REGS, (CLASS)) \ +- || reg_classes_intersect_p (VFP_REGS, (CLASS)) \ ++ precision. In VFPv1, VFP registers could only be accessed in the mode ++ they were set, so subregs would be invalid there too. However, we don't ++ support VFPv1 at the moment, and the restriction was lifted in VFPv2. */ ++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ ++ (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ ++ ? reg_classes_intersect_p (FPA_REGS, (CLASS)) \ + : 0) + + /* The class value for index registers, and the one for base regs. */ + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-05-06 11:28:27 +0000 ++++ new/gcc/config/arm/neon.md 2011-06-02 12:12:00 +0000 +@@ -4248,6 +4248,12 @@ + DONE; + }) + ++(define_expand "vec_load_lanes<mode><mode>" ++ [(set (match_operand:VDQX 0 "s_register_operand") ++ (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] ++ UNSPEC_VLD1))] ++ "TARGET_NEON") ++ + (define_insn "neon_vld1<mode>" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] +@@ -4345,6 +4351,12 @@ + (const_string "neon_vld1_1_2_regs")))] + ) + ++(define_expand "vec_store_lanes<mode><mode>" ++ [(set (match_operand:VDQX 0 "neon_struct_operand") ++ (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] ++ UNSPEC_VST1))] ++ "TARGET_NEON") ++ + (define_insn "neon_vst1<mode>" + [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] +@@ -4401,6 +4413,13 @@ + [(set_attr "neon_type" "neon_vst1_vst2_lane")] + ) + ++(define_expand "vec_load_lanesti<mode>" ++ [(set (match_operand:TI 0 "s_register_operand") ++ (unspec:TI [(match_operand:TI 1 "neon_struct_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VLD2))] ++ "TARGET_NEON") ++ + (define_insn "neon_vld2<mode>" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") +@@ -4419,6 +4438,13 @@ + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))] + ) + ++(define_expand "vec_load_lanesoi<mode>" ++ [(set (match_operand:OI 0 "s_register_operand") ++ (unspec:OI [(match_operand:OI 1 "neon_struct_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VLD2))] ++ "TARGET_NEON") ++ + (define_insn "neon_vld2<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") +@@ -4501,6 +4527,13 @@ + (const_string "neon_vld1_1_2_regs")))] + ) + ++(define_expand "vec_store_lanesti<mode>" ++ [(set (match_operand:TI 0 "neon_struct_operand") ++ (unspec:TI [(match_operand:TI 1 "s_register_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VST2))] ++ "TARGET_NEON") ++ + (define_insn "neon_vst2<mode>" + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") +@@ -4519,6 +4552,13 @@ + (const_string "neon_vst1_1_2_regs_vst2_2_regs")))] + ) + ++(define_expand "vec_store_lanesoi<mode>" ++ [(set (match_operand:OI 0 "neon_struct_operand") ++ (unspec:OI [(match_operand:OI 1 "s_register_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VST2))] ++ "TARGET_NEON") ++ + (define_insn "neon_vst2<mode>" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") +@@ -4584,6 +4624,13 @@ + [(set_attr "neon_type" "neon_vst1_vst2_lane")] + ) + ++(define_expand "vec_load_lanesei<mode>" ++ [(set (match_operand:EI 0 "s_register_operand") ++ (unspec:EI [(match_operand:EI 1 "neon_struct_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VLD3))] ++ "TARGET_NEON") ++ + (define_insn "neon_vld3<mode>" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") +@@ -4602,6 +4649,16 @@ + (const_string "neon_vld3_vld4")))] + ) + ++(define_expand "vec_load_lanesci<mode>" ++ [(match_operand:CI 0 "s_register_operand") ++ (match_operand:CI 1 "neon_struct_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_NEON" ++{ ++ emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); ++ DONE; ++}) ++ + (define_expand "neon_vld3<mode>" + [(match_operand:CI 0 "s_register_operand") + (match_operand:CI 1 "neon_struct_operand") +@@ -4741,6 +4798,13 @@ + (const_string "neon_vld3_vld4_all_lanes") + (const_string "neon_vld1_1_2_regs")))]) + ++(define_expand "vec_store_lanesei<mode>" ++ [(set (match_operand:EI 0 "neon_struct_operand") ++ (unspec:EI [(match_operand:EI 1 "s_register_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VST3))] ++ "TARGET_NEON") ++ + (define_insn "neon_vst3<mode>" + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") +@@ -4758,6 +4822,16 @@ + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst2_4_regs_vst3_vst4")))]) + ++(define_expand "vec_store_lanesci<mode>" ++ [(match_operand:CI 0 "neon_struct_operand") ++ (match_operand:CI 1 "s_register_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_NEON" ++{ ++ emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); ++ DONE; ++}) ++ + (define_expand "neon_vst3<mode>" + [(match_operand:CI 0 "neon_struct_operand") + (match_operand:CI 1 "s_register_operand") +@@ -4869,6 +4943,13 @@ + } + [(set_attr "neon_type" "neon_vst3_vst4_lane")]) + ++(define_expand "vec_load_lanesoi<mode>" ++ [(set (match_operand:OI 0 "s_register_operand") ++ (unspec:OI [(match_operand:OI 1 "neon_struct_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VLD4))] ++ "TARGET_NEON") ++ + (define_insn "neon_vld4<mode>" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") +@@ -4887,6 +4968,16 @@ + (const_string "neon_vld3_vld4")))] + ) + ++(define_expand "vec_load_lanesxi<mode>" ++ [(match_operand:XI 0 "s_register_operand") ++ (match_operand:XI 1 "neon_struct_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_NEON" ++{ ++ emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); ++ DONE; ++}) ++ + (define_expand "neon_vld4<mode>" + [(match_operand:XI 0 "s_register_operand") + (match_operand:XI 1 "neon_struct_operand") +@@ -5033,6 +5124,13 @@ + (const_string "neon_vld1_1_2_regs")))] + ) + ++(define_expand "vec_store_lanesoi<mode>" ++ [(set (match_operand:OI 0 "neon_struct_operand") ++ (unspec:OI [(match_operand:OI 1 "s_register_operand") ++ (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_VST4))] ++ "TARGET_NEON") ++ + (define_insn "neon_vst4<mode>" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") +@@ -5051,6 +5149,16 @@ + (const_string "neon_vst2_4_regs_vst3_vst4")))] + ) + ++(define_expand "vec_store_lanesxi<mode>" ++ [(match_operand:XI 0 "neon_struct_operand") ++ (match_operand:XI 1 "s_register_operand") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_NEON" ++{ ++ emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); ++ DONE; ++}) ++ + (define_expand "neon_vst4<mode>" + [(match_operand:XI 0 "neon_struct_operand") + (match_operand:XI 1 "s_register_operand") + +=== modified file 'gcc/doc/md.texi' +--- old/gcc/doc/md.texi 2011-01-03 20:52:22 +0000 ++++ new/gcc/doc/md.texi 2011-05-05 15:43:06 +0000 +@@ -3935,6 +3935,48 @@ + consecutive memory locations, operand 1 is the first register, and + operand 2 is a constant: the number of consecutive registers. + ++@cindex @code{vec_load_lanes@var{m}@var{n}} instruction pattern ++@item @samp{vec_load_lanes@var{m}@var{n}} ++Perform an interleaved load of several vectors from memory operand 1 ++into register operand 0. Both operands have mode @var{m}. The register ++operand is viewed as holding consecutive vectors of mode @var{n}, ++while the memory operand is a flat array that contains the same number ++of elements. The operation is equivalent to: ++ ++@smallexample ++int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n}); ++for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++) ++ for (i = 0; i < c; i++) ++ operand0[i][j] = operand1[j * c + i]; ++@end smallexample ++ ++For example, @samp{vec_load_lanestiv4hi} loads 8 16-bit values ++from memory into a register of mode @samp{TI}@. The register ++contains two consecutive vectors of mode @samp{V4HI}@. ++ ++This pattern can only be used if: ++@smallexample ++TARGET_ARRAY_MODE_SUPPORTED_P (@var{n}, @var{c}) ++@end smallexample ++is true. GCC assumes that, if a target supports this kind of ++instruction for some mode @var{n}, it also supports unaligned ++loads for vectors of mode @var{n}. ++ ++@cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern ++@item @samp{vec_store_lanes@var{m}@var{n}} ++Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory ++and register operands reversed. That is, the instruction is ++equivalent to: ++ ++@smallexample ++int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n}); ++for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++) ++ for (i = 0; i < c; i++) ++ operand0[j * c + i] = operand1[i][j]; ++@end smallexample ++ ++for a memory operand 0 and register operand 1. ++ + @cindex @code{vec_set@var{m}} instruction pattern + @item @samp{vec_set@var{m}} + Set given field in the vector value. Operand 0 is the vector to modify, + +=== modified file 'gcc/expr.c' +--- old/gcc/expr.c 2011-05-26 14:27:33 +0000 ++++ new/gcc/expr.c 2011-06-02 12:12:00 +0000 +@@ -8537,10 +8537,13 @@ + if (code == SSA_NAME + && (g = SSA_NAME_DEF_STMT (ssa_name)) + && gimple_code (g) == GIMPLE_CALL) +- pmode = promote_function_mode (type, mode, &unsignedp, +- TREE_TYPE +- (TREE_TYPE (gimple_call_fn (g))), +- 2); ++ { ++ gcc_assert (!gimple_call_internal_p (g)); ++ pmode = promote_function_mode (type, mode, &unsignedp, ++ TREE_TYPE ++ (TREE_TYPE (gimple_call_fn (g))), ++ 2); ++ } + else + pmode = promote_decl_mode (exp, &unsignedp); + gcc_assert (GET_MODE (decl_rtl) == pmode); + +=== modified file 'gcc/genopinit.c' +--- old/gcc/genopinit.c 2011-01-03 20:52:22 +0000 ++++ new/gcc/genopinit.c 2011-05-05 15:43:06 +0000 +@@ -74,6 +74,8 @@ + "set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))", + "set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))", + "set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))", ++ "set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))", ++ "set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))", + "set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))", + "set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\ + set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))", + +=== modified file 'gcc/gimple-low.c' +--- old/gcc/gimple-low.c 2011-02-08 11:15:53 +0000 ++++ new/gcc/gimple-low.c 2011-05-05 15:42:22 +0000 +@@ -218,6 +218,10 @@ + tree fndecl, parms, p; + unsigned int i, nargs; + ++ /* Calls to internal functions always match their signature. */ ++ if (gimple_call_internal_p (stmt)) ++ return true; ++ + nargs = gimple_call_num_args (stmt); + + /* Get argument types for verification. */ + +=== modified file 'gcc/gimple-pretty-print.c' +--- old/gcc/gimple-pretty-print.c 2011-02-15 18:36:16 +0000 ++++ new/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000 +@@ -596,8 +596,12 @@ + + if (flags & TDF_RAW) + { +- dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", +- gs, gimple_call_fn (gs), lhs); ++ if (gimple_call_internal_p (gs)) ++ dump_gimple_fmt (buffer, spc, flags, "%G <%s, %T", gs, ++ internal_fn_name (gimple_call_internal_fn (gs)), lhs); ++ else ++ dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", ++ gs, gimple_call_fn (gs), lhs); + if (gimple_call_num_args (gs) > 0) + { + pp_string (buffer, ", "); +@@ -617,7 +621,10 @@ + + pp_space (buffer); + } +- print_call_name (buffer, gimple_call_fn (gs), flags); ++ if (gimple_call_internal_p (gs)) ++ pp_string (buffer, internal_fn_name (gimple_call_internal_fn (gs))); ++ else ++ print_call_name (buffer, gimple_call_fn (gs), flags); + pp_string (buffer, " ("); + dump_gimple_call_args (buffer, gs, flags); + pp_character (buffer, ')'); + +=== modified file 'gcc/gimple.c' +--- old/gcc/gimple.c 2011-05-18 13:33:53 +0000 ++++ new/gcc/gimple.c 2011-06-02 12:12:00 +0000 +@@ -276,6 +276,59 @@ + } + + ++/* Helper for gimple_build_call_internal and gimple_build_call_internal_vec. ++ Build the basic components of a GIMPLE_CALL statement to internal ++ function FN with NARGS arguments. */ ++ ++static inline gimple ++gimple_build_call_internal_1 (enum internal_fn fn, unsigned nargs) ++{ ++ gimple s = gimple_build_with_ops (GIMPLE_CALL, ERROR_MARK, nargs + 3); ++ s->gsbase.subcode |= GF_CALL_INTERNAL; ++ gimple_call_set_internal_fn (s, fn); ++ gimple_call_reset_alias_info (s); ++ return s; ++} ++ ++ ++/* Build a GIMPLE_CALL statement to internal function FN. NARGS is ++ the number of arguments. The ... are the arguments. */ ++ ++gimple ++gimple_build_call_internal (enum internal_fn fn, unsigned nargs, ...) ++{ ++ va_list ap; ++ gimple call; ++ unsigned i; ++ ++ call = gimple_build_call_internal_1 (fn, nargs); ++ va_start (ap, nargs); ++ for (i = 0; i < nargs; i++) ++ gimple_call_set_arg (call, i, va_arg (ap, tree)); ++ va_end (ap); ++ ++ return call; ++} ++ ++ ++/* Build a GIMPLE_CALL statement to internal function FN with the arguments ++ specified in vector ARGS. */ ++ ++gimple ++gimple_build_call_internal_vec (enum internal_fn fn, VEC(tree, heap) *args) ++{ ++ unsigned i, nargs; ++ gimple call; ++ ++ nargs = VEC_length (tree, args); ++ call = gimple_build_call_internal_1 (fn, nargs); ++ for (i = 0; i < nargs; i++) ++ gimple_call_set_arg (call, i, VEC_index (tree, args, i)); ++ ++ return call; ++} ++ ++ + /* Build a GIMPLE_CALL statement from CALL_EXPR T. Note that T is + assumed to be in GIMPLE form already. Minimal checking is done of + this fact. */ +@@ -1774,6 +1827,20 @@ + return (gimple_body (fndecl) || (fn && fn->cfg)); + } + ++/* Return true if calls C1 and C2 are known to go to the same function. */ ++ ++bool ++gimple_call_same_target_p (const_gimple c1, const_gimple c2) ++{ ++ if (gimple_call_internal_p (c1)) ++ return (gimple_call_internal_p (c2) ++ && gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2)); ++ else ++ return (gimple_call_fn (c1) == gimple_call_fn (c2) ++ || (gimple_call_fndecl (c1) ++ && gimple_call_fndecl (c1) == gimple_call_fndecl (c2))); ++} ++ + /* Detect flags from a GIMPLE_CALL. This is just like + call_expr_flags, but for gimple tuples. */ + +@@ -1786,6 +1853,8 @@ + + if (decl) + flags = flags_from_decl_or_type (decl); ++ else if (gimple_call_internal_p (stmt)) ++ flags = internal_fn_flags (gimple_call_internal_fn (stmt)); + else + { + t = TREE_TYPE (gimple_call_fn (stmt)); +@@ -1801,18 +1870,35 @@ + return flags; + } + ++/* Return the "fn spec" string for call STMT. */ ++ ++static tree ++gimple_call_fnspec (const_gimple stmt) ++{ ++ tree fn, type, attr; ++ ++ fn = gimple_call_fn (stmt); ++ if (!fn) ++ return NULL_TREE; ++ ++ type = TREE_TYPE (TREE_TYPE (fn)); ++ if (!type) ++ return NULL_TREE; ++ ++ attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type)); ++ if (!attr) ++ return NULL_TREE; ++ ++ return TREE_VALUE (TREE_VALUE (attr)); ++} ++ + /* Detects argument flags for argument number ARG on call STMT. */ + + int + gimple_call_arg_flags (const_gimple stmt, unsigned arg) + { +- tree type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt))); +- tree attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type)); +- if (!attr) +- return 0; +- +- attr = TREE_VALUE (TREE_VALUE (attr)); +- if (1 + arg >= (unsigned) TREE_STRING_LENGTH (attr)) ++ tree attr = gimple_call_fnspec (stmt); ++ if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr)) + return 0; + + switch (TREE_STRING_POINTER (attr)[1 + arg]) +@@ -1850,13 +1936,8 @@ + if (gimple_call_flags (stmt) & ECF_MALLOC) + return ERF_NOALIAS; + +- type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt))); +- attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type)); +- if (!attr) +- return 0; +- +- attr = TREE_VALUE (TREE_VALUE (attr)); +- if (TREE_STRING_LENGTH (attr) < 1) ++ attr = gimple_call_fnspec (stmt); ++ if (!attr || TREE_STRING_LENGTH (attr) < 1) + return 0; + + switch (TREE_STRING_POINTER (attr)[0]) +@@ -2293,6 +2374,7 @@ + if (is_gimple_call (s)) + { + unsigned nargs = gimple_call_num_args (s); ++ tree fn; + + if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE))) + return true; +@@ -2307,7 +2389,8 @@ + return true; + } + +- if (TREE_SIDE_EFFECTS (gimple_call_fn (s))) ++ fn = gimple_call_fn (s); ++ if (fn && TREE_SIDE_EFFECTS (fn)) + return true; + + for (i = 0; i < nargs; i++) +@@ -2349,14 +2432,15 @@ + if (is_gimple_call (s)) + { + unsigned nargs = gimple_call_num_args (s); ++ tree fn; + + if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE))) + return true; + + /* We cannot use gimple_has_volatile_ops here, + because we must ignore a volatile LHS. */ +- if (TREE_SIDE_EFFECTS (gimple_call_fn (s)) +- || TREE_THIS_VOLATILE (gimple_call_fn (s))) ++ fn = gimple_call_fn (s); ++ if (fn && (TREE_SIDE_EFFECTS (fn) || TREE_THIS_VOLATILE (fn))) + { + gcc_assert (gimple_has_volatile_ops (s)); + return true; +@@ -3113,7 +3197,6 @@ + gimple_call_copy_skip_args (gimple stmt, bitmap args_to_skip) + { + int i; +- tree fn = gimple_call_fn (stmt); + int nargs = gimple_call_num_args (stmt); + VEC(tree, heap) *vargs = VEC_alloc (tree, heap, nargs); + gimple new_stmt; +@@ -3122,7 +3205,11 @@ + if (!bitmap_bit_p (args_to_skip, i)) + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); + +- new_stmt = gimple_build_call_vec (fn, vargs); ++ if (gimple_call_internal_p (stmt)) ++ new_stmt = gimple_build_call_internal_vec (gimple_call_internal_fn (stmt), ++ vargs); ++ else ++ new_stmt = gimple_build_call_vec (gimple_call_fn (stmt), vargs); + VEC_free (tree, heap, vargs); + if (gimple_call_lhs (stmt)) + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); + +=== modified file 'gcc/gimple.h' +--- old/gcc/gimple.h 2011-04-18 21:58:03 +0000 ++++ new/gcc/gimple.h 2011-06-02 12:12:00 +0000 +@@ -30,6 +30,7 @@ + #include "basic-block.h" + #include "tree-ssa-operands.h" + #include "tree-ssa-alias.h" ++#include "internal-fn.h" + + struct gimple_seq_node_d; + typedef struct gimple_seq_node_d *gimple_seq_node; +@@ -82,6 +83,8 @@ + name, a _DECL, a _REF, etc. */ + }; + ++#define GF_CALL_INTERNAL_FN_SHIFT 8 ++ + /* Specific flags for individual GIMPLE statements. These flags are + always stored in gimple_statement_base.subcode and they may only be + defined for statement codes that do not use sub-codes. +@@ -102,6 +105,8 @@ + GF_CALL_TAILCALL = 1 << 3, + GF_CALL_VA_ARG_PACK = 1 << 4, + GF_CALL_NOTHROW = 1 << 5, ++ GF_CALL_INTERNAL = 1 << 6, ++ GF_CALL_INTERNAL_FN = 0xff << GF_CALL_INTERNAL_FN_SHIFT, + GF_OMP_PARALLEL_COMBINED = 1 << 0, + + /* True on an GIMPLE_OMP_RETURN statement if the return does not require +@@ -817,6 +822,8 @@ + + gimple gimple_build_call_vec (tree, VEC(tree, heap) *); + gimple gimple_build_call (tree, unsigned, ...); ++gimple gimple_build_call_internal (enum internal_fn, unsigned, ...); ++gimple gimple_build_call_internal_vec (enum internal_fn, VEC(tree, heap) *); + gimple gimple_build_call_from_tree (tree); + gimple gimplify_assign (tree, tree, gimple_seq *); + gimple gimple_build_cond (enum tree_code, tree, tree, tree, tree); +@@ -861,6 +868,7 @@ + void gimple_seq_free (gimple_seq); + void gimple_seq_add_seq (gimple_seq *, gimple_seq); + gimple_seq gimple_seq_copy (gimple_seq); ++bool gimple_call_same_target_p (const_gimple, const_gimple); + int gimple_call_flags (const_gimple); + int gimple_call_return_flags (const_gimple); + int gimple_call_arg_flags (const_gimple, unsigned); +@@ -2012,6 +2020,27 @@ + } + + ++/* Return true if call GS calls an internal-only function, as enumerated ++ by internal_fn. */ ++ ++static inline bool ++gimple_call_internal_p (const_gimple gs) ++{ ++ GIMPLE_CHECK (gs, GIMPLE_CALL); ++ return (gs->gsbase.subcode & GF_CALL_INTERNAL) != 0; ++} ++ ++ ++/* Return the target of internal call GS. */ ++ ++static inline enum internal_fn ++gimple_call_internal_fn (const_gimple gs) ++{ ++ gcc_assert (gimple_call_internal_p (gs)); ++ return (enum internal_fn) (gs->gsbase.subcode >> GF_CALL_INTERNAL_FN_SHIFT); ++} ++ ++ + /* Return a pointer to the tree node representing the function called by call + statement GS. */ + +@@ -2029,6 +2058,7 @@ + gimple_call_set_fn (gimple gs, tree fn) + { + GIMPLE_CHECK (gs, GIMPLE_CALL); ++ gcc_assert (!gimple_call_internal_p (gs)); + gimple_set_op (gs, 1, fn); + } + +@@ -2039,10 +2069,23 @@ + gimple_call_set_fndecl (gimple gs, tree decl) + { + GIMPLE_CHECK (gs, GIMPLE_CALL); ++ gcc_assert (!gimple_call_internal_p (gs)); + gimple_set_op (gs, 1, build_fold_addr_expr_loc (gimple_location (gs), decl)); + } + + ++/* Set internal function FN to be the function called by call statement GS. */ ++ ++static inline void ++gimple_call_set_internal_fn (gimple gs, enum internal_fn fn) ++{ ++ GIMPLE_CHECK (gs, GIMPLE_CALL); ++ gcc_assert (gimple_call_internal_p (gs)); ++ gs->gsbase.subcode &= ~GF_CALL_INTERNAL_FN; ++ gs->gsbase.subcode |= (int) fn << GF_CALL_INTERNAL_FN_SHIFT; ++} ++ ++ + /* If a given GIMPLE_CALL's callee is a FUNCTION_DECL, return it. + Otherwise return NULL. This function is analogous to + get_callee_fndecl in tree land. */ +@@ -2051,7 +2094,7 @@ + gimple_call_fndecl (const_gimple gs) + { + tree addr = gimple_call_fn (gs); +- if (TREE_CODE (addr) == ADDR_EXPR) ++ if (addr && TREE_CODE (addr) == ADDR_EXPR) + { + tree fndecl = TREE_OPERAND (addr, 0); + if (TREE_CODE (fndecl) == MEM_REF) +@@ -2073,8 +2116,13 @@ + static inline tree + gimple_call_return_type (const_gimple gs) + { +- tree fn = gimple_call_fn (gs); +- tree type = TREE_TYPE (fn); ++ tree fn, type; ++ ++ fn = gimple_call_fn (gs); ++ if (fn == NULL_TREE) ++ return TREE_TYPE (gimple_call_lhs (gs)); ++ ++ type = TREE_TYPE (fn); + + /* See through the pointer. */ + type = TREE_TYPE (type); + +=== added file 'gcc/internal-fn.c' +--- old/gcc/internal-fn.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/internal-fn.c 2011-05-05 15:43:06 +0000 +@@ -0,0 +1,147 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "gimple.h" ++#include "tree.h" ++#include "expr.h" ++#include "optabs.h" ++#include "recog.h" ++ ++/* The names of each internal function, indexed by function number. */ ++const char *const internal_fn_name_array[] = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ "<invalid-fn>" ++}; ++ ++/* The ECF_* flags of each internal function, indexed by function number. */ ++const int internal_fn_flags_array[] = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ 0 ++}; ++ ++/* ARRAY_TYPE is an array of vector modes. Return the associated insn ++ for load-lanes-style optab OPTAB. The insn must exist. */ ++ ++static enum insn_code ++get_multi_vector_move (tree array_type, convert_optab optab) ++{ ++ enum insn_code icode; ++ enum machine_mode imode; ++ enum machine_mode vmode; ++ ++ gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE); ++ imode = TYPE_MODE (array_type); ++ vmode = TYPE_MODE (TREE_TYPE (array_type)); ++ ++ icode = convert_optab_handler (optab, imode, vmode); ++ gcc_assert (icode != CODE_FOR_nothing); ++ return icode; ++} ++ ++/* Expand LOAD_LANES call STMT. */ ++ ++static void ++expand_LOAD_LANES (gimple stmt) ++{ ++ tree type, lhs, rhs; ++ rtx target, mem; ++ enum insn_code icode; ++ const struct insn_operand_data *operand; ++ ++ lhs = gimple_call_lhs (stmt); ++ rhs = gimple_call_arg (stmt, 0); ++ type = TREE_TYPE (lhs); ++ ++ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ mem = expand_normal (rhs); ++ ++ gcc_assert (REG_P (target)); ++ gcc_assert (MEM_P (mem)); ++ PUT_MODE (mem, TYPE_MODE (type)); ++ ++ icode = get_multi_vector_move (type, vec_load_lanes_optab); ++ ++ operand = &insn_data[(int) icode].operand[1]; ++ if (operand->predicate && !operand->predicate (mem, operand->mode)) ++ mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); ++ ++ emit_insn (GEN_FCN (icode) (target, mem)); ++} ++ ++/* Expand STORE_LANES call STMT. */ ++ ++static void ++expand_STORE_LANES (gimple stmt) ++{ ++ tree type, lhs, rhs; ++ rtx target, reg; ++ enum insn_code icode; ++ const struct insn_operand_data *operand; ++ ++ lhs = gimple_call_lhs (stmt); ++ rhs = gimple_call_arg (stmt, 0); ++ type = TREE_TYPE (rhs); ++ ++ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ reg = expand_normal (rhs); ++ ++ gcc_assert (MEM_P (target)); ++ PUT_MODE (target, TYPE_MODE (type)); ++ ++ icode = get_multi_vector_move (type, vec_store_lanes_optab); ++ ++ operand = &insn_data[(int) icode].operand[0]; ++ if (operand->predicate && !operand->predicate (target, operand->mode)) ++ target = replace_equiv_address (target, ++ force_reg (Pmode, XEXP (target, 0))); ++ ++ operand = &insn_data[(int) icode].operand[1]; ++ if (operand->predicate && !operand->predicate (reg, operand->mode)) ++ reg = force_reg (TYPE_MODE (type), reg); ++ ++ emit_insn (GEN_FCN (icode) (target, reg)); ++} ++ ++/* Routines to expand each internal function, indexed by function number. ++ Each routine has the prototype: ++ ++ expand_<NAME> (gimple stmt) ++ ++ where STMT is the statement that performs the call. */ ++static void (*const internal_fn_expanders[]) (gimple) = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ 0 ++}; ++ ++/* Expand STMT, which is a call to internal function FN. */ ++ ++void ++expand_internal_call (gimple stmt) ++{ ++ internal_fn_expanders[(int) gimple_call_internal_fn (stmt)] (stmt); ++} + +=== added file 'gcc/internal-fn.def' +--- old/gcc/internal-fn.def 1970-01-01 00:00:00 +0000 ++++ new/gcc/internal-fn.def 2011-05-05 15:43:06 +0000 +@@ -0,0 +1,42 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++/* This file specifies a list of internal "functions". These functions ++ differ from built-in functions in that they have no linkage and cannot ++ be called directly by the user. They represent operations that are only ++ synthesised by GCC itself. ++ ++ Internal functions are used instead of tree codes if the operation ++ and its operands are more naturally represented as a GIMPLE_CALL ++ than a GIMPLE_ASSIGN. ++ ++ Each entry in this file has the form: ++ ++ DEF_INTERNAL_FN (NAME, FLAGS) ++ ++ where NAME is the name of the function and FLAGS is a set of ++ ECF_* flags. Each entry must have a corresponding expander ++ of the form: ++ ++ void expand_NAME (gimple stmt) ++ ++ where STMT is the statement that performs the call. */ ++ ++DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF) ++DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF) + +=== added file 'gcc/internal-fn.h' +--- old/gcc/internal-fn.h 1970-01-01 00:00:00 +0000 ++++ new/gcc/internal-fn.h 2011-05-05 15:42:22 +0000 +@@ -0,0 +1,52 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef GCC_INTERNAL_FN_H ++#define GCC_INTERNAL_FN_H ++ ++enum internal_fn { ++#define DEF_INTERNAL_FN(CODE, FLAGS) IFN_##CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ IFN_LAST ++}; ++ ++extern const char *const internal_fn_name_array[]; ++extern const int internal_fn_flags_array[]; ++ ++/* Return the name of internal function FN. The name is only meaningful ++ for dumps; it has no linkage. */ ++ ++static inline const char * ++internal_fn_name (enum internal_fn fn) ++{ ++ return internal_fn_name_array[(int) fn]; ++} ++ ++/* Return the ECF_* flags for function FN. */ ++ ++static inline int ++internal_fn_flags (enum internal_fn fn) ++{ ++ return internal_fn_flags_array[(int) fn]; ++} ++ ++extern void expand_internal_call (gimple); ++ ++#endif + +=== modified file 'gcc/ipa-prop.c' +--- old/gcc/ipa-prop.c 2011-04-18 21:58:03 +0000 ++++ new/gcc/ipa-prop.c 2011-06-02 12:12:00 +0000 +@@ -1418,6 +1418,8 @@ + { + tree target = gimple_call_fn (call); + ++ if (!target) ++ return; + if (TREE_CODE (target) == SSA_NAME) + ipa_analyze_indirect_call_uses (node, info, parms_info, call, target); + else if (TREE_CODE (target) == OBJ_TYPE_REF) + +=== modified file 'gcc/optabs.h' +--- old/gcc/optabs.h 2011-01-03 20:52:22 +0000 ++++ new/gcc/optabs.h 2011-05-05 15:43:06 +0000 +@@ -578,6 +578,9 @@ + COI_satfract, + COI_satfractuns, + ++ COI_vec_load_lanes, ++ COI_vec_store_lanes, ++ + COI_MAX + }; + +@@ -598,6 +601,8 @@ + #define fractuns_optab (&convert_optab_table[COI_fractuns]) + #define satfract_optab (&convert_optab_table[COI_satfract]) + #define satfractuns_optab (&convert_optab_table[COI_satfractuns]) ++#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes]) ++#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes]) + + /* Contains the optab used for each rtx code. */ + extern optab code_to_optab[NUM_RTX_CODE + 1]; + +=== modified file 'gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c' +--- old/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c 2009-04-20 10:26:18 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c 2011-05-05 15:46:10 +0000 +@@ -26,7 +26,7 @@ + } + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c' +--- old/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c 2011-05-05 15:46:10 +0000 +@@ -113,7 +113,7 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target { vect_strided && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target { vect_strided8 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided8 && vect_int_mult } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c' +--- old/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c 2008-08-26 08:14:37 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c 2011-05-05 15:44:00 +0000 +@@ -20,7 +20,7 @@ + return avg; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c' +--- old/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c 2010-08-26 11:13:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c 2011-05-05 15:46:10 +0000 +@@ -13,5 +13,5 @@ + } + } + +-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided } } } */ ++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c' +--- old/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c 2011-05-05 15:46:10 +0000 +@@ -56,5 +56,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c' +--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-04-24 07:45:49 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-05-05 15:46:10 +0000 +@@ -65,5 +65,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || { ! vect_strided2 } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c' +--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c 2011-05-05 15:46:10 +0000 +@@ -54,5 +54,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c' +--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c 2011-05-05 15:46:10 +0000 +@@ -53,5 +53,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c' +--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c 2007-10-21 09:01:16 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c 2011-05-05 15:46:10 +0000 +@@ -47,5 +47,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_interleave } } } */ ++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave || vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c' +--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c 2011-05-05 15:46:10 +0000 +@@ -50,5 +50,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/pr30843.c' +--- old/gcc/testsuite/gcc.dg/vect/pr30843.c 2007-02-22 12:30:12 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr30843.c 2011-05-05 15:46:10 +0000 +@@ -20,6 +20,6 @@ + } + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided4 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/pr33866.c' +--- old/gcc/testsuite/gcc.dg/vect/pr33866.c 2007-10-30 08:26:14 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr33866.c 2011-05-05 15:46:10 +0000 +@@ -27,6 +27,6 @@ + } + + /* Needs interleaving support. */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/pr37539.c' +--- old/gcc/testsuite/gcc.dg/vect/pr37539.c 2009-11-26 02:03:50 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr37539.c 2011-05-05 15:46:10 +0000 +@@ -40,7 +40,7 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_strided_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_strided4 && vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + + +=== removed file 'gcc/testsuite/gcc.dg/vect/slp-11.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-11.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-11.c 1970-01-01 00:00:00 +0000 +@@ -1,113 +0,0 @@ +-/* { dg-require-effective-target vect_int } */ +- +-#include <stdarg.h> +-#include "tree-vect.h" +- +-#define N 8 +- +-int +-main1 () +-{ +- int i; +- unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; +- unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +- float out2[N*8]; +- +- /* Different operations - not SLPable. */ +- for (i = 0; i < N; i++) +- { +- a0 = in[i*8] + 5; +- a1 = in[i*8 + 1] * 6; +- a2 = in[i*8 + 2] + 7; +- a3 = in[i*8 + 3] + 8; +- a4 = in[i*8 + 4] + 9; +- a5 = in[i*8 + 5] + 10; +- a6 = in[i*8 + 6] + 11; +- a7 = in[i*8 + 7] + 12; +- +- b0 = a0 * 3; +- b1 = a1 * 2; +- b2 = a2 * 12; +- b3 = a3 * 5; +- b4 = a4 * 8; +- b5 = a5 * 4; +- b6 = a6 * 3; +- b7 = a7 * 2; +- +- out[i*8] = b0 - 2; +- out[i*8 + 1] = b1 - 3; +- out[i*8 + 2] = b2 - 2; +- out[i*8 + 3] = b3 - 1; +- out[i*8 + 4] = b4 - 8; +- out[i*8 + 5] = b5 - 7; +- out[i*8 + 6] = b6 - 3; +- out[i*8 + 7] = b7 - 7; +- } +- +- /* check results: */ +- for (i = 0; i < N; i++) +- { +- if (out[i*8] != (in[i*8] + 5) * 3 - 2 +- || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3 +- || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2 +- || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1 +- || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8 +- || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7 +- || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3 +- || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7) +- abort (); +- } +- +- /* Requires permutation - not SLPable. */ +- for (i = 0; i < N*2; i++) +- { +- out[i*4] = (in[i*4] + 2) * 3; +- out[i*4 + 1] = (in[i*4 + 2] + 2) * 7; +- out[i*4 + 2] = (in[i*4 + 1] + 7) * 3; +- out[i*4 + 3] = (in[i*4 + 3] + 3) * 4; +- } +- +- /* check results: */ +- for (i = 0; i < N*2; i++) +- { +- if (out[i*4] != (in[i*4] + 2) * 3 +- || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7 +- || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3 +- || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4) +- abort (); +- } +- +- /* Different operations - not SLPable. */ +- for (i = 0; i < N*4; i++) +- { +- out2[i*2] = ((float) in[i*2] * 2 + 6) ; +- out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7); +- } +- +- /* check results: */ +- for (i = 0; i < N*4; i++) +- { +- if (out2[i*2] != ((float) in[i*2] * 2 + 6) +- || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7)) +- abort (); +- } +- +- +- return 0; +-} +- +-int main (void) +-{ +- check_vect (); +- +- main1 (); +- +- return 0; +-} +- +-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided_wide } && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { { ! vect_uintfloat_cvt } && vect_strided_wide } && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult && vect_strided_wide } } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ +-/* { dg-final { cleanup-tree-dump "vect" } } */ +- + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-11a.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-11a.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-11a.c 2011-05-05 15:46:10 +0000 +@@ -0,0 +1,75 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 8 ++ ++int ++main1 () ++{ ++ int i; ++ unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ ++ /* Different operations - not SLPable. */ ++ for (i = 0; i < N; i++) ++ { ++ a0 = in[i*8] + 5; ++ a1 = in[i*8 + 1] * 6; ++ a2 = in[i*8 + 2] + 7; ++ a3 = in[i*8 + 3] + 8; ++ a4 = in[i*8 + 4] + 9; ++ a5 = in[i*8 + 5] + 10; ++ a6 = in[i*8 + 6] + 11; ++ a7 = in[i*8 + 7] + 12; ++ ++ b0 = a0 * 3; ++ b1 = a1 * 2; ++ b2 = a2 * 12; ++ b3 = a3 * 5; ++ b4 = a4 * 8; ++ b5 = a5 * 4; ++ b6 = a6 * 3; ++ b7 = a7 * 2; ++ ++ out[i*8] = b0 - 2; ++ out[i*8 + 1] = b1 - 3; ++ out[i*8 + 2] = b2 - 2; ++ out[i*8 + 3] = b3 - 1; ++ out[i*8 + 4] = b4 - 8; ++ out[i*8 + 5] = b5 - 7; ++ out[i*8 + 6] = b6 - 3; ++ out[i*8 + 7] = b7 - 7; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N; i++) ++ { ++ if (out[i*8] != (in[i*8] + 5) * 3 - 2 ++ || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3 ++ || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2 ++ || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1 ++ || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8 ++ || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7 ++ || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3 ++ || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-11b.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-11b.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-11b.c 2011-05-05 15:46:10 +0000 +@@ -0,0 +1,49 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 8 ++ ++int ++main1 () ++{ ++ int i; ++ unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ ++ /* Requires permutation - not SLPable. */ ++ for (i = 0; i < N*2; i++) ++ { ++ out[i*4] = (in[i*4] + 2) * 3; ++ out[i*4 + 1] = (in[i*4 + 2] + 2) * 7; ++ out[i*4 + 2] = (in[i*4 + 1] + 7) * 3; ++ out[i*4 + 3] = (in[i*4 + 3] + 3) * 4; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N*2; i++) ++ { ++ if (out[i*4] != (in[i*4] + 2) * 3 ++ || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7 ++ || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3 ++ || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided4 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-11c.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-11c.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-11c.c 2011-05-05 15:46:10 +0000 +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 8 ++ ++int ++main1 () ++{ ++ int i; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ float out[N*8]; ++ ++ /* Different operations - not SLPable. */ ++ for (i = 0; i < N*4; i++) ++ { ++ out[i*2] = ((float) in[i*2] * 2 + 6) ; ++ out[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7); ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N*4; i++) ++ { ++ if (out[i*2] != ((float) in[i*2] * 2 + 6) ++ || out[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7)) ++ abort (); ++ } ++ ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-12a.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-12a.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-12a.c 2011-05-05 15:46:10 +0000 +@@ -11,7 +11,7 @@ + int i; + unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; + unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +- unsigned int ia[N], ib[N*2]; ++ unsigned int ia[N]; + + for (i = 0; i < N; i++) + { +@@ -61,27 +61,6 @@ + abort (); + } + +- for (i = 0; i < N*2; i++) +- { +- out[i*4] = (in[i*4] + 2) * 3; +- out[i*4 + 1] = (in[i*4 + 1] + 2) * 7; +- out[i*4 + 2] = (in[i*4 + 2] + 7) * 3; +- out[i*4 + 3] = (in[i*4 + 3] + 7) * 7; +- +- ib[i] = 7; +- } +- +- /* check results: */ +- for (i = 0; i < N*2; i++) +- { +- if (out[i*4] != (in[i*4] + 2) * 3 +- || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7 +- || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3 +- || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7 +- || ib[i] != 7) +- abort (); +- } +- + return 0; + } + +@@ -94,11 +73,8 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { vect_strided_wide && vect_int_mult} } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided_wide && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +- + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-12b.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-12b.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-12b.c 2011-05-05 15:46:10 +0000 +@@ -43,9 +43,9 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { vect_strided_wide && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { vect_strided_wide && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-12c.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-12c.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-12c.c 2011-05-05 15:44:41 +0000 +@@ -0,0 +1,53 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 8 ++ ++int ++main1 () ++{ ++ int i; ++ unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2]; ++ ++ for (i = 0; i < N*2; i++) ++ { ++ out[i*4] = (in[i*4] + 2) * 3; ++ out[i*4 + 1] = (in[i*4 + 1] + 2) * 7; ++ out[i*4 + 2] = (in[i*4 + 2] + 7) * 3; ++ out[i*4 + 3] = (in[i*4 + 3] + 7) * 7; ++ ++ ia[i] = 7; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N*2; i++) ++ { ++ if (out[i*4] != (in[i*4] + 2) * 3 ++ || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7 ++ || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3 ++ || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7 ++ || ia[i] != 7) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-18.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-18.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-18.c 2011-05-05 15:46:10 +0000 +@@ -91,7 +91,7 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== removed file 'gcc/testsuite/gcc.dg/vect/slp-19.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-19.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-19.c 1970-01-01 00:00:00 +0000 +@@ -1,154 +0,0 @@ +-/* { dg-require-effective-target vect_int } */ +- +-#include <stdarg.h> +-#include "tree-vect.h" +- +-#define N 16 +- +-int +-main1 () +-{ +- unsigned int i; +- unsigned int out[N*8]; +- unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +- unsigned int ia[N*2], a0, a1, a2, a3; +- +- for (i = 0; i < N; i++) +- { +- out[i*8] = in[i*8]; +- out[i*8 + 1] = in[i*8 + 1]; +- out[i*8 + 2] = in[i*8 + 2]; +- out[i*8 + 3] = in[i*8 + 3]; +- out[i*8 + 4] = in[i*8 + 4]; +- out[i*8 + 5] = in[i*8 + 5]; +- out[i*8 + 6] = in[i*8 + 6]; +- out[i*8 + 7] = in[i*8 + 7]; +- +- ia[i] = in[i*8 + 2]; +- } +- +- /* check results: */ +- for (i = 0; i < N; i++) +- { +- if (out[i*8] != in[i*8] +- || out[i*8 + 1] != in[i*8 + 1] +- || out[i*8 + 2] != in[i*8 + 2] +- || out[i*8 + 3] != in[i*8 + 3] +- || out[i*8 + 4] != in[i*8 + 4] +- || out[i*8 + 5] != in[i*8 + 5] +- || out[i*8 + 6] != in[i*8 + 6] +- || out[i*8 + 7] != in[i*8 + 7] +- || ia[i] != in[i*8 + 2]) +- abort (); +- } +- +- for (i = 0; i < N*2; i++) +- { +- a0 = in[i*4] + 1; +- a1 = in[i*4 + 1] + 2; +- a2 = in[i*4 + 2] + 3; +- a3 = in[i*4 + 3] + 4; +- +- out[i*4] = a0; +- out[i*4 + 1] = a1; +- out[i*4 + 2] = a2; +- out[i*4 + 3] = a3; +- +- ia[i] = a2; +- } +- +- /* check results: */ +- for (i = 0; i < N*2; i++) +- { +- if (out[i*4] != in[i*4] + 1 +- || out[i*4 + 1] != in[i*4 + 1] + 2 +- || out[i*4 + 2] != in[i*4 + 2] + 3 +- || out[i*4 + 3] != in[i*4 + 3] + 4 +- || ia[i] != in[i*4 + 2] + 3) +- abort (); +- } +- +- /* The last stmt requires interleaving of not power of 2 size - not +- vectorizable. */ +- for (i = 0; i < N/2; i++) +- { +- out[i*12] = in[i*12]; +- out[i*12 + 1] = in[i*12 + 1]; +- out[i*12 + 2] = in[i*12 + 2]; +- out[i*12 + 3] = in[i*12 + 3]; +- out[i*12 + 4] = in[i*12 + 4]; +- out[i*12 + 5] = in[i*12 + 5]; +- out[i*12 + 6] = in[i*12 + 6]; +- out[i*12 + 7] = in[i*12 + 7]; +- out[i*12 + 8] = in[i*12 + 8]; +- out[i*12 + 9] = in[i*12 + 9]; +- out[i*12 + 10] = in[i*12 + 10]; +- out[i*12 + 11] = in[i*12 + 11]; +- +- ia[i] = in[i*12 + 7]; +- } +- +- /* check results: */ +- for (i = 0; i < N/2; i++) +- { +- if (out[i*12] != in[i*12] +- || out[i*12 + 1] != in[i*12 + 1] +- || out[i*12 + 2] != in[i*12 + 2] +- || out[i*12 + 3] != in[i*12 + 3] +- || out[i*12 + 4] != in[i*12 + 4] +- || out[i*12 + 5] != in[i*12 + 5] +- || out[i*12 + 6] != in[i*12 + 6] +- || out[i*12 + 7] != in[i*12 + 7] +- || out[i*12 + 8] != in[i*12 + 8] +- || out[i*12 + 9] != in[i*12 + 9] +- || out[i*12 + 10] != in[i*12 + 10] +- || out[i*12 + 11] != in[i*12 + 11] +- || ia[i] != in[i*12 + 7]) +- abort (); +- } +- +- /* Hybrid SLP with unrolling by 2. */ +- for (i = 0; i < N; i++) +- { +- out[i*6] = in[i*6]; +- out[i*6 + 1] = in[i*6 + 1]; +- out[i*6 + 2] = in[i*6 + 2]; +- out[i*6 + 3] = in[i*6 + 3]; +- out[i*6 + 4] = in[i*6 + 4]; +- out[i*6 + 5] = in[i*6 + 5]; +- +- ia[i] = i; +- } +- +- /* check results: */ +- for (i = 0; i < N/2; i++) +- { +- if (out[i*6] != in[i*6] +- || out[i*6 + 1] != in[i*6 + 1] +- || out[i*6 + 2] != in[i*6 + 2] +- || out[i*6 + 3] != in[i*6 + 3] +- || out[i*6 + 4] != in[i*6 + 4] +- || out[i*6 + 5] != in[i*6 + 5] +- || ia[i] != i) +- abort (); +- } +- +- +- return 0; +-} +- +-int main (void) +-{ +- check_vect (); +- +- main1 (); +- +- return 0; +-} +- +-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_strided_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide } } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_strided_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! { vect_strided_wide } } } } } */ +-/* { dg-final { cleanup-tree-dump "vect" } } */ +- + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-19a.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-19a.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-19a.c 2011-05-05 15:46:10 +0000 +@@ -0,0 +1,61 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 16 ++ ++int ++main1 () ++{ ++ unsigned int i; ++ unsigned int out[N*8]; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2]; ++ ++ for (i = 0; i < N; i++) ++ { ++ out[i*8] = in[i*8]; ++ out[i*8 + 1] = in[i*8 + 1]; ++ out[i*8 + 2] = in[i*8 + 2]; ++ out[i*8 + 3] = in[i*8 + 3]; ++ out[i*8 + 4] = in[i*8 + 4]; ++ out[i*8 + 5] = in[i*8 + 5]; ++ out[i*8 + 6] = in[i*8 + 6]; ++ out[i*8 + 7] = in[i*8 + 7]; ++ ++ ia[i] = in[i*8 + 2]; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N; i++) ++ { ++ if (out[i*8] != in[i*8] ++ || out[i*8 + 1] != in[i*8 + 1] ++ || out[i*8 + 2] != in[i*8 + 2] ++ || out[i*8 + 3] != in[i*8 + 3] ++ || out[i*8 + 4] != in[i*8 + 4] ++ || out[i*8 + 5] != in[i*8 + 5] ++ || out[i*8 + 6] != in[i*8 + 6] ++ || out[i*8 + 7] != in[i*8 + 7] ++ || ia[i] != in[i*8 + 2]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-19b.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-19b.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-19b.c 2011-05-05 15:46:10 +0000 +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 16 ++ ++int ++main1 () ++{ ++ unsigned int i; ++ unsigned int out[N*8]; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2], a0, a1, a2, a3; ++ ++ for (i = 0; i < N*2; i++) ++ { ++ a0 = in[i*4] + 1; ++ a1 = in[i*4 + 1] + 2; ++ a2 = in[i*4 + 2] + 3; ++ a3 = in[i*4 + 3] + 4; ++ ++ out[i*4] = a0; ++ out[i*4 + 1] = a1; ++ out[i*4 + 2] = a2; ++ out[i*4 + 3] = a3; ++ ++ ia[i] = a2; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N*2; i++) ++ { ++ if (out[i*4] != in[i*4] + 1 ++ || out[i*4 + 1] != in[i*4 + 1] + 2 ++ || out[i*4 + 2] != in[i*4 + 2] + 3 ++ || out[i*4 + 3] != in[i*4 + 3] + 4 ++ || ia[i] != in[i*4 + 2] + 3) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-19c.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-19c.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-19c.c 2011-05-05 15:44:41 +0000 +@@ -0,0 +1,95 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 16 ++ ++int ++main1 () ++{ ++ unsigned int i; ++ unsigned int out[N*8]; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2], a0, a1, a2, a3; ++ ++ /* The last stmt requires interleaving of not power of 2 size - not ++ vectorizable. */ ++ for (i = 0; i < N/2; i++) ++ { ++ out[i*12] = in[i*12]; ++ out[i*12 + 1] = in[i*12 + 1]; ++ out[i*12 + 2] = in[i*12 + 2]; ++ out[i*12 + 3] = in[i*12 + 3]; ++ out[i*12 + 4] = in[i*12 + 4]; ++ out[i*12 + 5] = in[i*12 + 5]; ++ out[i*12 + 6] = in[i*12 + 6]; ++ out[i*12 + 7] = in[i*12 + 7]; ++ out[i*12 + 8] = in[i*12 + 8]; ++ out[i*12 + 9] = in[i*12 + 9]; ++ out[i*12 + 10] = in[i*12 + 10]; ++ out[i*12 + 11] = in[i*12 + 11]; ++ ++ ia[i] = in[i*12 + 7]; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N/2; i++) ++ { ++ if (out[i*12] != in[i*12] ++ || out[i*12 + 1] != in[i*12 + 1] ++ || out[i*12 + 2] != in[i*12 + 2] ++ || out[i*12 + 3] != in[i*12 + 3] ++ || out[i*12 + 4] != in[i*12 + 4] ++ || out[i*12 + 5] != in[i*12 + 5] ++ || out[i*12 + 6] != in[i*12 + 6] ++ || out[i*12 + 7] != in[i*12 + 7] ++ || out[i*12 + 8] != in[i*12 + 8] ++ || out[i*12 + 9] != in[i*12 + 9] ++ || out[i*12 + 10] != in[i*12 + 10] ++ || out[i*12 + 11] != in[i*12 + 11] ++ || ia[i] != in[i*12 + 7]) ++ abort (); ++ } ++ ++ /* Hybrid SLP with unrolling by 2. */ ++ for (i = 0; i < N; i++) ++ { ++ out[i*6] = in[i*6]; ++ out[i*6 + 1] = in[i*6 + 1]; ++ out[i*6 + 2] = in[i*6 + 2]; ++ out[i*6 + 3] = in[i*6 + 3]; ++ out[i*6 + 4] = in[i*6 + 4]; ++ out[i*6 + 5] = in[i*6 + 5]; ++ ++ ia[i] = i; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N/2; i++) ++ { ++ if (out[i*6] != in[i*6] ++ || out[i*6 + 1] != in[i*6 + 1] ++ || out[i*6 + 2] != in[i*6 + 2] ++ || out[i*6 + 3] != in[i*6 + 3] ++ || out[i*6 + 4] != in[i*6 + 4] ++ || out[i*6 + 5] != in[i*6 + 5] ++ || ia[i] != i) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-21.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-21.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-21.c 2011-05-05 15:46:10 +0000 +@@ -199,9 +199,9 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided || vect_extract_even_odd } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided || vect_extract_even_odd } } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-23.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-23.c 2011-01-10 12:51:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-23.c 2011-05-05 15:46:10 +0000 +@@ -106,8 +106,8 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided_wide } && {! { vect_no_align} } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide || vect_no_align} } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-reduc-6.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c 2011-05-05 15:46:10 +0000 +@@ -42,7 +42,7 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! vect_unpack } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! { vect_unpack || vect_strided2 } } } } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ + /* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-1.c 2010-08-19 10:23:50 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-1.c 2011-05-05 15:46:10 +0000 +@@ -85,6 +85,6 @@ + fbar (a); + } + +-/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-10.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-10.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-10.c 2011-05-05 15:46:10 +0000 +@@ -22,5 +22,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-107.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-107.c 2008-08-19 08:06:54 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-107.c 2011-05-05 15:46:10 +0000 +@@ -40,6 +40,6 @@ + return main1 (); + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-98.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-98.c 2008-08-02 11:05:47 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-98.c 2011-05-05 15:46:10 +0000 +@@ -38,6 +38,6 @@ + } + + /* Needs interleaving support. */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-03-27 09:38:18 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-05-05 15:46:10 +0000 +@@ -82,5 +82,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || { ! vect_strided2 } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c 2011-05-05 15:46:10 +0000 +@@ -71,6 +71,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c 2011-05-05 15:46:10 +0000 +@@ -55,6 +55,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c 2011-05-05 15:46:10 +0000 +@@ -68,6 +68,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c 2011-05-05 15:46:10 +0000 +@@ -62,6 +62,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c 2011-05-05 15:46:10 +0000 +@@ -61,6 +61,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c 2011-05-05 15:46:10 +0000 +@@ -69,6 +69,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c 2011-05-05 15:46:10 +0000 +@@ -76,6 +76,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c 2011-05-05 15:46:10 +0000 +@@ -81,6 +81,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-float.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-float.c 2008-08-19 08:06:54 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-float.c 2011-05-05 15:46:10 +0000 +@@ -39,7 +39,7 @@ + } + + /* Needs interleaving support. */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c 2011-05-05 15:46:10 +0000 +@@ -71,6 +71,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-mult.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c 2011-05-05 15:46:10 +0000 +@@ -71,6 +71,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c 2011-05-05 15:46:10 +0000 +@@ -72,5 +72,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c 2008-08-12 05:31:57 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c 2011-05-05 15:46:10 +0000 +@@ -55,6 +55,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c 2007-10-21 09:01:16 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c 2011-05-05 15:46:10 +0000 +@@ -65,8 +65,8 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_interleave && vect_pack_trunc } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { ! { vect_interleave } } && { vect_pack_trunc } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { { vect_interleave || vect_strided4 } && vect_pack_trunc } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { ! { vect_interleave || vect_strided4 } } && { vect_pack_trunc } } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c 2011-05-05 15:46:10 +0000 +@@ -39,7 +39,7 @@ + } + + /* Needs interleaving support. */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave || vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c 2011-05-05 15:46:10 +0000 +@@ -55,6 +55,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c 2011-05-05 15:46:25 +0000 +@@ -0,0 +1,112 @@ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 128 ++ ++typedef struct { ++ unsigned short a; ++ unsigned short b; ++ unsigned short c; ++} s; ++ ++#define A(I) (I) ++#define B(I) ((I) * 2) ++#define C(I) ((unsigned short) ~((I) ^ 0x18)) ++ ++void __attribute__ ((noinline)) ++check1 (s *res) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ if (res[i].a != C (i) ++ || res[i].b != A (i) ++ || res[i].c != B (i)) ++ abort (); ++} ++ ++void __attribute__ ((noinline)) ++check2 (unsigned short *res) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ if (res[i] != (unsigned short) (A (i) + B (i) + C (i))) ++ abort (); ++} ++ ++void __attribute__ ((noinline)) ++check3 (s *res) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ if (res[i].a != i ++ || res[i].b != i ++ || res[i].c != i) ++ abort (); ++} ++ ++void __attribute__ ((noinline)) ++check4 (unsigned short *res) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ if (res[i] != (unsigned short) (A (i) + B (i))) ++ abort (); ++} ++ ++void __attribute__ ((noinline)) ++main1 (s *arr) ++{ ++ int i; ++ s *ptr = arr; ++ s res1[N]; ++ unsigned short res2[N]; ++ ++ for (i = 0; i < N; i++) ++ { ++ res1[i].a = arr[i].c; ++ res1[i].b = arr[i].a; ++ res1[i].c = arr[i].b; ++ } ++ check1 (res1); ++ ++ for (i = 0; i < N; i++) ++ res2[i] = arr[i].a + arr[i].b + arr[i].c; ++ check2 (res2); ++ ++ for (i = 0; i < N; i++) ++ { ++ res1[i].a = i; ++ res1[i].b = i; ++ res1[i].c = i; ++ } ++ check3 (res1); ++ ++ for (i = 0; i < N; i++) ++ res2[i] = arr[i].a + arr[i].b; ++ check4 (res2); ++} ++ ++int main (void) ++{ ++ int i; ++ s arr[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ arr[i].a = A (i); ++ arr[i].b = B (i); ++ arr[i].c = C (i); ++ } ++ main1 (arr); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_strided3 } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c 2011-05-05 15:46:10 +0000 +@@ -68,6 +68,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c 2011-05-05 15:46:10 +0000 +@@ -63,6 +63,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c 2011-05-05 15:46:10 +0000 +@@ -77,6 +77,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c 2011-05-05 15:46:10 +0000 +@@ -60,6 +60,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c 2011-05-05 15:46:10 +0000 +@@ -71,6 +71,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c 2011-05-05 15:46:10 +0000 +@@ -54,6 +54,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c 2011-05-05 15:46:10 +0000 +@@ -78,6 +78,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c 2011-05-05 15:46:10 +0000 +@@ -98,6 +98,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c 2011-05-05 15:46:10 +0000 +@@ -83,6 +83,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c 2011-05-05 15:46:10 +0000 +@@ -85,6 +85,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-vfa-03.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c 2007-09-09 07:46:12 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c 2011-05-05 15:46:10 +0000 +@@ -53,6 +53,6 @@ + } + + /* Needs interleaving support. */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp' +--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2011-04-24 07:45:49 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-05-05 15:43:31 +0000 +@@ -75,15 +75,20 @@ + lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details" + + # Main loop. +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ +- "" $VECT_SLP_CFLAGS +- ++set VECT_ADDITIONAL_FLAGS [list ""] ++if { [check_effective_target_lto] } { ++ lappend VECT_ADDITIONAL_FLAGS "-flto" ++} ++foreach flags $VECT_ADDITIONAL_FLAGS { ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ ++ $flags $VECT_SLP_CFLAGS ++} + + #### Tests with special options + global SAVED_DEFAULT_VECTCFLAGS + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-05-06 11:28:27 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-06-02 12:12:00 +0000 +@@ -3139,29 +3139,6 @@ + return $et_vect_extract_even_odd_saved + } + +-# Return 1 if the target supports vector even/odd elements extraction of +-# vectors with SImode elements or larger, 0 otherwise. +- +-proc check_effective_target_vect_extract_even_odd_wide { } { +- global et_vect_extract_even_odd_wide_saved +- +- if [info exists et_vect_extract_even_odd_wide_saved] { +- verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2 +- } else { +- set et_vect_extract_even_odd_wide_saved 0 +- if { [istarget powerpc*-*-*] +- || [istarget i?86-*-*] +- || [istarget x86_64-*-*] +- || [istarget ia64-*-*] +- || [istarget spu-*-*] } { +- set et_vect_extract_even_odd_wide_saved 1 +- } +- } +- +- verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2 +- return $et_vect_extract_even_odd_wide_saved +-} +- + # Return 1 if the target supports vector interleaving, 0 otherwise. + + proc check_effective_target_vect_interleave { } { +@@ -3184,41 +3161,30 @@ + return $et_vect_interleave_saved + } + +-# Return 1 if the target supports vector interleaving and extract even/odd, 0 otherwise. +-proc check_effective_target_vect_strided { } { +- global et_vect_strided_saved +- +- if [info exists et_vect_strided_saved] { +- verbose "check_effective_target_vect_strided: using cached result" 2 +- } else { +- set et_vect_strided_saved 0 +- if { [check_effective_target_vect_interleave] +- && [check_effective_target_vect_extract_even_odd] } { +- set et_vect_strided_saved 1 +- } +- } +- +- verbose "check_effective_target_vect_strided: returning $et_vect_strided_saved" 2 +- return $et_vect_strided_saved +-} +- +-# Return 1 if the target supports vector interleaving and extract even/odd +-# for wide element types, 0 otherwise. +-proc check_effective_target_vect_strided_wide { } { +- global et_vect_strided_wide_saved +- +- if [info exists et_vect_strided_wide_saved] { +- verbose "check_effective_target_vect_strided_wide: using cached result" 2 +- } else { +- set et_vect_strided_wide_saved 0 +- if { [check_effective_target_vect_interleave] +- && [check_effective_target_vect_extract_even_odd_wide] } { +- set et_vect_strided_wide_saved 1 +- } +- } +- +- verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2 +- return $et_vect_strided_wide_saved ++foreach N {2 3 4 8} { ++ eval [string map [list N $N] { ++ # Return 1 if the target supports 2-vector interleaving ++ proc check_effective_target_vect_stridedN { } { ++ global et_vect_stridedN_saved ++ ++ if [info exists et_vect_stridedN_saved] { ++ verbose "check_effective_target_vect_stridedN: using cached result" 2 ++ } else { ++ set et_vect_stridedN_saved 0 ++ if { (N & -N) == N ++ && [check_effective_target_vect_interleave] ++ && [check_effective_target_vect_extract_even_odd] } { ++ set et_vect_stridedN_saved 1 ++ } ++ if { [istarget arm*-*-*] && N >= 2 && N <= 4 } { ++ set et_vect_stridedN_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect_stridedN: returning $et_vect_stridedN_saved" 2 ++ return $et_vect_stridedN_saved ++ } ++ }] + } + + # Return 1 if the target supports section-anchors + +=== modified file 'gcc/tree-cfg.c' +--- old/gcc/tree-cfg.c 2011-02-12 21:11:33 +0000 ++++ new/gcc/tree-cfg.c 2011-05-05 15:42:22 +0000 +@@ -3046,7 +3046,26 @@ + tree fntype; + unsigned i; + +- if (TREE_CODE (fn) != OBJ_TYPE_REF ++ if (gimple_call_internal_p (stmt)) ++ { ++ if (fn) ++ { ++ error ("gimple call has two targets"); ++ debug_generic_stmt (fn); ++ return true; ++ } ++ } ++ else ++ { ++ if (!fn) ++ { ++ error ("gimple call has no target"); ++ return true; ++ } ++ } ++ ++ if (fn ++ && TREE_CODE (fn) != OBJ_TYPE_REF + && !is_gimple_val (fn)) + { + error ("invalid function in gimple call"); +@@ -3054,9 +3073,10 @@ + return true; + } + +- if (!POINTER_TYPE_P (TREE_TYPE (fn)) +- || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE +- && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE)) ++ if (fn ++ && (!POINTER_TYPE_P (TREE_TYPE (fn)) ++ || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE ++ && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE))) + { + error ("non-function in gimple call"); + return true; +@@ -3076,8 +3096,12 @@ + return true; + } + +- fntype = TREE_TYPE (TREE_TYPE (fn)); +- if (gimple_call_lhs (stmt) ++ if (fn) ++ fntype = TREE_TYPE (TREE_TYPE (fn)); ++ else ++ fntype = NULL_TREE; ++ if (fntype ++ && gimple_call_lhs (stmt) + && !useless_type_conversion_p (TREE_TYPE (gimple_call_lhs (stmt)), + TREE_TYPE (fntype)) + /* ??? At least C++ misses conversions at assignments from +@@ -4130,9 +4154,10 @@ + didn't see a function declaration before the call. */ + if (is_gimple_call (stmt)) + { +- tree decl; ++ tree fn, decl; + +- if (!is_gimple_call_addr (gimple_call_fn (stmt))) ++ fn = gimple_call_fn (stmt); ++ if (fn && !is_gimple_call_addr (fn)) + { + error ("invalid function in call statement"); + return true; +@@ -7484,6 +7509,8 @@ + case GIMPLE_CALL: + if (gimple_call_lhs (g)) + break; ++ if (gimple_call_internal_p (g)) ++ break; + + /* This is a naked call, as opposed to a GIMPLE_CALL with an + LHS. All calls whose value is ignored should be + +=== modified file 'gcc/tree-eh.c' +--- old/gcc/tree-eh.c 2011-05-10 06:31:59 +0000 ++++ new/gcc/tree-eh.c 2011-06-02 12:12:00 +0000 +@@ -2745,7 +2745,7 @@ + || gimple_call_lhs (twos) + || gimple_call_chain (ones) + || gimple_call_chain (twos) +- || !operand_equal_p (gimple_call_fn (ones), gimple_call_fn (twos), 0) ++ || !gimple_call_same_target_p (ones, twos) + || gimple_call_num_args (ones) != gimple_call_num_args (twos)) + return false; + + +=== modified file 'gcc/tree-inline.c' +--- old/gcc/tree-inline.c 2011-05-05 21:02:06 +0000 ++++ new/gcc/tree-inline.c 2011-06-02 12:12:00 +0000 +@@ -3471,10 +3471,13 @@ + { + tree decl = gimple_call_fndecl (stmt); + tree addr = gimple_call_fn (stmt); +- tree funtype = TREE_TYPE (addr); ++ tree funtype = NULL_TREE; + bool stdarg = false; + +- if (POINTER_TYPE_P (funtype)) ++ if (addr) ++ funtype = TREE_TYPE (addr); ++ ++ if (funtype && POINTER_TYPE_P (funtype)) + funtype = TREE_TYPE (funtype); + + /* Do not special case builtins where we see the body. +@@ -3514,7 +3517,7 @@ + if (decl) + funtype = TREE_TYPE (decl); + +- if (!VOID_TYPE_P (TREE_TYPE (funtype))) ++ if (funtype && !VOID_TYPE_P (TREE_TYPE (funtype))) + cost += estimate_move_cost (TREE_TYPE (funtype)); + + if (funtype) + +=== modified file 'gcc/tree-ssa-ccp.c' +--- old/gcc/tree-ssa-ccp.c 2011-01-31 16:52:22 +0000 ++++ new/gcc/tree-ssa-ccp.c 2011-05-05 15:42:22 +0000 +@@ -1279,7 +1279,10 @@ + + case GIMPLE_CALL: + { +- tree fn = valueize_op (gimple_call_fn (stmt)); ++ tree fn = gimple_call_fn (stmt); ++ if (!fn) ++ return NULL_TREE; ++ fn = valueize_op (fn); + if (TREE_CODE (fn) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL + && DECL_BUILT_IN (TREE_OPERAND (fn, 0))) +@@ -2310,6 +2313,11 @@ + return true; + } + ++ /* Internal calls provide no argument types, so the extra laxity ++ for normal calls does not apply. */ ++ if (gimple_call_internal_p (stmt)) ++ return false; ++ + /* Propagate into the call arguments. Compared to replace_uses_in + this can use the argument slot types for type verification + instead of the current argument type. We also can safely + +=== modified file 'gcc/tree-ssa-dom.c' +--- old/gcc/tree-ssa-dom.c 2011-02-14 17:59:10 +0000 ++++ new/gcc/tree-ssa-dom.c 2011-05-05 15:42:22 +0000 +@@ -64,7 +64,7 @@ + struct { enum tree_code op; tree opnd; } unary; + struct { enum tree_code op; tree opnd0, opnd1; } binary; + struct { enum tree_code op; tree opnd0, opnd1, opnd2; } ternary; +- struct { tree fn; bool pure; size_t nargs; tree *args; } call; ++ struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call; + } ops; + }; + +@@ -258,7 +258,7 @@ + + expr->type = TREE_TYPE (gimple_call_lhs (stmt)); + expr->kind = EXPR_CALL; +- expr->ops.call.fn = gimple_call_fn (stmt); ++ expr->ops.call.fn_from = stmt; + + if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) + expr->ops.call.pure = true; +@@ -422,8 +422,8 @@ + + /* If the calls are to different functions, then they + clearly cannot be equal. */ +- if (! operand_equal_p (expr0->ops.call.fn, +- expr1->ops.call.fn, 0)) ++ if (!gimple_call_same_target_p (expr0->ops.call.fn_from, ++ expr1->ops.call.fn_from)) + return false; + + if (! expr0->ops.call.pure) +@@ -503,9 +503,15 @@ + { + size_t i; + enum tree_code code = CALL_EXPR; ++ gimple fn_from; + + val = iterative_hash_object (code, val); +- val = iterative_hash_expr (expr->ops.call.fn, val); ++ fn_from = expr->ops.call.fn_from; ++ if (gimple_call_internal_p (fn_from)) ++ val = iterative_hash_hashval_t ++ ((hashval_t) gimple_call_internal_fn (fn_from), val); ++ else ++ val = iterative_hash_expr (gimple_call_fn (fn_from), val); + for (i = 0; i < expr->ops.call.nargs; i++) + val = iterative_hash_expr (expr->ops.call.args[i], val); + } +@@ -565,8 +571,14 @@ + { + size_t i; + size_t nargs = element->expr.ops.call.nargs; ++ gimple fn_from; + +- print_generic_expr (stream, element->expr.ops.call.fn, 0); ++ fn_from = element->expr.ops.call.fn_from; ++ if (gimple_call_internal_p (fn_from)) ++ fputs (internal_fn_name (gimple_call_internal_fn (fn_from)), ++ stream); ++ else ++ print_generic_expr (stream, gimple_call_fn (fn_from), 0); + fprintf (stream, " ("); + for (i = 0; i < nargs; i++) + { + +=== modified file 'gcc/tree-ssa-pre.c' +--- old/gcc/tree-ssa-pre.c 2011-02-15 13:04:47 +0000 ++++ new/gcc/tree-ssa-pre.c 2011-05-05 15:42:22 +0000 +@@ -2657,11 +2657,13 @@ + } + + /* Return true if we can value number the call in STMT. This is true +- if we have a pure or constant call. */ ++ if we have a pure or constant call to a real function. */ + + static bool + can_value_number_call (gimple stmt) + { ++ if (gimple_call_internal_p (stmt)) ++ return false; + if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) + return true; + return false; +@@ -4187,6 +4189,7 @@ + gimple_stmt_iterator gsi; + gimple stmt; + unsigned i; ++ tree fn; + + FOR_EACH_BB (b) + { +@@ -4378,9 +4381,10 @@ + /* Visit indirect calls and turn them into direct calls if + possible. */ + if (is_gimple_call (stmt) +- && TREE_CODE (gimple_call_fn (stmt)) == SSA_NAME) ++ && (fn = gimple_call_fn (stmt)) ++ && TREE_CODE (fn) == SSA_NAME) + { +- tree fn = VN_INFO (gimple_call_fn (stmt))->valnum; ++ fn = VN_INFO (fn)->valnum; + if (TREE_CODE (fn) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL) + { + +=== modified file 'gcc/tree-ssa-sccvn.c' +--- old/gcc/tree-ssa-sccvn.c 2011-05-12 14:08:00 +0000 ++++ new/gcc/tree-ssa-sccvn.c 2011-06-02 12:12:00 +0000 +@@ -2982,7 +2982,8 @@ + /* ??? We should handle stores from calls. */ + else if (TREE_CODE (lhs) == SSA_NAME) + { +- if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) ++ if (!gimple_call_internal_p (stmt) ++ && gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) + changed = visit_reference_op_call (lhs, stmt); + else + changed = defs_to_varying (stmt); + +=== modified file 'gcc/tree-ssa-structalias.c' +--- old/gcc/tree-ssa-structalias.c 2011-02-10 15:29:52 +0000 ++++ new/gcc/tree-ssa-structalias.c 2011-05-05 15:42:22 +0000 +@@ -4319,6 +4319,7 @@ + /* Fallthru to general call handling. */; + } + if (!in_ipa_mode ++ || gimple_call_internal_p (t) + || (fndecl + && (!(fi = lookup_vi_for_tree (fndecl)) + || !fi->is_fn_info))) + +=== modified file 'gcc/tree-vect-data-refs.c' +--- old/gcc/tree-vect-data-refs.c 2011-05-06 11:28:27 +0000 ++++ new/gcc/tree-vect-data-refs.c 2011-06-02 12:12:00 +0000 +@@ -43,6 +43,45 @@ + #include "expr.h" + #include "optabs.h" + ++/* Return true if load- or store-lanes optab OPTAB is implemented for ++ COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ ++ ++static bool ++vect_lanes_optab_supported_p (const char *name, convert_optab optab, ++ tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ enum machine_mode mode, array_mode; ++ bool limit_p; ++ ++ mode = TYPE_MODE (vectype); ++ limit_p = !targetm.array_mode_supported_p (mode, count); ++ array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode), ++ MODE_INT, limit_p); ++ ++ if (array_mode == BLKmode) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]", ++ GET_MODE_NAME (mode), count); ++ return false; ++ } ++ ++ if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "cannot use %s<%s><%s>", ++ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); ++ return false; ++ } ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "can use %s<%s><%s>", ++ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); ++ ++ return true; ++} ++ ++ + /* Return the smallest scalar part of STMT. + This is used to determine the vectype of the stmt. We generally set the + vectype according to the type of the result (lhs). For stmts whose +@@ -2196,19 +2235,6 @@ + return false; + } + +- /* FORNOW: we handle only interleaving that is a power of 2. +- We don't fail here if it may be still possible to vectorize the +- group using SLP. If not, the size of the group will be checked in +- vect_analyze_operations, and the vectorization will fail. */ +- if (exact_log2 (stride) == -1) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "interleaving is not a power of 2"); +- +- if (slp_impossible) +- return false; +- } +- + if (stride == 0) + stride = count; + +@@ -2911,31 +2937,33 @@ + + /* Function vect_create_data_ref_ptr. + +- Create a new pointer to vector type (vp), that points to the first location +- accessed in the loop by STMT, along with the def-use update chain to +- appropriately advance the pointer through the loop iterations. Also set +- aliasing information for the pointer. This vector pointer is used by the +- callers to this function to create a memory reference expression for vector +- load/store access. ++ Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first ++ location accessed in the loop by STMT, along with the def-use update ++ chain to appropriately advance the pointer through the loop iterations. ++ Also set aliasing information for the pointer. This pointer is used by ++ the callers to this function to create a memory reference expression for ++ vector load/store access. + + Input: + 1. STMT: a stmt that references memory. Expected to be of the form + GIMPLE_ASSIGN <name, data-ref> or + GIMPLE_ASSIGN <data-ref, name>. +- 2. AT_LOOP: the loop where the vector memref is to be created. +- 3. OFFSET (optional): an offset to be added to the initial address accessed ++ 2. AGGR_TYPE: the type of the reference, which should be either a vector ++ or an array. ++ 3. AT_LOOP: the loop where the vector memref is to be created. ++ 4. OFFSET (optional): an offset to be added to the initial address accessed + by the data-ref in STMT. +- 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain ++ 5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain + pointing to the initial address. +- 5. TYPE: if not NULL indicates the required type of the data-ref. ++ 6. TYPE: if not NULL indicates the required type of the data-ref. + + Output: + 1. Declare a new ptr to vector_type, and have it point to the base of the + data reference (initial addressed accessed by the data reference). + For example, for vector of type V8HI, the following code is generated: + +- v8hi *vp; +- vp = (v8hi *)initial_address; ++ v8hi *ap; ++ ap = (v8hi *)initial_address; + + if OFFSET is not supplied: + initial_address = &a[init]; +@@ -2955,7 +2983,7 @@ + 4. Return the pointer. */ + + tree +-vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ++vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop, + tree offset, tree *initial_address, gimple *ptr_incr, + bool only_init, bool *inv_p) + { +@@ -2965,17 +2993,16 @@ + struct loop *loop = NULL; + bool nested_in_vect_loop = false; + struct loop *containing_loop = NULL; +- tree vectype = STMT_VINFO_VECTYPE (stmt_info); +- tree vect_ptr_type; +- tree vect_ptr; ++ tree aggr_ptr_type; ++ tree aggr_ptr; + tree new_temp; + gimple vec_stmt; + gimple_seq new_stmt_list = NULL; + edge pe = NULL; + basic_block new_bb; +- tree vect_ptr_init; ++ tree aggr_ptr_init; + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); +- tree vptr; ++ tree aptr; + gimple_stmt_iterator incr_gsi; + bool insert_after; + bool negative; +@@ -2986,6 +3013,9 @@ + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + tree base; + ++ gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE ++ || TREE_CODE (aggr_type) == VECTOR_TYPE); ++ + if (loop_vinfo) + { + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -3020,8 +3050,9 @@ + if (vect_print_dump_info (REPORT_DETAILS)) + { + tree data_ref_base = base_name; +- fprintf (vect_dump, "create vector-pointer variable to type: "); +- print_generic_expr (vect_dump, vectype, TDF_SLIM); ++ fprintf (vect_dump, "create %s-pointer variable to type: ", ++ tree_code_name[(int) TREE_CODE (aggr_type)]); ++ print_generic_expr (vect_dump, aggr_type, TDF_SLIM); + if (TREE_CODE (data_ref_base) == VAR_DECL + || TREE_CODE (data_ref_base) == ARRAY_REF) + fprintf (vect_dump, " vectorizing an array ref: "); +@@ -3032,27 +3063,28 @@ + print_generic_expr (vect_dump, base_name, TDF_SLIM); + } + +- /* (1) Create the new vector-pointer variable. */ +- vect_ptr_type = build_pointer_type (vectype); ++ /* (1) Create the new aggregate-pointer variable. */ ++ aggr_ptr_type = build_pointer_type (aggr_type); + base = get_base_address (DR_REF (dr)); + if (base + && TREE_CODE (base) == MEM_REF) +- vect_ptr_type +- = build_qualified_type (vect_ptr_type, ++ aggr_ptr_type ++ = build_qualified_type (aggr_ptr_type, + TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0)))); +- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + +- /* Vector types inherit the alias set of their component type by default so +- we need to use a ref-all pointer if the data reference does not conflict +- with the created vector data reference because it is not addressable. */ +- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), ++ /* Vector and array types inherit the alias set of their component ++ type by default so we need to use a ref-all pointer if the data ++ reference does not conflict with the created aggregated data ++ reference because it is not addressable. */ ++ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), + get_alias_set (DR_REF (dr)))) + { +- vect_ptr_type +- = build_pointer_type_for_mode (vectype, +- TYPE_MODE (vect_ptr_type), true); +- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr_type ++ = build_pointer_type_for_mode (aggr_type, ++ TYPE_MODE (aggr_ptr_type), true); ++ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + } + +@@ -3063,14 +3095,14 @@ + do + { + tree lhs = gimple_assign_lhs (orig_stmt); +- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), ++ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), + get_alias_set (lhs))) + { +- vect_ptr_type +- = build_pointer_type_for_mode (vectype, +- TYPE_MODE (vect_ptr_type), true); +- vect_ptr +- = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr_type ++ = build_pointer_type_for_mode (aggr_type, ++ TYPE_MODE (aggr_ptr_type), true); ++ aggr_ptr ++ = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + break; + } +@@ -3080,7 +3112,7 @@ + while (orig_stmt); + } + +- add_referenced_var (vect_ptr); ++ add_referenced_var (aggr_ptr); + + /* Note: If the dataref is in an inner-loop nested in LOOP, and we are + vectorizing LOOP (i.e., outer-loop vectorization), we need to create two +@@ -3113,8 +3145,8 @@ + vp2 = vp1 + step + if () goto LOOP */ + +- /* (2) Calculate the initial address the vector-pointer, and set +- the vector-pointer to point to it before the loop. */ ++ /* (2) Calculate the initial address of the aggregate-pointer, and set ++ the aggregate-pointer to point to it before the loop. */ + + /* Create: (&(base[init_val+offset]) in the loop preheader. */ + +@@ -3133,17 +3165,17 @@ + + *initial_address = new_temp; + +- /* Create: p = (vectype *) initial_base */ ++ /* Create: p = (aggr_type *) initial_base */ + if (TREE_CODE (new_temp) != SSA_NAME +- || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp))) ++ || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp))) + { +- vec_stmt = gimple_build_assign (vect_ptr, +- fold_convert (vect_ptr_type, new_temp)); +- vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); ++ vec_stmt = gimple_build_assign (aggr_ptr, ++ fold_convert (aggr_ptr_type, new_temp)); ++ aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt); + /* Copy the points-to information if it exists. */ + if (DR_PTR_INFO (dr)) +- duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); +- gimple_assign_set_lhs (vec_stmt, vect_ptr_init); ++ duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr)); ++ gimple_assign_set_lhs (vec_stmt, aggr_ptr_init); + if (pe) + { + new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt); +@@ -3153,19 +3185,19 @@ + gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT); + } + else +- vect_ptr_init = new_temp; ++ aggr_ptr_init = new_temp; + +- /* (3) Handle the updating of the vector-pointer inside the loop. ++ /* (3) Handle the updating of the aggregate-pointer inside the loop. + This is needed when ONLY_INIT is false, and also when AT_LOOP is the + inner-loop nested in LOOP (during outer-loop vectorization). */ + + /* No update in loop is required. */ + if (only_init && (!loop_vinfo || at_loop == loop)) +- vptr = vect_ptr_init; ++ aptr = aggr_ptr_init; + else + { +- /* The step of the vector pointer is the Vector Size. */ +- tree step = TYPE_SIZE_UNIT (vectype); ++ /* The step of the aggregate pointer is the type size. */ ++ tree step = TYPE_SIZE_UNIT (aggr_type); + /* One exception to the above is when the scalar step of the load in + LOOP is zero. In this case the step here is also zero. */ + if (*inv_p) +@@ -3175,9 +3207,9 @@ + + standard_iv_increment_position (loop, &incr_gsi, &insert_after); + +- create_iv (vect_ptr_init, +- fold_convert (vect_ptr_type, step), +- vect_ptr, loop, &incr_gsi, insert_after, ++ create_iv (aggr_ptr_init, ++ fold_convert (aggr_ptr_type, step), ++ aggr_ptr, loop, &incr_gsi, insert_after, + &indx_before_incr, &indx_after_incr); + incr = gsi_stmt (incr_gsi); + set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); +@@ -3191,14 +3223,14 @@ + if (ptr_incr) + *ptr_incr = incr; + +- vptr = indx_before_incr; ++ aptr = indx_before_incr; + } + + if (!nested_in_vect_loop || only_init) +- return vptr; +- +- +- /* (4) Handle the updating of the vector-pointer inside the inner-loop ++ return aptr; ++ ++ ++ /* (4) Handle the updating of the aggregate-pointer inside the inner-loop + nested in LOOP, if exists. */ + + gcc_assert (nested_in_vect_loop); +@@ -3206,7 +3238,7 @@ + { + standard_iv_increment_position (containing_loop, &incr_gsi, + &insert_after); +- create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr, ++ create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr, + containing_loop, &incr_gsi, insert_after, &indx_before_incr, + &indx_after_incr); + incr = gsi_stmt (incr_gsi); +@@ -3343,13 +3375,22 @@ + and FALSE otherwise. */ + + bool +-vect_strided_store_supported (tree vectype) ++vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) + { + optab interleave_high_optab, interleave_low_optab; + enum machine_mode mode; + + mode = TYPE_MODE (vectype); + ++ /* vect_permute_store_chain requires the group size to be a power of two. */ ++ if (exact_log2 (count) == -1) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "the size of the group of strided accesses" ++ " is not a power of 2"); ++ return false; ++ } ++ + /* Check that the operation is supported. */ + interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, + vectype, optab_default); +@@ -3374,6 +3415,18 @@ + } + + ++/* Return TRUE if vec_store_lanes is available for COUNT vectors of ++ type VECTYPE. */ ++ ++bool ++vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ return vect_lanes_optab_supported_p ("vec_store_lanes", ++ vec_store_lanes_optab, ++ vectype, count); ++} ++ ++ + /* Function vect_permute_store_chain. + + Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be +@@ -3435,7 +3488,7 @@ + I3: 4 12 20 28 5 13 21 30 + I4: 6 14 22 30 7 15 23 31. */ + +-bool ++void + vect_permute_store_chain (VEC(tree,heap) *dr_chain, + unsigned int length, + gimple stmt, +@@ -3449,9 +3502,7 @@ + unsigned int j; + enum tree_code high_code, low_code; + +- /* Check that the operation is supported. */ +- if (!vect_strided_store_supported (vectype)) +- return false; ++ gcc_assert (vect_strided_store_supported (vectype, length)); + + *result_chain = VEC_copy (tree, heap, dr_chain); + +@@ -3504,7 +3555,6 @@ + } + dr_chain = VEC_copy (tree, heap, *result_chain); + } +- return true; + } + + /* Function vect_setup_realignment +@@ -3674,8 +3724,9 @@ + + gcc_assert (!compute_in_loop); + vec_dest = vect_create_destination_var (scalar_dest, vectype); +- ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE, +- &init_addr, &inc, true, &inv_p); ++ ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load, ++ NULL_TREE, &init_addr, &inc, ++ true, &inv_p); + new_stmt = gimple_build_assign_with_ops + (BIT_AND_EXPR, NULL_TREE, ptr, + build_int_cst (TREE_TYPE (ptr), +@@ -3780,13 +3831,22 @@ + and FALSE otherwise. */ + + bool +-vect_strided_load_supported (tree vectype) ++vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) + { + optab perm_even_optab, perm_odd_optab; + enum machine_mode mode; + + mode = TYPE_MODE (vectype); + ++ /* vect_permute_load_chain requires the group size to be a power of two. */ ++ if (exact_log2 (count) == -1) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "the size of the group of strided accesses" ++ " is not a power of 2"); ++ return false; ++ } ++ + perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, + optab_default); + if (!perm_even_optab) +@@ -3821,6 +3881,16 @@ + return true; + } + ++/* Return TRUE if vec_load_lanes is available for COUNT vectors of ++ type VECTYPE. */ ++ ++bool ++vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ return vect_lanes_optab_supported_p ("vec_load_lanes", ++ vec_load_lanes_optab, ++ vectype, count); ++} + + /* Function vect_permute_load_chain. + +@@ -3898,7 +3968,7 @@ + 3rd vec (E2): 2 6 10 14 18 22 26 30 + 4th vec (E4): 3 7 11 15 19 23 27 31. */ + +-bool ++static void + vect_permute_load_chain (VEC(tree,heap) *dr_chain, + unsigned int length, + gimple stmt, +@@ -3911,9 +3981,7 @@ + int i; + unsigned int j; + +- /* Check that the operation is supported. */ +- if (!vect_strided_load_supported (vectype)) +- return false; ++ gcc_assert (vect_strided_load_supported (vectype, length)); + + *result_chain = VEC_copy (tree, heap, dr_chain); + for (i = 0; i < exact_log2 (length); i++) +@@ -3956,7 +4024,6 @@ + } + dr_chain = VEC_copy (tree, heap, *result_chain); + } +- return true; + } + + +@@ -3967,24 +4034,32 @@ + the scalar statements. + */ + +-bool ++void + vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, + gimple_stmt_iterator *gsi) + { +- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +- gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info); +- gimple next_stmt, new_stmt; + VEC(tree,heap) *result_chain = NULL; +- unsigned int i, gap_count; +- tree tmp_data_ref; + + /* DR_CHAIN contains input data-refs that are a part of the interleaving. + RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted + vectors, that are ready for vector computation. */ + result_chain = VEC_alloc (tree, heap, size); +- /* Permute. */ +- if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) +- return false; ++ vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); ++ vect_record_strided_load_vectors (stmt, result_chain); ++ VEC_free (tree, heap, result_chain); ++} ++ ++/* RESULT_CHAIN contains the output of a group of strided loads that were ++ generated as part of the vectorization of STMT. Assign the statement ++ for each vector to the associated scalar statement. */ ++ ++void ++vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain) ++{ ++ gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); ++ gimple next_stmt, new_stmt; ++ unsigned int i, gap_count; ++ tree tmp_data_ref; + + /* Put a permuted data-ref in the VECTORIZED_STMT field. + Since we scan the chain starting from it's first node, their order +@@ -4046,9 +4121,6 @@ + break; + } + } +- +- VEC_free (tree, heap, result_chain); +- return true; + } + + /* Function vect_force_dr_alignment_p. + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2010-12-23 16:25:52 +0000 ++++ new/gcc/tree-vect-slp.c 2011-05-05 15:43:06 +0000 +@@ -215,7 +215,8 @@ + vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); + else + /* Store. */ +- vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node); ++ vect_model_store_cost (stmt_info, ncopies_for_cost, false, ++ dt[0], slp_node); + } + + else +@@ -579,7 +580,7 @@ + + /* Analyze costs (for the first stmt in the group). */ + vect_model_load_cost (vinfo_for_stmt (stmt), +- ncopies_for_cost, *node); ++ ncopies_for_cost, false, *node); + } + + /* Store the place of this load in the interleaving chain. In + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-04-18 07:38:11 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-06-02 12:12:00 +0000 +@@ -42,6 +42,82 @@ + #include "langhooks.h" + + ++/* Return a variable of type ELEM_TYPE[NELEMS]. */ ++ ++static tree ++create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) ++{ ++ return create_tmp_var (build_array_type_nelts (elem_type, nelems), ++ "vect_array"); ++} ++ ++/* ARRAY is an array of vectors created by create_vector_array. ++ Return an SSA_NAME for the vector in index N. The reference ++ is part of the vectorization of STMT and the vector is associated ++ with scalar destination SCALAR_DEST. */ ++ ++static tree ++read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, ++ tree array, unsigned HOST_WIDE_INT n) ++{ ++ tree vect_type, vect, vect_name, array_ref; ++ gimple new_stmt; ++ ++ gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); ++ vect_type = TREE_TYPE (TREE_TYPE (array)); ++ vect = vect_create_destination_var (scalar_dest, vect_type); ++ array_ref = build4 (ARRAY_REF, vect_type, array, ++ build_int_cst (size_type_node, n), ++ NULL_TREE, NULL_TREE); ++ ++ new_stmt = gimple_build_assign (vect, array_ref); ++ vect_name = make_ssa_name (vect, new_stmt); ++ gimple_assign_set_lhs (new_stmt, vect_name); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++ ++ return vect_name; ++} ++ ++/* ARRAY is an array of vectors created by create_vector_array. ++ Emit code to store SSA_NAME VECT in index N of the array. ++ The store is part of the vectorization of STMT. */ ++ ++static void ++write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, ++ tree array, unsigned HOST_WIDE_INT n) ++{ ++ tree array_ref; ++ gimple new_stmt; ++ ++ array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, ++ build_int_cst (size_type_node, n), ++ NULL_TREE, NULL_TREE); ++ ++ new_stmt = gimple_build_assign (array_ref, vect); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++} ++ ++/* PTR is a pointer to an array of type TYPE. Return a representation ++ of *PTR. The memory reference replaces those in FIRST_DR ++ (and its group). */ ++ ++static tree ++create_array_ref (tree type, tree ptr, struct data_reference *first_dr) ++{ ++ struct ptr_info_def *pi; ++ tree mem_ref, alias_ptr_type; ++ ++ alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); ++ mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); ++ /* Arrays have the same alignment as their type. */ ++ pi = get_ptr_info (ptr); ++ pi->align = TYPE_ALIGN_UNIT (type); ++ pi->misalign = 0; ++ return mem_ref; ++} ++ + /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ + + /* Function vect_mark_relevant. +@@ -648,7 +724,8 @@ + + void + vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, +- enum vect_def_type dt, slp_tree slp_node) ++ bool store_lanes_p, enum vect_def_type dt, ++ slp_tree slp_node) + { + int group_size; + unsigned int inside_cost = 0, outside_cost = 0; +@@ -685,9 +762,11 @@ + first_dr = STMT_VINFO_DATA_REF (stmt_info); + } + +- /* Is this an access in a group of stores, which provide strided access? +- If so, add in the cost of the permutes. */ +- if (group_size > 1) ++ /* We assume that the cost of a single store-lanes instruction is ++ equivalent to the cost of GROUP_SIZE separate stores. If a strided ++ access is instead being provided by a permute-and-store operation, ++ include the cost of the permutes. */ ++ if (!store_lanes_p && group_size > 1) + { + /* Uses a high and low interleave operation for each needed permute. */ + inside_cost = ncopies * exact_log2(group_size) * group_size +@@ -763,8 +842,8 @@ + access scheme chosen. */ + + void +-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) +- ++vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p, ++ slp_tree slp_node) + { + int group_size; + gimple first_stmt; +@@ -789,9 +868,11 @@ + first_dr = dr; + } + +- /* Is this an access in a group of loads providing strided access? +- If so, add in the cost of the permutes. */ +- if (group_size > 1) ++ /* We assume that the cost of a single load-lanes instruction is ++ equivalent to the cost of GROUP_SIZE separate loads. If a strided ++ access is instead being provided by a load-and-permute operation, ++ include the cost of the permutes. */ ++ if (!load_lanes_p && group_size > 1) + { + /* Uses an even and odd extract operations for each needed permute. */ + inside_cost = ncopies * exact_log2(group_size) * group_size +@@ -3329,6 +3410,7 @@ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); ++ tree elem_type; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = NULL; + enum machine_mode vec_mode; +@@ -3344,6 +3426,7 @@ + int j; + gimple next_stmt, first_stmt = NULL; + bool strided_store = false; ++ bool store_lanes_p = false; + unsigned int group_size, i; + VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; + bool inv_p; +@@ -3351,6 +3434,7 @@ + bool slp = (slp_node != NULL); + unsigned int vec_num; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ tree aggr_type; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -3404,7 +3488,8 @@ + + /* The scalar rhs type needs to be trivially convertible to the vector + component type. This should always be the case. */ +- if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op))) ++ elem_type = TREE_TYPE (vectype); ++ if (!useless_type_conversion_p (elem_type, TREE_TYPE (op))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "??? operands of different types"); +@@ -3431,9 +3516,14 @@ + { + strided_store = true; + first_stmt = DR_GROUP_FIRST_DR (stmt_info); +- if (!vect_strided_store_supported (vectype) +- && !PURE_SLP_STMT (stmt_info) && !slp) +- return false; ++ if (!slp && !PURE_SLP_STMT (stmt_info)) ++ { ++ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); ++ if (vect_store_lanes_supported (vectype, group_size)) ++ store_lanes_p = true; ++ else if (!vect_strided_store_supported (vectype, group_size)) ++ return false; ++ } + + if (first_stmt == stmt) + { +@@ -3459,7 +3549,7 @@ + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; +- vect_model_store_cost (stmt_info, ncopies, dt, NULL); ++ vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL); + return true; + } + +@@ -3514,6 +3604,16 @@ + + alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); + gcc_assert (alignment_support_scheme); ++ /* Targets with store-lane instructions must not require explicit ++ realignment. */ ++ gcc_assert (!store_lanes_p ++ || alignment_support_scheme == dr_aligned ++ || alignment_support_scheme == dr_unaligned_supported); ++ ++ if (store_lanes_p) ++ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); ++ else ++ aggr_type = vectype; + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -3602,9 +3702,9 @@ + /* We should have catched mismatched types earlier. */ + gcc_assert (useless_type_conversion_p (vectype, + TREE_TYPE (vec_oprnd))); +- dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE, +- &dummy, &ptr_incr, false, +- &inv_p); ++ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL, ++ NULL_TREE, &dummy, ++ &ptr_incr, false, &inv_p); + gcc_assert (bb_vinfo || !inv_p); + } + else +@@ -3625,76 +3725,101 @@ + VEC_replace(tree, dr_chain, i, vec_oprnd); + VEC_replace(tree, oprnds, i, vec_oprnd); + } +- dataref_ptr = +- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); +- } +- +- if (strided_store) +- { +- result_chain = VEC_alloc (tree, heap, group_size); +- /* Permute. */ +- if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, +- &result_chain)) +- return false; +- } +- +- next_stmt = first_stmt; +- for (i = 0; i < vec_num; i++) +- { +- struct ptr_info_def *pi; +- +- if (i > 0) +- /* Bump the vector pointer. */ +- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, +- NULL_TREE); +- +- if (slp) +- vec_oprnd = VEC_index (tree, vec_oprnds, i); +- else if (strided_store) +- /* For strided stores vectorized defs are interleaved in +- vect_permute_store_chain(). */ +- vec_oprnd = VEC_index (tree, result_chain, i); +- +- data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- pi = get_ptr_info (dataref_ptr); +- pi->align = TYPE_ALIGN_UNIT (vectype); +- if (aligned_access_p (first_dr)) +- pi->misalign = 0; +- else if (DR_MISALIGNMENT (first_dr) == -1) +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); +- pi->misalign = 0; +- } +- else +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->misalign = DR_MISALIGNMENT (first_dr); +- } +- +- /* Arguments are ready. Create the new vector stmt. */ +- new_stmt = gimple_build_assign (data_ref, vec_oprnd); ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, ++ TYPE_SIZE_UNIT (aggr_type)); ++ } ++ ++ if (store_lanes_p) ++ { ++ tree vec_array; ++ ++ /* Combine all the vectors into an array. */ ++ vec_array = create_vector_array (vectype, vec_num); ++ for (i = 0; i < vec_num; i++) ++ { ++ vec_oprnd = VEC_index (tree, dr_chain, i); ++ write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); ++ } ++ ++ /* Emit: ++ MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ ++ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); ++ new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); ++ gimple_call_set_lhs (new_stmt, data_ref); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); +- +- if (slp) +- continue; +- +- if (j == 0) +- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; ++ } ++ else ++ { ++ new_stmt = NULL; ++ if (strided_store) ++ { ++ result_chain = VEC_alloc (tree, heap, group_size); ++ /* Permute. */ ++ vect_permute_store_chain (dr_chain, group_size, stmt, gsi, ++ &result_chain); ++ } ++ ++ next_stmt = first_stmt; ++ for (i = 0; i < vec_num; i++) ++ { ++ struct ptr_info_def *pi; ++ ++ if (i > 0) ++ /* Bump the vector pointer. */ ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, ++ stmt, NULL_TREE); ++ ++ if (slp) ++ vec_oprnd = VEC_index (tree, vec_oprnds, i); ++ else if (strided_store) ++ /* For strided stores vectorized defs are interleaved in ++ vect_permute_store_chain(). */ ++ vec_oprnd = VEC_index (tree, result_chain, i); ++ ++ data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ pi = get_ptr_info (dataref_ptr); ++ pi->align = TYPE_ALIGN_UNIT (vectype); ++ if (aligned_access_p (first_dr)) ++ pi->misalign = 0; ++ else if (DR_MISALIGNMENT (first_dr) == -1) ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->align = TYPE_ALIGN_UNIT (elem_type); ++ pi->misalign = 0; ++ } ++ else ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->misalign = DR_MISALIGNMENT (first_dr); ++ } ++ ++ /* Arguments are ready. Create the new vector stmt. */ ++ new_stmt = gimple_build_assign (data_ref, vec_oprnd); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++ ++ if (slp) ++ continue; ++ ++ next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); ++ if (!next_stmt) ++ break; ++ } ++ } ++ if (!slp) ++ { ++ if (j == 0) ++ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; +- + prev_stmt_info = vinfo_for_stmt (new_stmt); +- next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); +- if (!next_stmt) +- break; + } + } + +@@ -3805,6 +3930,7 @@ + bool nested_in_vect_loop = false; + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); ++ tree elem_type; + tree new_temp; + enum machine_mode mode; + gimple new_stmt = NULL; +@@ -3821,6 +3947,7 @@ + gimple phi = NULL; + VEC(tree,heap) *dr_chain = NULL; + bool strided_load = false; ++ bool load_lanes_p = false; + gimple first_stmt; + tree scalar_type; + bool inv_p; +@@ -3833,6 +3960,7 @@ + enum tree_code code; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + int vf; ++ tree aggr_type; + + if (loop_vinfo) + { +@@ -3909,7 +4037,8 @@ + + /* The vector component type needs to be trivially convertible to the + scalar lhs. This should always be the case. */ +- if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype))) ++ elem_type = TREE_TYPE (vectype); ++ if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "??? operands of different types"); +@@ -3923,10 +4052,15 @@ + /* FORNOW */ + gcc_assert (! nested_in_vect_loop); + +- /* Check if interleaving is supported. */ +- if (!vect_strided_load_supported (vectype) +- && !PURE_SLP_STMT (stmt_info) && !slp) +- return false; ++ first_stmt = DR_GROUP_FIRST_DR (stmt_info); ++ if (!slp && !PURE_SLP_STMT (stmt_info)) ++ { ++ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); ++ if (vect_load_lanes_supported (vectype, group_size)) ++ load_lanes_p = true; ++ else if (!vect_strided_load_supported (vectype, group_size)) ++ return false; ++ } + } + + if (negative) +@@ -3951,12 +4085,12 @@ + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; +- vect_model_load_cost (stmt_info, ncopies, NULL); ++ vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL); + return true; + } + + if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "transform load."); ++ fprintf (vect_dump, "transform load. ncopies = %d", ncopies); + + /** Transform. **/ + +@@ -3982,8 +4116,6 @@ + } + else + vec_num = group_size; +- +- dr_chain = VEC_alloc (tree, heap, vec_num); + } + else + { +@@ -3994,6 +4126,11 @@ + + alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); + gcc_assert (alignment_support_scheme); ++ /* Targets with load-lane instructions must not require explicit ++ realignment. */ ++ gcc_assert (!load_lanes_p ++ || alignment_support_scheme == dr_aligned ++ || alignment_support_scheme == dr_unaligned_supported); + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -4125,208 +4262,252 @@ + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + ++ if (load_lanes_p) ++ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); ++ else ++ aggr_type = vectype; ++ + prev_stmt_info = NULL; + for (j = 0; j < ncopies; j++) + { + /* 1. Create the vector pointer update chain. */ + if (j == 0) +- dataref_ptr = vect_create_data_ref_ptr (first_stmt, ++ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, + at_loop, offset, + &dummy, &ptr_incr, false, + &inv_p); + else +- dataref_ptr = +- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); +- +- for (i = 0; i < vec_num; i++) ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, ++ TYPE_SIZE_UNIT (aggr_type)); ++ ++ if (strided_load || slp_perm) ++ dr_chain = VEC_alloc (tree, heap, vec_num); ++ ++ if (load_lanes_p) + { +- if (i > 0) +- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, +- NULL_TREE); +- +- /* 2. Create the vector-load in the loop. */ +- switch (alignment_support_scheme) +- { +- case dr_aligned: +- case dr_unaligned_supported: +- { +- struct ptr_info_def *pi; +- data_ref +- = build2 (MEM_REF, vectype, dataref_ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- pi = get_ptr_info (dataref_ptr); +- pi->align = TYPE_ALIGN_UNIT (vectype); +- if (alignment_support_scheme == dr_aligned) +- { +- gcc_assert (aligned_access_p (first_dr)); +- pi->misalign = 0; +- } +- else if (DR_MISALIGNMENT (first_dr) == -1) +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); +- pi->misalign = 0; +- } +- else +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->misalign = DR_MISALIGNMENT (first_dr); +- } +- break; +- } +- case dr_explicit_realign: +- { +- tree ptr, bump; +- tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); +- +- if (compute_in_loop) +- msq = vect_setup_realignment (first_stmt, gsi, +- &realignment_token, +- dr_explicit_realign, +- dataref_ptr, NULL); +- +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, +- build_int_cst +- (TREE_TYPE (dataref_ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, ptr); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- vec_dest = vect_create_destination_var (scalar_dest, vectype); +- new_stmt = gimple_build_assign (vec_dest, data_ref); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- msq = new_temp; +- +- bump = size_binop (MULT_EXPR, vs_minus_1, +- TYPE_SIZE_UNIT (scalar_type)); +- ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, ptr, +- build_int_cst +- (TREE_TYPE (ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, ptr); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- break; +- } +- case dr_explicit_realign_optimized: +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, +- build_int_cst +- (TREE_TYPE (dataref_ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, new_temp, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- break; +- default: +- gcc_unreachable (); +- } +- vec_dest = vect_create_destination_var (scalar_dest, vectype); +- new_stmt = gimple_build_assign (vec_dest, data_ref); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); ++ tree vec_array; ++ ++ vec_array = create_vector_array (vectype, vec_num); ++ ++ /* Emit: ++ VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ ++ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); ++ new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); ++ gimple_call_set_lhs (new_stmt, vec_array); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); + +- /* 3. Handle explicit realignment if necessary/supported. Create in +- loop: vec_dest = realign_load (msq, lsq, realignment_token) */ +- if (alignment_support_scheme == dr_explicit_realign_optimized +- || alignment_support_scheme == dr_explicit_realign) +- { +- tree tmp; +- +- lsq = gimple_assign_lhs (new_stmt); +- if (!realignment_token) +- realignment_token = dataref_ptr; ++ /* Extract each vector into an SSA_NAME. */ ++ for (i = 0; i < vec_num; i++) ++ { ++ new_temp = read_vector_array (stmt, gsi, scalar_dest, ++ vec_array, i); ++ VEC_quick_push (tree, dr_chain, new_temp); ++ } ++ ++ /* Record the mapping between SSA_NAMEs and statements. */ ++ vect_record_strided_load_vectors (stmt, dr_chain); ++ } ++ else ++ { ++ for (i = 0; i < vec_num; i++) ++ { ++ if (i > 0) ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, ++ stmt, NULL_TREE); ++ ++ /* 2. Create the vector-load in the loop. */ ++ switch (alignment_support_scheme) ++ { ++ case dr_aligned: ++ case dr_unaligned_supported: ++ { ++ struct ptr_info_def *pi; ++ data_ref ++ = build2 (MEM_REF, vectype, dataref_ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ pi = get_ptr_info (dataref_ptr); ++ pi->align = TYPE_ALIGN_UNIT (vectype); ++ if (alignment_support_scheme == dr_aligned) ++ { ++ gcc_assert (aligned_access_p (first_dr)); ++ pi->misalign = 0; ++ } ++ else if (DR_MISALIGNMENT (first_dr) == -1) ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->align = TYPE_ALIGN_UNIT (elem_type); ++ pi->misalign = 0; ++ } ++ else ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->misalign = DR_MISALIGNMENT (first_dr); ++ } ++ break; ++ } ++ case dr_explicit_realign: ++ { ++ tree ptr, bump; ++ tree vs_minus_1 ++ = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); ++ ++ if (compute_in_loop) ++ msq = vect_setup_realignment (first_stmt, gsi, ++ &realignment_token, ++ dr_explicit_realign, ++ dataref_ptr, NULL); ++ ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, ++ build_int_cst ++ (TREE_TYPE (dataref_ptr), ++ -(HOST_WIDE_INT) ++ TYPE_ALIGN_UNIT (vectype))); ++ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); ++ gimple_assign_set_lhs (new_stmt, ptr); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ vec_dest = vect_create_destination_var (scalar_dest, ++ vectype); ++ new_stmt = gimple_build_assign (vec_dest, data_ref); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ msq = new_temp; ++ ++ bump = size_binop (MULT_EXPR, vs_minus_1, ++ TYPE_SIZE_UNIT (scalar_type)); ++ ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, ptr, ++ build_int_cst ++ (TREE_TYPE (ptr), ++ -(HOST_WIDE_INT) ++ TYPE_ALIGN_UNIT (vectype))); ++ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); ++ gimple_assign_set_lhs (new_stmt, ptr); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ break; ++ } ++ case dr_explicit_realign_optimized: ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, ++ build_int_cst ++ (TREE_TYPE (dataref_ptr), ++ -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); ++ new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), ++ new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, new_temp, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ break; ++ default: ++ gcc_unreachable (); ++ } + vec_dest = vect_create_destination_var (scalar_dest, vectype); +- tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, +- realignment_token); +- new_stmt = gimple_build_assign (vec_dest, tmp); ++ new_stmt = gimple_build_assign (vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); +- +- if (alignment_support_scheme == dr_explicit_realign_optimized) +- { +- gcc_assert (phi); +- if (i == vec_num - 1 && j == ncopies - 1) +- add_phi_arg (phi, lsq, loop_latch_edge (containing_loop), +- UNKNOWN_LOCATION); +- msq = lsq; +- } +- } +- +- /* 4. Handle invariant-load. */ +- if (inv_p && !bb_vinfo) +- { +- gcc_assert (!strided_load); +- gcc_assert (nested_in_vect_loop_p (loop, stmt)); +- if (j == 0) +- { +- int k; +- tree t = NULL_TREE; +- tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); +- +- /* CHECKME: bitpos depends on endianess? */ +- bitpos = bitsize_zero_node; +- vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, +- bitsize, bitpos); +- vec_dest = +- vect_create_destination_var (scalar_dest, NULL_TREE); +- new_stmt = gimple_build_assign (vec_dest, vec_inv); +- new_temp = make_ssa_name (vec_dest, new_stmt); ++ mark_symbols_for_renaming (new_stmt); ++ ++ /* 3. Handle explicit realignment if necessary/supported. ++ Create in loop: ++ vec_dest = realign_load (msq, lsq, realignment_token) */ ++ if (alignment_support_scheme == dr_explicit_realign_optimized ++ || alignment_support_scheme == dr_explicit_realign) ++ { ++ tree tmp; ++ ++ lsq = gimple_assign_lhs (new_stmt); ++ if (!realignment_token) ++ realignment_token = dataref_ptr; ++ vec_dest = vect_create_destination_var (scalar_dest, vectype); ++ tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, ++ realignment_token); ++ new_stmt = gimple_build_assign (vec_dest, tmp); ++ new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + +- for (k = nunits - 1; k >= 0; --k) +- t = tree_cons (NULL_TREE, new_temp, t); +- /* FIXME: use build_constructor directly. */ +- vec_inv = build_constructor_from_list (vectype, t); +- new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); ++ if (alignment_support_scheme == dr_explicit_realign_optimized) ++ { ++ gcc_assert (phi); ++ if (i == vec_num - 1 && j == ncopies - 1) ++ add_phi_arg (phi, lsq, ++ loop_latch_edge (containing_loop), ++ UNKNOWN_LOCATION); ++ msq = lsq; ++ } ++ } ++ ++ /* 4. Handle invariant-load. */ ++ if (inv_p && !bb_vinfo) ++ { ++ gcc_assert (!strided_load); ++ gcc_assert (nested_in_vect_loop_p (loop, stmt)); ++ if (j == 0) ++ { ++ int k; ++ tree t = NULL_TREE; ++ tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); ++ ++ /* CHECKME: bitpos depends on endianess? */ ++ bitpos = bitsize_zero_node; ++ vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, ++ bitsize, bitpos); ++ vec_dest = vect_create_destination_var (scalar_dest, ++ NULL_TREE); ++ new_stmt = gimple_build_assign (vec_dest, vec_inv); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ ++ for (k = nunits - 1; k >= 0; --k) ++ t = tree_cons (NULL_TREE, new_temp, t); ++ /* FIXME: use build_constructor directly. */ ++ vec_inv = build_constructor_from_list (vectype, t); ++ new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); ++ new_stmt = SSA_NAME_DEF_STMT (new_temp); ++ } ++ else ++ gcc_unreachable (); /* FORNOW. */ ++ } ++ ++ if (negative) ++ { ++ new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } +- else +- gcc_unreachable (); /* FORNOW. */ +- } +- +- if (negative) +- { +- new_temp = reverse_vec_elements (new_temp, stmt, gsi); +- new_stmt = SSA_NAME_DEF_STMT (new_temp); +- } +- +- /* Collect vector loads and later create their permutation in +- vect_transform_strided_load (). */ +- if (strided_load || slp_perm) +- VEC_quick_push (tree, dr_chain, new_temp); +- +- /* Store vector loads in the corresponding SLP_NODE. */ +- if (slp && !slp_perm) +- VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); ++ ++ /* Collect vector loads and later create their permutation in ++ vect_transform_strided_load (). */ ++ if (strided_load || slp_perm) ++ VEC_quick_push (tree, dr_chain, new_temp); ++ ++ /* Store vector loads in the corresponding SLP_NODE. */ ++ if (slp && !slp_perm) ++ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), ++ new_stmt); ++ } + } + + if (slp && !slp_perm) +@@ -4345,12 +4526,9 @@ + { + if (strided_load) + { +- if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi)) +- return false; +- ++ if (!load_lanes_p) ++ vect_transform_strided_load (stmt, dr_chain, group_size, gsi); + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); +- VEC_free (tree, heap, dr_chain); +- dr_chain = VEC_alloc (tree, heap, group_size); + } + else + { +@@ -4361,11 +4539,10 @@ + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + } ++ if (dr_chain) ++ VEC_free (tree, heap, dr_chain); + } + +- if (dr_chain) +- VEC_free (tree, heap, dr_chain); +- + return true; + } + +@@ -4769,27 +4946,6 @@ + return false; + } + +- if (!PURE_SLP_STMT (stmt_info)) +- { +- /* Groups of strided accesses whose size is not a power of 2 are not +- vectorizable yet using loop-vectorization. Therefore, if this stmt +- feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and +- loop-based vectorized), the loop cannot be vectorized. */ +- if (STMT_VINFO_STRIDED_ACCESS (stmt_info) +- && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( +- DR_GROUP_FIRST_DR (stmt_info)))) == -1) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "not vectorized: the size of group " +- "of strided accesses is not a power of 2"); +- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); +- } +- +- return false; +- } +- } +- + return true; + } + + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2010-12-23 16:25:52 +0000 ++++ new/gcc/tree-vectorizer.h 2011-05-05 15:43:06 +0000 +@@ -788,9 +788,9 @@ + extern tree vectorizable_function (gimple, tree, tree); + extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, + slp_tree); +-extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type, +- slp_tree); +-extern void vect_model_load_cost (stmt_vec_info, int, slp_tree); ++extern void vect_model_store_cost (stmt_vec_info, int, bool, ++ enum vect_def_type, slp_tree); ++extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree); + extern void vect_finish_stmt_generation (gimple, gimple, + gimple_stmt_iterator *); + extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); +@@ -823,21 +823,22 @@ + extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); + extern bool vect_prune_runtime_alias_test_list (loop_vec_info); + extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); +-extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *, +- gimple *, bool, bool *); ++extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, ++ tree *, gimple *, bool, bool *); + extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); + extern tree vect_create_destination_var (tree, tree); +-extern bool vect_strided_store_supported (tree); +-extern bool vect_strided_load_supported (tree); +-extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, ++extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); ++extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, + gimple_stmt_iterator *, VEC(tree,heap) **); + extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, + enum dr_alignment_support, tree, + struct loop **); +-extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, +- gimple_stmt_iterator *, VEC(tree,heap) **); +-extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, ++extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, + gimple_stmt_iterator *); ++extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *); + extern int vect_get_place_in_interleaving_chain (gimple, gimple); + extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); + extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, + +=== modified file 'gcc/tree.c' +--- old/gcc/tree.c 2011-05-18 13:29:24 +0000 ++++ new/gcc/tree.c 2011-06-02 12:12:00 +0000 +@@ -7321,6 +7321,15 @@ + return build_array_type_1 (elt_type, index_type, false); + } + ++/* Return a representation of ELT_TYPE[NELTS], using indices of type ++ sizetype. */ ++ ++tree ++build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts) ++{ ++ return build_array_type (elt_type, build_index_type (size_int (nelts - 1))); ++} ++ + /* Recursively examines the array elements of TYPE, until a non-array + element type is found. */ + + +=== modified file 'gcc/tree.h' +--- old/gcc/tree.h 2011-03-11 22:38:58 +0000 ++++ new/gcc/tree.h 2011-05-05 15:43:06 +0000 +@@ -4192,6 +4192,7 @@ + extern tree build_index_type (tree); + extern tree build_array_type (tree, tree); + extern tree build_nonshared_array_type (tree, tree); ++extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT); + extern tree build_function_type (tree, tree); + extern tree build_function_type_list (tree, ...); + extern tree build_function_type_skip_args (tree, bitmap); + +=== modified file 'gcc/value-prof.c' +--- old/gcc/value-prof.c 2011-01-29 03:54:56 +0000 ++++ new/gcc/value-prof.c 2011-05-05 15:42:22 +0000 +@@ -1242,6 +1242,9 @@ + if (TREE_CODE (callee) == FUNCTION_DECL) + return false; + ++ if (gimple_call_internal_p (stmt)) ++ return false; ++ + histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL); + if (!histogram) + return false; +@@ -1630,6 +1633,7 @@ + tree callee; + + if (gimple_code (stmt) != GIMPLE_CALL ++ || gimple_call_internal_p (stmt) + || gimple_call_fndecl (stmt) != NULL_TREE) + return; + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch new file mode 100644 index 0000000000..b64991836b --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch @@ -0,0 +1,329 @@ +2011-06-07 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-06-07 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns. + (*maddhisi4tb, *maddhisi4tt): New define_insns. + + gcc/testsuite/ + * gcc.target/arm/smlatb-1.c: New file. + * gcc.target/arm/smlatt-1.c: New file. + * gcc.target/arm/smlaltb-1.c: New file. + * gcc.target/arm/smlaltt-1.c: New file. + +2011-06-07 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-06-07 Bernd Schmidt <bernds@codesourcery.com> + Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening + multiplies. + * doc/md.texi (Canonicalization of Instructions): Document widening + multiply canonicalization. + + gcc/testsuite/ + * gcc.target/arm/mla-2.c: New test. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000 ++++ new/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000 +@@ -1809,6 +1809,36 @@ + (set_attr "predicable" "yes")] + ) + ++;; Note: there is no maddhisi4ibt because this one is canonical form ++(define_insn "*maddhisi4tb" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (plus:SI (mult:SI (ashiftrt:SI ++ (match_operand:SI 1 "s_register_operand" "r") ++ (const_int 16)) ++ (sign_extend:SI ++ (match_operand:HI 2 "s_register_operand" "r"))) ++ (match_operand:SI 3 "s_register_operand" "r")))] ++ "TARGET_DSP_MULTIPLY" ++ "smlatb%?\\t%0, %1, %2, %3" ++ [(set_attr "insn" "smlaxy") ++ (set_attr "predicable" "yes")] ++) ++ ++(define_insn "*maddhisi4tt" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (plus:SI (mult:SI (ashiftrt:SI ++ (match_operand:SI 1 "s_register_operand" "r") ++ (const_int 16)) ++ (ashiftrt:SI ++ (match_operand:SI 2 "s_register_operand" "r") ++ (const_int 16))) ++ (match_operand:SI 3 "s_register_operand" "r")))] ++ "TARGET_DSP_MULTIPLY" ++ "smlatt%?\\t%0, %1, %2, %3" ++ [(set_attr "insn" "smlaxy") ++ (set_attr "predicable" "yes")] ++) ++ + (define_insn "*maddhidi4" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI +@@ -1822,6 +1852,39 @@ + [(set_attr "insn" "smlalxy") + (set_attr "predicable" "yes")]) + ++;; Note: there is no maddhidi4ibt because this one is canonical form ++(define_insn "*maddhidi4tb" ++ [(set (match_operand:DI 0 "s_register_operand" "=r") ++ (plus:DI ++ (mult:DI (sign_extend:DI ++ (ashiftrt:SI ++ (match_operand:SI 1 "s_register_operand" "r") ++ (const_int 16))) ++ (sign_extend:DI ++ (match_operand:HI 2 "s_register_operand" "r"))) ++ (match_operand:DI 3 "s_register_operand" "0")))] ++ "TARGET_DSP_MULTIPLY" ++ "smlaltb%?\\t%Q0, %R0, %1, %2" ++ [(set_attr "insn" "smlalxy") ++ (set_attr "predicable" "yes")]) ++ ++(define_insn "*maddhidi4tt" ++ [(set (match_operand:DI 0 "s_register_operand" "=r") ++ (plus:DI ++ (mult:DI (sign_extend:DI ++ (ashiftrt:SI ++ (match_operand:SI 1 "s_register_operand" "r") ++ (const_int 16))) ++ (sign_extend:DI ++ (ashiftrt:SI ++ (match_operand:SI 2 "s_register_operand" "r") ++ (const_int 16)))) ++ (match_operand:DI 3 "s_register_operand" "0")))] ++ "TARGET_DSP_MULTIPLY" ++ "smlaltt%?\\t%Q0, %R0, %1, %2" ++ [(set_attr "insn" "smlalxy") ++ (set_attr "predicable" "yes")]) ++ + (define_expand "mulsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (mult:SF (match_operand:SF 1 "s_register_operand" "") + +=== modified file 'gcc/doc/md.texi' +--- old/gcc/doc/md.texi 2011-05-05 15:43:06 +0000 ++++ new/gcc/doc/md.texi 2011-06-07 11:18:20 +0000 +@@ -5929,6 +5929,23 @@ + will be written using @code{zero_extract} rather than the equivalent + @code{and} or @code{sign_extract} operations. + ++@cindex @code{mult}, canonicalization of ++@item ++@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x}) ++(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1} ++(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise ++for @code{zero_extend}. ++ ++@item ++@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2} ++@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted ++to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2} ++@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for ++patterns using @code{zero_extend} and @code{lshiftrt}. If the second ++operand of @code{mult} is also a shift, then that is extended also. ++This transformation is only applied when it can be proven that the ++original operation had sufficient precision to prevent overflow. ++ + @end itemize + + Further canonicalization rules are defined in the function + +=== modified file 'gcc/simplify-rtx.c' +--- old/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000 ++++ new/gcc/simplify-rtx.c 2011-06-02 12:32:16 +0000 +@@ -1000,6 +1000,48 @@ + && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF) + return XEXP (op, 0); + ++ /* Extending a widening multiplication should be canonicalized to ++ a wider widening multiplication. */ ++ if (GET_CODE (op) == MULT) ++ { ++ rtx lhs = XEXP (op, 0); ++ rtx rhs = XEXP (op, 1); ++ enum rtx_code lcode = GET_CODE (lhs); ++ enum rtx_code rcode = GET_CODE (rhs); ++ ++ /* Widening multiplies usually extend both operands, but sometimes ++ they use a shift to extract a portion of a register. */ ++ if ((lcode == SIGN_EXTEND ++ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) ++ && (rcode == SIGN_EXTEND ++ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) ++ { ++ enum machine_mode lmode = GET_MODE (lhs); ++ enum machine_mode rmode = GET_MODE (rhs); ++ int bits; ++ ++ if (lcode == ASHIFTRT) ++ /* Number of bits not shifted off the end. */ ++ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); ++ else /* lcode == SIGN_EXTEND */ ++ /* Size of inner mode. */ ++ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); ++ ++ if (rcode == ASHIFTRT) ++ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); ++ else /* rcode == SIGN_EXTEND */ ++ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); ++ ++ /* We can only widen multiplies if the result is mathematiclly ++ equivalent. I.e. if overflow was impossible. */ ++ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) ++ return simplify_gen_binary ++ (MULT, mode, ++ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode), ++ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode)); ++ } ++ } ++ + /* Check for a sign extension of a subreg of a promoted + variable, where the promotion is sign-extended, and the + target mode is the same as the variable's promotion. */ +@@ -1071,6 +1113,48 @@ + && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))) + return rtl_hooks.gen_lowpart_no_emit (mode, op); + ++ /* Extending a widening multiplication should be canonicalized to ++ a wider widening multiplication. */ ++ if (GET_CODE (op) == MULT) ++ { ++ rtx lhs = XEXP (op, 0); ++ rtx rhs = XEXP (op, 1); ++ enum rtx_code lcode = GET_CODE (lhs); ++ enum rtx_code rcode = GET_CODE (rhs); ++ ++ /* Widening multiplies usually extend both operands, but sometimes ++ they use a shift to extract a portion of a register. */ ++ if ((lcode == ZERO_EXTEND ++ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) ++ && (rcode == ZERO_EXTEND ++ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) ++ { ++ enum machine_mode lmode = GET_MODE (lhs); ++ enum machine_mode rmode = GET_MODE (rhs); ++ int bits; ++ ++ if (lcode == LSHIFTRT) ++ /* Number of bits not shifted off the end. */ ++ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); ++ else /* lcode == ZERO_EXTEND */ ++ /* Size of inner mode. */ ++ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); ++ ++ if (rcode == LSHIFTRT) ++ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); ++ else /* rcode == ZERO_EXTEND */ ++ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); ++ ++ /* We can only widen multiplies if the result is mathematiclly ++ equivalent. I.e. if overflow was impossible. */ ++ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) ++ return simplify_gen_binary ++ (MULT, mode, ++ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode), ++ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode)); ++ } ++ } ++ + /* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>). */ + if (GET_CODE (op) == ZERO_EXTEND) + return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0), + +=== added file 'gcc/testsuite/gcc.target/arm/mla-2.c' +--- old/gcc/testsuite/gcc.target/arm/mla-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/mla-2.c 2011-06-02 12:32:16 +0000 +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long foolong (long long x, short *a, short *b) ++{ ++ return x + *a * *b; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/smlaltb-1.c' +--- old/gcc/testsuite/gcc.target/arm/smlaltb-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/smlaltb-1.c 2011-06-02 15:58:33 +0000 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long int ++foo (long long x, int in) ++{ ++ short a = in & 0xffff; ++ short b = (in & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlaltb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/smlaltt-1.c' +--- old/gcc/testsuite/gcc.target/arm/smlaltt-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/smlaltt-1.c 2011-06-02 15:58:33 +0000 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long int ++foo (long long x, int in1, int in2) ++{ ++ short a = (in1 & 0xffff0000) >> 16; ++ short b = (in2 & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlaltt" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/smlatb-1.c' +--- old/gcc/testsuite/gcc.target/arm/smlatb-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/smlatb-1.c 2011-06-02 15:58:33 +0000 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int x, int in) ++{ ++ short a = in & 0xffff; ++ short b = (in & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlatb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/smlatt-1.c' +--- old/gcc/testsuite/gcc.target/arm/smlatt-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/smlatt-1.c 2011-06-02 15:58:33 +0000 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int x, int in1, int in2) ++{ ++ short a = (in1 & 0xffff0000) >> 16; ++ short b = (in2 & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlatt" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch new file mode 100644 index 0000000000..b8f587c9f4 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch @@ -0,0 +1,120 @@ +2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline: + gcc/ + 2011-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + * config/arm/neon.md (orndi3_neon): Actually split it. + + +2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + gcc/ + 2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + * config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not. + ("orndi3_neon"): Likewise. + ("bic<mode>3_neon"): Likewise. + + gcc/testsuite + 2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + * gcc.target/arm/neon-vorn-vbic.c: New test. + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-06-02 12:12:00 +0000 ++++ new/gcc/config/arm/neon.md 2011-06-04 00:04:47 +0000 +@@ -783,30 +783,57 @@ + + (define_insn "orn<mode>3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") +- (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w") +- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] ++ (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) ++ (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "neon_type" "neon_int_1")] + ) + +-(define_insn "orndi3_neon" +- [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") +- (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0") +- (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))] ++;; TODO: investigate whether we should disable ++;; this and bicdi3_neon for the A8 in line with the other ++;; changes above. ++(define_insn_and_split "orndi3_neon" ++ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") ++ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r")) ++ (match_operand:DI 1 "s_register_operand" "w,r,r,0")))] + "TARGET_NEON" + "@ + vorn\t%P0, %P1, %P2 + # ++ # + #" +- [(set_attr "neon_type" "neon_int_1,*,*") +- (set_attr "length" "*,8,8")] ++ "reload_completed && ++ (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))" ++ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) ++ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] ++ " ++ { ++ if (TARGET_THUMB2) ++ { ++ operands[3] = gen_highpart (SImode, operands[0]); ++ operands[0] = gen_lowpart (SImode, operands[0]); ++ operands[4] = gen_highpart (SImode, operands[2]); ++ operands[2] = gen_lowpart (SImode, operands[2]); ++ operands[5] = gen_highpart (SImode, operands[1]); ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ } ++ else ++ { ++ emit_insn (gen_one_cmpldi2 (operands[0], operands[2])); ++ emit_insn (gen_iordi3 (operands[0], operands[1], operands[0])); ++ DONE; ++ } ++ }" ++ [(set_attr "neon_type" "neon_int_1,*,*,*") ++ (set_attr "length" "*,16,8,8") ++ (set_attr "arch" "any,a,t2,t2")] + ) + + (define_insn "bic<mode>3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") +- (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w") +- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] ++ (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) ++ (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "neon_type" "neon_int_1")] + +=== added file 'gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c' +--- old/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 2011-06-03 23:50:02 +0000 +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++/* { dg-add-options arm_neon } */ ++ ++void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) ++{ ++ int i; ++ for (i = 0; i < 9; i++) ++ c[i] = b[i] | (~a[i]); ++} ++void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) ++{ ++ int i; ++ for (i = 0; i < 9; i++) ++ c[i] = b[i] & (~a[i]); ++} ++ ++/* { dg-final { scan-assembler "vorn\\t" } } */ ++/* { dg-final { scan-assembler "vbic\\t" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch new file mode 100644 index 0000000000..c515767946 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch @@ -0,0 +1,545 @@ +2011-06-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + 2011-06-03 Julian Brown <julian@codesourcery.com> + + * config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100) + (strongarm1110): Use strongarm tuning. + * config/arm/arm-protos.h (tune_params): Add max_insns_skipped + field. + * config/arm/arm.c (arm_strongarm_tune): New. + (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune) + (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune) + (arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field + setting, using previous defaults or 1 for Cortex-A5. + (arm_option_override): Set max_insns_skipped from current tuning. + +2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + 2011-06-02 Julian Brown <julian@codesourcery.com> + + * config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning. + * config/arm/arm.c (arm_cortex_a5_branch_cost): New. + (arm_cortex_a5_tune): New. + + 2011-06-02 Julian Brown <julian@codesourcery.com> + + * config/arm/arm-protos.h (tune_params): Add branch_cost hook. + * config/arm/arm.c (arm_default_branch_cost): New. + (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune) + (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune) + (arm_fa726_tune): Set branch_cost field using + arm_default_branch_cost. + * config/arm/arm.h (BRANCH_COST): Use branch_cost hook from + current_tune structure. + * dojump.c (tm_p.h): Include file. + + 2011-06-02 Julian Brown <julian@codesourcery.com> + + * config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2 + tuning. + (cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4) + (cortex-m3, cortex-m1, cortex-m0): Use cortex tuning. + * config/arm/arm-protos.h (tune_params): Add prefer_constant_pool + field. + * config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune) + (arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune) + (arm_fa726te_tune): Add prefer_constant_pool setting. + (arm_v6t2_tune, arm_cortex_tune): New. + * config/arm/arm.h (TARGET_USE_MOVT): Make dependent on + prefer_constant_pool setting. + +2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline + 2011-06-01 Paul Brook <paul@cpodesourcery.com> + + * config/arm/arm-cores.def: Add cortex-r5. Add DIV flags to + Cortex-A15. + * config/arm/arm-tune.md: Regenerate. + * config/arm/arm.c (FL_DIV): Rename... + (FL_THUMB_DIV): ... to this. + (FL_ARM_DIV): Define. + (FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV. + (arm_arch_hwdiv): Remove. + (arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables. + (arm_issue_rate): Add cortexr5. + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set + __ARM_ARCH_EXT_IDIV__. + (TARGET_IDIV): Define. + (arm_arch_hwdiv): Remove. + (arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes. + * config/arm/arm.md (tune_cortexr4): Add cortexr5. + (divsi3, udivsi3): New patterns. + * config/arm/thumb2.md (divsi3, udivsi3): Remove. + * doc/invoke.texi: Document ARM -mcpu=cortex-r5 + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000 +@@ -70,10 +70,10 @@ + /* V4 Architecture Processors */ + ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) + ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) +-ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) ++ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) + ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) + ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul) + +@@ -122,15 +122,16 @@ + ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) + ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) +-ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) +-ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) +-ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) +-ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) ++ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) ++ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) ++ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) ++ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e) +-ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) +-ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) ++ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) ++ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) ++ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-05-03 15:17:25 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000 +@@ -219,9 +219,14 @@ + bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); + int constant_limit; ++ /* Maximum number of instructions to conditionalise in ++ arm_final_prescan_insn. */ ++ int max_insns_skipped; + int num_prefetch_slots; + int l1_cache_size; + int l1_cache_line_size; ++ bool prefer_constant_pool; ++ int (*branch_cost) (bool, bool); + }; + + extern const struct tune_params *current_tune; + +=== modified file 'gcc/config/arm/arm-tune.md' +--- old/gcc/config/arm/arm-tune.md 2010-12-20 17:48:51 +0000 ++++ new/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-05-11 14:49:48 +0000 ++++ new/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000 +@@ -255,6 +255,8 @@ + static void arm_conditional_register_usage (void); + static reg_class_t arm_preferred_rename_class (reg_class_t rclass); + static unsigned int arm_autovectorize_vector_sizes (void); ++static int arm_default_branch_cost (bool, bool); ++static int arm_cortex_a5_branch_cost (bool, bool); + + + /* Table of machine attributes. */ +@@ -672,12 +674,13 @@ + #define FL_THUMB2 (1 << 16) /* Thumb-2. */ + #define FL_NOTM (1 << 17) /* Instructions not present in the 'M' + profile. */ +-#define FL_DIV (1 << 18) /* Hardware divide. */ ++#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */ + #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ + #define FL_NEON (1 << 20) /* Neon instructions. */ + #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M + architecture. */ + #define FL_ARCH7 (1 << 22) /* Architecture 7. */ ++#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ + + #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ + +@@ -704,8 +707,8 @@ + #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) + #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) + #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) +-#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) +-#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) ++#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) ++#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) + #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) + + /* The bits in this mask specify which +@@ -791,7 +794,8 @@ + int arm_arch_thumb2; + + /* Nonzero if chip supports integer division instruction. */ +-int arm_arch_hwdiv; ++int arm_arch_arm_hwdiv; ++int arm_arch_thumb_hwdiv; + + /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, + we must report the mode of the memory reference from +@@ -864,48 +868,117 @@ + { + arm_slowmul_rtx_costs, + NULL, +- 3, +- ARM_PREFETCH_NOT_BENEFICIAL ++ 3, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost + }; + + const struct tune_params arm_fastmul_tune = + { + arm_fastmul_rtx_costs, + NULL, +- 1, +- ARM_PREFETCH_NOT_BENEFICIAL ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost ++}; ++ ++/* StrongARM has early execution of branches, so a sequence that is worth ++ skipping is shorter. Set max_insns_skipped to a lower value. */ ++ ++const struct tune_params arm_strongarm_tune = ++{ ++ arm_fastmul_rtx_costs, ++ NULL, ++ 1, /* Constant limit. */ ++ 3, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost + }; + + const struct tune_params arm_xscale_tune = + { + arm_xscale_rtx_costs, + xscale_sched_adjust_cost, +- 2, +- ARM_PREFETCH_NOT_BENEFICIAL ++ 2, /* Constant limit. */ ++ 3, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost + }; + + const struct tune_params arm_9e_tune = + { + arm_9e_rtx_costs, + NULL, +- 1, +- ARM_PREFETCH_NOT_BENEFICIAL ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost ++}; ++ ++const struct tune_params arm_v6t2_tune = ++{ ++ arm_9e_rtx_costs, ++ NULL, ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ false, /* Prefer constant pool. */ ++ arm_default_branch_cost ++}; ++ ++/* Generic Cortex tuning. Use more specific tunings if appropriate. */ ++const struct tune_params arm_cortex_tune = ++{ ++ arm_9e_rtx_costs, ++ NULL, ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ false, /* Prefer constant pool. */ ++ arm_default_branch_cost ++}; ++ ++/* Branches can be dual-issued on Cortex-A5, so conditional execution is ++ less appealing. Set max_insns_skipped to a low value. */ ++ ++const struct tune_params arm_cortex_a5_tune = ++{ ++ arm_9e_rtx_costs, ++ NULL, ++ 1, /* Constant limit. */ ++ 1, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ false, /* Prefer constant pool. */ ++ arm_cortex_a5_branch_cost + }; + + const struct tune_params arm_cortex_a9_tune = + { + arm_9e_rtx_costs, + cortex_a9_sched_adjust_cost, +- 1, +- ARM_PREFETCH_BENEFICIAL(4,32,32) ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_BENEFICIAL(4,32,32), ++ false, /* Prefer constant pool. */ ++ arm_default_branch_cost + }; + + const struct tune_params arm_fa726te_tune = + { + arm_9e_rtx_costs, + fa726te_sched_adjust_cost, +- 1, +- ARM_PREFETCH_NOT_BENEFICIAL ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ true, /* Prefer constant pool. */ ++ arm_default_branch_cost + }; + + +@@ -1711,7 +1784,8 @@ + arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; + arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; +- arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; ++ arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; ++ arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + + /* If we are not using the default (ARM mode) section anchor offset +@@ -1991,12 +2065,7 @@ + max_insns_skipped = 6; + } + else +- { +- /* StrongARM has early execution of branches, so a sequence +- that is worth skipping is shorter. */ +- if (arm_tune_strongarm) +- max_insns_skipped = 3; +- } ++ max_insns_skipped = current_tune->max_insns_skipped; + + /* Hot/Cold partitioning is not currently supported, since we can't + handle literal pool placement in that case. */ +@@ -8211,6 +8280,21 @@ + return cost; + } + ++static int ++arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_32BIT) ++ return (TARGET_THUMB2 && !speed_p) ? 1 : 4; ++ else ++ return (optimize > 0) ? 2 : 0; ++} ++ ++static int ++arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) ++{ ++ return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); ++} ++ + static int fp_consts_inited = 0; + + /* Only zero is valid for VFP. Other values are also valid for FPA. */ +@@ -23123,6 +23207,7 @@ + { + case cortexr4: + case cortexr4f: ++ case cortexr5: + case cortexa5: + case cortexa8: + case cortexa9: + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-06-02 12:12:00 +0000 ++++ new/gcc/config/arm/arm.h 2011-06-14 14:53:07 +0000 +@@ -101,6 +101,8 @@ + builtin_define ("__ARM_PCS"); \ + builtin_define ("__ARM_EABI__"); \ + } \ ++ if (TARGET_IDIV) \ ++ builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ + } while (0) + + /* The various ARM cores. */ +@@ -282,7 +284,8 @@ + (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) + + /* Should MOVW/MOVT be used in preference to a constant pool. */ +-#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) ++#define TARGET_USE_MOVT \ ++ (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool) + + /* We could use unified syntax for arm mode, but for now we just use it + for Thumb-2. */ +@@ -303,6 +306,10 @@ + /* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */ + #define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7) + ++/* Nonzero if integer division instructions supported. */ ++#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ ++ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) ++ + /* True iff the full BPABI is being used. If TARGET_BPABI is true, + then TARGET_AAPCS_BASED must be true -- but the converse does not + hold. TARGET_BPABI implies the use of the BPABI runtime library, +@@ -487,8 +494,11 @@ + /* Nonzero if chip supports Thumb 2. */ + extern int arm_arch_thumb2; + +-/* Nonzero if chip supports integer division instruction. */ +-extern int arm_arch_hwdiv; ++/* Nonzero if chip supports integer division instruction in ARM mode. */ ++extern int arm_arch_arm_hwdiv; ++ ++/* Nonzero if chip supports integer division instruction in Thumb mode. */ ++extern int arm_arch_thumb_hwdiv; + + #ifndef TARGET_DEFAULT + #define TARGET_DEFAULT (MASK_APCS_FRAME) +@@ -2018,8 +2028,8 @@ + /* Try to generate sequences that don't involve branches, we can then use + conditional instructions */ + #define BRANCH_COST(speed_p, predictable_p) \ +- (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \ +- : (optimize > 0 ? 2 : 0)) ++ (current_tune->branch_cost (speed_p, predictable_p)) ++ + + /* Position Independent Code. */ + /* We decide which register to use based on the compilation options and + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000 ++++ new/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000 +@@ -490,7 +490,7 @@ + + (define_attr "tune_cortexr4" "yes,no" + (const (if_then_else +- (eq_attr "tune" "cortexr4,cortexr4f") ++ (eq_attr "tune" "cortexr4,cortexr4f,cortexr5") + (const_string "yes") + (const_string "no")))) + +@@ -3738,6 +3738,28 @@ + (set_attr "predicable" "yes")] + ) + ++ ++;; Division instructions ++(define_insn "divsi3" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (div:SI (match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "s_register_operand" "r")))] ++ "TARGET_IDIV" ++ "sdiv%?\t%0, %1, %2" ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "sdiv")] ++) ++ ++(define_insn "udivsi3" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (udiv:SI (match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "s_register_operand" "r")))] ++ "TARGET_IDIV" ++ "udiv%?\t%0, %1, %2" ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "udiv")] ++) ++ + + ;; Unary arithmetic insns + + +=== modified file 'gcc/config/arm/thumb2.md' +--- old/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000 ++++ new/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000 +@@ -779,26 +779,6 @@ + (set_attr "length" "2")] + ) + +-(define_insn "divsi3" +- [(set (match_operand:SI 0 "s_register_operand" "=r") +- (div:SI (match_operand:SI 1 "s_register_operand" "r") +- (match_operand:SI 2 "s_register_operand" "r")))] +- "TARGET_THUMB2 && arm_arch_hwdiv" +- "sdiv%?\t%0, %1, %2" +- [(set_attr "predicable" "yes") +- (set_attr "insn" "sdiv")] +-) +- +-(define_insn "udivsi3" +- [(set (match_operand:SI 0 "s_register_operand" "=r") +- (udiv:SI (match_operand:SI 1 "s_register_operand" "r") +- (match_operand:SI 2 "s_register_operand" "r")))] +- "TARGET_THUMB2 && arm_arch_hwdiv" +- "udiv%?\t%0, %1, %2" +- [(set_attr "predicable" "yes") +- (set_attr "insn" "udiv")] +-) +- + (define_insn "*thumb2_subsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (minus:SI (match_operand:SI 1 "low_register_operand" "l") + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000 ++++ new/gcc/doc/invoke.texi 2011-06-14 14:37:30 +0000 +@@ -10208,7 +10208,8 @@ + @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp}, + @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s}, + @samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15}, +-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3}, ++@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, ++@samp{cortex-m4}, @samp{cortex-m3}, + @samp{cortex-m1}, + @samp{cortex-m0}, + @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + +=== modified file 'gcc/dojump.c' +--- old/gcc/dojump.c 2010-05-19 19:09:57 +0000 ++++ new/gcc/dojump.c 2011-06-14 14:53:07 +0000 +@@ -36,6 +36,7 @@ + #include "ggc.h" + #include "basic-block.h" + #include "output.h" ++#include "tm_p.h" + + static bool prefer_and_bit_test (enum machine_mode, int); + static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch new file mode 100644 index 0000000000..bbf9819ecd --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch @@ -0,0 +1,1355 @@ +2011-06-28 Ira Rosen <ira.rosen@linaro.org> + + Backport from FSF: + + 2011-06-07 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be + a pointer. + * tree-vect-patterns.c (vect_recog_widen_sum_pattern, + vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern, + vect_recog_pow_pattern): Likewise. + (vect_pattern_recog_1): Remove declaration. + (widened_name_p): Remove declaration. Add new argument to specify + whether to check that both types are either signed or unsigned. + (vect_recog_widen_mult_pattern): Update documentation. Handle + unsigned patterns and multiplication by constants. + (vect_pattern_recog_1): Update vect_recog_func references. Use + statement information from the statement returned from pattern + detection functions. + (vect_pattern_recog): Update vect_recog_func reference. + * tree-vect-stmts.c (vectorizable_type_promotion): For widening + multiplication by a constant use the type of the other operand. + + gcc/testsuite + * lib/target-supports.exp + (check_effective_target_vect_widen_mult_qi_to_hi): + Add NEON as supporting target. + (check_effective_target_vect_widen_mult_hi_to_si): Likewise. + (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New. + (check_effective_target_vect_widen_mult_hi_to_si_pattern): New. + * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized + using widening multiplication on targets that support it. + * gcc.dg/vect/vect-widen-mult-u16.c: Likewise. + * gcc.dg/vect/vect-widen-mult-const-s16.c: New test. + * gcc.dg/vect/vect-widen-mult-const-u16.c: New test. + + and + + 2011-06-15 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove. + (slpeel_tree_peel_loop_to_edge): Don't call + remove_dead_stmts_from_loop. + * tree-vect-loop.c (vect_determine_vectorization_factor): Don't + remove irrelevant pattern statements. For irrelevant statements + check if it is the last statement of a detected pattern, use + corresponding pattern statement instead. + (destroy_loop_vec_info): No need to remove pattern statements, + only free stmt_vec_info. + (vect_transform_loop): For irrelevant statements check if it is + the last statement of a detected pattern, use corresponding + pattern statement instead. + * tree-vect-patterns.c (vect_pattern_recog_1): Don't insert + pattern statements. Set basic block for the new statement. + (vect_pattern_recog): Update documentation. + * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan + operands of pattern statements. + (vectorizable_call): Fix printing. In case of a pattern statement + use the lhs of the original statement when creating a dummy + statement to replace the original call. + (vect_analyze_stmt): For irrelevant statements check if it is + the last statement of a detected pattern, use corresponding + pattern statement instead. + * tree-vect-slp.c (vect_schedule_slp_instance): For pattern + statements use gsi of the original statement. + + and + 2011-06-21 Ira Rosen <ira.rosen@linaro.org> + + PR tree-optimization/49478 + gcc/ + + * tree-vect-loop.c (vectorizable_reduction): Handle DOT_PROD_EXPR + with constant operand. + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 2011-06-19 10:59:13 +0000 +@@ -0,0 +1,60 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#include <stdlib.h> ++ ++#define N 32 ++ ++__attribute__ ((noinline)) void ++foo (int *__restrict a, ++ short *__restrict b, ++ int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ a[i] = b[i] * 2333; ++ ++ for (i = 0; i < n; i++) ++ if (a[i] != b[i] * 2333) ++ abort (); ++} ++ ++__attribute__ ((noinline)) void ++bar (int *__restrict a, ++ short *__restrict b, ++ int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ a[i] = b[i] * (short) 2333; ++ ++ for (i = 0; i < n; i++) ++ if (a[i] != b[i] * (short) 2333) ++ abort (); ++} ++ ++int main (void) ++{ ++ int i; ++ int a[N]; ++ short b[N]; ++ ++ for (i = 0; i < N; i++) ++ { ++ a[i] = 0; ++ b[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (a, b, N); ++ bar (a, b, N); ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 2011-06-19 10:59:13 +0000 +@@ -0,0 +1,77 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#include <stdlib.h> ++ ++#define N 32 ++ ++__attribute__ ((noinline)) void ++foo (unsigned int *__restrict a, ++ unsigned short *__restrict b, ++ int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ a[i] = b[i] * 2333; ++ ++ for (i = 0; i < n; i++) ++ if (a[i] != b[i] * 2333) ++ abort (); ++} ++ ++__attribute__ ((noinline)) void ++bar (unsigned int *__restrict a, ++ unsigned short *__restrict b, ++ int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ a[i] = (unsigned short) 2333 * b[i]; ++ ++ for (i = 0; i < n; i++) ++ if (a[i] != b[i] * (unsigned short) 2333) ++ abort (); ++} ++ ++__attribute__ ((noinline)) void ++baz (unsigned int *__restrict a, ++ unsigned short *__restrict b, ++ int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ a[i] = b[i] * 233333333; ++ ++ for (i = 0; i < n; i++) ++ if (a[i] != b[i] * 233333333) ++ abort (); ++} ++ ++ ++int main (void) ++{ ++ int i; ++ unsigned int a[N]; ++ unsigned short b[N]; ++ ++ for (i = 0; i < N; i++) ++ { ++ a[i] = 0; ++ b[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (a, b, N); ++ bar (a, b, N); ++ baz (a, b, N); ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2011-06-19 10:59:13 +0000 +@@ -9,13 +9,11 @@ + unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + unsigned int result[N]; + +-/* short->int widening-mult */ ++/* unsigned short->unsigned int widening-mult. */ + __attribute__ ((noinline)) int + foo1(int len) { + int i; + +- /* Not vectorized because X[i] and Y[i] are casted to 'int' +- so the widening multiplication pattern is not recognized. */ + for (i=0; i<len; i++) { + result[i] = (unsigned int)(X[i] * Y[i]); + } +@@ -43,8 +41,8 @@ + return 0; + } + +-/*The induction loop is vectorized */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2011-06-19 10:59:13 +0000 +@@ -9,7 +9,7 @@ + unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + unsigned short result[N]; + +-/* char->short widening-mult */ ++/* unsigned char-> unsigned short widening-mult. */ + __attribute__ ((noinline)) int + foo1(int len) { + int i; +@@ -28,8 +28,7 @@ + for (i=0; i<N; i++) { + X[i] = i; + Y[i] = 64-i; +- if (i%4 == 0) +- X[i] = 5; ++ __asm__ volatile (""); + } + + foo1 (N); +@@ -43,5 +42,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-06-02 12:12:00 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-06-19 10:59:13 +0000 +@@ -2663,7 +2663,8 @@ + } else { + set et_vect_widen_mult_qi_to_hi_saved 0 + } +- if { [istarget powerpc*-*-*] } { ++ if { [istarget powerpc*-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_qi_to_hi_saved 1 + } + } +@@ -2696,7 +2697,8 @@ + || [istarget spu-*-*] + || [istarget ia64-*-*] + || [istarget i?86-*-*] +- || [istarget x86_64-*-*] } { ++ || [istarget x86_64-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_hi_to_si_saved 1 + } + } +@@ -2705,6 +2707,52 @@ + } + + # Return 1 if the target plus current options supports a vector ++# widening multiplication of *char* args into *short* result, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. ++ ++proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { ++ global et_vect_widen_mult_qi_to_hi_pattern ++ ++ if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { ++ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 ++ } else { ++ set et_vect_widen_mult_qi_to_hi_pattern_saved 0 ++ if { [istarget powerpc*-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { ++ set et_vect_widen_mult_qi_to_hi_pattern_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 ++ return $et_vect_widen_mult_qi_to_hi_pattern_saved ++} ++ ++# Return 1 if the target plus current options supports a vector ++# widening multiplication of *short* args into *int* result, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. ++ ++proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { ++ global et_vect_widen_mult_hi_to_si_pattern ++ ++ if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { ++ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 ++ } else { ++ set et_vect_widen_mult_hi_to_si_pattern_saved 0 ++ if { [istarget powerpc*-*-*] ++ || [istarget spu-*-*] ++ || [istarget ia64-*-*] ++ || [istarget i?86-*-*] ++ || [istarget x86_64-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { ++ set et_vect_widen_mult_hi_to_si_pattern_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 ++ return $et_vect_widen_mult_hi_to_si_pattern_saved ++} ++ ++# Return 1 if the target plus current options supports a vector + # dot-product of signed chars, 0 otherwise. + # + # This won't change for different subtargets so cache the result. + +=== modified file 'gcc/tree-vect-loop-manip.c' +--- old/gcc/tree-vect-loop-manip.c 2011-05-18 13:24:05 +0000 ++++ new/gcc/tree-vect-loop-manip.c 2011-06-19 10:59:13 +0000 +@@ -1105,35 +1105,6 @@ + first_niters = PHI_RESULT (newphi); + } + +- +-/* Remove dead assignments from loop NEW_LOOP. */ +- +-static void +-remove_dead_stmts_from_loop (struct loop *new_loop) +-{ +- basic_block *bbs = get_loop_body (new_loop); +- unsigned i; +- for (i = 0; i < new_loop->num_nodes; ++i) +- { +- gimple_stmt_iterator gsi; +- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);) +- { +- gimple stmt = gsi_stmt (gsi); +- if (is_gimple_assign (stmt) +- && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME +- && has_zero_uses (gimple_assign_lhs (stmt))) +- { +- gsi_remove (&gsi, true); +- release_defs (stmt); +- } +- else +- gsi_next (&gsi); +- } +- } +- free (bbs); +-} +- +- + /* Function slpeel_tree_peel_loop_to_edge. + + Peel the first (last) iterations of LOOP into a new prolog (epilog) loop +@@ -1445,13 +1416,6 @@ + BITMAP_FREE (definitions); + delete_update_ssa (); + +- /* Remove all pattern statements from the loop copy. They will confuse +- the expander if DCE is disabled. +- ??? The pattern recognizer should be split into an analysis and +- a transformation phase that is then run only on the loop that is +- going to be transformed. */ +- remove_dead_stmts_from_loop (new_loop); +- + adjust_vec_debug_stmts (); + + return new_loop; + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-03-01 13:18:25 +0000 ++++ new/gcc/tree-vect-loop.c 2011-06-22 06:21:13 +0000 +@@ -244,7 +244,7 @@ + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + tree vf_vectype; +- gimple stmt = gsi_stmt (si); ++ gimple stmt = gsi_stmt (si), pattern_stmt; + stmt_info = vinfo_for_stmt (stmt); + + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -259,9 +259,25 @@ + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "skip."); +- continue; ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "skip."); ++ continue; ++ } + } + + if (gimple_get_lhs (stmt) == NULL_TREE) +@@ -816,25 +832,17 @@ + + if (stmt_info) + { +- /* Check if this is a "pattern stmt" (introduced by the +- vectorizer during the pattern recognition pass). */ +- bool remove_stmt_p = false; +- gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +- if (orig_stmt) +- { +- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); +- if (orig_stmt_info +- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) +- remove_stmt_p = true; +- } ++ /* Check if this statement has a related "pattern stmt" ++ (introduced by the vectorizer during the pattern recognition ++ pass). Free pattern's stmt_vec_info. */ ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info))) ++ free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); + + /* Free stmt_vec_info. */ + free_stmt_vec_info (stmt); ++ } + +- /* Remove dead "pattern stmts". */ +- if (remove_stmt_p) +- gsi_remove (&si, true); +- } + gsi_next (&si); + } + } +@@ -4262,6 +4270,25 @@ + return false; + } + ++ /* In case of widenning multiplication by a constant, we update the type ++ of the constant to be the type of the other operand. We check that the ++ constant fits the type in the pattern recognition pass. */ ++ if (code == DOT_PROD_EXPR ++ && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1]))) ++ { ++ if (TREE_CODE (ops[0]) == INTEGER_CST) ++ ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]); ++ else if (TREE_CODE (ops[1]) == INTEGER_CST) ++ ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "invalid types in dot-prod"); ++ ++ return false; ++ } ++ } ++ + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; +@@ -4796,7 +4823,7 @@ + + for (si = gsi_start_bb (bb); !gsi_end_p (si);) + { +- gimple stmt = gsi_stmt (si); ++ gimple stmt = gsi_stmt (si), pattern_stmt; + bool is_store; + + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -4821,14 +4848,25 @@ + + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) +- { +- gsi_next (&si); +- continue; ++ { ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } ++ else ++ { ++ gsi_next (&si); ++ continue; ++ } + } + + gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); +- nunits = +- (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); ++ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( ++ STMT_VINFO_VECTYPE (stmt_info)); + if (!STMT_SLP_TYPE (stmt_info) + && nunits != (unsigned int) vectorization_factor + && vect_print_dump_info (REPORT_DETAILS)) + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2010-12-02 11:47:12 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-06-22 12:10:44 +0000 +@@ -38,16 +38,11 @@ + #include "recog.h" + #include "diagnostic-core.h" + +-/* Function prototypes */ +-static void vect_pattern_recog_1 +- (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); +-static bool widened_name_p (tree, gimple, tree *, gimple *); +- + /* Pattern recognition functions */ +-static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); +-static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); +-static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); +-static gimple vect_recog_pow_pattern (gimple, tree *, tree *); ++static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); ++static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); ++static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); ++static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, +@@ -61,10 +56,12 @@ + is a result of a type-promotion, such that: + DEF_STMT: NAME = NOP (name0) + where the type of name0 (HALF_TYPE) is smaller than the type of NAME. +-*/ ++ If CHECK_SIGN is TRUE, check that either both types are signed or both are ++ unsigned. */ + + static bool +-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) ++widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, ++ bool check_sign) + { + tree dummy; + gimple dummy_gimple; +@@ -98,7 +95,7 @@ + + *half_type = TREE_TYPE (oprnd0); + if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) +- || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) ++ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) + || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) + return false; + +@@ -168,12 +165,12 @@ + inner-loop nested in an outer-loop that us being vectorized). */ + + static gimple +-vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) + { + gimple stmt; + tree oprnd0, oprnd1; + tree oprnd00, oprnd01; +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); + tree type, half_type; + gimple pattern_stmt; + tree prod_type; +@@ -181,10 +178,10 @@ + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + tree var, rhs; + +- if (!is_gimple_assign (last_stmt)) ++ if (!is_gimple_assign (*last_stmt)) + return NULL; + +- type = gimple_expr_type (last_stmt); ++ type = gimple_expr_type (*last_stmt); + + /* Look for the following pattern + DX = (TYPE1) X; +@@ -210,7 +207,7 @@ + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) ++ if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) + return NULL; + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) +@@ -231,14 +228,14 @@ + + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) + return NULL; +- oprnd0 = gimple_assign_rhs1 (last_stmt); +- oprnd1 = gimple_assign_rhs2 (last_stmt); ++ oprnd0 = gimple_assign_rhs1 (*last_stmt); ++ oprnd1 = gimple_assign_rhs2 (*last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; +- stmt = last_stmt; ++ stmt = *last_stmt; + +- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) ++ if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) + { + stmt = def_stmt; + oprnd0 = gimple_assign_rhs1 (stmt); +@@ -293,10 +290,10 @@ + if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) + || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) + return NULL; +- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) ++ if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) + return NULL; + oprnd00 = gimple_assign_rhs1 (def_stmt); +- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) ++ if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) + return NULL; + oprnd01 = gimple_assign_rhs1 (def_stmt); + if (!types_compatible_p (half_type0, half_type1)) +@@ -322,7 +319,7 @@ + + /* We don't allow changing the order of the computation in the inner-loop + when doing outer-loop vectorization. */ +- gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); ++ gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); + + return pattern_stmt; + } +@@ -342,24 +339,47 @@ + + where type 'TYPE' is at least double the size of type 'type'. + +- Input: +- +- * LAST_STMT: A stmt from which the pattern search begins. In the example, +- when this function is called with S5, the pattern {S3,S4,S5} is be detected. +- +- Output: +- +- * TYPE_IN: The type of the input arguments to the pattern. +- +- * TYPE_OUT: The type of the output of this pattern. +- +- * Return value: A new stmt that will be used to replace the sequence of +- stmts that constitute the pattern. In this case it will be: +- WIDEN_MULT <a_t, b_t> +-*/ ++ Also detect unsgigned cases: ++ ++ unsigned type a_t, b_t; ++ unsigned TYPE u_prod_T; ++ TYPE a_T, b_T, prod_T; ++ ++ S1 a_t = ; ++ S2 b_t = ; ++ S3 a_T = (TYPE) a_t; ++ S4 b_T = (TYPE) b_t; ++ S5 prod_T = a_T * b_T; ++ S6 u_prod_T = (unsigned TYPE) prod_T; ++ ++ and multiplication by constants: ++ ++ type a_t; ++ TYPE a_T, prod_T; ++ ++ S1 a_t = ; ++ S3 a_T = (TYPE) a_t; ++ S5 prod_T = a_T * CONST; ++ ++ Input: ++ ++ * LAST_STMT: A stmt from which the pattern search begins. In the example, ++ when this function is called with S5, the pattern {S3,S4,S5,(S6)} is ++ detected. ++ ++ Output: ++ ++ * TYPE_IN: The type of the input arguments to the pattern. ++ ++ * TYPE_OUT: The type of the output of this pattern. ++ ++ * Return value: A new stmt that will be used to replace the sequence of ++ stmts that constitute the pattern. In this case it will be: ++ WIDEN_MULT <a_t, b_t> ++ */ + + static gimple +-vect_recog_widen_mult_pattern (gimple last_stmt, ++vect_recog_widen_mult_pattern (gimple *last_stmt, + tree *type_in, + tree *type_out) + { +@@ -367,39 +387,112 @@ + tree oprnd0, oprnd1; + tree type, half_type0, half_type1; + gimple pattern_stmt; +- tree vectype, vectype_out; ++ tree vectype, vectype_out = NULL_TREE; + tree dummy; + tree var; + enum tree_code dummy_code; + int dummy_int; + VEC (tree, heap) *dummy_vec; ++ bool op0_ok, op1_ok; + +- if (!is_gimple_assign (last_stmt)) ++ if (!is_gimple_assign (*last_stmt)) + return NULL; + +- type = gimple_expr_type (last_stmt); ++ type = gimple_expr_type (*last_stmt); + + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) ++ if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) + return NULL; + +- oprnd0 = gimple_assign_rhs1 (last_stmt); +- oprnd1 = gimple_assign_rhs2 (last_stmt); ++ oprnd0 = gimple_assign_rhs1 (*last_stmt); ++ oprnd1 = gimple_assign_rhs2 (*last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; + +- /* Check argument 0 */ +- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) +- return NULL; +- oprnd0 = gimple_assign_rhs1 (def_stmt0); +- +- /* Check argument 1 */ +- if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) +- return NULL; +- oprnd1 = gimple_assign_rhs1 (def_stmt1); ++ /* Check argument 0. */ ++ op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); ++ /* Check argument 1. */ ++ op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); ++ ++ /* In case of multiplication by a constant one of the operands may not match ++ the pattern, but not both. */ ++ if (!op0_ok && !op1_ok) ++ return NULL; ++ ++ if (op0_ok && op1_ok) ++ { ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ oprnd1 = gimple_assign_rhs1 (def_stmt1); ++ } ++ else if (!op0_ok) ++ { ++ if (CONSTANT_CLASS_P (oprnd0) ++ && TREE_CODE (half_type1) == INTEGER_TYPE ++ && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) ++ && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) ++ { ++ /* OPRND0 is a constant of HALF_TYPE1. */ ++ half_type0 = half_type1; ++ oprnd1 = gimple_assign_rhs1 (def_stmt1); ++ } ++ else ++ return NULL; ++ } ++ else if (!op1_ok) ++ { ++ if (CONSTANT_CLASS_P (oprnd1) ++ && TREE_CODE (half_type0) == INTEGER_TYPE ++ && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) ++ && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) ++ { ++ /* OPRND1 is a constant of HALF_TYPE0. */ ++ half_type1 = half_type0; ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ } ++ else ++ return NULL; ++ } ++ ++ /* Handle unsigned case. Look for ++ S6 u_prod_T = (unsigned TYPE) prod_T; ++ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ ++ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) ++ { ++ tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ int nuses = 0; ++ gimple use_stmt = NULL; ++ tree use_type; ++ ++ if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) ++ return NULL; ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); ++ nuses++; ++ } ++ ++ if (nuses != 1 || !is_gimple_assign (use_stmt) ++ || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) ++ return NULL; ++ ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ if (!INTEGRAL_TYPE_P (use_type) ++ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) ++ return NULL; ++ ++ type = use_type; ++ *last_stmt = use_stmt; ++ } + + if (!types_compatible_p (half_type0, half_type1)) + return NULL; +@@ -413,7 +506,7 @@ + vectype_out = get_vectype_for_scalar_type (type); + if (!vectype + || !vectype_out +- || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, ++ || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, + vectype_out, vectype, + &dummy, &dummy, &dummy_code, + &dummy_code, &dummy_int, &dummy_vec)) +@@ -462,16 +555,16 @@ + */ + + static gimple +-vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) + { + tree fn, base, exp = NULL; + gimple stmt; + tree var; + +- if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) ++ if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) + return NULL; + +- fn = gimple_call_fndecl (last_stmt); ++ fn = gimple_call_fndecl (*last_stmt); + if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) + return NULL; + +@@ -481,8 +574,8 @@ + case BUILT_IN_POWI: + case BUILT_IN_POWF: + case BUILT_IN_POW: +- base = gimple_call_arg (last_stmt, 0); +- exp = gimple_call_arg (last_stmt, 1); ++ base = gimple_call_arg (*last_stmt, 0); ++ exp = gimple_call_arg (*last_stmt, 1); + if (TREE_CODE (exp) != REAL_CST + && TREE_CODE (exp) != INTEGER_CST) + return NULL; +@@ -574,21 +667,21 @@ + inner-loop nested in an outer-loop that us being vectorized). */ + + static gimple +-vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) + { + gimple stmt; + tree oprnd0, oprnd1; +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); + tree type, half_type; + gimple pattern_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + tree var; + +- if (!is_gimple_assign (last_stmt)) ++ if (!is_gimple_assign (*last_stmt)) + return NULL; + +- type = gimple_expr_type (last_stmt); ++ type = gimple_expr_type (*last_stmt); + + /* Look for the following pattern + DX = (TYPE) X; +@@ -600,25 +693,25 @@ + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) ++ if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) + return NULL; + + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) + return NULL; + +- oprnd0 = gimple_assign_rhs1 (last_stmt); +- oprnd1 = gimple_assign_rhs2 (last_stmt); ++ oprnd0 = gimple_assign_rhs1 (*last_stmt); ++ oprnd1 = gimple_assign_rhs2 (*last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; + +- /* So far so good. Since last_stmt was detected as a (summation) reduction, ++ /* So far so good. Since *last_stmt was detected as a (summation) reduction, + we know that oprnd1 is the reduction variable (defined by a loop-header + phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. + Left to check that oprnd0 is defined by a cast from type 'type' to type + 'TYPE'. */ + +- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) ++ if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) + return NULL; + + oprnd0 = gimple_assign_rhs1 (stmt); +@@ -639,7 +732,7 @@ + + /* We don't allow changing the order of the computation in the inner-loop + when doing outer-loop vectorization. */ +- gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); ++ gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); + + return pattern_stmt; + } +@@ -669,23 +762,27 @@ + + static void + vect_pattern_recog_1 ( +- gimple (* vect_recog_func) (gimple, tree *, tree *), ++ gimple (* vect_recog_func) (gimple *, tree *, tree *), + gimple_stmt_iterator si) + { + gimple stmt = gsi_stmt (si), pattern_stmt; +- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); ++ stmt_vec_info stmt_info; + stmt_vec_info pattern_stmt_info; +- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ++ loop_vec_info loop_vinfo; + tree pattern_vectype; + tree type_in, type_out; + enum tree_code code; + int i; + gimple next; + +- pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); ++ pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); + if (!pattern_stmt) + return; + ++ si = gsi_for_stmt (stmt); ++ stmt_info = vinfo_for_stmt (stmt); ++ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ++ + if (VECTOR_MODE_P (TYPE_MODE (type_in))) + { + /* No need to check target support (already checked by the pattern +@@ -736,9 +833,9 @@ + } + + /* Mark the stmts that are involved in the pattern. */ +- gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); + set_vinfo_for_stmt (pattern_stmt, + new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); ++ gimple_set_bb (pattern_stmt, gimple_bb (stmt)); + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + + STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; +@@ -761,8 +858,8 @@ + LOOP_VINFO - a struct_loop_info of a loop in which we want to look for + computation idioms. + +- Output - for each computation idiom that is detected we insert a new stmt +- that provides the same functionality and that can be vectorized. We ++ Output - for each computation idiom that is detected we create a new stmt ++ that provides the same functionality and that can be vectorized. We + also record some information in the struct_stmt_info of the relevant + stmts, as explained below: + +@@ -777,52 +874,48 @@ + S5: ... = ..use(a_0).. - - - + + Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be +- represented by a single stmt. We then: +- - create a new stmt S6 that will replace the pattern. +- - insert the new stmt S6 before the last stmt in the pattern ++ represented by a single stmt. We then: ++ - create a new stmt S6 equivalent to the pattern (the stmt is not ++ inserted into the code) + - fill in the STMT_VINFO fields as follows: + + in_pattern_p related_stmt vec_stmt + S1: a_i = .... - - - + S2: a_2 = ..use(a_i).. - - - + S3: a_1 = ..use(a_2).. - - - +- > S6: a_new = .... - S4 - + S4: a_0 = ..use(a_1).. true S6 - ++ '---> S6: a_new = .... - S4 - + S5: ... = ..use(a_0).. - - - + + (the last stmt in the pattern (S4) and the new pattern stmt (S6) point +- to each other through the RELATED_STMT field). ++ to each other through the RELATED_STMT field). + + S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead + of S4 because it will replace all its uses. Stmts {S1,S2,S3} will + remain irrelevant unless used by stmts other than S4. + + If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} +- (because they are marked as irrelevant). It will vectorize S6, and record ++ (because they are marked as irrelevant). It will vectorize S6, and record + a pointer to the new vector stmt VS6 both from S6 (as usual), and also +- from S4. We do that so that when we get to vectorizing stmts that use the ++ from S4. We do that so that when we get to vectorizing stmts that use the + def of S4 (like S5 that uses a_0), we'll know where to take the relevant +- vector-def from. S4 will be skipped, and S5 will be vectorized as usual: ++ vector-def from. S4 will be skipped, and S5 will be vectorized as usual: + + in_pattern_p related_stmt vec_stmt + S1: a_i = .... - - - + S2: a_2 = ..use(a_i).. - - - + S3: a_1 = ..use(a_2).. - - - + > VS6: va_new = .... - - - +- S6: a_new = .... - S4 VS6 + S4: a_0 = ..use(a_1).. true S6 VS6 ++ '---> S6: a_new = .... - S4 VS6 + > VS5: ... = ..vuse(va_new).. - - - + S5: ... = ..use(a_0).. - - - + +- DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used ++ DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used + elsewhere), and we'll end up with: + + VS6: va_new = .... +- VS5: ... = ..vuse(va_new).. +- +- If vectorization does not succeed, DCE will clean S6 away (its def is +- not used), and we'll end up with the original sequence. +-*/ ++ VS5: ... = ..vuse(va_new).. */ + + void + vect_pattern_recog (loop_vec_info loop_vinfo) +@@ -832,7 +925,7 @@ + unsigned int nbbs = loop->num_nodes; + gimple_stmt_iterator si; + unsigned int i, j; +- gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); ++ gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-05-05 15:43:06 +0000 ++++ new/gcc/tree-vect-slp.c 2011-06-19 10:59:13 +0000 +@@ -2510,6 +2510,8 @@ + && STMT_VINFO_STRIDED_ACCESS (stmt_info) + && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) + si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); ++ else if (is_pattern_stmt_p (stmt_info)) ++ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + else + si = gsi_for_stmt (stmt); + + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-06-02 12:12:00 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-06-22 06:21:13 +0000 +@@ -605,15 +605,76 @@ + break; + } + +- FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) +- { +- tree op = USE_FROM_PTR (use_p); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) +- { +- VEC_free (gimple, heap, worklist); +- return false; +- } +- } ++ if (is_pattern_stmt_p (vinfo_for_stmt (stmt))) ++ { ++ /* Pattern statements are not inserted into the code, so ++ FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we ++ have to scan the RHS or function arguments instead. */ ++ if (is_gimple_assign (stmt)) ++ { ++ tree rhs = gimple_assign_rhs1 (stmt); ++ if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) ++ == GIMPLE_SINGLE_RHS) ++ { ++ unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 ++ (stmt)); ++ for (i = 0; i < op_num; i++) ++ { ++ tree op = TREE_OPERAND (rhs, i); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } ++ } ++ else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) ++ == GIMPLE_BINARY_RHS) ++ { ++ tree op = gimple_assign_rhs1 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ op = gimple_assign_rhs2 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } ++ else ++ return false; ++ } ++ else if (is_gimple_call (stmt)) ++ { ++ for (i = 0; i < gimple_call_num_args (stmt); i++) ++ { ++ tree arg = gimple_call_arg (stmt, i); ++ if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } ++ } ++ } ++ else ++ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) ++ { ++ tree op = USE_FROM_PTR (use_p); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } + } /* while worklist */ + + VEC_free (gimple, heap, worklist); +@@ -1405,6 +1466,7 @@ + VEC(tree, heap) *vargs = NULL; + enum { NARROW, NONE, WIDEN } modifier; + size_t i, nargs; ++ tree lhs; + + /* FORNOW: unsupported in basic block SLP. */ + gcc_assert (loop_vinfo); +@@ -1542,7 +1604,7 @@ + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "transform operation."); ++ fprintf (vect_dump, "transform call."); + + /* Handle def. */ + scalar_dest = gimple_call_lhs (stmt); +@@ -1661,8 +1723,11 @@ + rhs of the statement with something harmless. */ + + type = TREE_TYPE (scalar_dest); +- new_stmt = gimple_build_assign (gimple_call_lhs (stmt), +- build_zero_cst (type)); ++ if (is_pattern_stmt_p (stmt_info)) ++ lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); ++ else ++ lhs = gimple_call_lhs (stmt); ++ new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + set_vinfo_for_stmt (new_stmt, stmt_info); + set_vinfo_for_stmt (stmt, NULL); + STMT_VINFO_STMT (stmt_info) = new_stmt; +@@ -3231,6 +3296,33 @@ + fprintf (vect_dump, "use not simple."); + return false; + } ++ ++ op_type = TREE_CODE_LENGTH (code); ++ if (op_type == binary_op) ++ { ++ bool ok; ++ ++ op1 = gimple_assign_rhs2 (stmt); ++ if (code == WIDEN_MULT_EXPR) ++ { ++ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of ++ OP1. */ ++ if (CONSTANT_CLASS_P (op0)) ++ ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, ++ &def_stmt, &def, &dt[1], &vectype_in); ++ else ++ ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, ++ &dt[1]); ++ ++ if (!ok) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "use not simple."); ++ return false; ++ } ++ } ++ } ++ + /* If op0 is an external or constant def use a vector type with + the same size as the output vector type. */ + if (!vectype_in) +@@ -3263,18 +3355,6 @@ + + gcc_assert (ncopies >= 1); + +- op_type = TREE_CODE_LENGTH (code); +- if (op_type == binary_op) +- { +- op1 = gimple_assign_rhs2 (stmt); +- if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "use not simple."); +- return false; +- } +- } +- + /* Supportable by target? */ + if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, + &decl1, &decl2, &code1, &code2, +@@ -3300,6 +3380,14 @@ + fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", + ncopies); + ++ if (code == WIDEN_MULT_EXPR) ++ { ++ if (CONSTANT_CLASS_P (op0)) ++ op0 = fold_convert (TREE_TYPE (op1), op0); ++ else if (CONSTANT_CLASS_P (op1)) ++ op1 = fold_convert (TREE_TYPE (op0), op1); ++ } ++ + /* Handle def. */ + /* In case of multi-step promotion, we first generate promotion operations + to the intermediate types, and then from that types to the final one. +@@ -4824,10 +4912,26 @@ + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "irrelevant."); ++ gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "irrelevant."); + +- return true; ++ return true; ++ } + } + + switch (STMT_VINFO_DEF_TYPE (stmt_info)) + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-05-05 15:43:06 +0000 ++++ new/gcc/tree-vectorizer.h 2011-06-19 10:59:13 +0000 +@@ -884,7 +884,7 @@ + /* Pattern recognition functions. + Additional pattern recognition functions can (and will) be added + in the future. */ +-typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); ++typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); + #define NUM_PATTERNS 4 + void vect_pattern_recog (loop_vec_info); + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch new file mode 100644 index 0000000000..4abfa02a77 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch @@ -0,0 +1,24 @@ +2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + gcc/ + 2011-06-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + PR target/49385 + * config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast + one of the operands is a register. +Index: gcc-4_6-branch/gcc/config/arm/thumb2.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/thumb2.md 2011-09-16 20:22:40.000000000 -0700 ++++ gcc-4_6-branch/gcc/config/arm/thumb2.md 2011-09-16 20:28:47.648690433 -0700 +@@ -207,7 +207,9 @@ + (define_insn "*thumb2_movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,n,r,m"))] +- "TARGET_THUMB2" ++ "TARGET_THUMB2 ++ && (register_operand (operands[0], HImode) ++ || register_operand (operands[1], HImode))" + "@ + mov%?\\t%0, %1\\t%@ movhi + movw%?\\t%0, %L1\\t%@ movhi diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch new file mode 100644 index 0000000000..a548b1b683 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch @@ -0,0 +1,25 @@ +2011-06-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + LP 744754 + 2011-04-17 Chung-Lin Tang <cltang@codesourcery.com> + + * config/arm/arm.c (neon_struct_mem_operand): + Support POST_INC/PRE_DEC memory operands. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000 ++++ new/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000 +@@ -9357,6 +9357,11 @@ + if (GET_CODE (ind) == REG) + return arm_address_register_rtx_p (ind, 0); + ++ /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */ ++ if (GET_CODE (ind) == POST_INC ++ || GET_CODE (ind) == PRE_DEC) ++ return arm_address_register_rtx_p (XEXP (ind, 0), 0); ++ + return FALSE; + } + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch new file mode 100644 index 0000000000..15046a766e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch @@ -0,0 +1,25 @@ +2011-07-03 Ira Rosen <ira.rosen@linaro.org> + + Backport from FSF: + 2011-06-12 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent): + Take number of iterations to peel into account for equally frequent + misalignment values. + +=== modified file 'gcc/tree-vect-data-refs.c' +--- old/gcc/tree-vect-data-refs.c 2011-06-02 12:12:00 +0000 ++++ new/gcc/tree-vect-data-refs.c 2011-06-29 11:20:24 +0000 +@@ -1256,7 +1256,9 @@ + vect_peel_info elem = (vect_peel_info) *slot; + vect_peel_extended_info max = (vect_peel_extended_info) data; + +- if (elem->count > max->peel_info.count) ++ if (elem->count > max->peel_info.count ++ || (elem->count == max->peel_info.count ++ && max->peel_info.npeel > elem->npeel)) + { + max->peel_info.npeel = elem->npeel; + max->peel_info.count = elem->count; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch new file mode 100644 index 0000000000..f1f7718eb5 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch @@ -0,0 +1,182 @@ +2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * builtins.c (get_object_alignment): Fix comment. + * fold-const.c (get_pointer_modulus_and_residue): Remove + allow_func_align. Use get_object_alignment. + (fold_binary_loc): Update caller. + +2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-06-29 Richard Sandiford <richard.sandiford@linaro.org> + + PR tree-optimization/49545 + * builtins.c (get_object_alignment_1): Update function comment. + Do not use DECL_ALIGN for functions, but test + TARGET_PTRMEMFUNC_VBIT_LOCATION instead. + * fold-const.c (get_pointer_modulus_and_residue): Don't check + for functions here. + * tree-ssa-ccp.c (get_value_from_alignment): Likewise. + + gcc/testsuite/ + Backport from mainline: + + 2011-06-29 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets. + +2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-07-27 Richard Guenther <rguenther@suse.de> + + PR tree-optimization/49169 + * fold-const.c (get_pointer_modulus_and_residue): Don't rely on + the alignment of function decls. + + gcc/testsuite/ + Backport from mainline: + + 2011-07-27 Michael Hope <michael.hope@linaro.org> + Richard Sandiford <richard.sandiford@linaro.org> + + PR tree-optimization/49169 + * gcc.dg/torture/pr49169.c: New test. + +=== modified file 'gcc/builtins.c' +--- old/gcc/builtins.c 2011-03-03 21:56:58 +0000 ++++ new/gcc/builtins.c 2011-07-04 09:52:27 +0000 +@@ -264,7 +264,14 @@ + } + + /* Return the alignment in bits of EXP, an object. +- Don't return more than MAX_ALIGN no matter what. */ ++ Don't return more than MAX_ALIGN no matter what. ++ ++ Note that the address (and thus the alignment) computed here is based ++ on the address to which a symbol resolves, whereas DECL_ALIGN is based ++ on the address at which an object is actually located. These two ++ addresses are not always the same. For example, on ARM targets, ++ the address &foo of a Thumb function foo() has the lowest bit set, ++ whereas foo() itself starts on an even address. */ + + unsigned int + get_object_alignment (tree exp, unsigned int max_align) +@@ -286,7 +293,21 @@ + exp = DECL_INITIAL (exp); + if (DECL_P (exp) + && TREE_CODE (exp) != LABEL_DECL) +- align = DECL_ALIGN (exp); ++ { ++ if (TREE_CODE (exp) == FUNCTION_DECL) ++ { ++ /* Function addresses can encode extra information besides their ++ alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION ++ allows the low bit to be used as a virtual bit, we know ++ that the address itself must be 2-byte aligned. */ ++ if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) ++ align = 2 * BITS_PER_UNIT; ++ else ++ align = BITS_PER_UNIT; ++ } ++ else ++ align = DECL_ALIGN (exp); ++ } + else if (CONSTANT_CLASS_P (exp)) + { + align = TYPE_ALIGN (TREE_TYPE (exp)); + +=== modified file 'gcc/fold-const.c' +--- old/gcc/fold-const.c 2011-05-23 20:37:18 +0000 ++++ new/gcc/fold-const.c 2011-07-04 09:52:27 +0000 +@@ -9232,15 +9232,10 @@ + 0 <= N < M as is common. In general, the precise value of P is unknown. + M is chosen as large as possible such that constant N can be determined. + +- Returns M and sets *RESIDUE to N. +- +- If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into +- account. This is not always possible due to PR 35705. +- */ ++ Returns M and sets *RESIDUE to N. */ + + static unsigned HOST_WIDE_INT +-get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue, +- bool allow_func_align) ++get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue) + { + enum tree_code code; + +@@ -9270,9 +9265,8 @@ + } + } + +- if (DECL_P (expr) +- && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL)) +- return DECL_ALIGN_UNIT (expr); ++ if (DECL_P (expr)) ++ return get_object_alignment (expr, ~0U) / BITS_PER_UNIT; + } + else if (code == POINTER_PLUS_EXPR) + { +@@ -9282,8 +9276,7 @@ + + op0 = TREE_OPERAND (expr, 0); + STRIP_NOPS (op0); +- modulus = get_pointer_modulus_and_residue (op0, residue, +- allow_func_align); ++ modulus = get_pointer_modulus_and_residue (op0, residue); + + op1 = TREE_OPERAND (expr, 1); + STRIP_NOPS (op1); +@@ -11163,8 +11156,7 @@ + unsigned HOST_WIDE_INT modulus, residue; + unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1); + +- modulus = get_pointer_modulus_and_residue (arg0, &residue, +- integer_onep (arg1)); ++ modulus = get_pointer_modulus_and_residue (arg0, &residue); + + /* This works because modulus is a power of 2. If this weren't the + case, we'd have to replace it by its greatest power-of-2 + +=== added file 'gcc/testsuite/gcc.dg/torture/pr49169.c' +--- old/gcc/testsuite/gcc.dg/torture/pr49169.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/torture/pr49169.c 2011-06-29 09:46:06 +0000 +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */ ++ ++#include <stdlib.h> ++#include <stdint.h> ++ ++int ++main (void) ++{ ++ void *p = main; ++ if ((intptr_t) p & 1) ++ abort (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "abort" } } */ + +=== modified file 'gcc/tree-ssa-ccp.c' +--- old/gcc/tree-ssa-ccp.c 2011-05-05 15:42:22 +0000 ++++ new/gcc/tree-ssa-ccp.c 2011-06-29 09:46:06 +0000 +@@ -522,10 +522,6 @@ + val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr), + TREE_OPERAND (base, 0), TREE_OPERAND (base, 1)); + else if (base +- /* ??? While function decls have DECL_ALIGN their addresses +- may encode extra information in the lower bits on some +- targets (PR47239). Simply punt for function decls for now. */ +- && TREE_CODE (base) != FUNCTION_DECL + && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT)) + > BITS_PER_UNIT)) + { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch new file mode 100644 index 0000000000..3a149231f8 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch @@ -0,0 +1,1281 @@ +2011-07-11 Ira Rosen <ira.rosen@linaro.org> + + Backport from FSF: + 2011-06-16 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (vect_recog_func_ptr): Change the first + argument to be a VEC of statements. + * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the + assert that pattern statements have to have their vector type set. + * tree-vect-patterns.c (vect_recog_widen_sum_pattern): + Change the first argument to be a VEC of statements. Update + documentation. + (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. + (vect_handle_widen_mult_by_const): New function. + (vect_recog_widen_mult_pattern): Change the first argument to be a + VEC of statements. Update documentation. Check that the constant is + INTEGER_CST. Support multiplication by a constant that fits an + intermediate type - call vect_handle_widen_mult_by_const. + (vect_pattern_recog_1): Update vect_recog_func_ptr and its + call. Handle additional pattern statements if necessary. + + gcc/testsuite/ + * gcc.dg/vect/vect-widen-mult-half-u8.c: New test. + + and + 2011-06-30 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-loop.c (vect_determine_vectorization_factor): Handle + both pattern and original statements if necessary. + (vect_transform_loop): Likewise. + * tree-vect-patterns.c (vect_pattern_recog): Update documentation. + * tree-vect-stmts.c (vect_mark_relevant): Add new argument. + Mark the pattern statement only if the original statement doesn't + have its own uses. + (process_use): Call vect_mark_relevant with additional parameter. + (vect_mark_stmts_to_be_vectorized): Likewise. + (vect_get_vec_def_for_operand): Use vectorized pattern statement. + (vect_analyze_stmt): Handle both pattern and original statements + if necessary. + (vect_transform_stmt): Don't store vectorized pattern statement + in the original statement. + (vect_is_simple_use_1): Use related pattern statement only if the + original statement is irrelevant. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise. + + gcc/testsuite/ + * gcc.dg/vect/slp-widen-mult-half.c: New test. + * gcc.dg/vect/vect-widen-mult-half.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c' +Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2012-01-09 15:03:29.156918805 -0800 +@@ -0,0 +1,52 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#include <stdlib.h> ++ ++#define N 32 ++#define COEF 32470 ++#define COEF2 324700 ++ ++unsigned char in[N]; ++int out[N]; ++int out2[N]; ++ ++__attribute__ ((noinline)) void ++foo () ++{ ++ int i; ++ ++ for (i = 0; i < N/2; i++) ++ { ++ out[2*i] = in[2*i] * COEF; ++ out2[2*i] = in[2*i] + COEF2; ++ out[2*i+1] = in[2*i+1] * COEF; ++ out2[2*i+1] = in[2*i+1] + COEF2; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ +Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2012-01-09 15:03:29.156918805 -0800 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#include <stdlib.h> ++ ++#define N 32 ++#define COEF 32470 ++ ++unsigned char in[N]; ++int out[N]; ++ ++__attribute__ ((noinline)) void ++foo () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ out[i] = in[i] * COEF; ++} ++ ++__attribute__ ((noinline)) void ++bar () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ out[i] = COEF * in[i]; ++} ++ ++int main (void) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ if (out[i] != in[i] * COEF) ++ abort (); ++ ++ bar (); ++ ++ for (i = 0; i < N; i++) ++ if (out[i] != in[i] * COEF) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ +Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2012-01-09 15:03:29.160918806 -0800 +@@ -0,0 +1,49 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#include <stdlib.h> ++ ++#define N 32 ++#define COEF 32470 ++#define COEF2 324700 ++ ++unsigned char in[N]; ++int out[N]; ++int out2[N]; ++ ++__attribute__ ((noinline)) void ++foo (int a) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ out[i] = in[i] * COEF; ++ out2[i] = in[i] + a; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (COEF2); ++ ++ for (i = 0; i < N; i++) ++ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ +Index: gcc-4_6-branch/gcc/tree-vect-loop.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2012-01-09 15:02:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-loop.c 2012-01-09 15:03:29.160918806 -0800 +@@ -181,6 +181,8 @@ + stmt_vec_info stmt_info; + int i; + HOST_WIDE_INT dummy; ++ gimple stmt, pattern_stmt = NULL; ++ bool analyze_pattern_stmt = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); +@@ -241,12 +243,20 @@ + } + } + +- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) + { +- tree vf_vectype; +- gimple stmt = gsi_stmt (si), pattern_stmt; +- stmt_info = vinfo_for_stmt (stmt); ++ tree vf_vectype; ++ ++ if (analyze_pattern_stmt) ++ { ++ stmt = pattern_stmt; ++ analyze_pattern_stmt = false; ++ } ++ else ++ stmt = gsi_stmt (si); + ++ stmt_info = vinfo_for_stmt (stmt); ++ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "==> examining statement: "); +@@ -276,10 +286,17 @@ + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "skip."); ++ gsi_next (&si); + continue; + } + } + ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ analyze_pattern_stmt = true; ++ + if (gimple_get_lhs (stmt) == NULL_TREE) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +@@ -311,9 +328,7 @@ + } + else + { +- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) +- && !is_pattern_stmt_p (stmt_info)); +- ++ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); + scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + if (vect_print_dump_info (REPORT_DETAILS)) + { +@@ -385,6 +400,9 @@ + if (!vectorization_factor + || (nunits > vectorization_factor)) + vectorization_factor = nunits; ++ ++ if (!analyze_pattern_stmt) ++ gsi_next (&si); + } + } + +@@ -4740,6 +4758,8 @@ + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; + bool do_peeling_for_loop_bound; ++ gimple stmt, pattern_stmt; ++ bool transform_pattern_stmt = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vec_transform_loop ==="); +@@ -4827,11 +4847,19 @@ + } + } + +- for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ pattern_stmt = NULL; ++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) + { +- gimple stmt = gsi_stmt (si), pattern_stmt; + bool is_store; + ++ if (transform_pattern_stmt) ++ { ++ stmt = pattern_stmt; ++ transform_pattern_stmt = false; ++ } ++ else ++ stmt = gsi_stmt (si); ++ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "------>vectorizing statement: "); +@@ -4869,6 +4897,11 @@ + continue; + } + } ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ transform_pattern_stmt = true; + + gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); + nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( +@@ -4897,8 +4930,9 @@ + /* Hybrid SLP stmts must be vectorized in addition to SLP. */ + if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) + { +- gsi_next (&si); +- continue; ++ if (!transform_pattern_stmt) ++ gsi_next (&si); ++ continue; + } + } + +@@ -4917,7 +4951,7 @@ + the chain. */ + vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); + gsi_remove (&si, true); +- continue; ++ continue; + } + else + { +@@ -4927,7 +4961,9 @@ + continue; + } + } +- gsi_next (&si); ++ ++ if (!transform_pattern_stmt) ++ gsi_next (&si); + } /* stmts in BB */ + } /* BBs in loop */ + +Index: gcc-4_6-branch/gcc/tree-vect-patterns.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-01-09 15:02:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-01-09 15:03:29.160918806 -0800 +@@ -39,10 +39,13 @@ + #include "diagnostic-core.h" + + /* Pattern recognition functions */ +-static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); +-static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); +-static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); +-static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); ++static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, +@@ -142,9 +145,9 @@ + + Input: + +- * LAST_STMT: A stmt from which the pattern search begins. In the example, +- when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be +- detected. ++ * STMTS: Contains a stmt from which the pattern search begins. In the ++ example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} ++ will be detected. + + Output: + +@@ -165,12 +168,13 @@ + inner-loop nested in an outer-loop that us being vectorized). */ + + static gimple +-vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) ++vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) + { +- gimple stmt; ++ gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); + tree oprnd0, oprnd1; + tree oprnd00, oprnd01; +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree type, half_type; + gimple pattern_stmt; + tree prod_type; +@@ -178,10 +182,10 @@ + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + tree var, rhs; + +- if (!is_gimple_assign (*last_stmt)) ++ if (!is_gimple_assign (last_stmt)) + return NULL; + +- type = gimple_expr_type (*last_stmt); ++ type = gimple_expr_type (last_stmt); + + /* Look for the following pattern + DX = (TYPE1) X; +@@ -207,7 +211,7 @@ + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) ++ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + return NULL; + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) +@@ -228,12 +232,12 @@ + + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) + return NULL; +- oprnd0 = gimple_assign_rhs1 (*last_stmt); +- oprnd1 = gimple_assign_rhs2 (*last_stmt); ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; +- stmt = *last_stmt; ++ stmt = last_stmt; + + if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) + { +@@ -319,11 +323,79 @@ + + /* We don't allow changing the order of the computation in the inner-loop + when doing outer-loop vectorization. */ +- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); ++ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + + return pattern_stmt; + } + ++/* Handle two cases of multiplication by a constant. The first one is when ++ the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second ++ operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to ++ TYPE. ++ ++ Otherwise, if the type of the result (TYPE) is at least 4 times bigger than ++ HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than ++ TYPE), we can perform widen-mult from the intermediate type to TYPE and ++ replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ ++ ++static bool ++vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, ++ VEC (gimple, heap) **stmts, tree type, ++ tree *half_type, gimple def_stmt) ++{ ++ tree new_type, new_oprnd, tmp; ++ gimple new_stmt; ++ ++ if (int_fits_type_p (const_oprnd, *half_type)) ++ { ++ /* CONST_OPRND is a constant of HALF_TYPE. */ ++ *oprnd = gimple_assign_rhs1 (def_stmt); ++ return true; ++ } ++ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) ++ || !vinfo_for_stmt (def_stmt)) ++ return false; ++ ++ /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for ++ a type 2 times bigger than HALF_TYPE. */ ++ new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, ++ TYPE_UNSIGNED (type)); ++ if (!int_fits_type_p (const_oprnd, new_type)) ++ return false; ++ ++ /* Use NEW_TYPE for widen_mult. */ ++ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) ++ { ++ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); ++ /* Check if the already created pattern stmt is what we need. */ ++ if (!is_gimple_assign (new_stmt) ++ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR ++ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) ++ return false; ++ ++ *oprnd = gimple_assign_lhs (new_stmt); ++ } ++ else ++ { ++ /* Create a_T = (NEW_TYPE) a_t; */ ++ *oprnd = gimple_assign_rhs1 (def_stmt); ++ tmp = create_tmp_var (new_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, ++ NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ *oprnd = new_oprnd; ++ } ++ ++ *half_type = new_type; ++ return true; ++} ++ ++ + /* Function vect_recog_widen_mult_pattern + + Try to find the following pattern: +@@ -361,28 +433,47 @@ + S3 a_T = (TYPE) a_t; + S5 prod_T = a_T * CONST; + +- Input: ++ A special case of multiplication by constants is when 'TYPE' is 4 times ++ bigger than 'type', but CONST fits an intermediate type 2 times smaller ++ than 'TYPE'. In that case we create an additional pattern stmt for S3 ++ to create a variable of the intermediate type, and perform widen-mult ++ on the intermediate type as well: ++ ++ type a_t; ++ interm_type a_it; ++ TYPE a_T, prod_T, prod_T'; ++ ++ S1 a_t = ; ++ S3 a_T = (TYPE) a_t; ++ '--> a_it = (interm_type) a_t; ++ S5 prod_T = a_T * CONST; ++ '--> prod_T' = a_it w* CONST; ++ ++ Input/Output: + +- * LAST_STMT: A stmt from which the pattern search begins. In the example, +- when this function is called with S5, the pattern {S3,S4,S5,(S6)} is +- detected. ++ * STMTS: Contains a stmt from which the pattern search begins. In the ++ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} ++ is detected. In case of unsigned widen-mult, the original stmt (S5) is ++ replaced with S6 in STMTS. In case of multiplication by a constant ++ of an intermediate type (the last case above), STMTS also contains S3 ++ (inserted before S5). + +- Output: ++ Output: + +- * TYPE_IN: The type of the input arguments to the pattern. ++ * TYPE_IN: The type of the input arguments to the pattern. + +- * TYPE_OUT: The type of the output of this pattern. ++ * TYPE_OUT: The type of the output of this pattern. + +- * Return value: A new stmt that will be used to replace the sequence of +- stmts that constitute the pattern. In this case it will be: +- WIDEN_MULT <a_t, b_t> +- */ ++ * Return value: A new stmt that will be used to replace the sequence of ++ stmts that constitute the pattern. In this case it will be: ++ WIDEN_MULT <a_t, b_t> ++*/ + + static gimple +-vect_recog_widen_mult_pattern (gimple *last_stmt, +- tree *type_in, +- tree *type_out) ++vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) + { ++ gimple last_stmt = VEC_pop (gimple, *stmts); + gimple def_stmt0, def_stmt1; + tree oprnd0, oprnd1; + tree type, half_type0, half_type1; +@@ -395,27 +486,27 @@ + VEC (tree, heap) *dummy_vec; + bool op0_ok, op1_ok; + +- if (!is_gimple_assign (*last_stmt)) ++ if (!is_gimple_assign (last_stmt)) + return NULL; + +- type = gimple_expr_type (*last_stmt); ++ type = gimple_expr_type (last_stmt); + + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) ++ if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) + return NULL; + +- oprnd0 = gimple_assign_rhs1 (*last_stmt); +- oprnd1 = gimple_assign_rhs2 (*last_stmt); ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; + + /* Check argument 0. */ +- op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); ++ op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); + /* Check argument 1. */ +- op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); ++ op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); + + /* In case of multiplication by a constant one of the operands may not match + the pattern, but not both. */ +@@ -429,29 +520,21 @@ + } + else if (!op0_ok) + { +- if (CONSTANT_CLASS_P (oprnd0) +- && TREE_CODE (half_type1) == INTEGER_TYPE +- && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) +- && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) +- { +- /* OPRND0 is a constant of HALF_TYPE1. */ +- half_type0 = half_type1; +- oprnd1 = gimple_assign_rhs1 (def_stmt1); +- } ++ if (TREE_CODE (oprnd0) == INTEGER_CST ++ && TREE_CODE (half_type1) == INTEGER_TYPE ++ && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, ++ &half_type1, def_stmt1)) ++ half_type0 = half_type1; + else + return NULL; + } + else if (!op1_ok) + { +- if (CONSTANT_CLASS_P (oprnd1) ++ if (TREE_CODE (oprnd1) == INTEGER_CST + && TREE_CODE (half_type0) == INTEGER_TYPE +- && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) +- && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) +- { +- /* OPRND1 is a constant of HALF_TYPE0. */ +- half_type1 = half_type0; +- oprnd0 = gimple_assign_rhs1 (def_stmt0); +- } ++ && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, ++ &half_type0, def_stmt0)) ++ half_type1 = half_type0; + else + return NULL; + } +@@ -461,7 +544,7 @@ + Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) + { +- tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; ++ tree lhs = gimple_assign_lhs (last_stmt), use_lhs; + imm_use_iterator imm_iter; + use_operand_p use_p; + int nuses = 0; +@@ -491,7 +574,7 @@ + return NULL; + + type = use_type; +- *last_stmt = use_stmt; ++ last_stmt = use_stmt; + } + + if (!types_compatible_p (half_type0, half_type1)) +@@ -506,7 +589,7 @@ + vectype_out = get_vectype_for_scalar_type (type); + if (!vectype + || !vectype_out +- || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, ++ || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + vectype_out, vectype, + &dummy, &dummy, &dummy_code, + &dummy_code, &dummy_int, &dummy_vec)) +@@ -524,6 +607,7 @@ + if (vect_print_dump_info (REPORT_DETAILS)) + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; + } + +@@ -555,16 +639,17 @@ + */ + + static gimple +-vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) ++vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) + { ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); + tree fn, base, exp = NULL; + gimple stmt; + tree var; + +- if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) ++ if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) + return NULL; + +- fn = gimple_call_fndecl (*last_stmt); ++ fn = gimple_call_fndecl (last_stmt); + if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) + return NULL; + +@@ -574,8 +659,8 @@ + case BUILT_IN_POWI: + case BUILT_IN_POWF: + case BUILT_IN_POW: +- base = gimple_call_arg (*last_stmt, 0); +- exp = gimple_call_arg (*last_stmt, 1); ++ base = gimple_call_arg (last_stmt, 0); ++ exp = gimple_call_arg (last_stmt, 1); + if (TREE_CODE (exp) != REAL_CST + && TREE_CODE (exp) != INTEGER_CST) + return NULL; +@@ -667,21 +752,23 @@ + inner-loop nested in an outer-loop that us being vectorized). */ + + static gimple +-vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) ++vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) + { ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); + gimple stmt; + tree oprnd0, oprnd1; +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree type, half_type; + gimple pattern_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + tree var; + +- if (!is_gimple_assign (*last_stmt)) ++ if (!is_gimple_assign (last_stmt)) + return NULL; + +- type = gimple_expr_type (*last_stmt); ++ type = gimple_expr_type (last_stmt); + + /* Look for the following pattern + DX = (TYPE) X; +@@ -693,25 +780,25 @@ + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + +- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) ++ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + return NULL; + + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) + return NULL; + +- oprnd0 = gimple_assign_rhs1 (*last_stmt); +- oprnd1 = gimple_assign_rhs2 (*last_stmt); ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); + if (!types_compatible_p (TREE_TYPE (oprnd0), type) + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; + +- /* So far so good. Since *last_stmt was detected as a (summation) reduction, ++ /* So far so good. Since last_stmt was detected as a (summation) reduction, + we know that oprnd1 is the reduction variable (defined by a loop-header + phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. + Left to check that oprnd0 is defined by a cast from type 'type' to type + 'TYPE'. */ + +- if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) ++ if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) + return NULL; + + oprnd0 = gimple_assign_rhs1 (stmt); +@@ -732,8 +819,9 @@ + + /* We don't allow changing the order of the computation in the inner-loop + when doing outer-loop vectorization. */ +- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); ++ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; + } + +@@ -762,7 +850,7 @@ + + static void + vect_pattern_recog_1 ( +- gimple (* vect_recog_func) (gimple *, tree *, tree *), ++ gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), + gimple_stmt_iterator si) + { + gimple stmt = gsi_stmt (si), pattern_stmt; +@@ -774,12 +862,14 @@ + enum tree_code code; + int i; + gimple next; ++ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + +- pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); ++ VEC_quick_push (gimple, stmts_to_replace, stmt); ++ pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); + if (!pattern_stmt) + return; + +- si = gsi_for_stmt (stmt); ++ stmt = VEC_last (gimple, stmts_to_replace); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + +@@ -849,6 +939,35 @@ + FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) + if (next == stmt) + VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); ++ ++ /* In case of widen-mult by a constant, it is possible that an additional ++ pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a ++ stmt_info for it, and mark the relevant statements. */ ++ for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) ++ && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); ++ i++) ++ { ++ stmt_info = vinfo_for_stmt (stmt); ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "additional pattern stmt: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } ++ ++ set_vinfo_for_stmt (pattern_stmt, ++ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); ++ gimple_set_bb (pattern_stmt, gimple_bb (stmt)); ++ pattern_stmt_info = vinfo_for_stmt (pattern_stmt); ++ ++ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; ++ STMT_VINFO_DEF_TYPE (pattern_stmt_info) ++ = STMT_VINFO_DEF_TYPE (stmt_info); ++ STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); ++ STMT_VINFO_IN_PATTERN_P (stmt_info) = true; ++ } ++ ++ VEC_free (gimple, heap, stmts_to_replace); + } + + +@@ -896,10 +1015,8 @@ + + If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} + (because they are marked as irrelevant). It will vectorize S6, and record +- a pointer to the new vector stmt VS6 both from S6 (as usual), and also +- from S4. We do that so that when we get to vectorizing stmts that use the +- def of S4 (like S5 that uses a_0), we'll know where to take the relevant +- vector-def from. S4 will be skipped, and S5 will be vectorized as usual: ++ a pointer to the new vector stmt VS6 from S6 (as usual). ++ S4 will be skipped, and S5 will be vectorized as usual: + + in_pattern_p related_stmt vec_stmt + S1: a_i = .... - - - +@@ -915,7 +1032,21 @@ + elsewhere), and we'll end up with: + + VS6: va_new = .... +- VS5: ... = ..vuse(va_new).. */ ++ VS5: ... = ..vuse(va_new).. ++ ++ In case of more than one pattern statements, e.g., widen-mult with ++ intermediate type: ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ '--> S3: a_it = (interm_type) a_t; ++ S4 prod_T = a_T * CONST; ++ '--> S5: prod_T' = a_it w* CONST; ++ ++ there may be other users of a_T outside the pattern. In that case S2 will ++ be marked as relevant (as well as S3), and both S2 and S3 will be analyzed ++ and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will ++ be recorded in S3. */ + + void + vect_pattern_recog (loop_vec_info loop_vinfo) +@@ -925,7 +1056,7 @@ + unsigned int nbbs = loop->num_nodes; + gimple_stmt_iterator si; + unsigned int i, j; +- gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); ++ gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); +Index: gcc-4_6-branch/gcc/tree-vect-slp.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2012-01-09 15:02:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-slp.c 2012-01-09 15:03:29.160918806 -0800 +@@ -152,7 +152,9 @@ + if (loop && def_stmt && gimple_bb (def_stmt) + && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + && vinfo_for_stmt (def_stmt) +- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) ++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) ++ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) ++ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + { + if (!*first_stmt_dt0) + *pattern0 = true; +Index: gcc-4_6-branch/gcc/tree-vect-stmts.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-01-09 15:02:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-01-09 15:06:23.636927250 -0800 +@@ -126,33 +126,72 @@ + + static void + vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, +- enum vect_relevant relevant, bool live_p) ++ enum vect_relevant relevant, bool live_p, ++ bool used_in_pattern) + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); + bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); ++ gimple pattern_stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); + ++ /* If this stmt is an original stmt in a pattern, we might need to mark its ++ related pattern stmt instead of the original stmt. However, such stmts ++ may have their own uses that are not in any pattern, in such cases the ++ stmt itself should be marked. */ + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + { +- gimple pattern_stmt; ++ bool found = false; ++ if (!used_in_pattern) ++ { ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ gimple use_stmt; ++ tree lhs; ++ ++ if (is_gimple_assign (stmt)) ++ lhs = gimple_assign_lhs (stmt); ++ else ++ lhs = gimple_call_lhs (stmt); ++ ++ /* This use is out of pattern use, if LHS has other uses that are ++ pattern uses, we should mark the stmt itself, and not the pattern ++ stmt. */ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); ++ ++ if (vinfo_for_stmt (use_stmt) ++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) ++ { ++ found = true; ++ break; ++ } ++ } ++ } ++ ++ if (!found) ++ { ++ /* This is the last stmt in a sequence that was detected as a ++ pattern that can potentially be vectorized. Don't mark the stmt ++ as relevant/live because it's not going to be vectorized. ++ Instead mark the pattern-stmt that replaces it. */ ++ ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + +- /* This is the last stmt in a sequence that was detected as a +- pattern that can potentially be vectorized. Don't mark the stmt +- as relevant/live because it's not going to be vectorized. +- Instead mark the pattern-stmt that replaces it. */ +- +- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +- +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); +- stmt_info = vinfo_for_stmt (pattern_stmt); +- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); +- save_relevant = STMT_VINFO_RELEVANT (stmt_info); +- save_live_p = STMT_VINFO_LIVE_P (stmt_info); +- stmt = pattern_stmt; ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "last stmt in pattern. don't mark" ++ " relevant/live."); ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); ++ save_relevant = STMT_VINFO_RELEVANT (stmt_info); ++ save_live_p = STMT_VINFO_LIVE_P (stmt_info); ++ stmt = pattern_stmt; ++ } + } + + STMT_VINFO_LIVE_P (stmt_info) |= live_p; +@@ -437,7 +476,8 @@ + } + } + +- vect_mark_relevant (worklist, def_stmt, relevant, live_p); ++ vect_mark_relevant (worklist, def_stmt, relevant, live_p, ++ is_pattern_stmt_p (stmt_vinfo)); + return true; + } + +@@ -494,7 +534,7 @@ + } + + if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) +- vect_mark_relevant (&worklist, phi, relevant, live_p); ++ vect_mark_relevant (&worklist, phi, relevant, live_p, false); + } + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { +@@ -506,7 +546,7 @@ + } + + if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) +- vect_mark_relevant (&worklist, stmt, relevant, live_p); ++ vect_mark_relevant (&worklist, stmt, relevant, live_p, false); + } + } + +@@ -613,42 +653,55 @@ + if (is_gimple_assign (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); +- if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) +- == GIMPLE_SINGLE_RHS) ++ unsigned int op_num; ++ tree op; ++ switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) + { +- unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 +- (stmt)); +- for (i = 0; i < op_num; i++) +- { +- tree op = TREE_OPERAND (rhs, i); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, +- &worklist)) ++ case GIMPLE_SINGLE_RHS: ++ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); ++ for (i = 0; i < op_num; i++) + { +- VEC_free (gimple, heap, worklist); +- return false; ++ op = TREE_OPERAND (rhs, i); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } + } +- } +- } +- else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) +- == GIMPLE_BINARY_RHS) +- { +- tree op = gimple_assign_rhs1 (stmt); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, +- &worklist)) +- { +- VEC_free (gimple, heap, worklist); +- return false; +- } +- op = gimple_assign_rhs2 (stmt); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, +- &worklist)) +- { +- VEC_free (gimple, heap, worklist); +- return false; +- } ++ break; ++ ++ case GIMPLE_BINARY_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ op = gimple_assign_rhs2 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ break; ++ ++ case GIMPLE_UNARY_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ ++ break; ++ ++ default: ++ return false; + } +- else +- return false; + } + else if (is_gimple_call (stmt)) + { +@@ -1210,7 +1263,14 @@ + + /* Get the def from the vectorized stmt. */ + def_stmt_info = vinfo_for_stmt (def_stmt); ++ + vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); ++ /* Get vectorized pattern statement. */ ++ if (!vec_stmt ++ && STMT_VINFO_IN_PATTERN_P (def_stmt_info) ++ && !STMT_VINFO_RELEVANT (def_stmt_info)) ++ vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( ++ STMT_VINFO_RELATED_STMT (def_stmt_info))); + gcc_assert (vec_stmt); + if (gimple_code (vec_stmt) == GIMPLE_PHI) + vec_oprnd = PHI_RESULT (vec_stmt); +@@ -4894,6 +4954,7 @@ + enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); + bool ok; + tree scalar_type, vectype; ++ gimple pattern_stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + { +@@ -4915,16 +4976,22 @@ + - any LABEL_EXPRs in the loop + - computations that are used only for array indexing or loop control. + In basic blocks we only analyze statements that are a part of some SLP +- instance, therefore, all the statements are relevant. */ ++ instance, therefore, all the statements are relevant. ++ ++ Pattern statement need to be analyzed instead of the original statement ++ if the original statement is not relevant. Otherwise, we analyze both ++ statements. */ + ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + { +- gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && pattern_stmt + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + { ++ /* Analyze PATTERN_STMT instead of the original stmt. */ + stmt = pattern_stmt; + stmt_info = vinfo_for_stmt (pattern_stmt); + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -4941,6 +5008,21 @@ + return true; + } + } ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && pattern_stmt ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ /* Analyze PATTERN_STMT too. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ ++ if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) ++ return false; ++ } + + switch (STMT_VINFO_DEF_TYPE (stmt_info)) + { +@@ -5074,7 +5156,6 @@ + bool is_store = false; + gimple vec_stmt = NULL; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +- gimple orig_stmt_in_pattern; + bool done; + + switch (STMT_VINFO_TYPE (stmt_info)) +@@ -5213,21 +5294,7 @@ + } + + if (vec_stmt) +- { + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; +- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); +- if (orig_stmt_in_pattern) +- { +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); +- /* STMT was inserted by the vectorizer to replace a computation idiom. +- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that +- computed this idiom. We need to record a pointer to VEC_STMT in +- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the +- documentation of vect_pattern_recog. */ +- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) +- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; +- } +- } + + return is_store; + } +@@ -5605,8 +5672,12 @@ + || *dt == vect_nested_cycle) + { + stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); +- if (STMT_VINFO_IN_PATTERN_P (stmt_info)) ++ ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && !STMT_VINFO_RELEVANT (stmt_info) ++ && !STMT_VINFO_LIVE_P (stmt_info)) + stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); ++ + *vectype = STMT_VINFO_VECTYPE (stmt_info); + gcc_assert (*vectype != NULL_TREE); + } +Index: gcc-4_6-branch/gcc/tree-vectorizer.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-01-09 15:02:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-01-09 15:03:29.164918806 -0800 +@@ -890,7 +890,7 @@ + /* Pattern recognition functions. + Additional pattern recognition functions can (and will) be added + in the future. */ +-typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); ++typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); + #define NUM_PATTERNS 4 + void vect_pattern_recog (loop_vec_info); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch new file mode 100644 index 0000000000..82ae3a1327 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch @@ -0,0 +1,138 @@ +2011-07-11 Revital Eres <revital.eres@linaro.org> + + Backport from mainline -r175090. + gcc/ + * ddg.c (add_intra_loop_mem_dep): New function. + (build_intra_loop_deps): Call it. + + gcc/testsuite + * gcc.dg/sms-9.c: New file. + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-05-13 16:03:40 +0000 ++++ new/gcc/ddg.c 2011-07-04 11:00:06 +0000 +@@ -390,6 +390,33 @@ + &PATTERN (insn2)); + } + ++/* Given two nodes, analyze their RTL insns and add intra-loop mem deps ++ to ddg G. */ ++static void ++add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to) ++{ ++ ++ if ((from->cuid == to->cuid) ++ || !insns_may_alias_p (from->insn, to->insn)) ++ /* Do not create edge if memory references have disjoint alias sets ++ or 'to' and 'from' are the same instruction. */ ++ return; ++ ++ if (mem_write_insn_p (from->insn)) ++ { ++ if (mem_read_insn_p (to->insn)) ++ create_ddg_dep_no_link (g, from, to, ++ DEBUG_INSN_P (to->insn) ++ ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0); ++ else ++ create_ddg_dep_no_link (g, from, to, ++ DEBUG_INSN_P (to->insn) ++ ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0); ++ } ++ else if (!mem_read_insn_p (to->insn)) ++ create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0); ++} ++ + /* Given two nodes, analyze their RTL insns and add inter-loop mem deps + to ddg G. */ + static void +@@ -477,10 +504,22 @@ + if (DEBUG_INSN_P (j_node->insn)) + continue; + if (mem_access_insn_p (j_node->insn)) +- /* Don't bother calculating inter-loop dep if an intra-loop dep +- already exists. */ ++ { ++ /* Don't bother calculating inter-loop dep if an intra-loop dep ++ already exists. */ + if (! TEST_BIT (dest_node->successors, j)) + add_inter_loop_mem_dep (g, dest_node, j_node); ++ /* If -fmodulo-sched-allow-regmoves ++ is set certain anti-dep edges are not created. ++ It might be that these anti-dep edges are on the ++ path from one memory instruction to another such that ++ removing these edges could cause a violation of the ++ memory dependencies. Thus we add intra edges between ++ every two memory instructions in this case. */ ++ if (flag_modulo_sched_allow_regmoves ++ && !TEST_BIT (dest_node->predecessors, j)) ++ add_intra_loop_mem_dep (g, j_node, dest_node); ++ } + } + } + } + +=== added file 'gcc/testsuite/gcc.dg/sms-9.c' +--- old/gcc/testsuite/gcc.dg/sms-9.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/sms-9.c 2011-07-04 11:00:06 +0000 +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */ ++ ++#include <stdlib.h> ++#include <stdarg.h> ++ ++struct df_ref_info ++{ ++ unsigned int *begin; ++ unsigned int *count; ++}; ++ ++extern void *memset (void *s, int c, __SIZE_TYPE__ n); ++ ++ ++__attribute__ ((noinline)) ++ int ++ df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info, ++ int num, unsigned int start) ++{ ++ unsigned int m = num; ++ unsigned int offset = 77; ++ unsigned int r; ++ ++ for (r = start; r < m; r++) ++ { ++ ref_info->begin[r] = offset; ++ offset += ref_info->count[r]; ++ ref_info->count[r] = 0; ++ } ++ ++ return offset; ++} ++ ++int ++main () ++{ ++ struct df_ref_info temp; ++ int num = 100; ++ unsigned int start = 5; ++ int i, offset; ++ ++ temp.begin = malloc (100 * sizeof (unsigned int)); ++ temp.count = malloc (100 * sizeof (unsigned int)); ++ ++ memset (temp.begin, 0, sizeof (unsigned int) * num); ++ memset (temp.count, 0, sizeof (unsigned int) * num); ++ ++ for (i = 0; i < num; i++) ++ temp.count[i] = i + 1; ++ ++ offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start); ++ ++ if (offset != 5112) ++ abort (); ++ ++ free (temp.begin); ++ free (temp.count); ++ return 0; ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch new file mode 100644 index 0000000000..70c8638cd2 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch @@ -0,0 +1,211 @@ +2011-07-11 Revital Eres <revital.eres@linaro.org> + + Backport from mainline -r175091 + gcc/ + * modulo-sched.c (struct ps_insn): Remove row_rest_count + field. + (struct partial_schedule): Add rows_length field. + (verify_partial_schedule): Check rows_length. + (ps_insert_empty_row): Handle rows_length. + (create_partial_schedule): Likewise. + (free_partial_schedule): Likewise. + (reset_partial_schedule): Likewise. + (create_ps_insn): Remove rest_count argument. + (remove_node_from_ps): Update rows_length. + (add_node_to_ps): Update rows_length and call create_ps_insn without + passing row_rest_count. + (rotate_partial_schedule): Update rows_length. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000 ++++ new/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000 +@@ -134,8 +134,6 @@ + ps_insn_ptr next_in_row, + prev_in_row; + +- /* The number of nodes in the same row that come after this node. */ +- int row_rest_count; + }; + + /* Holds the partial schedule as an array of II rows. Each entry of the +@@ -149,6 +147,12 @@ + /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ + ps_insn_ptr *rows; + ++ /* rows_length[i] holds the number of instructions in the row. ++ It is used only (as an optimization) to back off quickly from ++ trying to schedule a node in a full row; that is, to avoid running ++ through futile DFA state transitions. */ ++ int *rows_length; ++ + /* The earliest absolute cycle of an insn in the partial schedule. */ + int min_cycle; + +@@ -1907,6 +1911,7 @@ + int ii = ps->ii; + int new_ii = ii + 1; + int row; ++ int *rows_length_new; + + verify_partial_schedule (ps, sched_nodes); + +@@ -1921,9 +1926,11 @@ + rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); + + rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); ++ rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); + for (row = 0; row < split_row; row++) + { + rows_new[row] = ps->rows[row]; ++ rows_length_new[row] = ps->rows_length[row]; + ps->rows[row] = NULL; + for (crr_insn = rows_new[row]; + crr_insn; crr_insn = crr_insn->next_in_row) +@@ -1944,6 +1951,7 @@ + for (row = split_row; row < ii; row++) + { + rows_new[row + 1] = ps->rows[row]; ++ rows_length_new[row + 1] = ps->rows_length[row]; + ps->rows[row] = NULL; + for (crr_insn = rows_new[row + 1]; + crr_insn; crr_insn = crr_insn->next_in_row) +@@ -1965,6 +1973,8 @@ + + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); + free (ps->rows); + ps->rows = rows_new; ++ free (ps->rows_length); ++ ps->rows_length = rows_length_new; + ps->ii = new_ii; + gcc_assert (ps->min_cycle >= 0); + +@@ -2040,16 +2050,23 @@ + ps_insn_ptr crr_insn; + + for (row = 0; row < ps->ii; row++) +- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) +- { +- ddg_node_ptr u = crr_insn->node; +- +- gcc_assert (TEST_BIT (sched_nodes, u->cuid)); +- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by +- popcount (sched_nodes) == number of insns in ps. */ +- gcc_assert (SCHED_TIME (u) >= ps->min_cycle); +- gcc_assert (SCHED_TIME (u) <= ps->max_cycle); +- } ++ { ++ int length = 0; ++ ++ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) ++ { ++ ddg_node_ptr u = crr_insn->node; ++ ++ length++; ++ gcc_assert (TEST_BIT (sched_nodes, u->cuid)); ++ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by ++ popcount (sched_nodes) == number of insns in ps. */ ++ gcc_assert (SCHED_TIME (u) >= ps->min_cycle); ++ gcc_assert (SCHED_TIME (u) <= ps->max_cycle); ++ } ++ ++ gcc_assert (ps->rows_length[row] == length); ++ } + } + + +@@ -2455,6 +2472,7 @@ + { + partial_schedule_ptr ps = XNEW (struct partial_schedule); + ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); ++ ps->rows_length = (int *) xcalloc (ii, sizeof (int)); + ps->ii = ii; + ps->history = history; + ps->min_cycle = INT_MAX; +@@ -2493,6 +2511,7 @@ + return; + free_ps_insns (ps); + free (ps->rows); ++ free (ps->rows_length); + free (ps); + } + +@@ -2510,6 +2529,8 @@ + ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii + * sizeof (ps_insn_ptr)); + memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); ++ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); ++ memset (ps->rows_length, 0, new_ii * sizeof (int)); + ps->ii = new_ii; + ps->min_cycle = INT_MAX; + ps->max_cycle = INT_MIN; +@@ -2538,14 +2559,13 @@ + + /* Creates an object of PS_INSN and initializes it to the given parameters. */ + static ps_insn_ptr +-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) ++create_ps_insn (ddg_node_ptr node, int cycle) + { + ps_insn_ptr ps_i = XNEW (struct ps_insn); + + ps_i->node = node; + ps_i->next_in_row = NULL; + ps_i->prev_in_row = NULL; +- ps_i->row_rest_count = rest_count; + ps_i->cycle = cycle; + + return ps_i; +@@ -2578,6 +2598,8 @@ + if (ps_i->next_in_row) + ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; + } ++ ++ ps->rows_length[row] -= 1; + free (ps_i); + return true; + } +@@ -2734,17 +2756,12 @@ + sbitmap must_precede, sbitmap must_follow) + { + ps_insn_ptr ps_i; +- int rest_count = 1; + int row = SMODULO (cycle, ps->ii); + +- if (ps->rows[row] +- && ps->rows[row]->row_rest_count >= issue_rate) ++ if (ps->rows_length[row] >= issue_rate) + return NULL; + +- if (ps->rows[row]) +- rest_count += ps->rows[row]->row_rest_count; +- +- ps_i = create_ps_insn (node, rest_count, cycle); ++ ps_i = create_ps_insn (node, cycle); + + /* Finds and inserts PS_I according to MUST_FOLLOW and + MUST_PRECEDE. */ +@@ -2754,6 +2771,7 @@ + return NULL; + } + ++ ps->rows_length[row] += 1; + return ps_i; + } + +@@ -2909,11 +2927,16 @@ + for (i = 0; i < backward_rotates; i++) + { + ps_insn_ptr first_row = ps->rows[0]; ++ int first_row_length = ps->rows_length[0]; + + for (row = 0; row < last_row; row++) +- ps->rows[row] = ps->rows[row+1]; ++ { ++ ps->rows[row] = ps->rows[row + 1]; ++ ps->rows_length[row] = ps->rows_length[row + 1]; ++ } + + ps->rows[last_row] = first_row; ++ ps->rows_length[last_row] = first_row_length; + } + + ps->max_cycle -= start_cycle; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch new file mode 100644 index 0000000000..d918f9c58c --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch @@ -0,0 +1,350 @@ +2011-07-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + 2011-06-22 Dmitry Plotnikov <dplotnikov@ispras.ru> + Dmitry Melnik <dm@ispras.ru> + + * config/arm/arm.c (neon_immediate_valid_for_shift): New function. + (neon_output_shift_immediate): Ditto. + * config/arm/arm-protos.h (neon_immediate_valid_for_shift): New + prototype. + (neon_output_shift_immediate): Ditto. + * config/arm/neon.md (vashl<mode>3): Modified constraint. + (vashr<mode>3_imm): New insn pattern. + (vlshr<mode>3_imm): Ditto. + (vashr<mode>3): Modified constraint. + (vlshr<mode>3): Ditto. + * config/arm/predicates.md (imm_for_neon_lshift_operand): New + predicate. + (imm_for_neon_rshift_operand): Ditto. + (imm_lshift_or_reg_neon): Ditto. + (imm_rshift_or_reg_neon): Ditto. + + * optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000 +@@ -64,8 +64,12 @@ + extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); + extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, + int *); ++extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *, ++ int *, bool); + extern char *neon_output_logic_immediate (const char *, rtx *, + enum machine_mode, int, int); ++extern char *neon_output_shift_immediate (const char *, char, rtx *, ++ enum machine_mode, int, bool); + extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, + rtx (*) (rtx, rtx, rtx)); + extern rtx neon_make_constant (rtx); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000 ++++ new/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000 +@@ -8863,6 +8863,66 @@ + return 1; + } + ++/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If ++ the immediate is valid, write a constant suitable for using as an operand ++ to VSHR/VSHL to *MODCONST and the corresponding element width to ++ *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, ++ because they have different limitations. */ ++ ++int ++neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, ++ rtx *modconst, int *elementwidth, ++ bool isleftshift) ++{ ++ unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); ++ unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; ++ unsigned HOST_WIDE_INT last_elt = 0; ++ unsigned HOST_WIDE_INT maxshift; ++ ++ /* Split vector constant out into a byte vector. */ ++ for (i = 0; i < n_elts; i++) ++ { ++ rtx el = CONST_VECTOR_ELT (op, i); ++ unsigned HOST_WIDE_INT elpart; ++ ++ if (GET_CODE (el) == CONST_INT) ++ elpart = INTVAL (el); ++ else if (GET_CODE (el) == CONST_DOUBLE) ++ return 0; ++ else ++ gcc_unreachable (); ++ ++ if (i != 0 && elpart != last_elt) ++ return 0; ++ ++ last_elt = elpart; ++ } ++ ++ /* Shift less than element size. */ ++ maxshift = innersize * 8; ++ ++ if (isleftshift) ++ { ++ /* Left shift immediate value can be from 0 to <size>-1. */ ++ if (last_elt >= maxshift) ++ return 0; ++ } ++ else ++ { ++ /* Right shift immediate value can be from 1 to <size>. */ ++ if (last_elt == 0 || last_elt > maxshift) ++ return 0; ++ } ++ ++ if (elementwidth) ++ *elementwidth = innersize * 8; ++ ++ if (modconst) ++ *modconst = CONST_VECTOR_ELT (op, 0); ++ ++ return 1; ++} ++ + /* Return a string suitable for output of Neon immediate logic operation + MNEM. */ + +@@ -8885,6 +8945,28 @@ + return templ; + } + ++/* Return a string suitable for output of Neon immediate shift operation ++ (VSHR or VSHL) MNEM. */ ++ ++char * ++neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, ++ enum machine_mode mode, int quad, ++ bool isleftshift) ++{ ++ int width, is_valid; ++ static char templ[40]; ++ ++ is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift); ++ gcc_assert (is_valid != 0); ++ ++ if (quad) ++ sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); ++ else ++ sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); ++ ++ return templ; ++} ++ + /* Output a sequence of pairwise operations to implement a reduction. + NOTE: We do "too much work" here, because pairwise operations work on two + registers-worth of operands in one go. Unfortunately we can't exploit those + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-07-01 09:19:21 +0000 ++++ new/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000 +@@ -956,15 +956,57 @@ + ; SImode elements. + + (define_insn "vashl<mode>3" +- [(set (match_operand:VDQIW 0 "s_register_operand" "=w") +- (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") +- (match_operand:VDQIW 2 "s_register_operand" "w")))] +- "TARGET_NEON" +- "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" +- [(set (attr "neon_type") +- (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) +- (const_string "neon_vshl_ddd") +- (const_string "neon_shift_3")))] ++ [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") ++ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") ++ (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))] ++ "TARGET_NEON" ++ { ++ switch (which_alternative) ++ { ++ case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; ++ case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], ++ <MODE>mode, ++ VALID_NEON_QREG_MODE (<MODE>mode), ++ true); ++ default: gcc_unreachable (); ++ } ++ } ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_vshl_ddd") ++ (const_string "neon_shift_3")))] ++) ++ ++(define_insn "vashr<mode>3_imm" ++ [(set (match_operand:VDQIW 0 "s_register_operand" "=w") ++ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") ++ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] ++ "TARGET_NEON" ++ { ++ return neon_output_shift_immediate ("vshr", 's', &operands[2], ++ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), ++ false); ++ } ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_vshl_ddd") ++ (const_string "neon_shift_3")))] ++) ++ ++(define_insn "vlshr<mode>3_imm" ++ [(set (match_operand:VDQIW 0 "s_register_operand" "=w") ++ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") ++ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] ++ "TARGET_NEON" ++ { ++ return neon_output_shift_immediate ("vshr", 'u', &operands[2], ++ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), ++ false); ++ } ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_vshl_ddd") ++ (const_string "neon_shift_3")))] + ) + + ; Used for implementing logical shift-right, which is a left-shift by a negative +@@ -1004,28 +1046,34 @@ + (define_expand "vashr<mode>3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") +- (match_operand:VDQIW 2 "s_register_operand" "")))] ++ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] + "TARGET_NEON" + { + rtx neg = gen_reg_rtx (<MODE>mode); +- +- emit_insn (gen_neg<mode>2 (neg, operands[2])); +- emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); +- ++ if (REG_P (operands[2])) ++ { ++ emit_insn (gen_neg<mode>2 (neg, operands[2])); ++ emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); ++ } ++ else ++ emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); + DONE; + }) + + (define_expand "vlshr<mode>3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") +- (match_operand:VDQIW 2 "s_register_operand" "")))] ++ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] + "TARGET_NEON" + { + rtx neg = gen_reg_rtx (<MODE>mode); +- +- emit_insn (gen_neg<mode>2 (neg, operands[2])); +- emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); +- ++ if (REG_P (operands[2])) ++ { ++ emit_insn (gen_neg<mode>2 (neg, operands[2])); ++ emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); ++ } ++ else ++ emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); + DONE; + }) + + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000 ++++ new/gcc/config/arm/predicates.md 2011-07-04 14:03:49 +0000 +@@ -585,6 +585,26 @@ + return neon_immediate_valid_for_move (op, mode, NULL, NULL); + }) + ++(define_predicate "imm_for_neon_lshift_operand" ++ (match_code "const_vector") ++{ ++ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true); ++}) ++ ++(define_predicate "imm_for_neon_rshift_operand" ++ (match_code "const_vector") ++{ ++ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false); ++}) ++ ++(define_predicate "imm_lshift_or_reg_neon" ++ (ior (match_operand 0 "s_register_operand") ++ (match_operand 0 "imm_for_neon_lshift_operand"))) ++ ++(define_predicate "imm_rshift_or_reg_neon" ++ (ior (match_operand 0 "s_register_operand") ++ (match_operand 0 "imm_for_neon_rshift_operand"))) ++ + (define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") + { + +=== modified file 'gcc/optabs.c' +--- old/gcc/optabs.c 2011-03-04 10:27:10 +0000 ++++ new/gcc/optabs.c 2011-07-04 14:03:49 +0000 +@@ -6171,6 +6171,9 @@ + init_optab (usashl_optab, US_ASHIFT); + init_optab (ashr_optab, ASHIFTRT); + init_optab (lshr_optab, LSHIFTRT); ++ init_optabv (vashl_optab, ASHIFT); ++ init_optabv (vashr_optab, ASHIFTRT); ++ init_optabv (vlshr_optab, LSHIFTRT); + init_optab (rotl_optab, ROTATE); + init_optab (rotr_optab, ROTATERT); + init_optab (smin_optab, SMIN); + +=== added file 'gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c' +--- old/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 2011-07-04 14:03:49 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, unsigned int x[], unsigned int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] >> 3; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c' +--- old/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 2011-07-04 14:03:49 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, int x[], int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] << 3; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c' +--- old/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 2011-07-04 14:03:49 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, int x[], int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] >> 3; ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch new file mode 100644 index 0000000000..de3f29e193 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch @@ -0,0 +1,119 @@ +2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> + + Backport from mainline: + gcc/ + 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> + + * reload1.c (choose_reload_regs): Use mode sizes to check whether + an old reload register completely defines the required value. + + gcc/testsuite/ + 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/arm/neon-modes-3.c: New test. + +=== modified file 'gcc/reload1.c' +--- old/gcc/reload1.c 2011-07-01 09:19:21 +0000 ++++ new/gcc/reload1.c 2011-07-11 10:06:50 +0000 +@@ -6451,6 +6451,8 @@ + + if (regno >= 0 + && reg_last_reload_reg[regno] != 0 ++ && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno])) ++ >= GET_MODE_SIZE (mode) + byte) + #ifdef CANNOT_CHANGE_MODE_CLASS + /* Verify that the register it's in can be used in + mode MODE. */ +@@ -6462,24 +6464,12 @@ + { + enum reg_class rclass = rld[r].rclass, last_class; + rtx last_reg = reg_last_reload_reg[regno]; +- enum machine_mode need_mode; + + i = REGNO (last_reg); + i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode); + last_class = REGNO_REG_CLASS (i); + +- if (byte == 0) +- need_mode = mode; +- else +- need_mode +- = smallest_mode_for_size +- (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT, +- GET_MODE_CLASS (mode) == MODE_PARTIAL_INT +- ? MODE_INT : GET_MODE_CLASS (mode)); +- +- if ((GET_MODE_SIZE (GET_MODE (last_reg)) +- >= GET_MODE_SIZE (need_mode)) +- && reg_reloaded_contents[i] == regno ++ if (reg_reloaded_contents[i] == regno + && TEST_HARD_REG_BIT (reg_reloaded_valid, i) + && HARD_REGNO_MODE_OK (i, rld[r].mode) + && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i) + +=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-3.c' +--- old/gcc/testsuite/gcc.target/arm/neon-modes-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-modes-3.c 2011-07-11 10:06:50 +0000 +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include <arm_neon.h> ++ ++void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n) ++{ ++ float32x4x4_t a5, a6, a7, a8, a9; ++ int i; ++ ++ a5 = *src; ++ a6 = *src; ++ a7 = *src; ++ a8 = *src; ++ a9 = *src; ++ while (n--) ++ { ++ for (i = 0; i < 8; i++) ++ { ++ float32x4x4_t a0, a1, a2, a3, a4; ++ ++ a0 = *src; ++ a1 = *src; ++ a2 = *src; ++ a3 = *src; ++ a4 = *src; ++ *src = a0; ++ *dest = a0.val[0]; ++ *dest = a0.val[3]; ++ *src = a1; ++ *dest = a1.val[0]; ++ *dest = a1.val[3]; ++ *src = a2; ++ *dest = a2.val[0]; ++ *dest = a2.val[3]; ++ *src = a3; ++ *dest = a3.val[0]; ++ *dest = a3.val[3]; ++ *src = a4; ++ *dest = a4.val[0]; ++ *dest = a4.val[3]; ++ } ++ *src = a5; ++ *dest = a5.val[0]; ++ *dest = a5.val[3]; ++ *src = a6; ++ *dest = a6.val[0]; ++ *dest = a6.val[3]; ++ *src = a7; ++ *dest = a7.val[0]; ++ *dest = a7.val[3]; ++ *src = a8; ++ *dest = a8.val[0]; ++ *dest = a8.val[3]; ++ *src = a9; ++ *dest = a9.val[0]; ++ *dest = a9.val[3]; ++ } ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch new file mode 100644 index 0000000000..0b05c38240 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch @@ -0,0 +1,67 @@ +2011-07-15 Michael Hope <michael.hope@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-05 Eric Botcazou <ebotcazou@adacore.com> + + * ifcvt.c (cond_exec_process_insns): Disallow converting a block + that contains the prologue. + + gcc/testsuite/ + Backport from mainline: + + 2011-04-01 Bernd Schmidt <bernds@codesourcery.com> + + * gcc.c-torture/compile/20110401-1.c: New test. + +=== modified file 'gcc/ifcvt.c' +--- old/gcc/ifcvt.c 2010-12-14 00:23:40 +0000 ++++ new/gcc/ifcvt.c 2011-07-11 04:02:28 +0000 +@@ -1,5 +1,6 @@ + /* If-conversion support. +- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 ++ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, ++ 2011 + Free Software Foundation, Inc. + + This file is part of GCC. +@@ -304,6 +305,10 @@ + + for (insn = start; ; insn = NEXT_INSN (insn)) + { ++ /* dwarf2out can't cope with conditional prologues. */ ++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END) ++ return FALSE; ++ + if (NOTE_P (insn) || DEBUG_INSN_P (insn)) + goto insn_done; + + +=== added file 'gcc/testsuite/gcc.c-torture/compile/20110401-1.c' +--- old/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 2011-07-11 04:02:28 +0000 +@@ -0,0 +1,22 @@ ++void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len) ++{ ++ int k; ++ unsigned char temp[4]; ++ if (len < 128) { ++ if (ans != ((void *) 0)) ++ ans[0] = (unsigned char) len; ++ *ans_len = 1; ++ } else { ++ k = 0; ++ while (len) { ++ temp[k++] = len & 0xFF; ++ len = len >> 8; ++ } ++ *ans_len = k + 1; ++ if (ans != ((void *) 0)) { ++ ans[0] = ((unsigned char) k & 0x7F) + 128; ++ while (k--) ++ ans[*ans_len - 1 - k] = temp[k]; ++ } ++ } ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch new file mode 100644 index 0000000000..3d4d5c5049 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch @@ -0,0 +1,46 @@ +2011-07-15 Michael Hope <michael.hope@linaro.org> + + gcc/ + Backport from mainline: + 2011-03-22 Eric Botcazou <ebotcazou@adacore.com> + + * combine.c (simplify_set): Try harder to find the best CC mode when + simplifying a nested COMPARE on the RHS. + +=== modified file 'gcc/combine.c' +--- old/gcc/combine.c 2011-05-27 14:31:18 +0000 ++++ new/gcc/combine.c 2011-07-11 03:52:31 +0000 +@@ -6287,10 +6287,18 @@ + enum rtx_code new_code; + rtx op0, op1, tmp; + int other_changed = 0; ++ rtx inner_compare = NULL_RTX; + enum machine_mode compare_mode = GET_MODE (dest); + + if (GET_CODE (src) == COMPARE) +- op0 = XEXP (src, 0), op1 = XEXP (src, 1); ++ { ++ op0 = XEXP (src, 0), op1 = XEXP (src, 1); ++ if (GET_CODE (op0) == COMPARE && op1 == const0_rtx) ++ { ++ inner_compare = op0; ++ op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1); ++ } ++ } + else + op0 = src, op1 = CONST0_RTX (GET_MODE (src)); + +@@ -6332,6 +6340,12 @@ + need to use a different CC mode here. */ + if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) + compare_mode = GET_MODE (op0); ++ else if (inner_compare ++ && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC ++ && new_code == old_code ++ && op0 == XEXP (inner_compare, 0) ++ && op1 == XEXP (inner_compare, 1)) ++ compare_mode = GET_MODE (inner_compare); + else + compare_mode = SELECT_CC_MODE (new_code, op0, op1); + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch new file mode 100644 index 0000000000..68b682b3c6 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch @@ -0,0 +1,192 @@ +2011-07-15 Michael Hope <michael.hope@linaro.org> + + gcc/ + Backport from mainline: + 2011-06-29 Nathan Sidwell <nathan@codesourcery.com> + + * config/arm/unwind-arm.c (enum __cxa_type_match_result): New. + (cxa_type_match): Correct declaration. + (__gnu_unwind_pr_common): Reconstruct + additional indirection when __cxa_type_match returns + succeeded_with_ptr_to_base. + + libstdc++-v3/ + Backport from mainline: + + 2011-06-29 Nathan Sidwell <nathan@codesourcery.com> + + * libsupc++/eh_arm.c (__cxa_type_match): Construct address of + thrown object here. Return succeded_with_ptr_to_base for all + pointer cases. + +=== modified file 'gcc/config/arm/unwind-arm.c' +--- old/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000 ++++ new/gcc/config/arm/unwind-arm.c 2011-07-11 03:35:44 +0000 +@@ -32,13 +32,18 @@ + typedef unsigned char bool; + + typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */ ++enum __cxa_type_match_result ++ { ++ ctm_failed = 0, ++ ctm_succeeded = 1, ++ ctm_succeeded_with_ptr_to_base = 2 ++ }; + + void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp); + bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp); +-bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp, +- const type_info *rttip, +- bool is_reference, +- void **matched_object); ++enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match ++ (_Unwind_Control_Block *ucbp, const type_info *rttip, ++ bool is_reference, void **matched_object); + + _Unwind_Ptr __attribute__((weak)) + __gnu_Unwind_Find_exidx (_Unwind_Ptr, int *); +@@ -1107,6 +1112,7 @@ + _uw rtti; + bool is_reference = (data[0] & uint32_highbit) != 0; + void *matched; ++ enum __cxa_type_match_result match_type; + + /* Check for no-throw areas. */ + if (data[1] == (_uw) -2) +@@ -1118,17 +1124,31 @@ + { + /* Match a catch specification. */ + rtti = _Unwind_decode_target2 ((_uw) &data[1]); +- if (!__cxa_type_match (ucbp, (type_info *) rtti, +- is_reference, +- &matched)) +- matched = (void *)0; ++ match_type = __cxa_type_match (ucbp, ++ (type_info *) rtti, ++ is_reference, ++ &matched); + } ++ else ++ match_type = ctm_succeeded; + +- if (matched) ++ if (match_type) + { + ucbp->barrier_cache.sp = + _Unwind_GetGR (context, R_SP); +- ucbp->barrier_cache.bitpattern[0] = (_uw) matched; ++ // ctm_succeeded_with_ptr_to_base really ++ // means _c_t_m indirected the pointer ++ // object. We have to reconstruct the ++ // additional pointer layer by using a temporary. ++ if (match_type == ctm_succeeded_with_ptr_to_base) ++ { ++ ucbp->barrier_cache.bitpattern[2] ++ = (_uw) matched; ++ ucbp->barrier_cache.bitpattern[0] ++ = (_uw) &ucbp->barrier_cache.bitpattern[2]; ++ } ++ else ++ ucbp->barrier_cache.bitpattern[0] = (_uw) matched; + ucbp->barrier_cache.bitpattern[1] = (_uw) data; + return _URC_HANDLER_FOUND; + } + +=== modified file 'libstdc++-v3/libsupc++/eh_arm.cc' +--- old/libstdc++-v3/libsupc++/eh_arm.cc 2011-01-03 20:52:22 +0000 ++++ new/libstdc++-v3/libsupc++/eh_arm.cc 2011-07-11 03:35:44 +0000 +@@ -30,10 +30,11 @@ + using namespace __cxxabiv1; + + +-// Given the thrown type THROW_TYPE, pointer to a variable containing a +-// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to +-// compare against, return whether or not there is a match and if so, +-// update *THROWN_PTR_P. ++// Given the thrown type THROW_TYPE, exception object UE_HEADER and a ++// type CATCH_TYPE to compare against, return whether or not there is ++// a match and if so, update *THROWN_PTR_P to point to either the ++// type-matched object, or in the case of a pointer type, the object ++// pointed to by the pointer. + + extern "C" __cxa_type_match_result + __cxa_type_match(_Unwind_Exception* ue_header, +@@ -41,51 +42,51 @@ + bool is_reference __attribute__((__unused__)), + void** thrown_ptr_p) + { +- bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class); +- bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class); +- bool dependent_exception = +- __is_dependent_exception(ue_header->exception_class); ++ bool forced_unwind ++ = __is_gxx_forced_unwind_class(ue_header->exception_class); ++ bool foreign_exception ++ = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class); ++ bool dependent_exception ++ = __is_dependent_exception(ue_header->exception_class); + __cxa_exception* xh = __get_exception_header_from_ue(ue_header); + __cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header); + const std::type_info* throw_type; ++ void *thrown_ptr = 0; + + if (forced_unwind) + throw_type = &typeid(abi::__forced_unwind); + else if (foreign_exception) + throw_type = &typeid(abi::__foreign_exception); +- else if (dependent_exception) +- throw_type = __get_exception_header_from_obj +- (dx->primaryException)->exceptionType; + else +- throw_type = xh->exceptionType; +- +- void* thrown_ptr = *thrown_ptr_p; ++ { ++ if (dependent_exception) ++ xh = __get_exception_header_from_obj (dx->primaryException); ++ throw_type = xh->exceptionType; ++ // We used to require the caller set the target of thrown_ptr_p, ++ // but that's incorrect -- the EHABI makes no such requirement ++ // -- and not all callers will set it. Fortunately callers that ++ // do initialize will always pass us the value we calculate ++ // here, so there's no backwards compatibility problem. ++ thrown_ptr = __get_object_from_ue (ue_header); ++ } ++ ++ __cxa_type_match_result result = ctm_succeeded; + + // Pointer types need to adjust the actual pointer, not + // the pointer to pointer that is the exception object. + // This also has the effect of passing pointer types + // "by value" through the __cxa_begin_catch return value. + if (throw_type->__is_pointer_p()) +- thrown_ptr = *(void**) thrown_ptr; ++ { ++ thrown_ptr = *(void**) thrown_ptr; ++ // We need to indicate the indirection to our caller. ++ result = ctm_succeeded_with_ptr_to_base; ++ } + + if (catch_type->__do_catch(throw_type, &thrown_ptr, 1)) + { + *thrown_ptr_p = thrown_ptr; +- +- if (typeid(*catch_type) == typeid (typeid(void*))) +- { +- const __pointer_type_info *catch_pointer_type = +- static_cast<const __pointer_type_info *> (catch_type); +- const __pointer_type_info *throw_pointer_type = +- static_cast<const __pointer_type_info *> (throw_type); +- +- if (typeid (*catch_pointer_type->__pointee) != typeid (void) +- && (*catch_pointer_type->__pointee != +- *throw_pointer_type->__pointee)) +- return ctm_succeeded_with_ptr_to_base; +- } +- +- return ctm_succeeded; ++ return result; + } + + return ctm_failed; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch new file mode 100644 index 0000000000..b42b425dc1 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch @@ -0,0 +1,225 @@ +2011-07-15 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r174540 + LP: #807573 + + gcc/ + 2011-06-01 Richard Sandiford <rdsandiford@googlemail.com> + + PR rtl-optimization/48830 + PR rtl-optimization/48808 + PR rtl-optimization/48792 + * reload.c (push_reload): Check contains_reg_of_mode. + * reload1.c (strip_paradoxical_subreg): New function. + (gen_reload_chain_without_interm_reg_p): Use it to handle + paradoxical subregs. + (emit_output_reload_insns, gen_reload): Likewise. + + gcc/testsuite/ + 2011-06-01 Eric Botcazou <ebotcazou@adacore.com> + Hans-Peter Nilsson <hp@axis.com> + + PR rtl-optimization/48830 + * gcc.target/sparc/ultrasp12.c: New test. + +=== modified file 'gcc/reload.c' +--- old/gcc/reload.c 2011-07-01 09:19:21 +0000 ++++ new/gcc/reload.c 2011-07-13 02:09:08 +0000 +@@ -1017,6 +1017,7 @@ + #ifdef CANNOT_CHANGE_MODE_CLASS + && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass) + #endif ++ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))] + && (CONSTANT_P (SUBREG_REG (in)) + || GET_CODE (SUBREG_REG (in)) == PLUS + || strict_low +@@ -1123,6 +1124,7 @@ + #ifdef CANNOT_CHANGE_MODE_CLASS + && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass) + #endif ++ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))] + && (CONSTANT_P (SUBREG_REG (out)) + || strict_low + || (((REG_P (SUBREG_REG (out)) + +=== modified file 'gcc/reload1.c' +--- old/gcc/reload1.c 2011-07-11 10:06:50 +0000 ++++ new/gcc/reload1.c 2011-07-14 22:14:45 +0000 +@@ -4476,6 +4476,43 @@ + } + } + } ++ ++/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload. ++ If *OP_PTR is a paradoxical subreg, try to remove that subreg ++ and apply the corresponding narrowing subreg to *OTHER_PTR. ++ Return true if the operands were changed, false otherwise. */ ++ ++static bool ++strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr) ++{ ++ rtx op, inner, other, tem; ++ ++ op = *op_ptr; ++ if (GET_CODE (op) != SUBREG) ++ return false; ++ ++ inner = SUBREG_REG (op); ++ if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner))) ++ return false; ++ ++ other = *other_ptr; ++ tem = gen_lowpart_common (GET_MODE (inner), other); ++ if (!tem) ++ return false; ++ ++ /* If the lowpart operation turned a hard register into a subreg, ++ rather than simplifying it to another hard register, then the ++ mode change cannot be properly represented. For example, OTHER ++ might be valid in its current mode, but not in the new one. */ ++ if (GET_CODE (tem) == SUBREG ++ && REG_P (other) ++ && HARD_REGISTER_P (other)) ++ return false; ++ ++ *op_ptr = inner; ++ *other_ptr = tem; ++ return true; ++} + + /* A subroutine of reload_as_needed. If INSN has a REG_EH_REGION note, + examine all of the reload insns between PREV and NEXT exclusive, and +@@ -5556,7 +5593,7 @@ + chain reloads or do need an intermediate hard registers. */ + bool result = true; + int regno, n, code; +- rtx out, in, tem, insn; ++ rtx out, in, insn; + rtx last = get_last_insn (); + + /* Make r2 a component of r1. */ +@@ -5575,11 +5612,7 @@ + + /* If IN is a paradoxical SUBREG, remove it and try to put the + opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ +- if (GET_CODE (in) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (in)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) +- in = SUBREG_REG (in), out = tem; ++ strip_paradoxical_subreg (&in, &out); + + if (GET_CODE (in) == PLUS + && (REG_P (XEXP (in, 0)) +@@ -7571,7 +7604,6 @@ + if (tertiary_icode != CODE_FOR_nothing) + { + rtx third_reloadreg = rld[tertiary_reload].reg_rtx; +- rtx tem; + + /* Copy primary reload reg to secondary reload reg. + (Note that these have been swapped above, then +@@ -7580,13 +7612,7 @@ + /* If REAL_OLD is a paradoxical SUBREG, remove it + and try to put the opposite SUBREG on + RELOADREG. */ +- if (GET_CODE (real_old) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (real_old)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old)))) +- && 0 != (tem = gen_lowpart_common +- (GET_MODE (SUBREG_REG (real_old)), +- reloadreg))) +- real_old = SUBREG_REG (real_old), reloadreg = tem; ++ strip_paradoxical_subreg (&real_old, &reloadreg); + + gen_reload (reloadreg, second_reloadreg, + rl->opnum, rl->when_needed); +@@ -8402,16 +8428,8 @@ + + /* If IN is a paradoxical SUBREG, remove it and try to put the + opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ +- if (GET_CODE (in) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (in)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) +- in = SUBREG_REG (in), out = tem; +- else if (GET_CODE (out) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (out)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0) +- out = SUBREG_REG (out), in = tem; ++ if (!strip_paradoxical_subreg (&in, &out)) ++ strip_paradoxical_subreg (&out, &in); + + /* How to do this reload can get quite tricky. Normally, we are being + asked to reload a simple operand, such as a MEM, a constant, or a pseudo + +=== added file 'gcc/testsuite/gcc.target/sparc/ultrasp12.c' +--- old/gcc/testsuite/gcc.target/sparc/ultrasp12.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/sparc/ultrasp12.c 2011-07-13 02:09:08 +0000 +@@ -0,0 +1,64 @@ ++/* PR rtl-optimization/48830 */ ++/* Testcase by Hans-Peter Nilsson <hp@gcc.gnu.org> */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */ ++ ++typedef unsigned char uint8_t; ++typedef unsigned int uint32_t; ++typedef unsigned long int uint64_t; ++typedef unsigned long int uintmax_t; ++typedef unsigned char rc_vec_t __attribute__((__vector_size__(8))); ++typedef short rc_svec_type_ __attribute__((__vector_size__(8))); ++typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4))); ++ ++void ++rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim, ++ const uint8_t *__restrict src2, int src2_dim, ++ int len, int height, uintmax_t sum[5]) ++{ ++ uint32_t s1 = 0; ++ uint32_t s2 = 0; ++ uintmax_t s11 = 0; ++ uintmax_t s22 = 0; ++ uintmax_t s12 = 0; ++ int full = len / ((1024) < (1024) ? (1024) : (1024)); ++ int rem = len % ((1024) < (1024) ? (1024) : (1024)); ++ int rem1 = rem / 1; ++ int y; ++ unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0; ++ for (y = 0; y < height; y++) { ++ rc_vec_t a1, a2, a11, a22, a12; ++ int i1 = (y)*(src1_dim); ++ int i2 = (y)*(src2_dim); ++ int x; ++ ((a1) = ((rc_vec_t) {0})); ++ ((a2) = ((rc_vec_t) {0})); ++ ((a11) = ((rc_vec_t) {0})); ++ ((a22) = ((rc_vec_t) {0})); ++ ((a12) = ((rc_vec_t) {0})); ++ for (x = 0; x < full; x++) { ++ int k; ++ for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) / ++ 1; k++) ++ { ++ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); ++ ++ } ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ } ++ for (x = 0; x < rem1; x++) { ++ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); ++ } ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ } ++ sum[0] = s1; ++ sum[1] = s2; ++ sum[2] = s11; ++ sum[3] = s22; ++ sum[4] = s12; ++ ; ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch new file mode 100644 index 0000000000..a86ddfdec0 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch @@ -0,0 +1,741 @@ +2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + PR middle-end/49736 + * expr.c (all_zeros_p): Undo bogus part of last change. + +2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> + + Backport from mainline: + gcc/cp/ + 2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> + + * typeck2.c (split_nonconstant_init_1): Pass the initializer directly, + rather than a pointer to it. Return true if the whole of the value + was initialized by the generated statements. Use + complete_ctor_at_level_p instead of count_type_elements. + + gcc/ + 2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> + + * tree.h (categorize_ctor_elements): Remove comment. Fix long line. + (count_type_elements): Delete. + (complete_ctor_at_level_p): Declare. + * expr.c (flexible_array_member_p): New function, split out from... + (count_type_elements): ...here. Make static. Replace allow_flexarr + parameter with for_ctor_p. When for_ctor_p is true, return the + number of elements that should appear in the top-level constructor, + otherwise return an estimate of the number of scalars. + (categorize_ctor_elements): Replace p_must_clear with p_complete. + (categorize_ctor_elements_1): Likewise. Use complete_ctor_at_level_p. + (complete_ctor_at_level_p): New function, borrowing union logic + from old categorize_ctor_elements_1. + (mostly_zeros_p): Return true if the constructor is not complete. + (all_zeros_p): Update call to categorize_ctor_elements. + * gimplify.c (gimplify_init_constructor): Update call to + categorize_ctor_elements. Don't call count_type_elements. + Unconditionally prevent clearing for variable-sized types, + otherwise rely on categorize_ctor_elements to detect + incomplete initializers. + + gcc/testsuite/ + 2011-07-13 Chung-Lin Tang <cltang@codesourcery.com> + + * gcc.target/arm/pr48183.c: New test. + +=== modified file 'gcc/cp/typeck2.c' +--- old/gcc/cp/typeck2.c 2011-05-20 21:29:14 +0000 ++++ new/gcc/cp/typeck2.c 2011-07-13 13:17:31 +0000 +@@ -473,18 +473,20 @@ + + + /* The recursive part of split_nonconstant_init. DEST is an lvalue +- expression to which INIT should be assigned. INIT is a CONSTRUCTOR. */ ++ expression to which INIT should be assigned. INIT is a CONSTRUCTOR. ++ Return true if the whole of the value was initialized by the ++ generated statements. */ + +-static void +-split_nonconstant_init_1 (tree dest, tree *initp) ++static bool ++split_nonconstant_init_1 (tree dest, tree init) + { + unsigned HOST_WIDE_INT idx; +- tree init = *initp; + tree field_index, value; + tree type = TREE_TYPE (dest); + tree inner_type = NULL; + bool array_type_p = false; +- HOST_WIDE_INT num_type_elements, num_initialized_elements; ++ bool complete_p = true; ++ HOST_WIDE_INT num_split_elts = 0; + + switch (TREE_CODE (type)) + { +@@ -496,7 +498,6 @@ + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: +- num_initialized_elements = 0; + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, + field_index, value) + { +@@ -519,13 +520,14 @@ + sub = build3 (COMPONENT_REF, inner_type, dest, field_index, + NULL_TREE); + +- split_nonconstant_init_1 (sub, &value); ++ if (!split_nonconstant_init_1 (sub, value)) ++ complete_p = false; ++ num_split_elts++; + } + else if (!initializer_constant_valid_p (value, inner_type)) + { + tree code; + tree sub; +- HOST_WIDE_INT inner_elements; + + /* FIXME: Ordered removal is O(1) so the whole function is + worst-case quadratic. This could be fixed using an aside +@@ -549,21 +551,9 @@ + code = build_stmt (input_location, EXPR_STMT, code); + add_stmt (code); + +- inner_elements = count_type_elements (inner_type, true); +- if (inner_elements < 0) +- num_initialized_elements = -1; +- else if (num_initialized_elements >= 0) +- num_initialized_elements += inner_elements; +- continue; ++ num_split_elts++; + } + } +- +- num_type_elements = count_type_elements (type, true); +- /* If all elements of the initializer are non-constant and +- have been split out, we don't need the empty CONSTRUCTOR. */ +- if (num_type_elements > 0 +- && num_type_elements == num_initialized_elements) +- *initp = NULL; + break; + + case VECTOR_TYPE: +@@ -575,6 +565,7 @@ + code = build2 (MODIFY_EXPR, type, dest, cons); + code = build_stmt (input_location, EXPR_STMT, code); + add_stmt (code); ++ num_split_elts += CONSTRUCTOR_NELTS (init); + } + break; + +@@ -584,6 +575,8 @@ + + /* The rest of the initializer is now a constant. */ + TREE_CONSTANT (init) = 1; ++ return complete_p && complete_ctor_at_level_p (TREE_TYPE (init), ++ num_split_elts, inner_type); + } + + /* A subroutine of store_init_value. Splits non-constant static +@@ -599,7 +592,8 @@ + if (TREE_CODE (init) == CONSTRUCTOR) + { + code = push_stmt_list (); +- split_nonconstant_init_1 (dest, &init); ++ if (split_nonconstant_init_1 (dest, init)) ++ init = NULL_TREE; + code = pop_stmt_list (code); + DECL_INITIAL (dest) = init; + TREE_READONLY (dest) = 0; + +=== modified file 'gcc/expr.c' +--- old/gcc/expr.c 2011-06-02 12:12:00 +0000 ++++ new/gcc/expr.c 2011-07-14 11:52:32 +0000 +@@ -4866,16 +4866,136 @@ + return NULL_RTX; + } + ++/* Return true if field F of structure TYPE is a flexible array. */ ++ ++static bool ++flexible_array_member_p (const_tree f, const_tree type) ++{ ++ const_tree tf; ++ ++ tf = TREE_TYPE (f); ++ return (DECL_CHAIN (f) == NULL ++ && TREE_CODE (tf) == ARRAY_TYPE ++ && TYPE_DOMAIN (tf) ++ && TYPE_MIN_VALUE (TYPE_DOMAIN (tf)) ++ && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf))) ++ && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf)) ++ && int_size_in_bytes (type) >= 0); ++} ++ ++/* If FOR_CTOR_P, return the number of top-level elements that a constructor ++ must have in order for it to completely initialize a value of type TYPE. ++ Return -1 if the number isn't known. ++ ++ If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE. */ ++ ++static HOST_WIDE_INT ++count_type_elements (const_tree type, bool for_ctor_p) ++{ ++ switch (TREE_CODE (type)) ++ { ++ case ARRAY_TYPE: ++ { ++ tree nelts; ++ ++ nelts = array_type_nelts (type); ++ if (nelts && host_integerp (nelts, 1)) ++ { ++ unsigned HOST_WIDE_INT n; ++ ++ n = tree_low_cst (nelts, 1) + 1; ++ if (n == 0 || for_ctor_p) ++ return n; ++ else ++ return n * count_type_elements (TREE_TYPE (type), false); ++ } ++ return for_ctor_p ? -1 : 1; ++ } ++ ++ case RECORD_TYPE: ++ { ++ unsigned HOST_WIDE_INT n; ++ tree f; ++ ++ n = 0; ++ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) ++ if (TREE_CODE (f) == FIELD_DECL) ++ { ++ if (!for_ctor_p) ++ n += count_type_elements (TREE_TYPE (f), false); ++ else if (!flexible_array_member_p (f, type)) ++ /* Don't count flexible arrays, which are not supposed ++ to be initialized. */ ++ n += 1; ++ } ++ ++ return n; ++ } ++ ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ { ++ tree f; ++ HOST_WIDE_INT n, m; ++ ++ gcc_assert (!for_ctor_p); ++ /* Estimate the number of scalars in each field and pick the ++ maximum. Other estimates would do instead; the idea is simply ++ to make sure that the estimate is not sensitive to the ordering ++ of the fields. */ ++ n = 1; ++ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) ++ if (TREE_CODE (f) == FIELD_DECL) ++ { ++ m = count_type_elements (TREE_TYPE (f), false); ++ /* If the field doesn't span the whole union, add an extra ++ scalar for the rest. */ ++ if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)), ++ TYPE_SIZE (type)) != 1) ++ m++; ++ if (n < m) ++ n = m; ++ } ++ return n; ++ } ++ ++ case COMPLEX_TYPE: ++ return 2; ++ ++ case VECTOR_TYPE: ++ return TYPE_VECTOR_SUBPARTS (type); ++ ++ case INTEGER_TYPE: ++ case REAL_TYPE: ++ case FIXED_POINT_TYPE: ++ case ENUMERAL_TYPE: ++ case BOOLEAN_TYPE: ++ case POINTER_TYPE: ++ case OFFSET_TYPE: ++ case REFERENCE_TYPE: ++ return 1; ++ ++ case ERROR_MARK: ++ return 0; ++ ++ case VOID_TYPE: ++ case METHOD_TYPE: ++ case FUNCTION_TYPE: ++ case LANG_TYPE: ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* Helper for categorize_ctor_elements. Identical interface. */ + + static bool + categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, +- HOST_WIDE_INT *p_elt_count, +- bool *p_must_clear) ++ HOST_WIDE_INT *p_init_elts, bool *p_complete) + { + unsigned HOST_WIDE_INT idx; +- HOST_WIDE_INT nz_elts, elt_count; +- tree value, purpose; ++ HOST_WIDE_INT nz_elts, init_elts, num_fields; ++ tree value, purpose, elt_type; + + /* Whether CTOR is a valid constant initializer, in accordance with what + initializer_constant_valid_p does. If inferred from the constructor +@@ -4884,7 +5004,9 @@ + bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor); + + nz_elts = 0; +- elt_count = 0; ++ init_elts = 0; ++ num_fields = 0; ++ elt_type = NULL_TREE; + + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value) + { +@@ -4899,6 +5021,8 @@ + mult = (tree_low_cst (hi_index, 1) + - tree_low_cst (lo_index, 1) + 1); + } ++ num_fields += mult; ++ elt_type = TREE_TYPE (value); + + switch (TREE_CODE (value)) + { +@@ -4906,11 +5030,11 @@ + { + HOST_WIDE_INT nz = 0, ic = 0; + +- bool const_elt_p +- = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear); ++ bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic, ++ p_complete); + + nz_elts += mult * nz; +- elt_count += mult * ic; ++ init_elts += mult * ic; + + if (const_from_elts_p && const_p) + const_p = const_elt_p; +@@ -4922,12 +5046,12 @@ + case FIXED_CST: + if (!initializer_zerop (value)) + nz_elts += mult; +- elt_count += mult; ++ init_elts += mult; + break; + + case STRING_CST: + nz_elts += mult * TREE_STRING_LENGTH (value); +- elt_count += mult * TREE_STRING_LENGTH (value); ++ init_elts += mult * TREE_STRING_LENGTH (value); + break; + + case COMPLEX_CST: +@@ -4935,7 +5059,7 @@ + nz_elts += mult; + if (!initializer_zerop (TREE_IMAGPART (value))) + nz_elts += mult; +- elt_count += mult; ++ init_elts += mult; + break; + + case VECTOR_CST: +@@ -4945,65 +5069,31 @@ + { + if (!initializer_zerop (TREE_VALUE (v))) + nz_elts += mult; +- elt_count += mult; ++ init_elts += mult; + } + } + break; + + default: + { +- HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true); +- if (tc < 1) +- tc = 1; ++ HOST_WIDE_INT tc = count_type_elements (elt_type, false); + nz_elts += mult * tc; +- elt_count += mult * tc; ++ init_elts += mult * tc; + + if (const_from_elts_p && const_p) +- const_p = initializer_constant_valid_p (value, TREE_TYPE (value)) ++ const_p = initializer_constant_valid_p (value, elt_type) + != NULL_TREE; + } + break; + } + } + +- if (!*p_must_clear +- && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE +- || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE)) +- { +- tree init_sub_type; +- bool clear_this = true; +- +- if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor))) +- { +- /* We don't expect more than one element of the union to be +- initialized. Not sure what we should do otherwise... */ +- gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor)) +- == 1); +- +- init_sub_type = TREE_TYPE (VEC_index (constructor_elt, +- CONSTRUCTOR_ELTS (ctor), +- 0)->value); +- +- /* ??? We could look at each element of the union, and find the +- largest element. Which would avoid comparing the size of the +- initialized element against any tail padding in the union. +- Doesn't seem worth the effort... */ +- if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)), +- TYPE_SIZE (init_sub_type)) == 1) +- { +- /* And now we have to find out if the element itself is fully +- constructed. E.g. for union { struct { int a, b; } s; } u +- = { .s = { .a = 1 } }. */ +- if (elt_count == count_type_elements (init_sub_type, false)) +- clear_this = false; +- } +- } +- +- *p_must_clear = clear_this; +- } ++ if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor), ++ num_fields, elt_type)) ++ *p_complete = false; + + *p_nz_elts += nz_elts; +- *p_elt_count += elt_count; ++ *p_init_elts += init_elts; + + return const_p; + } +@@ -5013,111 +5103,50 @@ + and place it in *P_NZ_ELTS; + * how many scalar fields in total are in CTOR, + and place it in *P_ELT_COUNT. +- * if a type is a union, and the initializer from the constructor +- is not the largest element in the union, then set *p_must_clear. ++ * whether the constructor is complete -- in the sense that every ++ meaningful byte is explicitly given a value -- ++ and place it in *P_COMPLETE. + + Return whether or not CTOR is a valid static constant initializer, the same + as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ + + bool + categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, +- HOST_WIDE_INT *p_elt_count, +- bool *p_must_clear) ++ HOST_WIDE_INT *p_init_elts, bool *p_complete) + { + *p_nz_elts = 0; +- *p_elt_count = 0; +- *p_must_clear = false; ++ *p_init_elts = 0; ++ *p_complete = true; + +- return +- categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear); ++ return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete); + } + +-/* Count the number of scalars in TYPE. Return -1 on overflow or +- variable-sized. If ALLOW_FLEXARR is true, don't count flexible +- array member at the end of the structure. */ ++/* TYPE is initialized by a constructor with NUM_ELTS elements, the last ++ of which had type LAST_TYPE. Each element was itself a complete ++ initializer, in the sense that every meaningful byte was explicitly ++ given a value. Return true if the same is true for the constructor ++ as a whole. */ + +-HOST_WIDE_INT +-count_type_elements (const_tree type, bool allow_flexarr) ++bool ++complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts, ++ const_tree last_type) + { +- const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1)); +- switch (TREE_CODE (type)) ++ if (TREE_CODE (type) == UNION_TYPE ++ || TREE_CODE (type) == QUAL_UNION_TYPE) + { +- case ARRAY_TYPE: +- { +- tree telts = array_type_nelts (type); +- if (telts && host_integerp (telts, 1)) +- { +- HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1; +- HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false); +- if (n == 0) +- return 0; +- else if (max / n > m) +- return n * m; +- } +- return -1; +- } +- +- case RECORD_TYPE: +- { +- HOST_WIDE_INT n = 0, t; +- tree f; +- +- for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) +- if (TREE_CODE (f) == FIELD_DECL) +- { +- t = count_type_elements (TREE_TYPE (f), false); +- if (t < 0) +- { +- /* Check for structures with flexible array member. */ +- tree tf = TREE_TYPE (f); +- if (allow_flexarr +- && DECL_CHAIN (f) == NULL +- && TREE_CODE (tf) == ARRAY_TYPE +- && TYPE_DOMAIN (tf) +- && TYPE_MIN_VALUE (TYPE_DOMAIN (tf)) +- && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf))) +- && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf)) +- && int_size_in_bytes (type) >= 0) +- break; +- +- return -1; +- } +- n += t; +- } +- +- return n; +- } +- +- case UNION_TYPE: +- case QUAL_UNION_TYPE: +- return -1; +- +- case COMPLEX_TYPE: +- return 2; +- +- case VECTOR_TYPE: +- return TYPE_VECTOR_SUBPARTS (type); +- +- case INTEGER_TYPE: +- case REAL_TYPE: +- case FIXED_POINT_TYPE: +- case ENUMERAL_TYPE: +- case BOOLEAN_TYPE: +- case POINTER_TYPE: +- case OFFSET_TYPE: +- case REFERENCE_TYPE: +- return 1; +- +- case ERROR_MARK: +- return 0; +- +- case VOID_TYPE: +- case METHOD_TYPE: +- case FUNCTION_TYPE: +- case LANG_TYPE: +- default: +- gcc_unreachable (); ++ if (num_elts == 0) ++ return false; ++ ++ gcc_assert (num_elts == 1 && last_type); ++ ++ /* ??? We could look at each element of the union, and find the ++ largest element. Which would avoid comparing the size of the ++ initialized element against any tail padding in the union. ++ Doesn't seem worth the effort... */ ++ return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1; + } ++ ++ return count_type_elements (type, true) == num_elts; + } + + /* Return 1 if EXP contains mostly (3/4) zeros. */ +@@ -5126,18 +5155,12 @@ + mostly_zeros_p (const_tree exp) + { + if (TREE_CODE (exp) == CONSTRUCTOR) +- + { +- HOST_WIDE_INT nz_elts, count, elts; +- bool must_clear; +- +- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear); +- if (must_clear) +- return 1; +- +- elts = count_type_elements (TREE_TYPE (exp), false); +- +- return nz_elts < elts / 4; ++ HOST_WIDE_INT nz_elts, init_elts; ++ bool complete_p; ++ ++ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); ++ return !complete_p || nz_elts < init_elts / 4; + } + + return initializer_zerop (exp); +@@ -5149,12 +5172,11 @@ + all_zeros_p (const_tree exp) + { + if (TREE_CODE (exp) == CONSTRUCTOR) +- + { +- HOST_WIDE_INT nz_elts, count; +- bool must_clear; ++ HOST_WIDE_INT nz_elts, init_elts; ++ bool complete_p; + +- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear); ++ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); + return nz_elts == 0; + } + + +=== modified file 'gcc/gimplify.c' +--- old/gcc/gimplify.c 2011-05-26 10:27:57 +0000 ++++ new/gcc/gimplify.c 2011-07-13 13:17:31 +0000 +@@ -3693,9 +3693,8 @@ + case ARRAY_TYPE: + { + struct gimplify_init_ctor_preeval_data preeval_data; +- HOST_WIDE_INT num_type_elements, num_ctor_elements; +- HOST_WIDE_INT num_nonzero_elements; +- bool cleared, valid_const_initializer; ++ HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; ++ bool cleared, complete_p, valid_const_initializer; + + /* Aggregate types must lower constructors to initialization of + individual elements. The exception is that a CONSTRUCTOR node +@@ -3712,7 +3711,7 @@ + can only do so if it known to be a valid constant initializer. */ + valid_const_initializer + = categorize_ctor_elements (ctor, &num_nonzero_elements, +- &num_ctor_elements, &cleared); ++ &num_ctor_elements, &complete_p); + + /* If a const aggregate variable is being initialized, then it + should never be a lose to promote the variable to be static. */ +@@ -3750,26 +3749,29 @@ + parts in, then generate code for the non-constant parts. */ + /* TODO. There's code in cp/typeck.c to do this. */ + +- num_type_elements = count_type_elements (type, true); ++ if (int_size_in_bytes (TREE_TYPE (ctor)) < 0) ++ /* store_constructor will ignore the clearing of variable-sized ++ objects. Initializers for such objects must explicitly set ++ every field that needs to be set. */ ++ cleared = false; ++ else if (!complete_p) ++ /* If the constructor isn't complete, clear the whole object ++ beforehand. + +- /* If count_type_elements could not determine number of type elements +- for a constant-sized object, assume clearing is needed. +- Don't do this for variable-sized objects, as store_constructor +- will ignore the clearing of variable-sized objects. */ +- if (num_type_elements < 0 && int_size_in_bytes (type) >= 0) ++ ??? This ought not to be needed. For any element not present ++ in the initializer, we should simply set them to zero. Except ++ we'd need to *find* the elements that are not present, and that ++ requires trickery to avoid quadratic compile-time behavior in ++ large cases or excessive memory use in small cases. */ + cleared = true; +- /* If there are "lots" of zeros, then block clear the object first. */ +- else if (num_type_elements - num_nonzero_elements ++ else if (num_ctor_elements - num_nonzero_elements + > CLEAR_RATIO (optimize_function_for_speed_p (cfun)) +- && num_nonzero_elements < num_type_elements/4) +- cleared = true; +- /* ??? This bit ought not be needed. For any element not present +- in the initializer, we should simply set them to zero. Except +- we'd need to *find* the elements that are not present, and that +- requires trickery to avoid quadratic compile-time behavior in +- large cases or excessive memory use in small cases. */ +- else if (num_ctor_elements < num_type_elements) +- cleared = true; ++ && num_nonzero_elements < num_ctor_elements / 4) ++ /* If there are "lots" of zeros, it's more efficient to clear ++ the memory and then set the nonzero elements. */ ++ cleared = true; ++ else ++ cleared = false; + + /* If there are "lots" of initialized elements, and all of them + are valid address constants, then the entire initializer can + +=== added file 'gcc/testsuite/gcc.target/arm/pr48183.c' +--- old/gcc/testsuite/gcc.target/arm/pr48183.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr48183.c 2011-07-13 13:17:31 +0000 +@@ -0,0 +1,25 @@ ++/* testsuite/gcc.target/arm/pr48183.c */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O -g" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include <arm_neon.h> ++ ++void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n) ++{ ++ unsigned i; ++ int16x4x2_t input; ++ int32x4x2_t mid; ++ int32x4x2_t output; ++ ++ for (i = 0; i < n/2; i += 8) { ++ input = vld2_s16(src + i); ++ mid.val[0] = vmovl_s16(input.val[0]); ++ mid.val[1] = vmovl_s16(input.val[1]); ++ output.val[0] = vshlq_n_s32(mid.val[0], 8); ++ output.val[1] = vshlq_n_s32(mid.val[1], 8); ++ vst2q_s32((int32_t *)dst + i, output); ++ } ++} + +=== modified file 'gcc/tree.h' +--- old/gcc/tree.h 2011-07-01 09:19:21 +0000 ++++ new/gcc/tree.h 2011-07-13 13:17:31 +0000 +@@ -4627,21 +4627,10 @@ + + extern VEC(tree,gc) *ctor_to_vec (tree); + +-/* Examine CTOR to discover: +- * how many scalar fields are set to nonzero values, +- and place it in *P_NZ_ELTS; +- * how many scalar fields in total are in CTOR, +- and place it in *P_ELT_COUNT. +- * if a type is a union, and the initializer from the constructor +- is not the largest element in the union, then set *p_must_clear. +- +- Return whether or not CTOR is a valid static constant initializer, the same +- as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ +- +-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, +- bool *); +- +-extern HOST_WIDE_INT count_type_elements (const_tree, bool); ++extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, ++ HOST_WIDE_INT *, bool *); ++ ++extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree); + + /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */ + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch new file mode 100644 index 0000000000..5335a9e375 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch @@ -0,0 +1,27 @@ +2011-07-21 Richard Sandiford <rdsandiford@googlemail.com> + + gcc/ + Backport from mainline: + + 2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> + + * regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK. + +=== modified file 'gcc/regcprop.c' +--- old/gcc/regcprop.c 2010-12-17 22:51:25 +0000 ++++ new/gcc/regcprop.c 2011-07-21 11:30:53 +0000 +@@ -418,10 +418,9 @@ + + offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0) + + (BYTES_BIG_ENDIAN ? byteoffset : 0)); +- return gen_rtx_raw_REG (new_mode, +- regno + subreg_regno_offset (regno, orig_mode, +- offset, +- new_mode)); ++ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); ++ if (HARD_REGNO_MODE_OK (regno, new_mode)) ++ return gen_rtx_raw_REG (new_mode, regno); + } + return NULL_RTX; + } + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch new file mode 100644 index 0000000000..61e3916375 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch @@ -0,0 +1,62 @@ +2011-07-31 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from trunk -r176970: + + * modulo-sched.c: Change comment. + (reset_sched_times): Fix print message. + (print_partial_schedule): Add print info. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000 ++++ new/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000 +@@ -84,13 +84,14 @@ + II cycles (i.e. use register copies to prevent a def from overwriting + itself before reaching the use). + +- SMS works with countable loops whose loop count can be easily +- adjusted. This is because we peel a constant number of iterations +- into a prologue and epilogue for which we want to avoid emitting +- the control part, and a kernel which is to iterate that constant +- number of iterations less than the original loop. So the control +- part should be a set of insns clearly identified and having its +- own iv, not otherwise used in the loop (at-least for now), which ++ SMS works with countable loops (1) whose control part can be easily ++ decoupled from the rest of the loop and (2) whose loop count can ++ be easily adjusted. This is because we peel a constant number of ++ iterations into a prologue and epilogue for which we want to avoid ++ emitting the control part, and a kernel which is to iterate that ++ constant number of iterations less than the original loop. So the ++ control part should be a set of insns clearly identified and having ++ its own iv, not otherwise used in the loop (at-least for now), which + initializes a register before the loop to the number of iterations. + Currently SMS relies on the do-loop pattern to recognize such loops, + where (1) the control part comprises of all insns defining and/or +@@ -598,8 +599,8 @@ + /* Print the scheduling times after the rotation. */ + fprintf (dump_file, "crr_insn->node=%d (insn id %d), " + "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, +- INSN_UID (crr_insn->node->insn), SCHED_TIME (u), +- normalized_time); ++ INSN_UID (crr_insn->node->insn), normalized_time, ++ new_min_cycle); + if (JUMP_P (crr_insn->node->insn)) + fprintf (dump_file, " (branch)"); + fprintf (dump_file, "\n"); +@@ -2550,8 +2551,13 @@ + fprintf (dump, "\n[ROW %d ]: ", i); + while (ps_i) + { +- fprintf (dump, "%d, ", +- INSN_UID (ps_i->node->insn)); ++ if (JUMP_P (ps_i->node->insn)) ++ fprintf (dump, "%d (branch), ", ++ INSN_UID (ps_i->node->insn)); ++ else ++ fprintf (dump, "%d, ", ++ INSN_UID (ps_i->node->insn)); ++ + ps_i = ps_i->next_in_row; + } + } + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch new file mode 100644 index 0000000000..b82fe76d41 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch @@ -0,0 +1,458 @@ +2011-08-09 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from trunk -r177235. + * modulo-sched.c (calculate_stage_count, + calculate_must_precede_follow, get_sched_window, + try_scheduling_node_in_cycle, remove_node_from_ps): + Add declaration. + (update_node_sched_params, set_must_precede_follow, optimize_sc): + New functions. + (reset_sched_times): Call update_node_sched_params. + (sms_schedule): Call optimize_sc. + (get_sched_window): Change function arguments. + (sms_schedule_by_order): Update call to get_sched_window. + Call set_must_precede_follow. + (calculate_stage_count): Add function argument. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000 ++++ new/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000 +@@ -203,7 +203,16 @@ + rtx, rtx); + static void duplicate_insns_of_cycles (partial_schedule_ptr, + int, int, int, rtx); +-static int calculate_stage_count (partial_schedule_ptr ps); ++static int calculate_stage_count (partial_schedule_ptr, int); ++static void calculate_must_precede_follow (ddg_node_ptr, int, int, ++ int, int, sbitmap, sbitmap, sbitmap); ++static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, ++ sbitmap, int, int *, int *, int *); ++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, ++ int, int, sbitmap, int *, sbitmap, ++ sbitmap); ++static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); ++ + #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) + #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) + #define SCHED_FIRST_REG_MOVE(x) \ +@@ -577,6 +586,36 @@ + } + } + ++/* Update the sched_params (time, row and stage) for node U using the II, ++ the CYCLE of U and MIN_CYCLE. ++ We're not simply taking the following ++ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); ++ because the stages may not be aligned on cycle 0. */ ++static void ++update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle) ++{ ++ int sc_until_cycle_zero; ++ int stage; ++ ++ SCHED_TIME (u) = cycle; ++ SCHED_ROW (u) = SMODULO (cycle, ii); ++ ++ /* The calculation of stage count is done adding the number ++ of stages before cycle zero and after cycle zero. */ ++ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); ++ ++ if (SCHED_TIME (u) < 0) ++ { ++ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ } ++ else ++ { ++ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; ++ } ++} ++ + /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of + SCHED_ROW and SCHED_STAGE. */ + static void +@@ -592,7 +631,6 @@ + ddg_node_ptr u = crr_insn->node; + int normalized_time = SCHED_TIME (u) - amount; + int new_min_cycle = PS_MIN_CYCLE (ps) - amount; +- int sc_until_cycle_zero, stage; + + if (dump_file) + { +@@ -608,23 +646,9 @@ + + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); + gcc_assert (SCHED_TIME (u) <= ps->max_cycle); +- SCHED_TIME (u) = normalized_time; +- SCHED_ROW (u) = SMODULO (normalized_time, ii); +- +- /* The calculation of stage count is done adding the number +- of stages before cycle zero and after cycle zero. */ +- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii); +- +- if (SCHED_TIME (u) < 0) +- { +- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); +- SCHED_STAGE (u) = sc_until_cycle_zero - stage; +- } +- else +- { +- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); +- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; +- } ++ ++ crr_insn->cycle = normalized_time; ++ update_node_sched_params (u, ii, normalized_time, new_min_cycle); + } + } + +@@ -661,6 +685,206 @@ + PREV_INSN (last)); + } + ++/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE ++ respectively only if cycle C falls on the border of the scheduling ++ window boundaries marked by START and END cycles. STEP is the ++ direction of the window. */ ++static inline void ++set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, ++ sbitmap *tmp_precede, sbitmap must_precede, int c, ++ int start, int end, int step) ++{ ++ *tmp_precede = NULL; ++ *tmp_follow = NULL; ++ ++ if (c == start) ++ { ++ if (step == 1) ++ *tmp_precede = must_precede; ++ else /* step == -1. */ ++ *tmp_follow = must_follow; ++ } ++ if (c == end - step) ++ { ++ if (step == 1) ++ *tmp_follow = must_follow; ++ else /* step == -1. */ ++ *tmp_precede = must_precede; ++ } ++ ++} ++ ++/* Return True if the branch can be moved to row ii-1 while ++ normalizing the partial schedule PS to start from cycle zero and thus ++ optimize the SC. Otherwise return False. */ ++static bool ++optimize_sc (partial_schedule_ptr ps, ddg_ptr g) ++{ ++ int amount = PS_MIN_CYCLE (ps); ++ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes); ++ int start, end, step; ++ int ii = ps->ii; ++ bool ok = false; ++ int stage_count, stage_count_curr; ++ ++ /* Compare the SC after normalization and SC after bringing the branch ++ to row ii-1. If they are equal just bail out. */ ++ stage_count = calculate_stage_count (ps, amount); ++ stage_count_curr = ++ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1)); ++ ++ if (stage_count == stage_count_curr) ++ { ++ if (dump_file) ++ fprintf (dump_file, "SMS SC already optimized.\n"); ++ ++ ok = false; ++ goto clear; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "SMS Trying to optimize branch location\n"); ++ fprintf (dump_file, "SMS partial schedule before trial:\n"); ++ print_partial_schedule (ps, dump_file); ++ } ++ ++ /* First, normalize the partial scheduling. */ ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "SMS partial schedule after normalization (ii, %d, SC %d):\n", ++ ii, stage_count); ++ print_partial_schedule (ps, dump_file); ++ } ++ ++ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1) ++ { ++ ok = true; ++ goto clear; ++ } ++ ++ sbitmap_ones (sched_nodes); ++ ++ /* Calculate the new placement of the branch. It should be in row ++ ii-1 and fall into it's scheduling window. */ ++ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, ++ &step, &end) == 0) ++ { ++ bool success; ++ ps_insn_ptr next_ps_i; ++ int branch_cycle = SCHED_TIME (g->closing_branch); ++ int row = SMODULO (branch_cycle, ps->ii); ++ int num_splits = 0; ++ sbitmap must_precede, must_follow, tmp_precede, tmp_follow; ++ int c; ++ ++ if (dump_file) ++ fprintf (dump_file, "\nTrying to schedule node %d " ++ "INSN = %d in (%d .. %d) step %d\n", ++ g->closing_branch->cuid, ++ (INSN_UID (g->closing_branch->insn)), start, end, step); ++ ++ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end)); ++ if (step == 1) ++ { ++ c = start + ii - SMODULO (start, ii) - 1; ++ gcc_assert (c >= start); ++ if (c >= end) ++ { ++ ok = false; ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d\n", c); ++ goto clear; ++ } ++ } ++ else ++ { ++ c = start - SMODULO (start, ii) - 1; ++ gcc_assert (c <= start); ++ ++ if (c <= end) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d\n", c); ++ ok = false; ++ goto clear; ++ } ++ } ++ ++ must_precede = sbitmap_alloc (g->num_nodes); ++ must_follow = sbitmap_alloc (g->num_nodes); ++ ++ /* Try to schedule the branch is it's new cycle. */ ++ calculate_must_precede_follow (g->closing_branch, start, end, ++ step, ii, sched_nodes, ++ must_precede, must_follow); ++ ++ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, ++ must_precede, c, start, end, step); ++ ++ /* Find the element in the partial schedule related to the closing ++ branch so we can remove it from it's current cycle. */ ++ for (next_ps_i = ps->rows[row]; ++ next_ps_i; next_ps_i = next_ps_i->next_in_row) ++ if (next_ps_i->node->cuid == g->closing_branch->cuid) ++ break; ++ ++ gcc_assert (next_ps_i); ++ gcc_assert (remove_node_from_ps (ps, next_ps_i)); ++ success = ++ try_scheduling_node_in_cycle (ps, g->closing_branch, ++ g->closing_branch->cuid, c, ++ sched_nodes, &num_splits, ++ tmp_precede, tmp_follow); ++ gcc_assert (num_splits == 0); ++ if (!success) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d, " ++ "bringing it back to cycle %d\n", c, branch_cycle); ++ ++ /* The branch was failed to be placed in row ii - 1. ++ Put it back in it's original place in the partial ++ schedualing. */ ++ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, ++ must_precede, branch_cycle, start, end, ++ step); ++ success = ++ try_scheduling_node_in_cycle (ps, g->closing_branch, ++ g->closing_branch->cuid, ++ branch_cycle, sched_nodes, ++ &num_splits, tmp_precede, ++ tmp_follow); ++ gcc_assert (success && (num_splits == 0)); ++ ok = false; ++ } ++ else ++ { ++ /* The branch is placed in row ii - 1. */ ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS success in moving branch to cycle %d\n", c); ++ ++ update_node_sched_params (g->closing_branch, ii, c, ++ PS_MIN_CYCLE (ps)); ++ ok = true; ++ } ++ ++ free (must_precede); ++ free (must_follow); ++ } ++ ++clear: ++ free (sched_nodes); ++ return ok; ++} ++ + static void + duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, + int to_stage, int for_prolog, rtx count_reg) +@@ -1116,6 +1340,7 @@ + int mii, rec_mii; + unsigned stage_count = 0; + HOST_WIDEST_INT loop_count = 0; ++ bool opt_sc_p = false; + + if (! (g = g_arr[loop->num])) + continue; +@@ -1197,14 +1422,32 @@ + set_node_sched_params (g); + + ps = sms_schedule_by_order (g, mii, maxii, node_order); +- +- if (ps) +- { +- stage_count = calculate_stage_count (ps); +- gcc_assert(stage_count >= 1); +- PS_STAGE_COUNT(ps) = stage_count; +- } +- ++ ++ if (ps) ++ { ++ /* Try to achieve optimized SC by normalizing the partial ++ schedule (having the cycles start from cycle zero). ++ The branch location must be placed in row ii-1 in the ++ final scheduling. If failed, shift all instructions to ++ position the branch in row ii-1. */ ++ opt_sc_p = optimize_sc (ps, g); ++ if (opt_sc_p) ++ stage_count = calculate_stage_count (ps, 0); ++ else ++ { ++ /* Bring the branch to cycle ii-1. */ ++ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ ++ if (dump_file) ++ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); ++ ++ stage_count = calculate_stage_count (ps, amount); ++ } ++ ++ gcc_assert (stage_count >= 1); ++ PS_STAGE_COUNT (ps) = stage_count; ++ } ++ + /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of + 1 means that there is no interleaving between iterations thus + we let the scheduling passes do the job in this case. */ +@@ -1225,12 +1468,16 @@ + else + { + struct undo_replace_buff_elem *reg_move_replaces; +- int amount = SCHED_TIME (g->closing_branch) + 1; ++ ++ if (!opt_sc_p) ++ { ++ /* Rotate the partial schedule to have the branch in row ii-1. */ ++ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ } + +- /* Set the stage boundaries. The closing_branch was scheduled +- and should appear in the last (ii-1) row. */ +- reset_sched_times (ps, amount); +- rotate_partial_schedule (ps, amount); + set_columns_for_ps (ps); + + canon_loop (loop); +@@ -1382,13 +1629,11 @@ + scheduling window is empty and zero otherwise. */ + + static int +-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, ++get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, + sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) + { + int start, step, end; + ddg_edge_ptr e; +- int u = nodes_order [i]; +- ddg_node_ptr u_node = &ps->g->nodes[u]; + sbitmap psp = sbitmap_alloc (ps->g->num_nodes); + sbitmap pss = sbitmap_alloc (ps->g->num_nodes); + sbitmap u_node_preds = NODE_PREDECESSORS (u_node); +@@ -1800,7 +2045,7 @@ + + /* Try to get non-empty scheduling window. */ + success = 0; +- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, ++ if (get_sched_window (ps, u_node, sched_nodes, ii, &start, + &step, &end) == 0) + { + if (dump_file) +@@ -1817,24 +2062,11 @@ + + for (c = start; c != end; c += step) + { +- sbitmap tmp_precede = NULL; +- sbitmap tmp_follow = NULL; +- +- if (c == start) +- { +- if (step == 1) +- tmp_precede = must_precede; +- else /* step == -1. */ +- tmp_follow = must_follow; +- } +- if (c == end - step) +- { +- if (step == 1) +- tmp_follow = must_follow; +- else /* step == -1. */ +- tmp_precede = must_precede; +- } +- ++ sbitmap tmp_precede, tmp_follow; ++ ++ set_must_precede_follow (&tmp_follow, must_follow, ++ &tmp_precede, must_precede, ++ c, start, end, step); + success = + try_scheduling_node_in_cycle (ps, u_node, u, c, + sched_nodes, +@@ -2899,12 +3131,10 @@ + } + + /* Calculate the stage count of the partial schedule PS. The calculation +- takes into account the rotation to bring the closing branch to row +- ii-1. */ ++ takes into account the rotation amount passed in ROTATION_AMOUNT. */ + int +-calculate_stage_count (partial_schedule_ptr ps) ++calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) + { +- int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1; + int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; + int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; + int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch new file mode 100644 index 0000000000..608dd18336 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch @@ -0,0 +1,39 @@ + 2011-08-09 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from trunk -r176972: + + * ddg.c (create_ddg_dep_from_intra_loop_link): Remove + the creation of anti-dep edge from a branch. + (add_cross_iteration_register_deps): + Create anti-dep edge from a branch. + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-07-04 11:00:06 +0000 ++++ new/gcc/ddg.c 2011-07-31 11:29:10 +0000 +@@ -197,11 +197,6 @@ + } + } + +- /* If a true dep edge enters the branch create an anti edge in the +- opposite direction to prevent the creation of reg-moves. */ +- if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn)) +- create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1); +- + latency = dep_cost (link); + e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance); + add_edge_to_ddg (g, e); +@@ -306,8 +301,11 @@ + + gcc_assert (first_def_node); + ++ /* Always create the edge if the use node is a branch in ++ order to prevent the creation of reg-moves. */ + if (DF_REF_ID (last_def) != DF_REF_ID (first_def) +- || !flag_modulo_sched_allow_regmoves) ++ || !flag_modulo_sched_allow_regmoves ++ || JUMP_P (use_node->insn)) + create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, + REG_DEP, 1); + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch new file mode 100644 index 0000000000..55b7cc1078 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch @@ -0,0 +1,94 @@ +2011-08-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + Backport from mainline: + + 2011-07-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + * config/arm/vfp.md ("*movdf_vfp"): Handle the VFP constraints + before the core constraints. Adjust attributes. + (*thumb2_movdf_vfp"): Likewise. + +=== modified file 'gcc/config/arm/vfp.md' +--- old/gcc/config/arm/vfp.md 2011-01-20 22:03:29 +0000 ++++ new/gcc/config/arm/vfp.md 2011-07-27 12:59:19 +0000 +@@ -401,8 +401,8 @@ + ;; DFmode moves + + (define_insn "*movdf_vfp" +- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") +- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] ++ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r") ++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" +@@ -418,9 +418,9 @@ + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: ++ return output_move_vfp (operands); ++ case 5: case 6: + return output_move_double (operands); +- case 5: case 6: +- return output_move_vfp (operands); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; +@@ -435,7 +435,7 @@ + " + [(set_attr "type" + "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") +- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) ++ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") +@@ -449,8 +449,8 @@ + ) + + (define_insn "*thumb2_movdf_vfp" +- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") +- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] ++ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r") ++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "* + { +@@ -463,10 +463,10 @@ + case 2: + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; +- case 3: case 4: case 8: ++ case 3: case 4: ++ return output_move_vfp (operands); ++ case 5: case 6: case 8: + return output_move_double (operands); +- case 5: case 6: +- return output_move_vfp (operands); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; +@@ -478,8 +478,8 @@ + } + " + [(set_attr "type" +- "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*") +- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) ++ "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") ++ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") +@@ -487,8 +487,8 @@ + (const_int 8) + (const_int 4))] + (const_int 4))) +- (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*") +- (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")] ++ (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") ++ (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] + ) + + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch new file mode 100644 index 0000000000..bdb48ad1e6 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch @@ -0,0 +1,30 @@ +2011-08-15 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r177357 + + gcc/testsuite/ + 2011-08-04 Ian Bolton <ian.bolton@arm.com> + + * gcc.target/arm/vfp-1.c: no large negative offsets on Thumb2. + +=== modified file 'gcc/testsuite/gcc.target/arm/vfp-1.c' +--- old/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-01-01 08:52:03 +0000 ++++ new/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-08-09 23:22:51 +0000 +@@ -127,13 +127,13 @@ + + void test_ldst (float f[], double d[]) { + /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */ +- /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */ ++ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ + /* { dg-final { scan-assembler "add.+ r0, #1024" } } */ +- /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */ ++ /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */ + f[256] = f[255] + f[-255]; + + /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */ +- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */ ++ /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ + /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */ + d[32] = d[127] + d[-127]; + } + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch new file mode 100644 index 0000000000..a384ce7a18 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch @@ -0,0 +1,33 @@ +2011-08-15 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * config/rs6000/rs6000.c (paired_expand_vector_init): Don't create + CONST_VECTORs with symbolic elements. + (rs6000_expand_vector_init): Likewise. + +=== modified file 'gcc/config/rs6000/rs6000.c' +--- old/gcc/config/rs6000/rs6000.c 2011-07-27 18:17:15 +0000 ++++ new/gcc/config/rs6000/rs6000.c 2011-08-16 08:59:36 +0000 +@@ -5134,7 +5134,9 @@ + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); +- if (!CONSTANT_P (x)) ++ if (!(CONST_INT_P (x) ++ || GET_CODE (x) == CONST_DOUBLE ++ || GET_CODE (x) == CONST_FIXED)) + ++n_var; + } + if (n_var == 0) +@@ -5286,7 +5288,9 @@ + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); +- if (!CONSTANT_P (x)) ++ if (!(CONST_INT_P (x) ++ || GET_CODE (x) == CONST_DOUBLE ++ || GET_CODE (x) == CONST_FIXED)) + ++n_var, one_var = i; + else if (x != CONST0_RTX (inner_mode)) + all_const_zero = false; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch new file mode 100644 index 0000000000..12c578c2b4 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch @@ -0,0 +1,61 @@ +2011-08-18 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-12 Richard Sandiford <rdsandiford@googlemail.com> + + * config/arm/arm.c (get_label_padding): New function. + (create_fix_barrier, arm_reorg): Use it. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000 ++++ new/gcc/config/arm/arm.c 2011-08-12 08:08:31 +0000 +@@ -11769,6 +11769,19 @@ + return 0; + } + ++/* Return the maximum amount of padding that will be inserted before ++ label LABEL. */ ++ ++static HOST_WIDE_INT ++get_label_padding (rtx label) ++{ ++ HOST_WIDE_INT align, min_insn_size; ++ ++ align = 1 << label_to_alignment (label); ++ min_insn_size = TARGET_THUMB ? 2 : 4; ++ return align > min_insn_size ? align - min_insn_size : 0; ++} ++ + /* Move a minipool fix MP from its current location to before MAX_MP. + If MAX_MP is NULL, then MP doesn't need moving, but the addressing + constraints may need updating. */ +@@ -12315,8 +12328,12 @@ + within range. */ + gcc_assert (GET_CODE (from) != BARRIER); + +- /* Count the length of this insn. */ +- count += get_attr_length (from); ++ /* Count the length of this insn. This must stay in sync with the ++ code that pushes minipool fixes. */ ++ if (LABEL_P (from)) ++ count += get_label_padding (from); ++ else ++ count += get_attr_length (from); + + /* If there is a jump table, add its length. */ + tmp = is_jump_table (from); +@@ -12736,6 +12753,11 @@ + insn = table; + } + } ++ else if (LABEL_P (insn)) ++ /* Add the worst-case padding due to alignment. We don't add ++ the _current_ padding because the minipool insertions ++ themselves might change it. */ ++ address += get_label_padding (insn); + } + + fix = minipool_fix_head; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch new file mode 100644 index 0000000000..29663c64a0 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch @@ -0,0 +1,2648 @@ +2011-08-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + * config/arm/arm.c (arm_init_neon_builtins): Use + n_operands instead of n_generator_args. + +2011-08-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline + 2011-04-18 Jie Zhang <jie@codesourcery.com> + Richard Earnshaw <rearnsha@arm.com> + + * arm.c (neon_builtin_type_bits): Remove. + (typedef enum neon_builtin_mode): New. + (T_MAX): Don't define. + (typedef enum neon_builtin_datum): Remove bits, codes[], + num_vars and base_fcode. Add mode, code and fcode. + (VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9 + VAR10): Change accordingly. + (neon_builtin_data[]): Change accordingly + (arm_init_neon_builtins): Change accordingly. + (neon_builtin_compare): Remove. + (locate_neon_builtin_icode): Remove. + (arm_expand_neon_builtin): Change accordingly. + + * arm.h (enum arm_builtins): Move to ... + * arm.c (enum arm_builtins): ... here; and rearrange builtin code. + + * arm.c (arm_builtin_decl): Declare. + (TARGET_BUILTIN_DECL): Define. + (enum arm_builtins): Correct ARM_BUILTIN_MAX. + (arm_builtin_decls[]): New. + (arm_init_neon_builtins): Store builtin declarations in + arm_builtin_decls[]. + (arm_init_tls_builtins): Likewise. + (arm_init_iwmmxt_builtins): Likewise. Refactor initialization code. + (arm_builtin_decl): New. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-08-18 13:53:37 +0000 ++++ new/gcc/config/arm/arm.c 2011-08-24 17:35:16 +0000 +@@ -162,6 +162,7 @@ + static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); + static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); + static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); ++static tree arm_builtin_decl (unsigned, bool); + static void emit_constant_insn (rtx cond, rtx pattern); + static rtx emit_set_insn (rtx, rtx); + static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, +@@ -415,6 +416,8 @@ + #define TARGET_INIT_BUILTINS arm_init_builtins + #undef TARGET_EXPAND_BUILTIN + #define TARGET_EXPAND_BUILTIN arm_expand_builtin ++#undef TARGET_BUILTIN_DECL ++#define TARGET_BUILTIN_DECL arm_builtin_decl + + #undef TARGET_INIT_LIBFUNCS + #define TARGET_INIT_LIBFUNCS arm_init_libfuncs +@@ -18147,505 +18150,31 @@ + return value; + } + +-#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ +- do \ +- { \ +- if ((MASK) & insn_flags) \ +- add_builtin_function ((NAME), (TYPE), (CODE), \ +- BUILT_IN_MD, NULL, NULL_TREE); \ +- } \ +- while (0) +- +-struct builtin_description +-{ +- const unsigned int mask; +- const enum insn_code icode; +- const char * const name; +- const enum arm_builtins code; +- const enum rtx_code comparison; +- const unsigned int flag; +-}; +- +-static const struct builtin_description bdesc_2arg[] = +-{ +-#define IWMMXT_BUILTIN(code, string, builtin) \ +- { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ +- ARM_BUILTIN_##builtin, UNKNOWN, 0 }, +- +- IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) +- IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) +- IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) +- IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) +- IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) +- IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) +- IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) +- IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) +- IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) +- IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) +- IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) +- IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) +- IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) +- IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) +- IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) +- IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) +- IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) +- IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) +- IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) +- IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) +- IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) +- IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) +- IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) +- IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) +- IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) +- IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) +- IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) +- IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) +- IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) +- IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) +- IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) +- IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) +- IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) +- IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) +- IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) +- IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) +- IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) +- IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) +- IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) +- IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) +- IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) +- IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) +- IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) +- IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) +- IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) +- IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) +- IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) +- IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) +- IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) +- IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) +- IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) +- IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) +- IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) +- IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) +- IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) +- IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) +- IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS) +- IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU) +- +-#define IWMMXT_BUILTIN2(code, builtin) \ +- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, +- +- IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) +- IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) +- IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) +- IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) +- IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) +- IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) +- IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) +- IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI) +- IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) +- IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI) +- IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) +- IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) +- IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) +- IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI) +- IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) +- IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI) +- IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) +- IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) +- IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) +- IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI) +- IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) +- IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI) +- IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) +- IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) +- IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) +- IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) +- IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) +- IWMMXT_BUILTIN2 (rorv2si3, WRORWI) +- IWMMXT_BUILTIN2 (rordi3_di, WRORD) +- IWMMXT_BUILTIN2 (rordi3, WRORDI) +- IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) +- IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) +-}; +- +-static const struct builtin_description bdesc_1arg[] = +-{ +- IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) +- IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) +- IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) +- IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) +- IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) +- IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) +- IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) +- IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) +- IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) +- IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) +- IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) +- IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) +- IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) +- IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) +- IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) +- IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) +- IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) +- IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) +-}; +- +-/* Set up all the iWMMXt builtins. This is +- not called if TARGET_IWMMXT is zero. */ +- +-static void +-arm_init_iwmmxt_builtins (void) +-{ +- const struct builtin_description * d; +- size_t i; +- tree endlink = void_list_node; +- +- tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); +- tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); +- tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); +- +- tree int_ftype_int +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, integer_type_node, endlink)); +- tree v8qi_ftype_v8qi_v8qi_int +- = build_function_type (V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, +- integer_type_node, +- endlink)))); +- tree v4hi_ftype_v4hi_int +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree v2si_ftype_v2si_int +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree v2si_ftype_di_di +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, long_long_integer_type_node, +- tree_cons (NULL_TREE, long_long_integer_type_node, +- endlink))); +- tree di_ftype_di_int +- = build_function_type (long_long_integer_type_node, +- tree_cons (NULL_TREE, long_long_integer_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree di_ftype_di_int_int +- = build_function_type (long_long_integer_type_node, +- tree_cons (NULL_TREE, long_long_integer_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- tree_cons (NULL_TREE, +- integer_type_node, +- endlink)))); +- tree int_ftype_v8qi +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- endlink)); +- tree int_ftype_v4hi +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink)); +- tree int_ftype_v2si +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- endlink)); +- tree int_ftype_v8qi_int +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree int_ftype_v4hi_int +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree int_ftype_v2si_int +- = build_function_type (integer_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree v8qi_ftype_v8qi_int_int +- = build_function_type (V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- tree_cons (NULL_TREE, +- integer_type_node, +- endlink)))); +- tree v4hi_ftype_v4hi_int_int +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- tree_cons (NULL_TREE, +- integer_type_node, +- endlink)))); +- tree v2si_ftype_v2si_int_int +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- tree_cons (NULL_TREE, +- integer_type_node, +- endlink)))); +- /* Miscellaneous. */ +- tree v8qi_ftype_v4hi_v4hi +- = build_function_type (V8QI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink))); +- tree v4hi_ftype_v2si_v2si +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- endlink))); +- tree v2si_ftype_v4hi_v4hi +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink))); +- tree v2si_ftype_v8qi_v8qi +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- endlink))); +- tree v4hi_ftype_v4hi_di +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, +- long_long_integer_type_node, +- endlink))); +- tree v2si_ftype_v2si_di +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, +- long_long_integer_type_node, +- endlink))); +- tree void_ftype_int_int +- = build_function_type (void_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- tree_cons (NULL_TREE, integer_type_node, +- endlink))); +- tree di_ftype_void +- = build_function_type (long_long_unsigned_type_node, endlink); +- tree di_ftype_v8qi +- = build_function_type (long_long_integer_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- endlink)); +- tree di_ftype_v4hi +- = build_function_type (long_long_integer_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink)); +- tree di_ftype_v2si +- = build_function_type (long_long_integer_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- endlink)); +- tree v2si_ftype_v4hi +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink)); +- tree v4hi_ftype_v8qi +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- endlink)); +- +- tree di_ftype_di_v4hi_v4hi +- = build_function_type (long_long_unsigned_type_node, +- tree_cons (NULL_TREE, +- long_long_unsigned_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, +- V4HI_type_node, +- endlink)))); +- +- tree di_ftype_v4hi_v4hi +- = build_function_type (long_long_unsigned_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink))); +- +- /* Normal vector binops. */ +- tree v8qi_ftype_v8qi_v8qi +- = build_function_type (V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- tree_cons (NULL_TREE, V8QI_type_node, +- endlink))); +- tree v4hi_ftype_v4hi_v4hi +- = build_function_type (V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink))); +- tree v2si_ftype_v2si_v2si +- = build_function_type (V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- tree_cons (NULL_TREE, V2SI_type_node, +- endlink))); +- tree di_ftype_di_di +- = build_function_type (long_long_unsigned_type_node, +- tree_cons (NULL_TREE, long_long_unsigned_type_node, +- tree_cons (NULL_TREE, +- long_long_unsigned_type_node, +- endlink))); +- +- /* Add all builtins that are more or less simple operations on two +- operands. */ +- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) +- { +- /* Use one of the operands; the target can have a different mode for +- mask-generating compares. */ +- enum machine_mode mode; +- tree type; +- +- if (d->name == 0) +- continue; +- +- mode = insn_data[d->icode].operand[1].mode; +- +- switch (mode) +- { +- case V8QImode: +- type = v8qi_ftype_v8qi_v8qi; +- break; +- case V4HImode: +- type = v4hi_ftype_v4hi_v4hi; +- break; +- case V2SImode: +- type = v2si_ftype_v2si_v2si; +- break; +- case DImode: +- type = di_ftype_di_di; +- break; +- +- default: +- gcc_unreachable (); +- } +- +- def_mbuiltin (d->mask, d->name, type, d->code); +- } +- +- /* Add the remaining MMX insns with somewhat more complicated types. */ +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ); +- +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB); +- def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT); +-} +- +-static void +-arm_init_tls_builtins (void) +-{ +- tree ftype, decl; +- +- ftype = build_function_type (ptr_type_node, void_list_node); +- decl = add_builtin_function ("__builtin_thread_pointer", ftype, +- ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD, +- NULL, NULL_TREE); +- TREE_NOTHROW (decl) = 1; +- TREE_READONLY (decl) = 1; +-} +- +-enum neon_builtin_type_bits { +- T_V8QI = 0x0001, +- T_V4HI = 0x0002, +- T_V2SI = 0x0004, +- T_V2SF = 0x0008, +- T_DI = 0x0010, +- T_DREG = 0x001F, +- T_V16QI = 0x0020, +- T_V8HI = 0x0040, +- T_V4SI = 0x0080, +- T_V4SF = 0x0100, +- T_V2DI = 0x0200, +- T_TI = 0x0400, +- T_QREG = 0x07E0, +- T_EI = 0x0800, +- T_OI = 0x1000 +-}; ++typedef enum { ++ T_V8QI, ++ T_V4HI, ++ T_V2SI, ++ T_V2SF, ++ T_DI, ++ T_V16QI, ++ T_V8HI, ++ T_V4SI, ++ T_V4SF, ++ T_V2DI, ++ T_TI, ++ T_EI, ++ T_OI, ++ T_MAX /* Size of enum. Keep last. */ ++} neon_builtin_type_mode; ++ ++#define TYPE_MODE_BIT(X) (1 << (X)) ++ ++#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ ++ | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \ ++ | TYPE_MODE_BIT (T_DI)) ++#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ ++ | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ ++ | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) + + #define v8qi_UP T_V8QI + #define v4hi_UP T_V4HI +@@ -18663,8 +18192,6 @@ + + #define UP(X) X##_UP + +-#define T_MAX 13 +- + typedef enum { + NEON_BINOP, + NEON_TERNOP, +@@ -18708,49 +18235,42 @@ + typedef struct { + const char *name; + const neon_itype itype; +- const int bits; +- const enum insn_code codes[T_MAX]; +- const unsigned int num_vars; +- unsigned int base_fcode; ++ const neon_builtin_type_mode mode; ++ const enum insn_code code; ++ unsigned int fcode; + } neon_builtin_datum; + + #define CF(N,X) CODE_FOR_neon_##N##X + + #define VAR1(T, N, A) \ +- #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0 ++ {#N, NEON_##T, UP (A), CF (N, A), 0} + #define VAR2(T, N, A, B) \ +- #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0 ++ VAR1 (T, N, A), \ ++ {#N, NEON_##T, UP (B), CF (N, B), 0} + #define VAR3(T, N, A, B, C) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C), \ +- { CF (N, A), CF (N, B), CF (N, C) }, 3, 0 ++ VAR2 (T, N, A, B), \ ++ {#N, NEON_##T, UP (C), CF (N, C), 0} + #define VAR4(T, N, A, B, C, D) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0 ++ VAR3 (T, N, A, B, C), \ ++ {#N, NEON_##T, UP (D), CF (N, D), 0} + #define VAR5(T, N, A, B, C, D, E) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0 ++ VAR4 (T, N, A, B, C, D), \ ++ {#N, NEON_##T, UP (E), CF (N, E), 0} + #define VAR6(T, N, A, B, C, D, E, F) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0 ++ VAR5 (T, N, A, B, C, D, E), \ ++ {#N, NEON_##T, UP (F), CF (N, F), 0} + #define VAR7(T, N, A, B, C, D, E, F, G) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ +- CF (N, G) }, 7, 0 ++ VAR6 (T, N, A, B, C, D, E, F), \ ++ {#N, NEON_##T, UP (G), CF (N, G), 0} + #define VAR8(T, N, A, B, C, D, E, F, G, H) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ +- | UP (H), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ +- CF (N, G), CF (N, H) }, 8, 0 ++ VAR7 (T, N, A, B, C, D, E, F, G), \ ++ {#N, NEON_##T, UP (H), CF (N, H), 0} + #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ +- | UP (H) | UP (I), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ +- CF (N, G), CF (N, H), CF (N, I) }, 9, 0 ++ VAR8 (T, N, A, B, C, D, E, F, G, H), \ ++ {#N, NEON_##T, UP (I), CF (N, I), 0} + #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ +- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ +- | UP (H) | UP (I) | UP (J), \ +- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ +- CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0 ++ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ ++ {#N, NEON_##T, UP (J), CF (N, J), 0} + + /* The mode entries in the following table correspond to the "key" type of the + instruction variant, i.e. equivalent to that which would be specified after +@@ -18758,192 +18278,190 @@ + (Signed/unsigned/polynomial types are not differentiated between though, and + are all mapped onto the same mode for a given element size.) The modes + listed per instruction should be the same as those defined for that +- instruction's pattern in neon.md. +- WARNING: Variants should be listed in the same increasing order as +- neon_builtin_type_bits. */ ++ instruction's pattern in neon.md. */ + + static neon_builtin_datum neon_builtin_data[] = + { +- { VAR10 (BINOP, vadd, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) }, +- { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) }, +- { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) }, +- { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) }, +- { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) }, +- { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) }, +- { VAR2 (TERNOP, vqdmlal, v4hi, v2si) }, +- { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) }, +- { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) }, +- { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) }, +- { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) }, +- { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) }, +- { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) }, +- { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) }, +- { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) }, +- { VAR2 (BINOP, vqdmull, v4hi, v2si) }, +- { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) }, +- { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) }, +- { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) }, +- { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) }, +- { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR10 (BINOP, vsub, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) }, +- { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) }, +- { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) }, +- { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR2 (BINOP, vcage, v2sf, v4sf) }, +- { VAR2 (BINOP, vcagt, v2sf, v4sf) }, +- { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) }, +- { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) }, +- { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) }, +- { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) }, +- { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) }, +- { VAR2 (BINOP, vrecps, v2sf, v4sf) }, +- { VAR2 (BINOP, vrsqrts, v2sf, v4sf) }, +- { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, +- { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, +- { VAR2 (UNOP, vcnt, v8qi, v16qi) }, +- { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) }, +- { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) }, +- { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, ++ VAR10 (BINOP, vadd, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), ++ VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), ++ VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR3 (BINOP, vaddhn, v8hi, v4si, v2di), ++ VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si), ++ VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si), ++ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si), ++ VAR2 (TERNOP, vqdmlal, v4hi, v2si), ++ VAR2 (TERNOP, vqdmlsl, v4hi, v2si), ++ VAR3 (BINOP, vmull, v8qi, v4hi, v2si), ++ VAR2 (SCALARMULL, vmull_n, v4hi, v2si), ++ VAR2 (LANEMULL, vmull_lane, v4hi, v2si), ++ VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si), ++ VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si), ++ VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si), ++ VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si), ++ VAR2 (BINOP, vqdmull, v4hi, v2si), ++ VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di), ++ VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di), ++ VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di), ++ VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), ++ VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR10 (BINOP, vsub, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), ++ VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), ++ VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR3 (BINOP, vsubhn, v8hi, v4si, v2di), ++ VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR2 (BINOP, vcage, v2sf, v4sf), ++ VAR2 (BINOP, vcagt, v2sf, v4sf), ++ VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR3 (BINOP, vabdl, v8qi, v4hi, v2si), ++ VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR3 (TERNOP, vabal, v8qi, v4hi, v2si), ++ VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf), ++ VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf), ++ VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf), ++ VAR2 (BINOP, vrecps, v2sf, v4sf), ++ VAR2 (BINOP, vrsqrts, v2sf, v4sf), ++ VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), ++ VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++ VAR2 (UNOP, vcnt, v8qi, v16qi), ++ VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), ++ VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), ++ VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si), + /* FIXME: vget_lane supports more variants than this! */ +- { VAR10 (GETLANE, vget_lane, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (SETLANE, vset_lane, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR10 (DUP, vdup_n, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (DUPLANE, vdup_lane, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) }, +- { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) }, +- { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) }, +- { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) }, +- { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) }, +- { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) }, +- { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) }, +- { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) }, +- { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) }, +- { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) }, +- { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) }, +- { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) }, +- { VAR10 (BINOP, vext, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) }, +- { VAR2 (UNOP, vrev16, v8qi, v16qi) }, +- { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) }, +- { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) }, +- { VAR10 (SELECT, vbsl, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR1 (VTBL, vtbl1, v8qi) }, +- { VAR1 (VTBL, vtbl2, v8qi) }, +- { VAR1 (VTBL, vtbl3, v8qi) }, +- { VAR1 (VTBL, vtbl4, v8qi) }, +- { VAR1 (VTBX, vtbx1, v8qi) }, +- { VAR1 (VTBX, vtbx2, v8qi) }, +- { VAR1 (VTBX, vtbx3, v8qi) }, +- { VAR1 (VTBX, vtbx4, v8qi) }, +- { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, +- { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOAD1, vld1, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOAD1LANE, vld1_lane, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOAD1, vld1_dup, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (STORE1, vst1, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (STORE1LANE, vst1_lane, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR9 (LOADSTRUCT, +- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (LOADSTRUCTLANE, vld2_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR9 (STORESTRUCT, vst2, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (STORESTRUCTLANE, vst2_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR9 (LOADSTRUCT, +- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (LOADSTRUCTLANE, vld3_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR9 (STORESTRUCT, vst3, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (STORESTRUCTLANE, vst3_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR9 (LOADSTRUCT, vld4, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (LOADSTRUCTLANE, vld4_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) }, +- { VAR9 (STORESTRUCT, vst4, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, +- { VAR7 (STORESTRUCTLANE, vst4_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, +- { VAR10 (LOGICBINOP, vand, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOGICBINOP, vorr, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (BINOP, veor, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOGICBINOP, vbic, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, +- { VAR10 (LOGICBINOP, vorn, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) } ++ VAR10 (GETLANE, vget_lane, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (SETLANE, vset_lane, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di), ++ VAR10 (DUP, vdup_n, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (DUPLANE, vdup_lane, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR3 (UNOP, vmovn, v8hi, v4si, v2di), ++ VAR3 (UNOP, vqmovn, v8hi, v4si, v2di), ++ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di), ++ VAR3 (UNOP, vmovl, v8qi, v4hi, v2si), ++ VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR2 (LANEMAC, vmlal_lane, v4hi, v2si), ++ VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si), ++ VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si), ++ VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si), ++ VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR2 (SCALARMAC, vmlal_n, v4hi, v2si), ++ VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si), ++ VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si), ++ VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si), ++ VAR10 (BINOP, vext, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), ++ VAR2 (UNOP, vrev16, v8qi, v16qi), ++ VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), ++ VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), ++ VAR10 (SELECT, vbsl, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR1 (VTBL, vtbl1, v8qi), ++ VAR1 (VTBL, vtbl2, v8qi), ++ VAR1 (VTBL, vtbl3, v8qi), ++ VAR1 (VTBL, vtbl4, v8qi), ++ VAR1 (VTBX, vtbx1, v8qi), ++ VAR1 (VTBX, vtbx2, v8qi), ++ VAR1 (VTBX, vtbx3, v8qi), ++ VAR1 (VTBX, vtbx4, v8qi), ++ VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ++ VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di), ++ VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOAD1, vld1, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOAD1LANE, vld1_lane, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOAD1, vld1_dup, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (STORE1, vst1, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (STORE1LANE, vst1_lane, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR9 (LOADSTRUCT, ++ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (LOADSTRUCTLANE, vld2_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di), ++ VAR9 (STORESTRUCT, vst2, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (STORESTRUCTLANE, vst2_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR9 (LOADSTRUCT, ++ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (LOADSTRUCTLANE, vld3_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di), ++ VAR9 (STORESTRUCT, vst3, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (STORESTRUCTLANE, vst3_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR9 (LOADSTRUCT, vld4, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (LOADSTRUCTLANE, vld4_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di), ++ VAR9 (STORESTRUCT, vst4, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), ++ VAR7 (STORESTRUCTLANE, vst4_lane, ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), ++ VAR10 (LOGICBINOP, vand, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOGICBINOP, vorr, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (BINOP, veor, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOGICBINOP, vbic, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++ VAR10 (LOGICBINOP, vorn, ++ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) + }; + + #undef CF +@@ -18958,10 +18476,185 @@ + #undef VAR9 + #undef VAR10 + ++/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have ++ symbolic names defined here (which would require too much duplication). ++ FIXME? */ ++enum arm_builtins ++{ ++ ARM_BUILTIN_GETWCX, ++ ARM_BUILTIN_SETWCX, ++ ++ ARM_BUILTIN_WZERO, ++ ++ ARM_BUILTIN_WAVG2BR, ++ ARM_BUILTIN_WAVG2HR, ++ ARM_BUILTIN_WAVG2B, ++ ARM_BUILTIN_WAVG2H, ++ ++ ARM_BUILTIN_WACCB, ++ ARM_BUILTIN_WACCH, ++ ARM_BUILTIN_WACCW, ++ ++ ARM_BUILTIN_WMACS, ++ ARM_BUILTIN_WMACSZ, ++ ARM_BUILTIN_WMACU, ++ ARM_BUILTIN_WMACUZ, ++ ++ ARM_BUILTIN_WSADB, ++ ARM_BUILTIN_WSADBZ, ++ ARM_BUILTIN_WSADH, ++ ARM_BUILTIN_WSADHZ, ++ ++ ARM_BUILTIN_WALIGN, ++ ++ ARM_BUILTIN_TMIA, ++ ARM_BUILTIN_TMIAPH, ++ ARM_BUILTIN_TMIABB, ++ ARM_BUILTIN_TMIABT, ++ ARM_BUILTIN_TMIATB, ++ ARM_BUILTIN_TMIATT, ++ ++ ARM_BUILTIN_TMOVMSKB, ++ ARM_BUILTIN_TMOVMSKH, ++ ARM_BUILTIN_TMOVMSKW, ++ ++ ARM_BUILTIN_TBCSTB, ++ ARM_BUILTIN_TBCSTH, ++ ARM_BUILTIN_TBCSTW, ++ ++ ARM_BUILTIN_WMADDS, ++ ARM_BUILTIN_WMADDU, ++ ++ ARM_BUILTIN_WPACKHSS, ++ ARM_BUILTIN_WPACKWSS, ++ ARM_BUILTIN_WPACKDSS, ++ ARM_BUILTIN_WPACKHUS, ++ ARM_BUILTIN_WPACKWUS, ++ ARM_BUILTIN_WPACKDUS, ++ ++ ARM_BUILTIN_WADDB, ++ ARM_BUILTIN_WADDH, ++ ARM_BUILTIN_WADDW, ++ ARM_BUILTIN_WADDSSB, ++ ARM_BUILTIN_WADDSSH, ++ ARM_BUILTIN_WADDSSW, ++ ARM_BUILTIN_WADDUSB, ++ ARM_BUILTIN_WADDUSH, ++ ARM_BUILTIN_WADDUSW, ++ ARM_BUILTIN_WSUBB, ++ ARM_BUILTIN_WSUBH, ++ ARM_BUILTIN_WSUBW, ++ ARM_BUILTIN_WSUBSSB, ++ ARM_BUILTIN_WSUBSSH, ++ ARM_BUILTIN_WSUBSSW, ++ ARM_BUILTIN_WSUBUSB, ++ ARM_BUILTIN_WSUBUSH, ++ ARM_BUILTIN_WSUBUSW, ++ ++ ARM_BUILTIN_WAND, ++ ARM_BUILTIN_WANDN, ++ ARM_BUILTIN_WOR, ++ ARM_BUILTIN_WXOR, ++ ++ ARM_BUILTIN_WCMPEQB, ++ ARM_BUILTIN_WCMPEQH, ++ ARM_BUILTIN_WCMPEQW, ++ ARM_BUILTIN_WCMPGTUB, ++ ARM_BUILTIN_WCMPGTUH, ++ ARM_BUILTIN_WCMPGTUW, ++ ARM_BUILTIN_WCMPGTSB, ++ ARM_BUILTIN_WCMPGTSH, ++ ARM_BUILTIN_WCMPGTSW, ++ ++ ARM_BUILTIN_TEXTRMSB, ++ ARM_BUILTIN_TEXTRMSH, ++ ARM_BUILTIN_TEXTRMSW, ++ ARM_BUILTIN_TEXTRMUB, ++ ARM_BUILTIN_TEXTRMUH, ++ ARM_BUILTIN_TEXTRMUW, ++ ARM_BUILTIN_TINSRB, ++ ARM_BUILTIN_TINSRH, ++ ARM_BUILTIN_TINSRW, ++ ++ ARM_BUILTIN_WMAXSW, ++ ARM_BUILTIN_WMAXSH, ++ ARM_BUILTIN_WMAXSB, ++ ARM_BUILTIN_WMAXUW, ++ ARM_BUILTIN_WMAXUH, ++ ARM_BUILTIN_WMAXUB, ++ ARM_BUILTIN_WMINSW, ++ ARM_BUILTIN_WMINSH, ++ ARM_BUILTIN_WMINSB, ++ ARM_BUILTIN_WMINUW, ++ ARM_BUILTIN_WMINUH, ++ ARM_BUILTIN_WMINUB, ++ ++ ARM_BUILTIN_WMULUM, ++ ARM_BUILTIN_WMULSM, ++ ARM_BUILTIN_WMULUL, ++ ++ ARM_BUILTIN_PSADBH, ++ ARM_BUILTIN_WSHUFH, ++ ++ ARM_BUILTIN_WSLLH, ++ ARM_BUILTIN_WSLLW, ++ ARM_BUILTIN_WSLLD, ++ ARM_BUILTIN_WSRAH, ++ ARM_BUILTIN_WSRAW, ++ ARM_BUILTIN_WSRAD, ++ ARM_BUILTIN_WSRLH, ++ ARM_BUILTIN_WSRLW, ++ ARM_BUILTIN_WSRLD, ++ ARM_BUILTIN_WRORH, ++ ARM_BUILTIN_WRORW, ++ ARM_BUILTIN_WRORD, ++ ARM_BUILTIN_WSLLHI, ++ ARM_BUILTIN_WSLLWI, ++ ARM_BUILTIN_WSLLDI, ++ ARM_BUILTIN_WSRAHI, ++ ARM_BUILTIN_WSRAWI, ++ ARM_BUILTIN_WSRADI, ++ ARM_BUILTIN_WSRLHI, ++ ARM_BUILTIN_WSRLWI, ++ ARM_BUILTIN_WSRLDI, ++ ARM_BUILTIN_WRORHI, ++ ARM_BUILTIN_WRORWI, ++ ARM_BUILTIN_WRORDI, ++ ++ ARM_BUILTIN_WUNPCKIHB, ++ ARM_BUILTIN_WUNPCKIHH, ++ ARM_BUILTIN_WUNPCKIHW, ++ ARM_BUILTIN_WUNPCKILB, ++ ARM_BUILTIN_WUNPCKILH, ++ ARM_BUILTIN_WUNPCKILW, ++ ++ ARM_BUILTIN_WUNPCKEHSB, ++ ARM_BUILTIN_WUNPCKEHSH, ++ ARM_BUILTIN_WUNPCKEHSW, ++ ARM_BUILTIN_WUNPCKEHUB, ++ ARM_BUILTIN_WUNPCKEHUH, ++ ARM_BUILTIN_WUNPCKEHUW, ++ ARM_BUILTIN_WUNPCKELSB, ++ ARM_BUILTIN_WUNPCKELSH, ++ ARM_BUILTIN_WUNPCKELSW, ++ ARM_BUILTIN_WUNPCKELUB, ++ ARM_BUILTIN_WUNPCKELUH, ++ ARM_BUILTIN_WUNPCKELUW, ++ ++ ARM_BUILTIN_THREAD_POINTER, ++ ++ ARM_BUILTIN_NEON_BASE, ++ ++ ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data) ++}; ++ ++static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX]; ++ + static void + arm_init_neon_builtins (void) + { +- unsigned int i, fcode = ARM_BUILTIN_NEON_BASE; ++ unsigned int i, fcode; ++ tree decl; + + tree neon_intQI_type_node; + tree neon_intHI_type_node; +@@ -19209,250 +18902,740 @@ + } + } + +- for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++) ++ for (i = 0, fcode = ARM_BUILTIN_NEON_BASE; ++ i < ARRAY_SIZE (neon_builtin_data); ++ i++, fcode++) + { + neon_builtin_datum *d = &neon_builtin_data[i]; +- unsigned int j, codeidx = 0; +- +- d->base_fcode = fcode; +- +- for (j = 0; j < T_MAX; j++) +- { +- const char* const modenames[] = { +- "v8qi", "v4hi", "v2si", "v2sf", "di", +- "v16qi", "v8hi", "v4si", "v4sf", "v2di" +- }; +- char namebuf[60]; +- tree ftype = NULL; +- enum insn_code icode; +- int is_load = 0, is_store = 0; +- +- if ((d->bits & (1 << j)) == 0) +- continue; +- +- icode = d->codes[codeidx++]; +- +- switch (d->itype) +- { +- case NEON_LOAD1: +- case NEON_LOAD1LANE: +- case NEON_LOADSTRUCT: +- case NEON_LOADSTRUCTLANE: +- is_load = 1; +- /* Fall through. */ +- case NEON_STORE1: +- case NEON_STORE1LANE: +- case NEON_STORESTRUCT: +- case NEON_STORESTRUCTLANE: +- if (!is_load) +- is_store = 1; +- /* Fall through. */ +- case NEON_UNOP: +- case NEON_BINOP: +- case NEON_LOGICBINOP: +- case NEON_SHIFTINSERT: +- case NEON_TERNOP: +- case NEON_GETLANE: +- case NEON_SETLANE: +- case NEON_CREATE: +- case NEON_DUP: +- case NEON_DUPLANE: +- case NEON_SHIFTIMM: +- case NEON_SHIFTACC: +- case NEON_COMBINE: +- case NEON_SPLIT: +- case NEON_CONVERT: +- case NEON_FIXCONV: +- case NEON_LANEMUL: +- case NEON_LANEMULL: +- case NEON_LANEMULH: +- case NEON_LANEMAC: +- case NEON_SCALARMUL: +- case NEON_SCALARMULL: +- case NEON_SCALARMULH: +- case NEON_SCALARMAC: +- case NEON_SELECT: +- case NEON_VTBL: +- case NEON_VTBX: +- { +- int k; +- tree return_type = void_type_node, args = void_list_node; +- +- /* Build a function type directly from the insn_data for this +- builtin. The build_function_type() function takes care of +- removing duplicates for us. */ +- for (k = insn_data[icode].n_operands - 1; k >= 0; k--) +- { +- tree eltype; +- +- if (is_load && k == 1) +- { +- /* Neon load patterns always have the memory operand +- in the operand 1 position. */ +- gcc_assert (insn_data[icode].operand[k].predicate +- == neon_struct_operand); +- +- switch (1 << j) +- { +- case T_V8QI: +- case T_V16QI: +- eltype = const_intQI_pointer_node; +- break; +- +- case T_V4HI: +- case T_V8HI: +- eltype = const_intHI_pointer_node; +- break; +- +- case T_V2SI: +- case T_V4SI: +- eltype = const_intSI_pointer_node; +- break; +- +- case T_V2SF: +- case T_V4SF: +- eltype = const_float_pointer_node; +- break; +- +- case T_DI: +- case T_V2DI: +- eltype = const_intDI_pointer_node; +- break; +- +- default: gcc_unreachable (); +- } +- } +- else if (is_store && k == 0) +- { +- /* Similarly, Neon store patterns use operand 0 as +- the memory location to store to. */ +- gcc_assert (insn_data[icode].operand[k].predicate +- == neon_struct_operand); +- +- switch (1 << j) +- { +- case T_V8QI: +- case T_V16QI: +- eltype = intQI_pointer_node; +- break; +- +- case T_V4HI: +- case T_V8HI: +- eltype = intHI_pointer_node; +- break; +- +- case T_V2SI: +- case T_V4SI: +- eltype = intSI_pointer_node; +- break; +- +- case T_V2SF: +- case T_V4SF: +- eltype = float_pointer_node; +- break; +- +- case T_DI: +- case T_V2DI: +- eltype = intDI_pointer_node; +- break; +- +- default: gcc_unreachable (); +- } +- } +- else +- { +- switch (insn_data[icode].operand[k].mode) +- { +- case VOIDmode: eltype = void_type_node; break; +- /* Scalars. */ +- case QImode: eltype = neon_intQI_type_node; break; +- case HImode: eltype = neon_intHI_type_node; break; +- case SImode: eltype = neon_intSI_type_node; break; +- case SFmode: eltype = neon_float_type_node; break; +- case DImode: eltype = neon_intDI_type_node; break; +- case TImode: eltype = intTI_type_node; break; +- case EImode: eltype = intEI_type_node; break; +- case OImode: eltype = intOI_type_node; break; +- case CImode: eltype = intCI_type_node; break; +- case XImode: eltype = intXI_type_node; break; +- /* 64-bit vectors. */ +- case V8QImode: eltype = V8QI_type_node; break; +- case V4HImode: eltype = V4HI_type_node; break; +- case V2SImode: eltype = V2SI_type_node; break; +- case V2SFmode: eltype = V2SF_type_node; break; +- /* 128-bit vectors. */ +- case V16QImode: eltype = V16QI_type_node; break; +- case V8HImode: eltype = V8HI_type_node; break; +- case V4SImode: eltype = V4SI_type_node; break; +- case V4SFmode: eltype = V4SF_type_node; break; +- case V2DImode: eltype = V2DI_type_node; break; +- default: gcc_unreachable (); +- } +- } +- +- if (k == 0 && !is_store) +- return_type = eltype; +- else +- args = tree_cons (NULL_TREE, eltype, args); +- } +- +- ftype = build_function_type (return_type, args); +- } +- break; +- +- case NEON_RESULTPAIR: +- { +- switch (insn_data[icode].operand[1].mode) +- { +- case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; +- case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; +- case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; +- case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; +- case DImode: ftype = void_ftype_pdi_di_di; break; +- case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; +- case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; +- case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; +- case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; +- case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; +- default: gcc_unreachable (); +- } +- } +- break; +- +- case NEON_REINTERP: +- { +- /* We iterate over 5 doubleword types, then 5 quadword +- types. */ +- int rhs = j % 5; +- switch (insn_data[icode].operand[0].mode) +- { +- case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; +- case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; +- case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; +- case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; +- case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; +- case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; +- case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; +- case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; +- case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; +- case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; +- default: gcc_unreachable (); +- } +- } +- break; +- +- default: +- gcc_unreachable (); +- } +- +- gcc_assert (ftype != NULL); +- +- sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]); +- +- add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL, +- NULL_TREE); +- } +- } ++ ++ const char* const modenames[] = { ++ "v8qi", "v4hi", "v2si", "v2sf", "di", ++ "v16qi", "v8hi", "v4si", "v4sf", "v2di", ++ "ti", "ei", "oi" ++ }; ++ char namebuf[60]; ++ tree ftype = NULL; ++ int is_load = 0, is_store = 0; ++ ++ gcc_assert (ARRAY_SIZE (modenames) == T_MAX); ++ ++ d->fcode = fcode; ++ ++ switch (d->itype) ++ { ++ case NEON_LOAD1: ++ case NEON_LOAD1LANE: ++ case NEON_LOADSTRUCT: ++ case NEON_LOADSTRUCTLANE: ++ is_load = 1; ++ /* Fall through. */ ++ case NEON_STORE1: ++ case NEON_STORE1LANE: ++ case NEON_STORESTRUCT: ++ case NEON_STORESTRUCTLANE: ++ if (!is_load) ++ is_store = 1; ++ /* Fall through. */ ++ case NEON_UNOP: ++ case NEON_BINOP: ++ case NEON_LOGICBINOP: ++ case NEON_SHIFTINSERT: ++ case NEON_TERNOP: ++ case NEON_GETLANE: ++ case NEON_SETLANE: ++ case NEON_CREATE: ++ case NEON_DUP: ++ case NEON_DUPLANE: ++ case NEON_SHIFTIMM: ++ case NEON_SHIFTACC: ++ case NEON_COMBINE: ++ case NEON_SPLIT: ++ case NEON_CONVERT: ++ case NEON_FIXCONV: ++ case NEON_LANEMUL: ++ case NEON_LANEMULL: ++ case NEON_LANEMULH: ++ case NEON_LANEMAC: ++ case NEON_SCALARMUL: ++ case NEON_SCALARMULL: ++ case NEON_SCALARMULH: ++ case NEON_SCALARMAC: ++ case NEON_SELECT: ++ case NEON_VTBL: ++ case NEON_VTBX: ++ { ++ int k; ++ tree return_type = void_type_node, args = void_list_node; ++ ++ /* Build a function type directly from the insn_data for ++ this builtin. The build_function_type() function takes ++ care of removing duplicates for us. */ ++ for (k = insn_data[d->code].n_operands - 1; k >= 0; k--) ++ { ++ tree eltype; ++ ++ if (is_load && k == 1) ++ { ++ /* Neon load patterns always have the memory ++ operand in the operand 1 position. */ ++ gcc_assert (insn_data[d->code].operand[k].predicate ++ == neon_struct_operand); ++ ++ switch (d->mode) ++ { ++ case T_V8QI: ++ case T_V16QI: ++ eltype = const_intQI_pointer_node; ++ break; ++ ++ case T_V4HI: ++ case T_V8HI: ++ eltype = const_intHI_pointer_node; ++ break; ++ ++ case T_V2SI: ++ case T_V4SI: ++ eltype = const_intSI_pointer_node; ++ break; ++ ++ case T_V2SF: ++ case T_V4SF: ++ eltype = const_float_pointer_node; ++ break; ++ ++ case T_DI: ++ case T_V2DI: ++ eltype = const_intDI_pointer_node; ++ break; ++ ++ default: gcc_unreachable (); ++ } ++ } ++ else if (is_store && k == 0) ++ { ++ /* Similarly, Neon store patterns use operand 0 as ++ the memory location to store to. */ ++ gcc_assert (insn_data[d->code].operand[k].predicate ++ == neon_struct_operand); ++ ++ switch (d->mode) ++ { ++ case T_V8QI: ++ case T_V16QI: ++ eltype = intQI_pointer_node; ++ break; ++ ++ case T_V4HI: ++ case T_V8HI: ++ eltype = intHI_pointer_node; ++ break; ++ ++ case T_V2SI: ++ case T_V4SI: ++ eltype = intSI_pointer_node; ++ break; ++ ++ case T_V2SF: ++ case T_V4SF: ++ eltype = float_pointer_node; ++ break; ++ ++ case T_DI: ++ case T_V2DI: ++ eltype = intDI_pointer_node; ++ break; ++ ++ default: gcc_unreachable (); ++ } ++ } ++ else ++ { ++ switch (insn_data[d->code].operand[k].mode) ++ { ++ case VOIDmode: eltype = void_type_node; break; ++ /* Scalars. */ ++ case QImode: eltype = neon_intQI_type_node; break; ++ case HImode: eltype = neon_intHI_type_node; break; ++ case SImode: eltype = neon_intSI_type_node; break; ++ case SFmode: eltype = neon_float_type_node; break; ++ case DImode: eltype = neon_intDI_type_node; break; ++ case TImode: eltype = intTI_type_node; break; ++ case EImode: eltype = intEI_type_node; break; ++ case OImode: eltype = intOI_type_node; break; ++ case CImode: eltype = intCI_type_node; break; ++ case XImode: eltype = intXI_type_node; break; ++ /* 64-bit vectors. */ ++ case V8QImode: eltype = V8QI_type_node; break; ++ case V4HImode: eltype = V4HI_type_node; break; ++ case V2SImode: eltype = V2SI_type_node; break; ++ case V2SFmode: eltype = V2SF_type_node; break; ++ /* 128-bit vectors. */ ++ case V16QImode: eltype = V16QI_type_node; break; ++ case V8HImode: eltype = V8HI_type_node; break; ++ case V4SImode: eltype = V4SI_type_node; break; ++ case V4SFmode: eltype = V4SF_type_node; break; ++ case V2DImode: eltype = V2DI_type_node; break; ++ default: gcc_unreachable (); ++ } ++ } ++ ++ if (k == 0 && !is_store) ++ return_type = eltype; ++ else ++ args = tree_cons (NULL_TREE, eltype, args); ++ } ++ ++ ftype = build_function_type (return_type, args); ++ } ++ break; ++ ++ case NEON_RESULTPAIR: ++ { ++ switch (insn_data[d->code].operand[1].mode) ++ { ++ case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; ++ case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; ++ case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; ++ case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; ++ case DImode: ftype = void_ftype_pdi_di_di; break; ++ case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; ++ case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; ++ case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; ++ case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; ++ case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; ++ default: gcc_unreachable (); ++ } ++ } ++ break; ++ ++ case NEON_REINTERP: ++ { ++ /* We iterate over 5 doubleword types, then 5 quadword ++ types. */ ++ int rhs = d->mode % 5; ++ switch (insn_data[d->code].operand[0].mode) ++ { ++ case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; ++ case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; ++ case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; ++ case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; ++ case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; ++ case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; ++ case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; ++ case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; ++ case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; ++ case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; ++ default: gcc_unreachable (); ++ } ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (ftype != NULL); ++ ++ sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]); ++ ++ decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, ++ NULL_TREE); ++ arm_builtin_decls[fcode] = decl; ++ } ++} ++ ++#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ ++ do \ ++ { \ ++ if ((MASK) & insn_flags) \ ++ { \ ++ tree bdecl; \ ++ bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \ ++ BUILT_IN_MD, NULL, NULL_TREE); \ ++ arm_builtin_decls[CODE] = bdecl; \ ++ } \ ++ } \ ++ while (0) ++ ++struct builtin_description ++{ ++ const unsigned int mask; ++ const enum insn_code icode; ++ const char * const name; ++ const enum arm_builtins code; ++ const enum rtx_code comparison; ++ const unsigned int flag; ++}; ++ ++static const struct builtin_description bdesc_2arg[] = ++{ ++#define IWMMXT_BUILTIN(code, string, builtin) \ ++ { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ ++ ARM_BUILTIN_##builtin, UNKNOWN, 0 }, ++ ++ IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) ++ IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) ++ IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) ++ IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) ++ IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) ++ IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) ++ IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) ++ IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) ++ IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) ++ IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) ++ IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) ++ IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) ++ IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) ++ IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) ++ IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) ++ IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) ++ IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) ++ IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) ++ IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) ++ IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) ++ IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) ++ IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) ++ IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) ++ IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) ++ IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) ++ IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) ++ IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) ++ IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) ++ IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) ++ IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) ++ IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) ++ IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) ++ IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) ++ IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) ++ IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) ++ IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) ++ IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) ++ IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) ++ IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) ++ IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) ++ IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) ++ IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) ++ IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) ++ IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) ++ IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) ++ IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) ++ IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) ++ IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) ++ IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) ++ IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) ++ IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) ++ IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) ++ IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS) ++ IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU) ++ ++#define IWMMXT_BUILTIN2(code, builtin) \ ++ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, ++ ++ IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) ++ IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) ++ IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) ++ IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) ++ IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) ++ IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) ++ IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) ++ IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI) ++ IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) ++ IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI) ++ IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) ++ IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) ++ IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) ++ IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI) ++ IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) ++ IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI) ++ IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) ++ IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) ++ IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) ++ IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI) ++ IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) ++ IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI) ++ IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) ++ IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) ++ IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) ++ IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) ++ IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) ++ IWMMXT_BUILTIN2 (rorv2si3, WRORWI) ++ IWMMXT_BUILTIN2 (rordi3_di, WRORD) ++ IWMMXT_BUILTIN2 (rordi3, WRORDI) ++ IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) ++ IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) ++}; ++ ++static const struct builtin_description bdesc_1arg[] = ++{ ++ IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) ++ IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) ++ IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) ++ IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) ++ IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) ++ IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) ++ IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) ++ IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) ++ IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) ++ IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) ++}; ++ ++/* Set up all the iWMMXt builtins. This is not called if ++ TARGET_IWMMXT is zero. */ ++ ++static void ++arm_init_iwmmxt_builtins (void) ++{ ++ const struct builtin_description * d; ++ size_t i; ++ tree endlink = void_list_node; ++ ++ tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); ++ tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); ++ tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); ++ ++ tree int_ftype_int ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, integer_type_node, endlink)); ++ tree v8qi_ftype_v8qi_v8qi_int ++ = build_function_type (V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, ++ integer_type_node, ++ endlink)))); ++ tree v4hi_ftype_v4hi_int ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree v2si_ftype_v2si_int ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree v2si_ftype_di_di ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, long_long_integer_type_node, ++ tree_cons (NULL_TREE, ++ long_long_integer_type_node, ++ endlink))); ++ tree di_ftype_di_int ++ = build_function_type (long_long_integer_type_node, ++ tree_cons (NULL_TREE, long_long_integer_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree di_ftype_di_int_int ++ = build_function_type (long_long_integer_type_node, ++ tree_cons (NULL_TREE, long_long_integer_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ tree_cons (NULL_TREE, ++ integer_type_node, ++ endlink)))); ++ tree int_ftype_v8qi ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ endlink)); ++ tree int_ftype_v4hi ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink)); ++ tree int_ftype_v2si ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ endlink)); ++ tree int_ftype_v8qi_int ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree int_ftype_v4hi_int ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree int_ftype_v2si_int ++ = build_function_type (integer_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree v8qi_ftype_v8qi_int_int ++ = build_function_type (V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ tree_cons (NULL_TREE, ++ integer_type_node, ++ endlink)))); ++ tree v4hi_ftype_v4hi_int_int ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ tree_cons (NULL_TREE, ++ integer_type_node, ++ endlink)))); ++ tree v2si_ftype_v2si_int_int ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ tree_cons (NULL_TREE, ++ integer_type_node, ++ endlink)))); ++ /* Miscellaneous. */ ++ tree v8qi_ftype_v4hi_v4hi ++ = build_function_type (V8QI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink))); ++ tree v4hi_ftype_v2si_v2si ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ endlink))); ++ tree v2si_ftype_v4hi_v4hi ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink))); ++ tree v2si_ftype_v8qi_v8qi ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ endlink))); ++ tree v4hi_ftype_v4hi_di ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, ++ long_long_integer_type_node, ++ endlink))); ++ tree v2si_ftype_v2si_di ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, ++ long_long_integer_type_node, ++ endlink))); ++ tree void_ftype_int_int ++ = build_function_type (void_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ tree_cons (NULL_TREE, integer_type_node, ++ endlink))); ++ tree di_ftype_void ++ = build_function_type (long_long_unsigned_type_node, endlink); ++ tree di_ftype_v8qi ++ = build_function_type (long_long_integer_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ endlink)); ++ tree di_ftype_v4hi ++ = build_function_type (long_long_integer_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink)); ++ tree di_ftype_v2si ++ = build_function_type (long_long_integer_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ endlink)); ++ tree v2si_ftype_v4hi ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink)); ++ tree v4hi_ftype_v8qi ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ endlink)); ++ ++ tree di_ftype_di_v4hi_v4hi ++ = build_function_type (long_long_unsigned_type_node, ++ tree_cons (NULL_TREE, ++ long_long_unsigned_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, ++ V4HI_type_node, ++ endlink)))); ++ ++ tree di_ftype_v4hi_v4hi ++ = build_function_type (long_long_unsigned_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink))); ++ ++ /* Normal vector binops. */ ++ tree v8qi_ftype_v8qi_v8qi ++ = build_function_type (V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ tree_cons (NULL_TREE, V8QI_type_node, ++ endlink))); ++ tree v4hi_ftype_v4hi_v4hi ++ = build_function_type (V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ tree_cons (NULL_TREE, V4HI_type_node, ++ endlink))); ++ tree v2si_ftype_v2si_v2si ++ = build_function_type (V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ tree_cons (NULL_TREE, V2SI_type_node, ++ endlink))); ++ tree di_ftype_di_di ++ = build_function_type (long_long_unsigned_type_node, ++ tree_cons (NULL_TREE, long_long_unsigned_type_node, ++ tree_cons (NULL_TREE, ++ long_long_unsigned_type_node, ++ endlink))); ++ ++ /* Add all builtins that are more or less simple operations on two ++ operands. */ ++ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) ++ { ++ /* Use one of the operands; the target can have a different mode for ++ mask-generating compares. */ ++ enum machine_mode mode; ++ tree type; ++ ++ if (d->name == 0) ++ continue; ++ ++ mode = insn_data[d->icode].operand[1].mode; ++ ++ switch (mode) ++ { ++ case V8QImode: ++ type = v8qi_ftype_v8qi_v8qi; ++ break; ++ case V4HImode: ++ type = v4hi_ftype_v4hi_v4hi; ++ break; ++ case V2SImode: ++ type = v2si_ftype_v2si_v2si; ++ break; ++ case DImode: ++ type = di_ftype_di_di; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ def_mbuiltin (d->mask, d->name, type, d->code); ++ } ++ ++ /* Add the remaining MMX insns with somewhat more complicated types. */ ++#define iwmmx_mbuiltin(NAME, TYPE, CODE) \ ++ def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \ ++ ARM_BUILTIN_ ## CODE) ++ ++ iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO); ++ iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX); ++ iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX); ++ ++ iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH); ++ iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW); ++ iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD); ++ iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI); ++ iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI); ++ iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI); ++ ++ iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH); ++ iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW); ++ iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD); ++ iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI); ++ iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI); ++ iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI); ++ ++ iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH); ++ iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW); ++ iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD); ++ iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI); ++ iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI); ++ iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI); ++ ++ iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH); ++ iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW); ++ iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD); ++ iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI); ++ iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI); ++ iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI); ++ ++ iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH); ++ ++ iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB); ++ iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH); ++ iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ); ++ iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ); ++ ++ iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB); ++ iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH); ++ iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW); ++ iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB); ++ iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH); ++ iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW); ++ iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB); ++ iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH); ++ iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW); ++ ++ iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB); ++ iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH); ++ iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW); ++ ++ iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB); ++ iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH); ++ iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW); ++ ++ iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS); ++ iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS); ++ iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS); ++ iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS); ++ iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS); ++ iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS); ++ ++ iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB); ++ iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH); ++ iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW); ++ iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB); ++ iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH); ++ iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW); ++ iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB); ++ iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH); ++ iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW); ++ iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB); ++ iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH); ++ iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW); ++ ++ iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS); ++ iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ); ++ iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU); ++ iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ); ++ ++ iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN); ++ iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA); ++ iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH); ++ iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB); ++ iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT); ++ iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB); ++ iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT); ++ ++#undef iwmmx_mbuiltin ++} ++ ++static void ++arm_init_tls_builtins (void) ++{ ++ tree ftype, decl; ++ ++ ftype = build_function_type (ptr_type_node, void_list_node); ++ decl = add_builtin_function ("__builtin_thread_pointer", ftype, ++ ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD, ++ NULL, NULL_TREE); ++ TREE_NOTHROW (decl) = 1; ++ TREE_READONLY (decl) = 1; ++ arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl; + } + + static void +@@ -19479,6 +19662,17 @@ + arm_init_fp16_builtins (); + } + ++/* Return the ARM builtin for CODE. */ ++ ++static tree ++arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) ++{ ++ if (code >= ARM_BUILTIN_MAX) ++ return error_mark_node; ++ ++ return arm_builtin_decls[code]; ++} ++ + /* Implement TARGET_INVALID_PARAMETER_TYPE. */ + + static const char * +@@ -19630,58 +19824,6 @@ + return target; + } + +-static int +-neon_builtin_compare (const void *a, const void *b) +-{ +- const neon_builtin_datum *const key = (const neon_builtin_datum *) a; +- const neon_builtin_datum *const memb = (const neon_builtin_datum *) b; +- unsigned int soughtcode = key->base_fcode; +- +- if (soughtcode >= memb->base_fcode +- && soughtcode < memb->base_fcode + memb->num_vars) +- return 0; +- else if (soughtcode < memb->base_fcode) +- return -1; +- else +- return 1; +-} +- +-static enum insn_code +-locate_neon_builtin_icode (int fcode, neon_itype *itype, +- enum neon_builtin_type_bits *type_bit) +-{ +- neon_builtin_datum key +- = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 }; +- neon_builtin_datum *found; +- int idx, type, ntypes; +- +- key.base_fcode = fcode; +- found = (neon_builtin_datum *) +- bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), +- sizeof (neon_builtin_data[0]), neon_builtin_compare); +- gcc_assert (found); +- idx = fcode - (int) found->base_fcode; +- gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars); +- +- if (itype) +- *itype = found->itype; +- +- if (type_bit) +- { +- ntypes = 0; +- for (type = 0; type < T_MAX; type++) +- if (found->bits & (1 << type)) +- { +- if (ntypes == idx) +- break; +- ntypes++; +- } +- gcc_assert (type < T_MAX); +- *type_bit = (enum neon_builtin_type_bits) (1 << type); +- } +- return found->codes[idx]; +-} +- + typedef enum { + NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, +@@ -19695,14 +19837,14 @@ + and return an expression for the accessed memory. + + The intrinsic function operates on a block of registers that has +- mode REG_MODE. This block contains vectors of type TYPE_BIT. ++ mode REG_MODE. This block contains vectors of type TYPE_MODE. + The function references the memory at EXP in mode MEM_MODE; + this mode may be BLKmode if no more suitable mode is available. */ + + static tree + neon_dereference_pointer (tree exp, enum machine_mode mem_mode, + enum machine_mode reg_mode, +- enum neon_builtin_type_bits type_bit) ++ neon_builtin_type_mode type_mode) + { + HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; + tree elem_type, upper_bound, array_type; +@@ -19711,8 +19853,8 @@ + reg_size = GET_MODE_SIZE (reg_mode); + + /* Work out the size of each vector in bytes. */ +- gcc_assert (type_bit & (T_DREG | T_QREG)); +- vector_size = (type_bit & T_QREG ? 16 : 8); ++ gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG)); ++ vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8); + + /* Work out how many vectors there are. */ + gcc_assert (reg_size % vector_size == 0); +@@ -19743,7 +19885,7 @@ + /* Expand a Neon builtin. */ + static rtx + arm_expand_neon_args (rtx target, int icode, int have_retval, +- enum neon_builtin_type_bits type_bit, ++ neon_builtin_type_mode type_mode, + tree exp, ...) + { + va_list ap; +@@ -19779,7 +19921,7 @@ + { + other_mode = insn_data[icode].operand[1 - opno].mode; + arg[argc] = neon_dereference_pointer (arg[argc], mode[argc], +- other_mode, type_bit); ++ other_mode, type_mode); + } + op[argc] = expand_normal (arg[argc]); + +@@ -19889,16 +20031,17 @@ + static rtx + arm_expand_neon_builtin (int fcode, tree exp, rtx target) + { +- neon_itype itype; +- enum neon_builtin_type_bits type_bit; +- enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit); ++ neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE]; ++ neon_itype itype = d->itype; ++ enum insn_code icode = d->code; ++ neon_builtin_type_mode type_mode = d->mode; + + switch (itype) + { + case NEON_UNOP: + case NEON_CONVERT: + case NEON_DUPLANE: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_BINOP: +@@ -19908,89 +20051,89 @@ + case NEON_SCALARMULH: + case NEON_SHIFTINSERT: + case NEON_LOGICBINOP: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_TERNOP: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_GETLANE: + case NEON_FIXCONV: + case NEON_SHIFTIMM: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_CREATE: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_DUP: + case NEON_SPLIT: + case NEON_REINTERP: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_COMBINE: + case NEON_VTBL: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_RESULTPAIR: +- return arm_expand_neon_args (target, icode, 0, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 0, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_LANEMAC: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SHIFTACC: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SCALARMAC: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SELECT: + case NEON_VTBX: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LOAD1: + case NEON_LOADSTRUCT: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_MEMORY, NEON_ARG_STOP); + + case NEON_LOAD1LANE: + case NEON_LOADSTRUCTLANE: +- return arm_expand_neon_args (target, icode, 1, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 1, type_mode, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_STORE1: + case NEON_STORESTRUCT: +- return arm_expand_neon_args (target, icode, 0, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 0, type_mode, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_STORE1LANE: + case NEON_STORESTRUCTLANE: +- return arm_expand_neon_args (target, icode, 0, type_bit, exp, ++ return arm_expand_neon_args (target, icode, 0, type_mode, exp, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + } + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-08-13 08:32:32 +0000 ++++ new/gcc/config/arm/arm.h 2011-08-24 17:35:16 +0000 +@@ -2269,178 +2269,6 @@ + : arm_gen_return_addr_mask ()) + + +-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have +- symbolic names defined here (which would require too much duplication). +- FIXME? */ +-enum arm_builtins +-{ +- ARM_BUILTIN_GETWCX, +- ARM_BUILTIN_SETWCX, +- +- ARM_BUILTIN_WZERO, +- +- ARM_BUILTIN_WAVG2BR, +- ARM_BUILTIN_WAVG2HR, +- ARM_BUILTIN_WAVG2B, +- ARM_BUILTIN_WAVG2H, +- +- ARM_BUILTIN_WACCB, +- ARM_BUILTIN_WACCH, +- ARM_BUILTIN_WACCW, +- +- ARM_BUILTIN_WMACS, +- ARM_BUILTIN_WMACSZ, +- ARM_BUILTIN_WMACU, +- ARM_BUILTIN_WMACUZ, +- +- ARM_BUILTIN_WSADB, +- ARM_BUILTIN_WSADBZ, +- ARM_BUILTIN_WSADH, +- ARM_BUILTIN_WSADHZ, +- +- ARM_BUILTIN_WALIGN, +- +- ARM_BUILTIN_TMIA, +- ARM_BUILTIN_TMIAPH, +- ARM_BUILTIN_TMIABB, +- ARM_BUILTIN_TMIABT, +- ARM_BUILTIN_TMIATB, +- ARM_BUILTIN_TMIATT, +- +- ARM_BUILTIN_TMOVMSKB, +- ARM_BUILTIN_TMOVMSKH, +- ARM_BUILTIN_TMOVMSKW, +- +- ARM_BUILTIN_TBCSTB, +- ARM_BUILTIN_TBCSTH, +- ARM_BUILTIN_TBCSTW, +- +- ARM_BUILTIN_WMADDS, +- ARM_BUILTIN_WMADDU, +- +- ARM_BUILTIN_WPACKHSS, +- ARM_BUILTIN_WPACKWSS, +- ARM_BUILTIN_WPACKDSS, +- ARM_BUILTIN_WPACKHUS, +- ARM_BUILTIN_WPACKWUS, +- ARM_BUILTIN_WPACKDUS, +- +- ARM_BUILTIN_WADDB, +- ARM_BUILTIN_WADDH, +- ARM_BUILTIN_WADDW, +- ARM_BUILTIN_WADDSSB, +- ARM_BUILTIN_WADDSSH, +- ARM_BUILTIN_WADDSSW, +- ARM_BUILTIN_WADDUSB, +- ARM_BUILTIN_WADDUSH, +- ARM_BUILTIN_WADDUSW, +- ARM_BUILTIN_WSUBB, +- ARM_BUILTIN_WSUBH, +- ARM_BUILTIN_WSUBW, +- ARM_BUILTIN_WSUBSSB, +- ARM_BUILTIN_WSUBSSH, +- ARM_BUILTIN_WSUBSSW, +- ARM_BUILTIN_WSUBUSB, +- ARM_BUILTIN_WSUBUSH, +- ARM_BUILTIN_WSUBUSW, +- +- ARM_BUILTIN_WAND, +- ARM_BUILTIN_WANDN, +- ARM_BUILTIN_WOR, +- ARM_BUILTIN_WXOR, +- +- ARM_BUILTIN_WCMPEQB, +- ARM_BUILTIN_WCMPEQH, +- ARM_BUILTIN_WCMPEQW, +- ARM_BUILTIN_WCMPGTUB, +- ARM_BUILTIN_WCMPGTUH, +- ARM_BUILTIN_WCMPGTUW, +- ARM_BUILTIN_WCMPGTSB, +- ARM_BUILTIN_WCMPGTSH, +- ARM_BUILTIN_WCMPGTSW, +- +- ARM_BUILTIN_TEXTRMSB, +- ARM_BUILTIN_TEXTRMSH, +- ARM_BUILTIN_TEXTRMSW, +- ARM_BUILTIN_TEXTRMUB, +- ARM_BUILTIN_TEXTRMUH, +- ARM_BUILTIN_TEXTRMUW, +- ARM_BUILTIN_TINSRB, +- ARM_BUILTIN_TINSRH, +- ARM_BUILTIN_TINSRW, +- +- ARM_BUILTIN_WMAXSW, +- ARM_BUILTIN_WMAXSH, +- ARM_BUILTIN_WMAXSB, +- ARM_BUILTIN_WMAXUW, +- ARM_BUILTIN_WMAXUH, +- ARM_BUILTIN_WMAXUB, +- ARM_BUILTIN_WMINSW, +- ARM_BUILTIN_WMINSH, +- ARM_BUILTIN_WMINSB, +- ARM_BUILTIN_WMINUW, +- ARM_BUILTIN_WMINUH, +- ARM_BUILTIN_WMINUB, +- +- ARM_BUILTIN_WMULUM, +- ARM_BUILTIN_WMULSM, +- ARM_BUILTIN_WMULUL, +- +- ARM_BUILTIN_PSADBH, +- ARM_BUILTIN_WSHUFH, +- +- ARM_BUILTIN_WSLLH, +- ARM_BUILTIN_WSLLW, +- ARM_BUILTIN_WSLLD, +- ARM_BUILTIN_WSRAH, +- ARM_BUILTIN_WSRAW, +- ARM_BUILTIN_WSRAD, +- ARM_BUILTIN_WSRLH, +- ARM_BUILTIN_WSRLW, +- ARM_BUILTIN_WSRLD, +- ARM_BUILTIN_WRORH, +- ARM_BUILTIN_WRORW, +- ARM_BUILTIN_WRORD, +- ARM_BUILTIN_WSLLHI, +- ARM_BUILTIN_WSLLWI, +- ARM_BUILTIN_WSLLDI, +- ARM_BUILTIN_WSRAHI, +- ARM_BUILTIN_WSRAWI, +- ARM_BUILTIN_WSRADI, +- ARM_BUILTIN_WSRLHI, +- ARM_BUILTIN_WSRLWI, +- ARM_BUILTIN_WSRLDI, +- ARM_BUILTIN_WRORHI, +- ARM_BUILTIN_WRORWI, +- ARM_BUILTIN_WRORDI, +- +- ARM_BUILTIN_WUNPCKIHB, +- ARM_BUILTIN_WUNPCKIHH, +- ARM_BUILTIN_WUNPCKIHW, +- ARM_BUILTIN_WUNPCKILB, +- ARM_BUILTIN_WUNPCKILH, +- ARM_BUILTIN_WUNPCKILW, +- +- ARM_BUILTIN_WUNPCKEHSB, +- ARM_BUILTIN_WUNPCKEHSH, +- ARM_BUILTIN_WUNPCKEHSW, +- ARM_BUILTIN_WUNPCKEHUB, +- ARM_BUILTIN_WUNPCKEHUH, +- ARM_BUILTIN_WUNPCKEHUW, +- ARM_BUILTIN_WUNPCKELSB, +- ARM_BUILTIN_WUNPCKELSH, +- ARM_BUILTIN_WUNPCKELSW, +- ARM_BUILTIN_WUNPCKELUB, +- ARM_BUILTIN_WUNPCKELUH, +- ARM_BUILTIN_WUNPCKELUW, +- +- ARM_BUILTIN_THREAD_POINTER, +- +- ARM_BUILTIN_NEON_BASE, +- +- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE /* FIXME: Wrong! */ +-}; +- + /* Do not emit .note.GNU-stack by default. */ + #ifndef NEED_INDICATE_EXEC_STACK + #define NEED_INDICATE_EXEC_STACK 0 + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch new file mode 100644 index 0000000000..1a940975f3 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch @@ -0,0 +1,1255 @@ +2011-08-25 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (is_widening_mult_rhs_p): Handle constants + beyond conversions. + (convert_mult_to_widen): Convert constant inputs to the right type. + (convert_plusminus_to_widen): Don't automatically reject inputs that + are not an SSA_NAME. + Convert constant inputs to the right type. + + gcc/testsuite/ + * gcc.target/arm/wmul-11.c: New file. + * gcc.target/arm/wmul-12.c: New file. + * gcc.target/arm/wmul-13.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (convert_plusminus_to_widen): Convert add_rhs + to the correct type. + + gcc/testsuite/ + * gcc.target/arm/wmul-10.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (convert_mult_to_widen): Better handle + unsigned inputs of different modes. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.target/arm/wmul-9.c: New file. + * gcc.target/arm/wmul-bitfield-2.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (is_widening_mult_rhs_p): Add new argument + 'type'. + Use 'type' from caller, not inferred from 'rhs'. + Don't reject non-conversion statements. Do return lhs in this case. + (is_widening_mult_p): Add new argument 'type'. + Use 'type' from caller, not inferred from 'stmt'. + Pass type to is_widening_mult_rhs_p. + (convert_mult_to_widen): Pass type to is_widening_mult_p. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.target/arm/wmul-8.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (is_widening_mult_p): Remove FIXME. + Ensure the the larger type is the first operand. + + gcc/testsuite/ + * gcc.target/arm/wmul-7.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (convert_mult_to_widen): Convert + unsupported unsigned multiplies to signed. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.target/arm/wmul-6.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * tree-ssa-math-opts.c (convert_plusminus_to_widen): Permit a single + conversion statement separating multiply-and-accumulate. + + gcc/testsuite/ + * gcc.target/arm/wmul-5.c: New file. + * gcc.target/arm/no-wmla-1.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.md (maddhidi4): Remove '*' from name. + * expr.c (expand_expr_real_2): Use find_widening_optab_handler. + * optabs.c (find_widening_optab_handler_and_mode): New function. + (expand_widen_pattern_expr): Use find_widening_optab_handler. + (expand_binop_directly): Likewise. + (expand_binop): Likewise. + * optabs.h (find_widening_optab_handler): New macro define. + (find_widening_optab_handler_and_mode): New prototype. + * tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR + type precision rules. + (verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR. + * tree-ssa-math-opts.c (build_and_insert_cast): New function. + (is_widening_mult_rhs_p): Allow widening by more than one mode. + Explicitly disallow mis-matched input types. + (convert_mult_to_widen): Use find_widening_optab_handler, and cast + input types to fit the new handler. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.target/arm/wmul-bitfield-1.c: New file. + + 2011-08-19 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * expr.c (expand_expr_real_2): Use widening_optab_handler. + * genopinit.c (optabs): Use set_widening_optab_handler for $N. + (gen_insn): $N now means $a must be wider than $b, not consecutive. + * optabs.c (widened_mode): New function. + (expand_widen_pattern_expr): Use widening_optab_handler. + (expand_binop_directly): Likewise. + (expand_binop): Likewise. + * optabs.h (widening_optab_handlers): New struct. + (optab_d): New member, 'widening'. + (widening_optab_handler): New function. + (set_widening_optab_handler): New function. + * tree-ssa-math-opts.c (convert_mult_to_widen): Use + widening_optab_handler. + (convert_plusminus_to_widen): Likewise. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-08-13 08:32:32 +0000 ++++ new/gcc/config/arm/arm.md 2011-08-25 11:42:09 +0000 +@@ -1839,7 +1839,7 @@ + (set_attr "predicable" "yes")] + ) + +-(define_insn "*maddhidi4" ++(define_insn "maddhidi4" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI (sign_extend:DI + +=== modified file 'gcc/expr.c' +--- old/gcc/expr.c 2011-08-13 08:32:32 +0000 ++++ new/gcc/expr.c 2011-08-25 11:42:09 +0000 +@@ -7688,18 +7688,16 @@ + { + enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0)); + this_optab = usmul_widen_optab; +- if (mode == GET_MODE_2XWIDER_MODE (innermode)) ++ if (find_widening_optab_handler (this_optab, mode, innermode, 0) ++ != CODE_FOR_nothing) + { +- if (optab_handler (this_optab, mode) != CODE_FOR_nothing) +- { +- if (TYPE_UNSIGNED (TREE_TYPE (treeop0))) +- expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, +- EXPAND_NORMAL); +- else +- expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0, +- EXPAND_NORMAL); +- goto binop3; +- } ++ if (TYPE_UNSIGNED (TREE_TYPE (treeop0))) ++ expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, ++ EXPAND_NORMAL); ++ else ++ expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0, ++ EXPAND_NORMAL); ++ goto binop3; + } + } + /* Check for a multiplication with matching signedness. */ +@@ -7714,10 +7712,10 @@ + optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab; + this_optab = zextend_p ? umul_widen_optab : smul_widen_optab; + +- if (mode == GET_MODE_2XWIDER_MODE (innermode) +- && TREE_CODE (treeop0) != INTEGER_CST) ++ if (TREE_CODE (treeop0) != INTEGER_CST) + { +- if (optab_handler (this_optab, mode) != CODE_FOR_nothing) ++ if (find_widening_optab_handler (this_optab, mode, innermode, 0) ++ != CODE_FOR_nothing) + { + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, + EXPAND_NORMAL); +@@ -7725,7 +7723,8 @@ + unsignedp, this_optab); + return REDUCE_BIT_FIELD (temp); + } +- if (optab_handler (other_optab, mode) != CODE_FOR_nothing ++ if (find_widening_optab_handler (other_optab, mode, innermode, 0) ++ != CODE_FOR_nothing + && innermode == word_mode) + { + rtx htem, hipart; + +=== modified file 'gcc/genopinit.c' +--- old/gcc/genopinit.c 2011-05-05 15:43:06 +0000 ++++ new/gcc/genopinit.c 2011-07-15 13:06:31 +0000 +@@ -46,10 +46,12 @@ + used. $A and $B are replaced with the full name of the mode; $a and $b + are replaced with the short form of the name, as above. + +- If $N is present in the pattern, it means the two modes must be consecutive +- widths in the same mode class (e.g, QImode and HImode). $I means that +- only full integer modes should be considered for the next mode, and $F +- means that only float modes should be considered. ++ If $N is present in the pattern, it means the two modes must be in ++ the same mode class, and $b must be greater than $a (e.g, QImode ++ and HImode). ++ ++ $I means that only full integer modes should be considered for the ++ next mode, and $F means that only float modes should be considered. + $P means that both full and partial integer modes should be considered. + $Q means that only fixed-point modes should be considered. + +@@ -99,17 +101,17 @@ + "set_optab_handler (smulv_optab, $A, CODE_FOR_$(mulv$I$a3$))", + "set_optab_handler (umul_highpart_optab, $A, CODE_FOR_$(umul$a3_highpart$))", + "set_optab_handler (smul_highpart_optab, $A, CODE_FOR_$(smul$a3_highpart$))", +- "set_optab_handler (smul_widen_optab, $B, CODE_FOR_$(mul$a$b3$)$N)", +- "set_optab_handler (umul_widen_optab, $B, CODE_FOR_$(umul$a$b3$)$N)", +- "set_optab_handler (usmul_widen_optab, $B, CODE_FOR_$(usmul$a$b3$)$N)", +- "set_optab_handler (smadd_widen_optab, $B, CODE_FOR_$(madd$a$b4$)$N)", +- "set_optab_handler (umadd_widen_optab, $B, CODE_FOR_$(umadd$a$b4$)$N)", +- "set_optab_handler (ssmadd_widen_optab, $B, CODE_FOR_$(ssmadd$a$b4$)$N)", +- "set_optab_handler (usmadd_widen_optab, $B, CODE_FOR_$(usmadd$a$b4$)$N)", +- "set_optab_handler (smsub_widen_optab, $B, CODE_FOR_$(msub$a$b4$)$N)", +- "set_optab_handler (umsub_widen_optab, $B, CODE_FOR_$(umsub$a$b4$)$N)", +- "set_optab_handler (ssmsub_widen_optab, $B, CODE_FOR_$(ssmsub$a$b4$)$N)", +- "set_optab_handler (usmsub_widen_optab, $B, CODE_FOR_$(usmsub$a$b4$)$N)", ++ "set_widening_optab_handler (smul_widen_optab, $B, $A, CODE_FOR_$(mul$a$b3$)$N)", ++ "set_widening_optab_handler (umul_widen_optab, $B, $A, CODE_FOR_$(umul$a$b3$)$N)", ++ "set_widening_optab_handler (usmul_widen_optab, $B, $A, CODE_FOR_$(usmul$a$b3$)$N)", ++ "set_widening_optab_handler (smadd_widen_optab, $B, $A, CODE_FOR_$(madd$a$b4$)$N)", ++ "set_widening_optab_handler (umadd_widen_optab, $B, $A, CODE_FOR_$(umadd$a$b4$)$N)", ++ "set_widening_optab_handler (ssmadd_widen_optab, $B, $A, CODE_FOR_$(ssmadd$a$b4$)$N)", ++ "set_widening_optab_handler (usmadd_widen_optab, $B, $A, CODE_FOR_$(usmadd$a$b4$)$N)", ++ "set_widening_optab_handler (smsub_widen_optab, $B, $A, CODE_FOR_$(msub$a$b4$)$N)", ++ "set_widening_optab_handler (umsub_widen_optab, $B, $A, CODE_FOR_$(umsub$a$b4$)$N)", ++ "set_widening_optab_handler (ssmsub_widen_optab, $B, $A, CODE_FOR_$(ssmsub$a$b4$)$N)", ++ "set_widening_optab_handler (usmsub_widen_optab, $B, $A, CODE_FOR_$(usmsub$a$b4$)$N)", + "set_optab_handler (sdiv_optab, $A, CODE_FOR_$(div$a3$))", + "set_optab_handler (ssdiv_optab, $A, CODE_FOR_$(ssdiv$Q$a3$))", + "set_optab_handler (sdivv_optab, $A, CODE_FOR_$(div$V$I$a3$))", +@@ -304,7 +306,7 @@ + { + int force_float = 0, force_int = 0, force_partial_int = 0; + int force_fixed = 0; +- int force_consec = 0; ++ int force_wider = 0; + int matches = 1; + + for (pp = optabs[pindex]; pp[0] != '$' || pp[1] != '('; pp++) +@@ -322,7 +324,7 @@ + switch (*++pp) + { + case 'N': +- force_consec = 1; ++ force_wider = 1; + break; + case 'I': + force_int = 1; +@@ -391,7 +393,10 @@ + || mode_class[i] == MODE_VECTOR_FRACT + || mode_class[i] == MODE_VECTOR_UFRACT + || mode_class[i] == MODE_VECTOR_ACCUM +- || mode_class[i] == MODE_VECTOR_UACCUM)) ++ || mode_class[i] == MODE_VECTOR_UACCUM) ++ && (! force_wider ++ || *pp == 'a' ++ || m1 < i)) + break; + } + +@@ -411,8 +416,7 @@ + } + + if (matches && pp[0] == '$' && pp[1] == ')' +- && *np == 0 +- && (! force_consec || (int) GET_MODE_WIDER_MODE(m1) == m2)) ++ && *np == 0) + break; + } + + +=== modified file 'gcc/optabs.c' +--- old/gcc/optabs.c 2011-07-04 14:03:49 +0000 ++++ new/gcc/optabs.c 2011-08-11 15:46:01 +0000 +@@ -225,6 +225,61 @@ + return 1; + } + ++/* Given two input operands, OP0 and OP1, determine what the correct from_mode ++ for a widening operation would be. In most cases this would be OP0, but if ++ that's a constant it'll be VOIDmode, which isn't useful. */ ++ ++static enum machine_mode ++widened_mode (enum machine_mode to_mode, rtx op0, rtx op1) ++{ ++ enum machine_mode m0 = GET_MODE (op0); ++ enum machine_mode m1 = GET_MODE (op1); ++ enum machine_mode result; ++ ++ if (m0 == VOIDmode && m1 == VOIDmode) ++ return to_mode; ++ else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1)) ++ result = m1; ++ else ++ result = m0; ++ ++ if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode)) ++ return to_mode; ++ ++ return result; ++} ++ ++/* Find a widening optab even if it doesn't widen as much as we want. ++ E.g. if from_mode is HImode, and to_mode is DImode, and there is no ++ direct HI->SI insn, then return SI->DI, if that exists. ++ If PERMIT_NON_WIDENING is non-zero then this can be used with ++ non-widening optabs also. */ ++ ++enum insn_code ++find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode, ++ int permit_non_widening, ++ enum machine_mode *found_mode) ++{ ++ for (; (permit_non_widening || from_mode != to_mode) ++ && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) ++ && from_mode != VOIDmode; ++ from_mode = GET_MODE_WIDER_MODE (from_mode)) ++ { ++ enum insn_code handler = widening_optab_handler (op, to_mode, ++ from_mode); ++ ++ if (handler != CODE_FOR_nothing) ++ { ++ if (found_mode) ++ *found_mode = from_mode; ++ return handler; ++ } ++ } ++ ++ return CODE_FOR_nothing; ++} ++ + /* Widen OP to MODE and return the rtx for the widened operand. UNSIGNEDP + says whether OP is signed or unsigned. NO_EXTEND is nonzero if we need + not actually do a sign-extend or zero-extend, but can leave the +@@ -517,8 +572,9 @@ + optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); + if (ops->code == WIDEN_MULT_PLUS_EXPR + || ops->code == WIDEN_MULT_MINUS_EXPR) +- icode = (int) optab_handler (widen_pattern_optab, +- TYPE_MODE (TREE_TYPE (ops->op2))); ++ icode = (int) find_widening_optab_handler (widen_pattern_optab, ++ TYPE_MODE (TREE_TYPE (ops->op2)), ++ tmode0, 0); + else + icode = (int) optab_handler (widen_pattern_optab, tmode0); + gcc_assert (icode != CODE_FOR_nothing); +@@ -1389,7 +1445,9 @@ + rtx target, int unsignedp, enum optab_methods methods, + rtx last) + { +- int icode = (int) optab_handler (binoptab, mode); ++ enum machine_mode from_mode = widened_mode (mode, op0, op1); ++ int icode = (int) find_widening_optab_handler (binoptab, mode, ++ from_mode, 1); + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode tmp_mode; +@@ -1546,7 +1604,9 @@ + /* If we can do it with a three-operand insn, do so. */ + + if (methods != OPTAB_MUST_WIDEN +- && optab_handler (binoptab, mode) != CODE_FOR_nothing) ++ && find_widening_optab_handler (binoptab, mode, ++ widened_mode (mode, op0, op1), 1) ++ != CODE_FOR_nothing) + { + temp = expand_binop_directly (mode, binoptab, op0, op1, target, + unsignedp, methods, last); +@@ -1586,8 +1646,9 @@ + + if (binoptab == smul_optab + && GET_MODE_WIDER_MODE (mode) != VOIDmode +- && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab), +- GET_MODE_WIDER_MODE (mode)) ++ && (widening_optab_handler ((unsignedp ? umul_widen_optab ++ : smul_widen_optab), ++ GET_MODE_WIDER_MODE (mode), mode) + != CODE_FOR_nothing)) + { + temp = expand_binop (GET_MODE_WIDER_MODE (mode), +@@ -1618,9 +1679,11 @@ + if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing + || (binoptab == smul_optab + && GET_MODE_WIDER_MODE (wider_mode) != VOIDmode +- && (optab_handler ((unsignedp ? umul_widen_optab +- : smul_widen_optab), +- GET_MODE_WIDER_MODE (wider_mode)) ++ && (find_widening_optab_handler ((unsignedp ++ ? umul_widen_optab ++ : smul_widen_optab), ++ GET_MODE_WIDER_MODE (wider_mode), ++ mode, 0) + != CODE_FOR_nothing))) + { + rtx xop0 = op0, xop1 = op1; +@@ -2043,8 +2106,8 @@ + && optab_handler (add_optab, word_mode) != CODE_FOR_nothing) + { + rtx product = NULL_RTX; +- +- if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing) ++ if (widening_optab_handler (umul_widen_optab, mode, word_mode) ++ != CODE_FOR_nothing) + { + product = expand_doubleword_mult (mode, op0, op1, target, + true, methods); +@@ -2053,7 +2116,8 @@ + } + + if (product == NULL_RTX +- && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing) ++ && widening_optab_handler (smul_widen_optab, mode, word_mode) ++ != CODE_FOR_nothing) + { + product = expand_doubleword_mult (mode, op0, op1, target, + false, methods); +@@ -2144,7 +2208,8 @@ + wider_mode != VOIDmode; + wider_mode = GET_MODE_WIDER_MODE (wider_mode)) + { +- if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing ++ if (find_widening_optab_handler (binoptab, wider_mode, mode, 1) ++ != CODE_FOR_nothing + || (methods == OPTAB_LIB + && optab_libfunc (binoptab, wider_mode))) + { + +=== modified file 'gcc/optabs.h' +--- old/gcc/optabs.h 2011-05-05 15:43:06 +0000 ++++ new/gcc/optabs.h 2011-07-27 14:12:45 +0000 +@@ -42,6 +42,11 @@ + int insn_code; + }; + ++struct widening_optab_handlers ++{ ++ struct optab_handlers handlers[NUM_MACHINE_MODES][NUM_MACHINE_MODES]; ++}; ++ + struct optab_d + { + enum rtx_code code; +@@ -50,6 +55,7 @@ + void (*libcall_gen)(struct optab_d *, const char *name, char suffix, + enum machine_mode); + struct optab_handlers handlers[NUM_MACHINE_MODES]; ++ struct widening_optab_handlers *widening; + }; + typedef struct optab_d * optab; + +@@ -799,6 +805,15 @@ + extern void emit_unop_insn (int, rtx, rtx, enum rtx_code); + extern bool maybe_emit_unop_insn (int, rtx, rtx, enum rtx_code); + ++/* Find a widening optab even if it doesn't widen as much as we want. */ ++#define find_widening_optab_handler(A,B,C,D) \ ++ find_widening_optab_handler_and_mode (A, B, C, D, NULL) ++extern enum insn_code find_widening_optab_handler_and_mode (optab, ++ enum machine_mode, ++ enum machine_mode, ++ int, ++ enum machine_mode *); ++ + /* An extra flag to control optab_for_tree_code's behavior. This is needed to + distinguish between machines with a vector shift that takes a scalar for the + shift amount vs. machines that take a vector for the shift amount. */ +@@ -874,6 +889,23 @@ + + (int) CODE_FOR_nothing); + } + ++/* Like optab_handler, but for widening_operations that have a TO_MODE and ++ a FROM_MODE. */ ++ ++static inline enum insn_code ++widening_optab_handler (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode) ++{ ++ if (to_mode == from_mode || from_mode == VOIDmode) ++ return optab_handler (op, to_mode); ++ ++ if (op->widening) ++ return (enum insn_code) (op->widening->handlers[(int) to_mode][(int) from_mode].insn_code ++ + (int) CODE_FOR_nothing); ++ ++ return CODE_FOR_nothing; ++} ++ + /* Record that insn CODE should be used to implement mode MODE of OP. */ + + static inline void +@@ -882,6 +914,26 @@ + op->handlers[(int) mode].insn_code = (int) code - (int) CODE_FOR_nothing; + } + ++/* Like set_optab_handler, but for widening operations that have a TO_MODE ++ and a FROM_MODE. */ ++ ++static inline void ++set_widening_optab_handler (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode, enum insn_code code) ++{ ++ if (to_mode == from_mode) ++ set_optab_handler (op, to_mode, code); ++ else ++ { ++ if (op->widening == NULL) ++ op->widening = (struct widening_optab_handlers *) ++ xcalloc (1, sizeof (struct widening_optab_handlers)); ++ ++ op->widening->handlers[(int) to_mode][(int) from_mode].insn_code ++ = (int) code - (int) CODE_FOR_nothing; ++ } ++} ++ + /* Return the insn used to perform conversion OP from mode FROM_MODE + to mode TO_MODE; return CODE_FOR_nothing if the target does not have + such an insn. */ + +=== added file 'gcc/testsuite/gcc.target/arm/no-wmla-1.c' +--- old/gcc/testsuite/gcc.target/arm/no-wmla-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/no-wmla-1.c 2011-07-15 13:52:38 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int a, short b, short c) ++{ ++ int bc = b * c; ++ return a + (short)bc; ++} ++ ++/* { dg-final { scan-assembler "mul" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-10.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-10.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-10.c 2011-07-18 12:56:20 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++unsigned long long ++foo (unsigned short a, unsigned short *b, unsigned short *c) ++{ ++ return (unsigned)a + (unsigned long long)*b * (unsigned long long)*c; ++} ++ ++/* { dg-final { scan-assembler "umlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-11.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-11.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-11.c 2011-07-22 15:46:42 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (int *b) ++{ ++ return 10 * (long long)*b; ++} ++ ++/* { dg-final { scan-assembler "smull" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-12.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (int *b, int *c) ++{ ++ int tmp = *b * *c; ++ return 10 + (long long)tmp; ++} ++ ++/* { dg-final { scan-assembler "smlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-13.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-13.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-13.c 2011-07-22 15:46:42 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (int *a, int *b) ++{ ++ return *a + (long long)*b * 10; ++} ++ ++/* { dg-final { scan-assembler "smlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-5.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-5.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-5.c 2011-07-15 13:52:38 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, char *b, char *c) ++{ ++ return a + *b * *c; ++} ++ ++/* { dg-final { scan-assembler "umlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-6.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-6.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-6.c 2011-07-15 13:59:11 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, unsigned char *b, signed char *c) ++{ ++ return a + (long long)*b * (long long)*c; ++} ++ ++/* { dg-final { scan-assembler "smlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-7.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-7.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-7.c 2011-07-15 14:11:23 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++unsigned long long ++foo (unsigned long long a, unsigned char *b, unsigned short *c) ++{ ++ return a + *b * *c; ++} ++ ++/* { dg-final { scan-assembler "umlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-8.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, int *b, int *c) ++{ ++ return a + *b * *c; ++} ++ ++/* { dg-final { scan-assembler "smlal" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-9.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-9.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-9.c 2011-07-15 14:22:39 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, short *b, char *c) ++{ ++ return a + *b * *c; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c 2011-07-15 13:44:50 +0000 +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++struct bf ++{ ++ int a : 3; ++ int b : 15; ++ int c : 3; ++}; ++ ++long long ++foo (long long a, struct bf b, struct bf c) ++{ ++ return a + b.b * c.b; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c 2011-07-15 14:22:39 +0000 +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++struct bf ++{ ++ int a : 3; ++ unsigned int b : 15; ++ int c : 3; ++}; ++ ++long long ++foo (long long a, struct bf b, struct bf c) ++{ ++ return a + b.b * c.c; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ + +=== modified file 'gcc/tree-cfg.c' +--- old/gcc/tree-cfg.c 2011-07-01 09:19:21 +0000 ++++ new/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000 +@@ -3574,7 +3574,7 @@ + case WIDEN_MULT_EXPR: + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return true; +- return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)) ++ return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)) + || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))); + + case WIDEN_SUM_EXPR: +@@ -3667,7 +3667,7 @@ + && !FIXED_POINT_TYPE_P (rhs1_type)) + || !useless_type_conversion_p (rhs1_type, rhs2_type) + || !useless_type_conversion_p (lhs_type, rhs3_type) +- || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type) ++ || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type) + || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)) + { + error ("type mismatch in widening multiply-accumulate expression"); + +=== modified file 'gcc/tree-ssa-math-opts.c' +--- old/gcc/tree-ssa-math-opts.c 2011-03-11 16:36:16 +0000 ++++ new/gcc/tree-ssa-math-opts.c 2011-08-09 10:26:48 +0000 +@@ -1266,39 +1266,67 @@ + } + }; + +-/* Return true if RHS is a suitable operand for a widening multiplication. ++/* Build a gimple assignment to cast VAL to TARGET. Insert the statement ++ prior to GSI's current position, and return the fresh SSA name. */ ++ ++static tree ++build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, ++ tree target, tree val) ++{ ++ tree result = make_ssa_name (target, NULL); ++ gimple stmt = gimple_build_assign_with_ops (CONVERT_EXPR, result, val, NULL); ++ gimple_set_location (stmt, loc); ++ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); ++ return result; ++} ++ ++/* Return true if RHS is a suitable operand for a widening multiplication, ++ assuming a target type of TYPE. + There are two cases: + +- - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT +- if so, and store its type in *TYPE_OUT. ++ - RHS makes some value at least twice as wide. Store that value ++ in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. + + - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, + but leave *TYPE_OUT untouched. */ + + static bool +-is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out) ++is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, ++ tree *new_rhs_out) + { + gimple stmt; +- tree type, type1, rhs1; ++ tree type1, rhs1; + enum tree_code rhs_code; + + if (TREE_CODE (rhs) == SSA_NAME) + { +- type = TREE_TYPE (rhs); + stmt = SSA_NAME_DEF_STMT (rhs); +- if (!is_gimple_assign (stmt)) +- return false; +- +- rhs_code = gimple_assign_rhs_code (stmt); +- if (TREE_CODE (type) == INTEGER_TYPE +- ? !CONVERT_EXPR_CODE_P (rhs_code) +- : rhs_code != FIXED_CONVERT_EXPR) +- return false; +- +- rhs1 = gimple_assign_rhs1 (stmt); ++ if (is_gimple_assign (stmt)) ++ { ++ rhs_code = gimple_assign_rhs_code (stmt); ++ if (TREE_CODE (type) == INTEGER_TYPE ++ ? !CONVERT_EXPR_CODE_P (rhs_code) ++ : rhs_code != FIXED_CONVERT_EXPR) ++ rhs1 = rhs; ++ else ++ { ++ rhs1 = gimple_assign_rhs1 (stmt); ++ ++ if (TREE_CODE (rhs1) == INTEGER_CST) ++ { ++ *new_rhs_out = rhs1; ++ *type_out = NULL; ++ return true; ++ } ++ } ++ } ++ else ++ rhs1 = rhs; ++ + type1 = TREE_TYPE (rhs1); ++ + if (TREE_CODE (type1) != TREE_CODE (type) +- || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) ++ || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) + return false; + + *new_rhs_out = rhs1; +@@ -1316,28 +1344,27 @@ + return false; + } + +-/* Return true if STMT performs a widening multiplication. If so, +- store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT +- respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting +- those operands to types *TYPE1_OUT and *TYPE2_OUT would give the +- operands of the multiplication. */ ++/* Return true if STMT performs a widening multiplication, assuming the ++ output type is TYPE. If so, store the unwidened types of the operands ++ in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and ++ *RHS2_OUT such that converting those operands to types *TYPE1_OUT ++ and *TYPE2_OUT would give the operands of the multiplication. */ + + static bool +-is_widening_mult_p (gimple stmt, ++is_widening_mult_p (tree type, gimple stmt, + tree *type1_out, tree *rhs1_out, + tree *type2_out, tree *rhs2_out) + { +- tree type; +- +- type = TREE_TYPE (gimple_assign_lhs (stmt)); + if (TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + return false; + +- if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out)) ++ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, ++ rhs1_out)) + return false; + +- if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out)) ++ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, ++ rhs2_out)) + return false; + + if (*type1_out == NULL) +@@ -1354,6 +1381,18 @@ + *type2_out = *type1_out; + } + ++ /* Ensure that the larger of the two operands comes first. */ ++ if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) ++ { ++ tree tmp; ++ tmp = *type1_out; ++ *type1_out = *type2_out; ++ *type2_out = tmp; ++ tmp = *rhs1_out; ++ *rhs1_out = *rhs2_out; ++ *rhs2_out = tmp; ++ } ++ + return true; + } + +@@ -1362,31 +1401,100 @@ + value is true iff we converted the statement. */ + + static bool +-convert_mult_to_widen (gimple stmt) ++convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) + { +- tree lhs, rhs1, rhs2, type, type1, type2; ++ tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL; + enum insn_code handler; ++ enum machine_mode to_mode, from_mode, actual_mode; ++ optab op; ++ int actual_precision; ++ location_t loc = gimple_location (stmt); ++ bool from_unsigned1, from_unsigned2; + + lhs = gimple_assign_lhs (stmt); + type = TREE_TYPE (lhs); + if (TREE_CODE (type) != INTEGER_TYPE) + return false; + +- if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) ++ if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2)) + return false; + +- if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) +- handler = optab_handler (umul_widen_optab, TYPE_MODE (type)); +- else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) +- handler = optab_handler (smul_widen_optab, TYPE_MODE (type)); ++ to_mode = TYPE_MODE (type); ++ from_mode = TYPE_MODE (type1); ++ from_unsigned1 = TYPE_UNSIGNED (type1); ++ from_unsigned2 = TYPE_UNSIGNED (type2); ++ ++ if (from_unsigned1 && from_unsigned2) ++ op = umul_widen_optab; ++ else if (!from_unsigned1 && !from_unsigned2) ++ op = smul_widen_optab; + else +- handler = optab_handler (usmul_widen_optab, TYPE_MODE (type)); ++ op = usmul_widen_optab; ++ ++ handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, ++ 0, &actual_mode); + + if (handler == CODE_FOR_nothing) +- return false; +- +- gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); +- gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); ++ { ++ if (op != smul_widen_optab) ++ { ++ /* We can use a signed multiply with unsigned types as long as ++ there is a wider mode to use, or it is the smaller of the two ++ types that is unsigned. Note that type1 >= type2, always. */ ++ if ((TYPE_UNSIGNED (type1) ++ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) ++ || (TYPE_UNSIGNED (type2) ++ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) ++ { ++ from_mode = GET_MODE_WIDER_MODE (from_mode); ++ if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) ++ return false; ++ } ++ ++ op = smul_widen_optab; ++ handler = find_widening_optab_handler_and_mode (op, to_mode, ++ from_mode, 0, ++ &actual_mode); ++ ++ if (handler == CODE_FOR_nothing) ++ return false; ++ ++ from_unsigned1 = from_unsigned2 = false; ++ } ++ else ++ return false; ++ } ++ ++ /* Ensure that the inputs to the handler are in the correct precison ++ for the opcode. This will be the full mode size. */ ++ actual_precision = GET_MODE_PRECISION (actual_mode); ++ if (actual_precision != TYPE_PRECISION (type1) ++ || from_unsigned1 != TYPE_UNSIGNED (type1)) ++ { ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned1), ++ NULL); ++ rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1); ++ } ++ if (actual_precision != TYPE_PRECISION (type2) ++ || from_unsigned2 != TYPE_UNSIGNED (type2)) ++ { ++ /* Reuse the same type info, if possible. */ ++ if (!tmp || from_unsigned1 != from_unsigned2) ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned2), ++ NULL); ++ rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2); ++ } ++ ++ /* Handle constants. */ ++ if (TREE_CODE (rhs1) == INTEGER_CST) ++ rhs1 = fold_convert (type1, rhs1); ++ if (TREE_CODE (rhs2) == INTEGER_CST) ++ rhs2 = fold_convert (type2, rhs2); ++ ++ gimple_assign_set_rhs1 (stmt, rhs1); ++ gimple_assign_set_rhs2 (stmt, rhs2); + gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); + update_stmt (stmt); + return true; +@@ -1403,11 +1511,17 @@ + enum tree_code code) + { + gimple rhs1_stmt = NULL, rhs2_stmt = NULL; +- tree type, type1, type2; ++ gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; ++ tree type, type1, type2, optype, tmp = NULL; + tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; + enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; + optab this_optab; + enum tree_code wmult_code; ++ enum insn_code handler; ++ enum machine_mode to_mode, from_mode, actual_mode; ++ location_t loc = gimple_location (stmt); ++ int actual_precision; ++ bool from_unsigned1, from_unsigned2; + + lhs = gimple_assign_lhs (stmt); + type = TREE_TYPE (lhs); +@@ -1429,8 +1543,6 @@ + if (is_gimple_assign (rhs1_stmt)) + rhs1_code = gimple_assign_rhs_code (rhs1_stmt); + } +- else +- return false; + + if (TREE_CODE (rhs2) == SSA_NAME) + { +@@ -1438,57 +1550,160 @@ + if (is_gimple_assign (rhs2_stmt)) + rhs2_code = gimple_assign_rhs_code (rhs2_stmt); + } +- else +- return false; +- +- if (code == PLUS_EXPR && rhs1_code == MULT_EXPR) +- { +- if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, +- &type2, &mult_rhs2)) +- return false; +- add_rhs = rhs2; +- } +- else if (rhs2_code == MULT_EXPR) +- { +- if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, +- &type2, &mult_rhs2)) +- return false; +- add_rhs = rhs1; +- } +- else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR) +- { +- mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt); +- mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt); +- type1 = TREE_TYPE (mult_rhs1); +- type2 = TREE_TYPE (mult_rhs2); +- add_rhs = rhs2; +- } +- else if (rhs2_code == WIDEN_MULT_EXPR) +- { +- mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt); +- mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt); +- type1 = TREE_TYPE (mult_rhs1); +- type2 = TREE_TYPE (mult_rhs2); +- add_rhs = rhs1; +- } +- else +- return false; +- +- if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) +- return false; ++ ++ /* Allow for one conversion statement between the multiply ++ and addition/subtraction statement. If there are more than ++ one conversions then we assume they would invalidate this ++ transformation. If that's not the case then they should have ++ been folded before now. */ ++ if (CONVERT_EXPR_CODE_P (rhs1_code)) ++ { ++ conv1_stmt = rhs1_stmt; ++ rhs1 = gimple_assign_rhs1 (rhs1_stmt); ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ { ++ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); ++ if (is_gimple_assign (rhs1_stmt)) ++ rhs1_code = gimple_assign_rhs_code (rhs1_stmt); ++ } ++ else ++ return false; ++ } ++ if (CONVERT_EXPR_CODE_P (rhs2_code)) ++ { ++ conv2_stmt = rhs2_stmt; ++ rhs2 = gimple_assign_rhs1 (rhs2_stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ { ++ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); ++ if (is_gimple_assign (rhs2_stmt)) ++ rhs2_code = gimple_assign_rhs_code (rhs2_stmt); ++ } ++ else ++ return false; ++ } ++ ++ /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call ++ is_widening_mult_p, but we still need the rhs returns. ++ ++ It might also appear that it would be sufficient to use the existing ++ operands of the widening multiply, but that would limit the choice of ++ multiply-and-accumulate instructions. */ ++ if (code == PLUS_EXPR ++ && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) ++ { ++ if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1, ++ &type2, &mult_rhs2)) ++ return false; ++ add_rhs = rhs2; ++ conv_stmt = conv1_stmt; ++ } ++ else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) ++ { ++ if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1, ++ &type2, &mult_rhs2)) ++ return false; ++ add_rhs = rhs1; ++ conv_stmt = conv2_stmt; ++ } ++ else ++ return false; ++ ++ to_mode = TYPE_MODE (type); ++ from_mode = TYPE_MODE (type1); ++ from_unsigned1 = TYPE_UNSIGNED (type1); ++ from_unsigned2 = TYPE_UNSIGNED (type2); ++ ++ /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ ++ if (from_unsigned1 != from_unsigned2) ++ { ++ /* We can use a signed multiply with unsigned types as long as ++ there is a wider mode to use, or it is the smaller of the two ++ types that is unsigned. Note that type1 >= type2, always. */ ++ if ((from_unsigned1 ++ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) ++ || (from_unsigned2 ++ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) ++ { ++ from_mode = GET_MODE_WIDER_MODE (from_mode); ++ if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) ++ return false; ++ } ++ ++ from_unsigned1 = from_unsigned2 = false; ++ } ++ ++ /* If there was a conversion between the multiply and addition ++ then we need to make sure it fits a multiply-and-accumulate. ++ The should be a single mode change which does not change the ++ value. */ ++ if (conv_stmt) ++ { ++ /* We use the original, unmodified data types for this. */ ++ tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); ++ tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); ++ int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); ++ bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); ++ ++ if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) ++ { ++ /* Conversion is a truncate. */ ++ if (TYPE_PRECISION (to_type) < data_size) ++ return false; ++ } ++ else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) ++ { ++ /* Conversion is an extend. Check it's the right sort. */ ++ if (TYPE_UNSIGNED (from_type) != is_unsigned ++ && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) ++ return false; ++ } ++ /* else convert is a no-op for our purposes. */ ++ } + + /* Verify that the machine can perform a widening multiply + accumulate in this mode/signedness combination, otherwise + this transformation is likely to pessimize code. */ +- this_optab = optab_for_tree_code (wmult_code, type1, optab_default); +- if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing) ++ optype = build_nonstandard_integer_type (from_mode, from_unsigned1); ++ this_optab = optab_for_tree_code (wmult_code, optype, optab_default); ++ handler = find_widening_optab_handler_and_mode (this_optab, to_mode, ++ from_mode, 0, &actual_mode); ++ ++ if (handler == CODE_FOR_nothing) + return false; + +- /* ??? May need some type verification here? */ +- +- gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, +- fold_convert (type1, mult_rhs1), +- fold_convert (type2, mult_rhs2), ++ /* Ensure that the inputs to the handler are in the correct precison ++ for the opcode. This will be the full mode size. */ ++ actual_precision = GET_MODE_PRECISION (actual_mode); ++ if (actual_precision != TYPE_PRECISION (type1) ++ || from_unsigned1 != TYPE_UNSIGNED (type1)) ++ { ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned1), ++ NULL); ++ mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1); ++ } ++ if (actual_precision != TYPE_PRECISION (type2) ++ || from_unsigned2 != TYPE_UNSIGNED (type2)) ++ { ++ if (!tmp || from_unsigned1 != from_unsigned2) ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned2), ++ NULL); ++ mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2); ++ } ++ ++ if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) ++ add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL), ++ add_rhs); ++ ++ /* Handle constants. */ ++ if (TREE_CODE (mult_rhs1) == INTEGER_CST) ++ rhs1 = fold_convert (type1, mult_rhs1); ++ if (TREE_CODE (mult_rhs2) == INTEGER_CST) ++ rhs2 = fold_convert (type2, mult_rhs2); ++ ++ gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2, + add_rhs); + update_stmt (gsi_stmt (*gsi)); + return true; +@@ -1696,7 +1911,7 @@ + switch (code) + { + case MULT_EXPR: +- if (!convert_mult_to_widen (stmt) ++ if (!convert_mult_to_widen (stmt, &gsi) + && convert_mult_to_fma (stmt, + gimple_assign_rhs1 (stmt), + gimple_assign_rhs2 (stmt))) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch new file mode 100644 index 0000000000..8230beb91e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch @@ -0,0 +1,23 @@ +2011-08-26 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-26 Richard Sandiford <richard.sandiford@linaro.org> + + * df-problems.c (df_note_bb_compute): Pass uses rather than defs + to df_set_dead_notes_for_mw. + +=== modified file 'gcc/df-problems.c' +--- old/gcc/df-problems.c 2011-07-07 19:10:01 +0000 ++++ new/gcc/df-problems.c 2011-08-26 14:32:47 +0000 +@@ -3375,7 +3375,7 @@ + while (*mws_rec) + { + struct df_mw_hardreg *mws = *mws_rec; +- if ((DF_MWS_REG_DEF_P (mws)) ++ if (DF_MWS_REG_USE_P (mws) + && !df_ignore_stack_reg (mws->start_regno)) + { + bool really_add_notes = debug_insn != 0; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch new file mode 100644 index 0000000000..aa067b7113 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch @@ -0,0 +1,23 @@ +2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + + 2011-08-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + PR target/48328 + * config/arm/arm.h (CASE_VECTOR_SHORTEN_MODE): Fix distance + for tbh instructions. + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-08-24 17:35:16 +0000 ++++ new/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000 +@@ -1961,7 +1961,7 @@ + : min >= -4096 && max < 4096 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ + : SImode) \ +- : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \ ++ : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \ + : (max >= 0x200) ? HImode \ + : QImode)) + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch new file mode 100644 index 0000000000..c440db91e1 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch @@ -0,0 +1,75 @@ + 2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + 2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + * config/arm/cortex-a9.md ("cortex_a9_mult_long"): New. + ("cortex_a9_multiply_long"): New and use above. Handle all + long multiply cases. + ("cortex_a9_multiply"): Handle smmul and smmulr. + ("cortex_a9_mac"): Handle smmla. + +=== modified file 'gcc/config/arm/cortex-a9.md' +--- old/gcc/config/arm/cortex-a9.md 2011-01-18 15:28:08 +0000 ++++ new/gcc/config/arm/cortex-a9.md 2011-08-26 08:52:15 +0000 +@@ -68,7 +68,8 @@ + "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb") + (define_reservation "cortex_a9_mac" + "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb") +- ++(define_reservation "cortex_a9_mult_long" ++ "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb") + + ;; Issue at the same time along the load store pipeline and + ;; the VFP / Neon pipeline is not possible. +@@ -139,29 +140,35 @@ + (eq_attr "insn" "smlaxy")) + "cortex_a9_mac16") + +- + (define_insn_reservation "cortex_a9_multiply" 4 + (and (eq_attr "tune" "cortexa9") +- (eq_attr "insn" "mul")) ++ (eq_attr "insn" "mul,smmul,smmulr")) + "cortex_a9_mult") + + (define_insn_reservation "cortex_a9_mac" 4 + (and (eq_attr "tune" "cortexa9") +- (eq_attr "insn" "mla")) ++ (eq_attr "insn" "mla,smmla")) + "cortex_a9_mac") + ++(define_insn_reservation "cortex_a9_multiply_long" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) ++ "cortex_a9_mult_long") ++ + ;; An instruction with a result in E2 can be forwarded + ;; to E2 or E1 or M1 or the load store unit in the next cycle. + + (define_bypass 1 "cortex_a9_dp" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, +- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") ++ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, ++ cortex_a9_multiply_long") + + (define_bypass 2 "cortex_a9_dp_shift" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, +- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") ++ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, ++ cortex_a9_multiply_long") + + ;; An instruction in the load store pipeline can provide + ;; read access to a DP instruction in the P0 default pipeline +@@ -212,7 +219,7 @@ + + (define_bypass 1 + "cortex_a9_fps" +- "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply") ++ "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long") + + ;; Scheduling on the FP_ADD pipeline. + (define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch new file mode 100644 index 0000000000..dfdeec7245 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch @@ -0,0 +1,1270 @@ +2011-09-07 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-08-04 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (struct _stmt_vec_info): Add new field for + pattern def statement, and its access macro. + (NUM_PATTERNS): Set to 5. + * tree-vect-loop.c (vect_determine_vectorization_factor): Handle + pattern def statement. + (vect_transform_loop): Likewise. + * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new + function vect_recog_over_widening_pattern (). + (vect_operation_fits_smaller_type): New function. + (vect_recog_over_widening_pattern, vect_mark_pattern_stmts): + Likewise. + (vect_pattern_recog_1): Move the code that marks pattern + statements to vect_mark_pattern_stmts (), and call it. Update + documentation. + * tree-vect-stmts.c (vect_supportable_shift): New function. + (vect_analyze_stmt): Handle pattern def statement. + (new_stmt_vec_info): Initialize pattern def statement. + + gcc/testsuite/ + * gcc.dg/vect/vect-over-widen-1.c: New test. + * gcc.dg/vect/vect-over-widen-2.c: New test. + * gcc.dg/vect/vect-over-widen-3.c: New test. + * gcc.dg/vect/vect-over-widen-4.c: New test. + + + 2011-08-09 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50014 + * tree-vect-loop.c (vectorizable_reduction): Get def type before + calling vect_get_vec_def_for_stmt_copy (). + + gcc/testsuite/ + PR tree-optimization/50014 + * gcc.dg/vect/pr50014.c: New test. + + + 2011-08-11 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50039 + * tree-vect-patterns.c (vect_operation_fits_smaller_type): Check + that DEF_STMT has a stmt_vec_info. + + gcc/testsuite/ + PR tree-optimization/50039 + * gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre. + * gcc.dg/vect/no-tree-fre-pr50039.c: New test. + + + 2011-09-04 Jakub Jelinek <jakub@redhat.com> + Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50208 + * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add an + argument. Check that def_stmt is inside the loop. + (vect_recog_widen_mult_pattern): Update calls to + vect_handle_widen_mult_by_cons. + (vect_operation_fits_smaller_type): Check that def_stmt is + inside the loop. + + gcc/testsuite/ + PR tree-optimization/50208 + * gcc.dg/vect/no-fre-pre-pr50208.c: New test. + * gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with + -fno-tree-fre -fno-tree-pre. + +=== added file 'gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c' +--- old/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++char c; ++int a, b; ++ ++void foo (int j) ++{ ++ int i; ++ while (--j) ++ { ++ b = 3; ++ for (i = 0; i < 2; ++i) ++ a = b ^ c; ++ } ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== added file 'gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c' +--- old/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++extern unsigned char g_5; ++extern int g_31, g_76; ++int main(void) { ++ int i, j; ++ for (j=0; j < 2; ++j) { ++ g_31 = -3; ++ for (i=0; i < 2; ++i) ++ g_76 = (g_31 ? g_31+1 : 0) ^ g_5; ++ } ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/pr50014.c' +--- old/gcc/testsuite/gcc.dg/vect/pr50014.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr50014.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target vect_int } */ ++ ++int f(unsigned char *s, int n) ++{ ++ int sum = 0; ++ int i; ++ ++ for (i = 0; i < n; i++) ++ sum += 256 * s[i]; ++ ++ return sum; ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,64 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdlib.h> ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Modified rgb to rgb conversion from FFmpeg. */ ++__attribute__ ((noinline)) void ++foo (unsigned char *src, unsigned char *dst) ++{ ++ unsigned char *s = src; ++ unsigned short *d = (unsigned short *)dst; ++ int i; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); ++ d++; ++ } ++ ++ s = src; ++ d = (unsigned short *)dst; ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N], out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,65 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdlib.h> ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Modified rgb to rgb conversion from FFmpeg. */ ++__attribute__ ((noinline)) void ++foo (unsigned char *src, unsigned char *dst) ++{ ++ unsigned char *s = src; ++ int *d = (int *)dst; ++ int i; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); ++ d++; ++ } ++ ++ s = src; ++ d = (int *)dst; ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N], out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* Final value stays in int, so no over-widening is detected at the moment. */ ++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,64 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdlib.h> ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Modified rgb to rgb conversion from FFmpeg. */ ++__attribute__ ((noinline)) void ++foo (unsigned char *src, unsigned char *dst) ++{ ++ unsigned char *s = src; ++ unsigned short *d = (unsigned short *)dst; ++ int i; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)); ++ d++; ++ } ++ ++ s = src; ++ d = (unsigned short *)dst; ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9))) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N], out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 2011-09-05 06:23:37 +0000 +@@ -0,0 +1,68 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdlib.h> ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Modified rgb to rgb conversion from FFmpeg. */ ++__attribute__ ((noinline)) int ++foo (unsigned char *src, unsigned char *dst) ++{ ++ unsigned char *s = src; ++ unsigned short *d = (unsigned short *)dst, res; ++ int i, result = 0; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); ++ *d = res; ++ result += res; ++ d++; ++ } ++ ++ s = src; ++ d = (unsigned short *)dst; ++ for (i = 0; i < N/4; i++) ++ { ++ const int b = *s++; ++ const int g = *s++; ++ const int r = *s++; ++ const int a = *s++; ++ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) ++ abort (); ++ d++; ++ } ++ ++ return result; ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N], out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp' +--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2011-05-05 15:43:31 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-09-05 06:23:37 +0000 +@@ -245,6 +245,18 @@ + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \ + "" $VECT_SLP_CFLAGS + ++# -fno-tree-fre ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ ++# -fno-tree-fre -fno-tree-pre ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ + # Clean up. + set dg-do-what-default ${save-dg-do-what-default} + + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-07-11 11:02:55 +0000 ++++ new/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000 +@@ -181,8 +181,8 @@ + stmt_vec_info stmt_info; + int i; + HOST_WIDE_INT dummy; +- gimple stmt, pattern_stmt = NULL; +- bool analyze_pattern_stmt = false; ++ gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL; ++ bool analyze_pattern_stmt = false, pattern_def = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); +@@ -297,6 +297,29 @@ + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + analyze_pattern_stmt = true; + ++ /* If a pattern statement has a def stmt, analyze it too. */ ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ if (pattern_def) ++ pattern_def = false; ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern def stmt: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, ++ TDF_SLIM); ++ } ++ ++ pattern_def = true; ++ stmt = pattern_def_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } ++ } ++ + if (gimple_get_lhs (stmt) == NULL_TREE) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +@@ -401,7 +424,7 @@ + || (nunits > vectorization_factor)) + vectorization_factor = nunits; + +- if (!analyze_pattern_stmt) ++ if (!analyze_pattern_stmt && !pattern_def) + gsi_next (&si); + } + } +@@ -3985,7 +4008,7 @@ + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL; + VEC (gimple, heap) *phis = NULL; + int vec_num; +- tree def0, def1, tem; ++ tree def0, def1, tem, op0, op1 = NULL_TREE; + + if (nested_in_vect_loop_p (loop, stmt)) + { +@@ -4418,8 +4441,6 @@ + /* Handle uses. */ + if (j == 0) + { +- tree op0, op1 = NULL_TREE; +- + op0 = ops[!reduc_index]; + if (op_type == ternary_op) + { +@@ -4449,11 +4470,19 @@ + { + if (!slp_node) + { +- enum vect_def_type dt = vect_unknown_def_type; /* Dummy */ +- loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0); ++ enum vect_def_type dt; ++ gimple dummy_stmt; ++ tree dummy; ++ ++ vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL, ++ &dummy_stmt, &dummy, &dt); ++ loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, ++ loop_vec_def0); + VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0); + if (op_type == ternary_op) + { ++ vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt, ++ &dummy, &dt); + loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, + loop_vec_def1); + VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1); +@@ -4758,8 +4787,8 @@ + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; + bool do_peeling_for_loop_bound; +- gimple stmt, pattern_stmt; +- bool transform_pattern_stmt = false; ++ gimple stmt, pattern_stmt, pattern_def_stmt; ++ bool transform_pattern_stmt = false, pattern_def = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vec_transform_loop ==="); +@@ -4903,6 +4932,30 @@ + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + transform_pattern_stmt = true; + ++ /* If pattern statement has a def stmt, vectorize it too. */ ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ if (pattern_def) ++ pattern_def = false; ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> vectorizing pattern def" ++ " stmt: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, ++ TDF_SLIM); ++ } ++ ++ pattern_def = true; ++ stmt = pattern_def_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } ++ } ++ + gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); + nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( + STMT_VINFO_VECTYPE (stmt_info)); +@@ -4930,7 +4983,7 @@ + /* Hybrid SLP stmts must be vectorized in addition to SLP. */ + if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) + { +- if (!transform_pattern_stmt) ++ if (!transform_pattern_stmt && !pattern_def) + gsi_next (&si); + continue; + } +@@ -4962,7 +5015,7 @@ + } + } + +- if (!transform_pattern_stmt) ++ if (!transform_pattern_stmt && !pattern_def) + gsi_next (&si); + } /* stmts in BB */ + } /* BBs in loop */ + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-07-06 12:04:10 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000 +@@ -46,11 +46,14 @@ + static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, + tree *); + static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); ++static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, ++ tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, + vect_recog_dot_prod_pattern, +- vect_recog_pow_pattern}; ++ vect_recog_pow_pattern, ++ vect_recog_over_widening_pattern}; + + + /* Function widened_name_p +@@ -339,12 +342,14 @@ + replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ + + static bool +-vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, ++vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd, + VEC (gimple, heap) **stmts, tree type, + tree *half_type, gimple def_stmt) + { + tree new_type, new_oprnd, tmp; + gimple new_stmt; ++ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); ++ struct loop *loop = LOOP_VINFO_LOOP (loop_info); + + if (int_fits_type_p (const_oprnd, *half_type)) + { +@@ -354,6 +359,8 @@ + } + + if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) ++ || !gimple_bb (def_stmt) ++ || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + || !vinfo_for_stmt (def_stmt)) + return false; + +@@ -522,7 +529,8 @@ + { + if (TREE_CODE (oprnd0) == INTEGER_CST + && TREE_CODE (half_type1) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, ++ && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, ++ stmts, type, + &half_type1, def_stmt1)) + half_type0 = half_type1; + else +@@ -532,7 +540,8 @@ + { + if (TREE_CODE (oprnd1) == INTEGER_CST + && TREE_CODE (half_type0) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, ++ && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0, ++ stmts, type, + &half_type0, def_stmt0)) + half_type1 = half_type0; + else +@@ -826,6 +835,424 @@ + } + + ++/* Return TRUE if the operation in STMT can be performed on a smaller type. ++ ++ Input: ++ STMT - a statement to check. ++ DEF - we support operations with two operands, one of which is constant. ++ The other operand can be defined by a demotion operation, or by a ++ previous statement in a sequence of over-promoted operations. In the ++ later case DEF is used to replace that operand. (It is defined by a ++ pattern statement we created for the previous statement in the ++ sequence). ++ ++ Input/output: ++ NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not ++ NULL, it's the type of DEF. ++ STMTS - additional pattern statements. If a pattern statement (type ++ conversion) is created in this function, its original statement is ++ added to STMTS. ++ ++ Output: ++ OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new ++ operands to use in the new pattern statement for STMT (will be created ++ in vect_recog_over_widening_pattern ()). ++ NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern ++ statements for STMT: the first one is a type promotion and the second ++ one is the operation itself. We return the type promotion statement ++ in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of ++ the second pattern statement. */ ++ ++static bool ++vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, ++ tree *op0, tree *op1, gimple *new_def_stmt, ++ VEC (gimple, heap) **stmts) ++{ ++ enum tree_code code; ++ tree const_oprnd, oprnd; ++ tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; ++ gimple def_stmt, new_stmt; ++ bool first = false; ++ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); ++ struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ ++ *new_def_stmt = NULL; ++ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ code = gimple_assign_rhs_code (stmt); ++ if (code != LSHIFT_EXPR && code != RSHIFT_EXPR ++ && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) ++ return false; ++ ++ oprnd = gimple_assign_rhs1 (stmt); ++ const_oprnd = gimple_assign_rhs2 (stmt); ++ type = gimple_expr_type (stmt); ++ ++ if (TREE_CODE (oprnd) != SSA_NAME ++ || TREE_CODE (const_oprnd) != INTEGER_CST) ++ return false; ++ ++ /* If we are in the middle of a sequence, we use DEF from a previous ++ statement. Otherwise, OPRND has to be a result of type promotion. */ ++ if (*new_type) ++ { ++ half_type = *new_type; ++ oprnd = def; ++ } ++ else ++ { ++ first = true; ++ if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) ++ || !gimple_bb (def_stmt) ++ || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) ++ || !vinfo_for_stmt (def_stmt)) ++ return false; ++ } ++ ++ /* Can we perform the operation on a smaller type? */ ++ switch (code) ++ { ++ case BIT_IOR_EXPR: ++ case BIT_XOR_EXPR: ++ case BIT_AND_EXPR: ++ if (!int_fits_type_p (const_oprnd, half_type)) ++ { ++ /* HALF_TYPE is not enough. Try a bigger type if possible. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ if (!int_fits_type_p (const_oprnd, interm_type)) ++ return false; ++ } ++ ++ break; ++ ++ case LSHIFT_EXPR: ++ /* Try intermediate type - HALF_TYPE is not enough for sure. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. ++ (e.g., if the original value was char, the shift amount is at most 8 ++ if we want to use short). */ ++ if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ ++ if (!vect_supportable_shift (code, interm_type)) ++ return false; ++ ++ break; ++ ++ case RSHIFT_EXPR: ++ if (vect_supportable_shift (code, half_type)) ++ break; ++ ++ /* Try intermediate type - HALF_TYPE is not supported. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ ++ if (!vect_supportable_shift (code, interm_type)) ++ return false; ++ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* There are four possible cases: ++ 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's ++ the first statement in the sequence) ++ a. The original, HALF_TYPE, is not enough - we replace the promotion ++ from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. ++ b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original ++ promotion. ++ 2. OPRND is defined by a pattern statement we created. ++ a. Its type is not sufficient for the operation, we create a new stmt: ++ a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store ++ this statement in NEW_DEF_STMT, and it is later put in ++ STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT. ++ b. OPRND is good to use in the new statement. */ ++ if (first) ++ { ++ if (interm_type) ++ { ++ /* Replace the original type conversion HALF_TYPE->TYPE with ++ HALF_TYPE->INTERM_TYPE. */ ++ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) ++ { ++ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); ++ /* Check if the already created pattern stmt is what we need. */ ++ if (!is_gimple_assign (new_stmt) ++ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR ++ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) ++ return false; ++ ++ oprnd = gimple_assign_lhs (new_stmt); ++ } ++ else ++ { ++ /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ ++ oprnd = gimple_assign_rhs1 (def_stmt); ++ tmp = create_tmp_reg (interm_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ oprnd, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ oprnd = new_oprnd; ++ } ++ } ++ else ++ { ++ /* Retrieve the operand before the type promotion. */ ++ oprnd = gimple_assign_rhs1 (def_stmt); ++ } ++ } ++ else ++ { ++ if (interm_type) ++ { ++ /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ ++ tmp = create_tmp_reg (interm_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ oprnd, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; ++ oprnd = new_oprnd; ++ *new_def_stmt = new_stmt; ++ } ++ ++ /* Otherwise, OPRND is already set. */ ++ } ++ ++ if (interm_type) ++ *new_type = interm_type; ++ else ++ *new_type = half_type; ++ ++ *op0 = oprnd; ++ *op1 = fold_convert (*new_type, const_oprnd); ++ ++ return true; ++} ++ ++ ++/* Try to find a statement or a sequence of statements that can be performed ++ on a smaller type: ++ ++ type x_t; ++ TYPE x_T, res0_T, res1_T; ++ loop: ++ S1 x_t = *p; ++ S2 x_T = (TYPE) x_t; ++ S3 res0_T = op (x_T, C0); ++ S4 res1_T = op (res0_T, C1); ++ S5 ... = () res1_T; - type demotion ++ ++ where type 'TYPE' is at least double the size of type 'type', C0 and C1 are ++ constants. ++ Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either ++ be 'type' or some intermediate type. For now, we expect S5 to be a type ++ demotion operation. We also check that S3 and S4 have only one use. ++. ++ ++*/ ++static gimple ++vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) ++{ ++ gimple stmt = VEC_pop (gimple, *stmts); ++ gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; ++ tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ int nuses = 0; ++ tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; ++ bool first; ++ struct loop *loop = (gimple_bb (stmt))->loop_father; ++ ++ first = true; ++ while (1) ++ { ++ if (!vinfo_for_stmt (stmt) ++ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) ++ return NULL; ++ ++ new_def_stmt = NULL; ++ if (!vect_operation_fits_smaller_type (stmt, var, &new_type, ++ &op0, &op1, &new_def_stmt, ++ stmts)) ++ { ++ if (first) ++ return NULL; ++ else ++ break; ++ } ++ ++ /* STMT can be performed on a smaller type. Check its uses. */ ++ lhs = gimple_assign_lhs (stmt); ++ nuses = 0; ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); ++ nuses++; ++ } ++ ++ if (nuses != 1 || !is_gimple_assign (use_stmt) ++ || !gimple_bb (use_stmt) ++ || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) ++ return NULL; ++ ++ /* Create pattern statement for STMT. */ ++ vectype = get_vectype_for_scalar_type (new_type); ++ if (!vectype) ++ return NULL; ++ ++ /* We want to collect all the statements for which we create pattern ++ statetments, except for the case when the last statement in the ++ sequence doesn't have a corresponding pattern statement. In such ++ case we associate the last pattern statement with the last statement ++ in the sequence. Therefore, we only add an original statetement to ++ the list if we know that it is not the last. */ ++ if (prev_stmt) ++ VEC_safe_push (gimple, heap, *stmts, prev_stmt); ++ ++ var = vect_recog_temp_ssa_var (new_type, NULL); ++ pattern_stmt = gimple_build_assign_with_ops ( ++ gimple_assign_rhs_code (stmt), var, op0, op1); ++ SSA_NAME_DEF_STMT (var) = pattern_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; ++ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt; ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "created pattern stmt: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } ++ ++ prev_stmt = stmt; ++ stmt = use_stmt; ++ ++ first = false; ++ } ++ ++ /* We got a sequence. We expect it to end with a type demotion operation. ++ Otherwise, we quit (for now). There are three possible cases: the ++ conversion is to NEW_TYPE (we don't do anything), the conversion is to ++ a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and ++ NEW_TYPE differs (we create a new conversion statement). */ ++ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) ++ { ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ /* Support only type promotion or signedess change. */ ++ if (!INTEGRAL_TYPE_P (use_type) ++ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) ++ return NULL; ++ ++ if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) ++ || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) ++ { ++ /* Create NEW_TYPE->USE_TYPE conversion. */ ++ tmp = create_tmp_reg (use_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ var, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; ++ ++ *type_in = get_vectype_for_scalar_type (new_type); ++ *type_out = get_vectype_for_scalar_type (use_type); ++ ++ /* We created a pattern statement for the last statement in the ++ sequence, so we don't need to associate it with the pattern ++ statement created for PREV_STMT. Therefore, we add PREV_STMT ++ to the list in order to mark it later in vect_pattern_recog_1. */ ++ if (prev_stmt) ++ VEC_safe_push (gimple, heap, *stmts, prev_stmt); ++ } ++ else ++ { ++ if (prev_stmt) ++ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt)) ++ = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt)); ++ ++ *type_in = vectype; ++ *type_out = NULL_TREE; ++ } ++ ++ VEC_safe_push (gimple, heap, *stmts, use_stmt); ++ } ++ else ++ /* TODO: support general case, create a conversion to the correct type. */ ++ return NULL; ++ ++ /* Pattern detected. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } ++ ++ return pattern_stmt; ++} ++ ++ ++/* Mark statements that are involved in a pattern. */ ++ ++static inline void ++vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, ++ tree pattern_vectype) ++{ ++ stmt_vec_info pattern_stmt_info, def_stmt_info; ++ stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); ++ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); ++ gimple def_stmt; ++ ++ set_vinfo_for_stmt (pattern_stmt, ++ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); ++ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); ++ pattern_stmt_info = vinfo_for_stmt (pattern_stmt); ++ ++ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; ++ STMT_VINFO_DEF_TYPE (pattern_stmt_info) ++ = STMT_VINFO_DEF_TYPE (orig_stmt_info); ++ STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; ++ STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; ++ STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; ++ STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) ++ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); ++ if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) ++ { ++ def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); ++ set_vinfo_for_stmt (def_stmt, ++ new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); ++ gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); ++ def_stmt_info = vinfo_for_stmt (def_stmt); ++ STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; ++ STMT_VINFO_DEF_TYPE (def_stmt_info) ++ = STMT_VINFO_DEF_TYPE (orig_stmt_info); ++ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; ++ } ++} ++ + /* Function vect_pattern_recog_1 + + Input: +@@ -855,7 +1282,6 @@ + { + gimple stmt = gsi_stmt (si), pattern_stmt; + stmt_vec_info stmt_info; +- stmt_vec_info pattern_stmt_info; + loop_vec_info loop_vinfo; + tree pattern_vectype; + tree type_in, type_out; +@@ -923,16 +1349,7 @@ + } + + /* Mark the stmts that are involved in the pattern. */ +- set_vinfo_for_stmt (pattern_stmt, +- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); +- gimple_set_bb (pattern_stmt, gimple_bb (stmt)); +- pattern_stmt_info = vinfo_for_stmt (pattern_stmt); +- +- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; +- STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); +- STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; +- STMT_VINFO_IN_PATTERN_P (stmt_info) = true; +- STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; ++ vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); + + /* Patterns cannot be vectorized using SLP, because they change the order of + computation. */ +@@ -940,9 +1357,9 @@ + if (next == stmt) + VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); + +- /* In case of widen-mult by a constant, it is possible that an additional +- pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a +- stmt_info for it, and mark the relevant statements. */ ++ /* It is possible that additional pattern stmts are created and inserted in ++ STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the ++ relevant statements. */ + for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) + && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); + i++) +@@ -955,16 +1372,7 @@ + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + } + +- set_vinfo_for_stmt (pattern_stmt, +- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); +- gimple_set_bb (pattern_stmt, gimple_bb (stmt)); +- pattern_stmt_info = vinfo_for_stmt (pattern_stmt); +- +- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; +- STMT_VINFO_DEF_TYPE (pattern_stmt_info) +- = STMT_VINFO_DEF_TYPE (stmt_info); +- STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); +- STMT_VINFO_IN_PATTERN_P (stmt_info) = true; ++ vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); + } + + VEC_free (gimple, heap, stmts_to_replace); + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-07-06 12:04:10 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-09-05 06:23:37 +0000 +@@ -2246,6 +2246,42 @@ + } + + ++/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE ++ either as shift by a scalar or by a vector. */ ++ ++bool ++vect_supportable_shift (enum tree_code code, tree scalar_type) ++{ ++ ++ enum machine_mode vec_mode; ++ optab optab; ++ int icode; ++ tree vectype; ++ ++ vectype = get_vectype_for_scalar_type (scalar_type); ++ if (!vectype) ++ return false; ++ ++ optab = optab_for_tree_code (code, vectype, optab_scalar); ++ if (!optab ++ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) ++ { ++ optab = optab_for_tree_code (code, vectype, optab_vector); ++ if (!optab ++ || (optab_handler (optab, TYPE_MODE (vectype)) ++ == CODE_FOR_nothing)) ++ return false; ++ } ++ ++ vec_mode = TYPE_MODE (vectype); ++ icode = (int) optab_handler (optab, vec_mode); ++ if (icode == CODE_FOR_nothing) ++ return false; ++ ++ return true; ++} ++ ++ + /* Function vectorizable_shift. + + Check if STMT performs a shift operation that can be vectorized. +@@ -4946,7 +4982,7 @@ + enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); + bool ok; + tree scalar_type, vectype; +- gimple pattern_stmt; ++ gimple pattern_stmt, pattern_def_stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + { +@@ -5016,6 +5052,23 @@ + return false; + } + ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ /* Analyze def stmt of STMT if it's a pattern stmt. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern def statement: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM); ++ } ++ ++ if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node)) ++ return false; ++ } ++ ++ + switch (STMT_VINFO_DEF_TYPE (stmt_info)) + { + case vect_internal_def: +@@ -5336,6 +5389,7 @@ + STMT_VINFO_VECTORIZABLE (res) = true; + STMT_VINFO_IN_PATTERN_P (res) = false; + STMT_VINFO_RELATED_STMT (res) = NULL; ++ STMT_VINFO_PATTERN_DEF_STMT (res) = NULL; + STMT_VINFO_DATA_REF (res) = NULL; + + STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-07-11 11:02:55 +0000 ++++ new/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000 +@@ -464,6 +464,9 @@ + pattern). */ + gimple related_stmt; + ++ /* Used to keep a def stmt of a pattern stmt if such exists. */ ++ gimple pattern_def_stmt; ++ + /* List of datarefs that are known to have the same alignment as the dataref + of this stmt. */ + VEC(dr_p,heap) *same_align_refs; +@@ -531,6 +534,7 @@ + + #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p + #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt ++#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt + #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs + #define STMT_VINFO_DEF_TYPE(S) (S)->def_type + #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr +@@ -814,6 +818,7 @@ + extern void vect_get_load_cost (struct data_reference *, int, bool, + unsigned int *, unsigned int *); + extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); ++extern bool vect_supportable_shift (enum tree_code, tree); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); +@@ -891,7 +896,7 @@ + Additional pattern recognition functions can (and will) be added + in the future. */ + typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); +-#define NUM_PATTERNS 4 ++#define NUM_PATTERNS 5 + void vect_pattern_recog (loop_vec_info); + + /* In tree-vectorizer.c. */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch new file mode 100644 index 0000000000..ade96fdd11 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch @@ -0,0 +1,948 @@ +2011-09-12 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-08-30 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2, + b3 and b4 unsigned. + + 2011-08-30 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_gen_constant): Set can_negate correctly + when code is SET. + + 2011-08-26 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (struct four_ints): New type. + (count_insns_for_constant): Delete function. + (find_best_start): Delete function. + (optimal_immediate_sequence): New function. + (optimal_immediate_sequence_1): New function. + (arm_gen_constant): Move constant splitting code to + optimal_immediate_sequence. + Rewrite constant negation/invertion code. + + gcc/testsuite/ + * gcc.target/arm/thumb2-replicated-constant1.c: New file. + * gcc.target/arm/thumb2-replicated-constant2.c: New file. + * gcc.target/arm/thumb2-replicated-constant3.c: New file. + * gcc.target/arm/thumb2-replicated-constant4.c: New file. + + 2011-08-26 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm-protos.h (const_ok_for_op): Add prototype. + * config/arm/arm.c (const_ok_for_op): Add support for addw/subw. + Remove prototype. Remove static function type. + * config/arm/arm.md (*arm_addsi3): Add addw/subw support. + Add arch attribute. + * config/arm/constraints.md (Pj, PJ): New constraints. + + 2011-04-20 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_gen_constant): Move mowv support .... + (const_ok_for_op): ... to here. + + 2011-04-20 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_gen_constant): Remove redundant can_invert. + + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000 +@@ -46,6 +46,7 @@ + extern bool arm_small_register_classes_for_mode_p (enum machine_mode); + extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); + extern int const_ok_for_arm (HOST_WIDE_INT); ++extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); + extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int); + extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-08-24 17:35:16 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000 +@@ -63,6 +63,11 @@ + + void (*arm_lang_output_object_attributes_hook)(void); + ++struct four_ints ++{ ++ int i[4]; ++}; ++ + /* Forward function declarations. */ + static bool arm_needs_doubleword_align (enum machine_mode, const_tree); + static int arm_compute_static_chain_stack_bytes (void); +@@ -81,7 +86,6 @@ + static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); + static int thumb_far_jump_used_p (void); + static bool thumb_force_lr_save (void); +-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); + static rtx emit_sfm (int, int); + static unsigned arm_size_return_regs (void); + static bool arm_assemble_integer (rtx, unsigned int, int); +@@ -129,7 +133,13 @@ + static int arm_comp_type_attributes (const_tree, const_tree); + static void arm_set_default_type_attributes (tree); + static int arm_adjust_cost (rtx, rtx, rtx, int); +-static int count_insns_for_constant (HOST_WIDE_INT, int); ++static int optimal_immediate_sequence (enum rtx_code code, ++ unsigned HOST_WIDE_INT val, ++ struct four_ints *return_sequence); ++static int optimal_immediate_sequence_1 (enum rtx_code code, ++ unsigned HOST_WIDE_INT val, ++ struct four_ints *return_sequence, ++ int i); + static int arm_get_strip_length (int); + static bool arm_function_ok_for_sibcall (tree, tree); + static enum machine_mode arm_promote_function_mode (const_tree, +@@ -2525,7 +2535,7 @@ + } + + /* Return true if I is a valid constant for the operation CODE. */ +-static int ++int + const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) + { + if (const_ok_for_arm (i)) +@@ -2533,7 +2543,21 @@ + + switch (code) + { ++ case SET: ++ /* See if we can use movw. */ ++ if (arm_arch_thumb2 && (i & 0xffff0000) == 0) ++ return 1; ++ else ++ return 0; ++ + case PLUS: ++ /* See if we can use addw or subw. */ ++ if (TARGET_THUMB2 ++ && ((i & 0xfffff000) == 0 ++ || ((-i) & 0xfffff000) == 0)) ++ return 1; ++ /* else fall through. */ ++ + case COMPARE: + case EQ: + case NE: +@@ -2649,68 +2673,41 @@ + 1); + } + +-/* Return the number of instructions required to synthesize the given +- constant, if we start emitting them from bit-position I. */ +-static int +-count_insns_for_constant (HOST_WIDE_INT remainder, int i) +-{ +- HOST_WIDE_INT temp1; +- int step_size = TARGET_ARM ? 2 : 1; +- int num_insns = 0; +- +- gcc_assert (TARGET_ARM || i == 0); +- +- do +- { +- int end; +- +- if (i <= 0) +- i += 32; +- if (remainder & (((1 << step_size) - 1) << (i - step_size))) +- { +- end = i - 8; +- if (end < 0) +- end += 32; +- temp1 = remainder & ((0x0ff << end) +- | ((i < end) ? (0xff >> (32 - end)) : 0)); +- remainder &= ~temp1; +- num_insns++; +- i -= 8 - step_size; +- } +- i -= step_size; +- } while (remainder); +- return num_insns; +-} +- +-static int +-find_best_start (unsigned HOST_WIDE_INT remainder) ++/* Return a sequence of integers, in RETURN_SEQUENCE that fit into ++ ARM/THUMB2 immediates, and add up to VAL. ++ Thr function return value gives the number of insns required. */ ++static int ++optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, ++ struct four_ints *return_sequence) + { + int best_consecutive_zeros = 0; + int i; + int best_start = 0; ++ int insns1, insns2; ++ struct four_ints tmp_sequence; + + /* If we aren't targetting ARM, the best place to start is always at +- the bottom. */ +- if (! TARGET_ARM) +- return 0; +- +- for (i = 0; i < 32; i += 2) ++ the bottom, otherwise look more closely. */ ++ if (TARGET_ARM) + { +- int consecutive_zeros = 0; +- +- if (!(remainder & (3 << i))) ++ for (i = 0; i < 32; i += 2) + { +- while ((i < 32) && !(remainder & (3 << i))) +- { +- consecutive_zeros += 2; +- i += 2; +- } +- if (consecutive_zeros > best_consecutive_zeros) +- { +- best_consecutive_zeros = consecutive_zeros; +- best_start = i - consecutive_zeros; +- } +- i -= 2; ++ int consecutive_zeros = 0; ++ ++ if (!(val & (3 << i))) ++ { ++ while ((i < 32) && !(val & (3 << i))) ++ { ++ consecutive_zeros += 2; ++ i += 2; ++ } ++ if (consecutive_zeros > best_consecutive_zeros) ++ { ++ best_consecutive_zeros = consecutive_zeros; ++ best_start = i - consecutive_zeros; ++ } ++ i -= 2; ++ } + } + } + +@@ -2737,13 +2734,161 @@ + the constant starting from `best_start', and also starting from + zero (i.e. with bit 31 first to be output). If `best_start' doesn't + yield a shorter sequence, we may as well use zero. */ ++ insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); + if (best_start != 0 +- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) +- && (count_insns_for_constant (remainder, 0) <= +- count_insns_for_constant (remainder, best_start))) +- best_start = 0; +- +- return best_start; ++ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val)) ++ { ++ insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); ++ if (insns2 <= insns1) ++ { ++ *return_sequence = tmp_sequence; ++ insns1 = insns2; ++ } ++ } ++ ++ return insns1; ++} ++ ++/* As for optimal_immediate_sequence, but starting at bit-position I. */ ++static int ++optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, ++ struct four_ints *return_sequence, int i) ++{ ++ int remainder = val & 0xffffffff; ++ int insns = 0; ++ ++ /* Try and find a way of doing the job in either two or three ++ instructions. ++ ++ In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned ++ location. We start at position I. This may be the MSB, or ++ optimial_immediate_sequence may have positioned it at the largest block ++ of zeros that are aligned on a 2-bit boundary. We then fill up the temps, ++ wrapping around to the top of the word when we drop off the bottom. ++ In the worst case this code should produce no more than four insns. ++ ++ In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit ++ constants, shifted to any arbitrary location. We should always start ++ at the MSB. */ ++ do ++ { ++ int end; ++ unsigned int b1, b2, b3, b4; ++ unsigned HOST_WIDE_INT result; ++ int loc; ++ ++ gcc_assert (insns < 4); ++ ++ if (i <= 0) ++ i += 32; ++ ++ /* First, find the next normal 12/8-bit shifted/rotated immediate. */ ++ if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1))))) ++ { ++ loc = i; ++ if (i <= 12 && TARGET_THUMB2 && code == PLUS) ++ /* We can use addw/subw for the last 12 bits. */ ++ result = remainder; ++ else ++ { ++ /* Use an 8-bit shifted/rotated immediate. */ ++ end = i - 8; ++ if (end < 0) ++ end += 32; ++ result = remainder & ((0x0ff << end) ++ | ((i < end) ? (0xff >> (32 - end)) ++ : 0)); ++ i -= 8; ++ } ++ } ++ else ++ { ++ /* Arm allows rotates by a multiple of two. Thumb-2 allows ++ arbitrary shifts. */ ++ i -= TARGET_ARM ? 2 : 1; ++ continue; ++ } ++ ++ /* Next, see if we can do a better job with a thumb2 replicated ++ constant. ++ ++ We do it this way around to catch the cases like 0x01F001E0 where ++ two 8-bit immediates would work, but a replicated constant would ++ make it worse. ++ ++ TODO: 16-bit constants that don't clear all the bits, but still win. ++ TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */ ++ if (TARGET_THUMB2) ++ { ++ b1 = (remainder & 0xff000000) >> 24; ++ b2 = (remainder & 0x00ff0000) >> 16; ++ b3 = (remainder & 0x0000ff00) >> 8; ++ b4 = remainder & 0xff; ++ ++ if (loc > 24) ++ { ++ /* The 8-bit immediate already found clears b1 (and maybe b2), ++ but must leave b3 and b4 alone. */ ++ ++ /* First try to find a 32-bit replicated constant that clears ++ almost everything. We can assume that we can't do it in one, ++ or else we wouldn't be here. */ ++ unsigned int tmp = b1 & b2 & b3 & b4; ++ unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16) ++ + (tmp << 24); ++ unsigned int matching_bytes = (tmp == b1) + (tmp == b2) ++ + (tmp == b3) + (tmp == b4); ++ if (tmp ++ && (matching_bytes >= 3 ++ || (matching_bytes == 2 ++ && const_ok_for_op (remainder & ~tmp2, code)))) ++ { ++ /* At least 3 of the bytes match, and the fourth has at ++ least as many bits set, or two of the bytes match ++ and it will only require one more insn to finish. */ ++ result = tmp2; ++ i = tmp != b1 ? 32 ++ : tmp != b2 ? 24 ++ : tmp != b3 ? 16 ++ : 8; ++ } ++ ++ /* Second, try to find a 16-bit replicated constant that can ++ leave three of the bytes clear. If b2 or b4 is already ++ zero, then we can. If the 8-bit from above would not ++ clear b2 anyway, then we still win. */ ++ else if (b1 == b3 && (!b2 || !b4 ++ || (remainder & 0x00ff0000 & ~result))) ++ { ++ result = remainder & 0xff00ff00; ++ i = 24; ++ } ++ } ++ else if (loc > 16) ++ { ++ /* The 8-bit immediate already found clears b2 (and maybe b3) ++ and we don't get here unless b1 is alredy clear, but it will ++ leave b4 unchanged. */ ++ ++ /* If we can clear b2 and b4 at once, then we win, since the ++ 8-bits couldn't possibly reach that far. */ ++ if (b2 == b4) ++ { ++ result = remainder & 0x00ff00ff; ++ i = 16; ++ } ++ } ++ } ++ ++ return_sequence->i[insns++] = result; ++ remainder &= ~result; ++ ++ if (code == SET || code == MINUS) ++ code = PLUS; ++ } ++ while (remainder); ++ ++ return insns; + } + + /* Emit an instruction with the indicated PATTERN. If COND is +@@ -2760,7 +2905,6 @@ + + /* As above, but extra parameter GENERATE which, if clear, suppresses + RTL generation. */ +-/* ??? This needs more work for thumb2. */ + + static int + arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, +@@ -2772,15 +2916,15 @@ + int final_invert = 0; + int can_negate_initial = 0; + int i; +- int num_bits_set = 0; + int set_sign_bit_copies = 0; + int clear_sign_bit_copies = 0; + int clear_zero_bit_copies = 0; + int set_zero_bit_copies = 0; +- int insns = 0; ++ int insns = 0, neg_insns, inv_insns; + unsigned HOST_WIDE_INT temp1, temp2; + unsigned HOST_WIDE_INT remainder = val & 0xffffffff; +- int step_size = TARGET_ARM ? 2 : 1; ++ struct four_ints *immediates; ++ struct four_ints pos_immediates, neg_immediates, inv_immediates; + + /* Find out which operations are safe for a given CODE. Also do a quick + check for degenerate cases; these can occur when DImode operations +@@ -2789,7 +2933,6 @@ + { + case SET: + can_invert = 1; +- can_negate = 1; + break; + + case PLUS: +@@ -2817,9 +2960,6 @@ + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } +- +- if (TARGET_THUMB2) +- can_invert = 1; + break; + + case AND: +@@ -2861,6 +3001,7 @@ + gen_rtx_NOT (mode, source))); + return 1; + } ++ final_invert = 1; + break; + + case MINUS: +@@ -2883,7 +3024,6 @@ + source))); + return 1; + } +- can_negate = 1; + + break; + +@@ -2892,9 +3032,7 @@ + } + + /* If we can do it in one insn get out quickly. */ +- if (const_ok_for_arm (val) +- || (can_negate_initial && const_ok_for_arm (-val)) +- || (can_invert && const_ok_for_arm (~val))) ++ if (const_ok_for_op (val, code)) + { + if (generate) + emit_constant_insn (cond, +@@ -2947,15 +3085,6 @@ + switch (code) + { + case SET: +- /* See if we can use movw. */ +- if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0) +- { +- if (generate) +- emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, +- GEN_INT (val))); +- return 1; +- } +- + /* See if we can do this by sign_extending a constant that is known + to be negative. This is a good, way of doing it, since the shift + may well merge into a subsequent insn. */ +@@ -3306,121 +3435,97 @@ + break; + } + +- for (i = 0; i < 32; i++) +- if (remainder & (1 << i)) +- num_bits_set++; +- +- if ((code == AND) +- || (code != IOR && can_invert && num_bits_set > 16)) +- remainder ^= 0xffffffff; +- else if (code == PLUS && num_bits_set > 16) +- remainder = (-remainder) & 0xffffffff; +- +- /* For XOR, if more than half the bits are set and there's a sequence +- of more than 8 consecutive ones in the pattern then we can XOR by the +- inverted constant and then invert the final result; this may save an +- instruction and might also lead to the final mvn being merged with +- some other operation. */ +- else if (code == XOR && num_bits_set > 16 +- && (count_insns_for_constant (remainder ^ 0xffffffff, +- find_best_start +- (remainder ^ 0xffffffff)) +- < count_insns_for_constant (remainder, +- find_best_start (remainder)))) +- { +- remainder ^= 0xffffffff; +- final_invert = 1; ++ /* Calculate what the instruction sequences would be if we generated it ++ normally, negated, or inverted. */ ++ if (code == AND) ++ /* AND cannot be split into multiple insns, so invert and use BIC. */ ++ insns = 99; ++ else ++ insns = optimal_immediate_sequence (code, remainder, &pos_immediates); ++ ++ if (can_negate) ++ neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff, ++ &neg_immediates); ++ else ++ neg_insns = 99; ++ ++ if (can_invert || final_invert) ++ inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff, ++ &inv_immediates); ++ else ++ inv_insns = 99; ++ ++ immediates = &pos_immediates; ++ ++ /* Is the negated immediate sequence more efficient? */ ++ if (neg_insns < insns && neg_insns <= inv_insns) ++ { ++ insns = neg_insns; ++ immediates = &neg_immediates; ++ } ++ else ++ can_negate = 0; ++ ++ /* Is the inverted immediate sequence more efficient? ++ We must allow for an extra NOT instruction for XOR operations, although ++ there is some chance that the final 'mvn' will get optimized later. */ ++ if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns)) ++ { ++ insns = inv_insns; ++ immediates = &inv_immediates; + } + else + { + can_invert = 0; +- can_negate = 0; ++ final_invert = 0; + } + +- /* Now try and find a way of doing the job in either two or three +- instructions. +- We start by looking for the largest block of zeros that are aligned on +- a 2-bit boundary, we then fill up the temps, wrapping around to the +- top of the word when we drop off the bottom. +- In the worst case this code should produce no more than four insns. +- Thumb-2 constants are shifted, not rotated, so the MSB is always the +- best place to start. */ +- +- /* ??? Use thumb2 replicated constants when the high and low halfwords are +- the same. */ +- { +- /* Now start emitting the insns. */ +- i = find_best_start (remainder); +- do +- { +- int end; +- +- if (i <= 0) +- i += 32; +- if (remainder & (3 << (i - 2))) +- { +- end = i - 8; +- if (end < 0) +- end += 32; +- temp1 = remainder & ((0x0ff << end) +- | ((i < end) ? (0xff >> (32 - end)) : 0)); +- remainder &= ~temp1; +- +- if (generate) +- { +- rtx new_src, temp1_rtx; +- +- if (code == SET || code == MINUS) +- { +- new_src = (subtargets ? gen_reg_rtx (mode) : target); +- if (can_invert && code != MINUS) +- temp1 = ~temp1; +- } +- else +- { +- if ((final_invert || remainder) && subtargets) +- new_src = gen_reg_rtx (mode); +- else +- new_src = target; +- if (can_invert) +- temp1 = ~temp1; +- else if (can_negate) +- temp1 = -temp1; +- } +- +- temp1 = trunc_int_for_mode (temp1, mode); +- temp1_rtx = GEN_INT (temp1); +- +- if (code == SET) +- ; +- else if (code == MINUS) +- temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); +- else +- temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); +- +- emit_constant_insn (cond, +- gen_rtx_SET (VOIDmode, new_src, +- temp1_rtx)); +- source = new_src; +- } +- +- if (code == SET) +- { +- can_invert = 0; +- code = PLUS; +- } +- else if (code == MINUS) ++ /* Now output the chosen sequence as instructions. */ ++ if (generate) ++ { ++ for (i = 0; i < insns; i++) ++ { ++ rtx new_src, temp1_rtx; ++ ++ temp1 = immediates->i[i]; ++ ++ if (code == SET || code == MINUS) ++ new_src = (subtargets ? gen_reg_rtx (mode) : target); ++ else if ((final_invert || i < (insns - 1)) && subtargets) ++ new_src = gen_reg_rtx (mode); ++ else ++ new_src = target; ++ ++ if (can_invert) ++ temp1 = ~temp1; ++ else if (can_negate) ++ temp1 = -temp1; ++ ++ temp1 = trunc_int_for_mode (temp1, mode); ++ temp1_rtx = GEN_INT (temp1); ++ ++ if (code == SET) ++ ; ++ else if (code == MINUS) ++ temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); ++ else ++ temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); ++ ++ emit_constant_insn (cond, ++ gen_rtx_SET (VOIDmode, new_src, ++ temp1_rtx)); ++ source = new_src; ++ ++ if (code == SET) ++ { ++ can_negate = can_invert; ++ can_invert = 0; + code = PLUS; +- +- insns++; +- i -= 8 - step_size; +- } +- /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary +- shifts. */ +- i -= step_size; +- } +- while (remainder); +- } ++ } ++ else if (code == MINUS) ++ code = PLUS; ++ } ++ } + + if (final_invert) + { + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-08-25 11:42:09 +0000 ++++ new/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000 +@@ -701,21 +701,24 @@ + ;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will + ;; put the duplicated register first, and not try the commutative version. + (define_insn_and_split "*arm_addsi3" +- [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r") +- (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk") +- (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))] ++ [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r") ++ (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk") ++ (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))] + "TARGET_32BIT" + "@ + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %2, %1 +- sub%?\\t%0, %1, #%n2 +- sub%?\\t%0, %1, #%n2 ++ addw%?\\t%0, %1, %2 ++ addw%?\\t%0, %1, %2 ++ sub%?\\t%0, %1, #%n2 ++ sub%?\\t%0, %1, #%n2 ++ subw%?\\t%0, %1, #%n2 ++ subw%?\\t%0, %1, #%n2 + #" + "TARGET_32BIT + && GET_CODE (operands[2]) == CONST_INT +- && !(const_ok_for_arm (INTVAL (operands[2])) +- || const_ok_for_arm (-INTVAL (operands[2]))) ++ && !const_ok_for_op (INTVAL (operands[2]), PLUS) + && (reload_completed || !arm_eliminable_register (operands[1]))" + [(clobber (const_int 0))] + " +@@ -724,8 +727,9 @@ + operands[1], 0); + DONE; + " +- [(set_attr "length" "4,4,4,4,4,16") +- (set_attr "predicable" "yes")] ++ [(set_attr "length" "4,4,4,4,4,4,4,4,4,16") ++ (set_attr "predicable" "yes") ++ (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")] + ) + + (define_insn_and_split "*thumb1_addsi3" + +=== modified file 'gcc/config/arm/constraints.md' +--- old/gcc/config/arm/constraints.md 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000 +@@ -31,7 +31,7 @@ + ;; The following multi-letter normal constraints have been used: + ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz + ;; in Thumb-1 state: Pa, Pb, Pc, Pd +-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px ++;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px + + ;; The following memory constraints have been used: + ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +@@ -74,6 +74,18 @@ + (and (match_code "const_int") + (match_test "(ival & 0xffff0000) == 0"))))) + ++(define_constraint "Pj" ++ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)" ++ (and (match_code "const_int") ++ (and (match_test "TARGET_THUMB2") ++ (match_test "(ival & 0xfffff000) == 0")))) ++ ++(define_constraint "PJ" ++ "@internal A constant that satisfies the Pj constrant if negated." ++ (and (match_code "const_int") ++ (and (match_test "TARGET_THUMB2") ++ (match_test "((-ival) & 0xfffff000) == 0")))) ++ + (define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 2011-08-25 13:31:00 +0000 +@@ -0,0 +1,27 @@ ++/* Ensure simple replicated constant immediates work. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++foo1 (int a) ++{ ++ return a + 0xfefefefe; ++} ++ ++/* { dg-final { scan-assembler "add.*#-16843010" } } */ ++ ++int ++foo2 (int a) ++{ ++ return a - 0xab00ab00; ++} ++ ++/* { dg-final { scan-assembler "sub.*#-1426019584" } } */ ++ ++int ++foo3 (int a) ++{ ++ return a & 0x00cd00cd; ++} ++ ++/* { dg-final { scan-assembler "and.*#13435085" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 2011-08-25 13:31:00 +0000 +@@ -0,0 +1,75 @@ ++/* Ensure split constants can use replicated patterns. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++foo1 (int a) ++{ ++ return a + 0xfe00fe01; ++} ++ ++/* { dg-final { scan-assembler "add.*#-33489408" } } */ ++/* { dg-final { scan-assembler "add.*#1" } } */ ++ ++int ++foo2 (int a) ++{ ++ return a + 0xdd01dd00; ++} ++ ++/* { dg-final { scan-assembler "add.*#-587145984" } } */ ++/* { dg-final { scan-assembler "add.*#65536" } } */ ++ ++int ++foo3 (int a) ++{ ++ return a + 0x00443344; ++} ++ ++/* { dg-final { scan-assembler "add.*#4456516" } } */ ++/* { dg-final { scan-assembler "add.*#13056" } } */ ++ ++int ++foo4 (int a) ++{ ++ return a + 0x77330033; ++} ++ ++/* { dg-final { scan-assembler "add.*#1996488704" } } */ ++/* { dg-final { scan-assembler "add.*#3342387" } } */ ++ ++int ++foo5 (int a) ++{ ++ return a + 0x11221122; ++} ++ ++/* { dg-final { scan-assembler "add.*#285217024" } } */ ++/* { dg-final { scan-assembler "add.*#2228258" } } */ ++ ++int ++foo6 (int a) ++{ ++ return a + 0x66666677; ++} ++ ++/* { dg-final { scan-assembler "add.*#1717986918" } } */ ++/* { dg-final { scan-assembler "add.*#17" } } */ ++ ++int ++foo7 (int a) ++{ ++ return a + 0x99888888; ++} ++ ++/* { dg-final { scan-assembler "add.*#-2004318072" } } */ ++/* { dg-final { scan-assembler "add.*#285212672" } } */ ++ ++int ++foo8 (int a) ++{ ++ return a + 0xdddddfff; ++} ++ ++/* { dg-final { scan-assembler "add.*#-572662307" } } */ ++/* { dg-final { scan-assembler "addw.*#546" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 2011-08-25 13:31:00 +0000 +@@ -0,0 +1,28 @@ ++/* Ensure negated/inverted replicated constant immediates work. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++foo1 (int a) ++{ ++ return a | 0xffffff00; ++} ++ ++/* { dg-final { scan-assembler "orn.*#255" } } */ ++ ++int ++foo2 (int a) ++{ ++ return a & 0xffeeffee; ++} ++ ++/* { dg-final { scan-assembler "bic.*#1114129" } } */ ++ ++int ++foo3 (int a) ++{ ++ return a & 0xaaaaaa00; ++} ++ ++/* { dg-final { scan-assembler "and.*#-1431655766" } } */ ++/* { dg-final { scan-assembler "bic.*#170" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 2011-08-25 13:31:00 +0000 +@@ -0,0 +1,22 @@ ++/* Ensure replicated constants don't make things worse. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++int ++foo1 (int a) ++{ ++ /* It might be tempting to use 0x01000100, but it wouldn't help. */ ++ return a + 0x01f001e0; ++} ++ ++/* { dg-final { scan-assembler "add.*#32505856" } } */ ++/* { dg-final { scan-assembler "add.*#480" } } */ ++ ++int ++foo2 (int a) ++{ ++ return a + 0x0f100e10; ++} ++ ++/* { dg-final { scan-assembler "add.*#252706816" } } */ ++/* { dg-final { scan-assembler "add.*#3600" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch new file mode 100644 index 0000000000..5b36959b6b --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch @@ -0,0 +1,201 @@ +2011-09-12 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + PR target/49030 + * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare. + * config/arm/arm.c (maybe_get_arm_condition_code): New function, + reusing the old code from get_arm_condition_code. Return ARM_NV + for invalid comparison codes. + (get_arm_condition_code): Redefine in terms of + maybe_get_arm_condition_code. + * config/arm/predicates.md (arm_comparison_operator): Use + maybe_get_arm_condition_code. + + gcc/testsuite/ + PR target/49030 + * gcc.dg/torture/pr49030.c: New test. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000 +@@ -179,6 +179,7 @@ + #endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE ++extern enum arm_cond_code maybe_get_arm_condition_code (rtx); + extern void thumb1_final_prescan_insn (rtx); + extern void thumb2_final_prescan_insn (rtx); + extern const char *thumb_load_double_from_address (rtx *); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000 +@@ -17494,10 +17494,10 @@ + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + + /* Returns the index of the ARM condition code string in +- `arm_condition_codes'. COMPARISON should be an rtx like +- `(eq (...) (...))'. */ +-static enum arm_cond_code +-get_arm_condition_code (rtx comparison) ++ `arm_condition_codes', or ARM_NV if the comparison is invalid. ++ COMPARISON should be an rtx like `(eq (...) (...))'. */ ++enum arm_cond_code ++maybe_get_arm_condition_code (rtx comparison) + { + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; +@@ -17521,11 +17521,11 @@ + case CC_DLTUmode: code = ARM_CC; + + dominance: +- gcc_assert (comp_code == EQ || comp_code == NE); +- + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); +- return code; ++ if (comp_code == NE) ++ return code; ++ return ARM_NV; + + case CC_NOOVmode: + switch (comp_code) +@@ -17534,7 +17534,7 @@ + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Zmode: +@@ -17542,7 +17542,7 @@ + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Nmode: +@@ -17550,7 +17550,7 @@ + { + case NE: return ARM_MI; + case EQ: return ARM_PL; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCFPEmode: +@@ -17575,7 +17575,7 @@ + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_SWPmode: +@@ -17591,7 +17591,7 @@ + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Cmode: +@@ -17599,7 +17599,7 @@ + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_CZmode: +@@ -17611,7 +17611,7 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_NCVmode: +@@ -17621,7 +17621,7 @@ + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCmode: +@@ -17637,13 +17637,22 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + default: gcc_unreachable (); + } + } + ++/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ ++static enum arm_cond_code ++get_arm_condition_code (rtx comparison) ++{ ++ enum arm_cond_code code = maybe_get_arm_condition_code (comparison); ++ gcc_assert (code != ARM_NV); ++ return code; ++} ++ + /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ + void + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-08-13 08:40:36 +0000 ++++ new/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000 +@@ -242,10 +242,9 @@ + ;; True for integer comparisons and, if FP is active, for comparisons + ;; other than LTGT or UNEQ. + (define_special_predicate "arm_comparison_operator" +- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") +- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT +- && (TARGET_FPA || TARGET_VFP)") +- (match_code "unordered,ordered,unlt,unle,unge,ungt")))) ++ (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, ++ unordered,ordered,unlt,unle,unge,ungt") ++ (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) + + (define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c' +--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000 +@@ -0,0 +1,19 @@ ++void ++sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples, ++ unsigned long dst_skip) ++{ ++ long long y; ++ while (nsamples--) ++ { ++ y = (long long) (*src * 8388608.0f) << 8; ++ if (y > 2147483647) { ++ *(int *) dst = 2147483647; ++ } else if (y < -2147483647 - 1) { ++ *(int *) dst = -2147483647 - 1; ++ } else { ++ *(int *) dst = (int) y; ++ } ++ dst += dst_skip; ++ src++; ++ } ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch new file mode 100644 index 0000000000..09d0767914 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch @@ -0,0 +1,38 @@ +2011-09-01 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/predicates.md (shift_amount_operand): Ensure shift + amount is positive. + + gcc/testsuite/ + * gcc.dg/pr50193-1.c: New file. + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000 ++++ new/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000 +@@ -132,7 +132,8 @@ + (define_predicate "shift_amount_operand" + (ior (and (match_test "TARGET_ARM") + (match_operand 0 "s_register_operand")) +- (match_operand 0 "const_int_operand"))) ++ (and (match_operand 0 "const_int_operand") ++ (match_test "INTVAL (op) > 0")))) + + (define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") + +=== added file 'gcc/testsuite/gcc.dg/pr50193-1.c' +--- old/gcc/testsuite/gcc.dg/pr50193-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr50193-1.c 2011-09-01 12:22:14 +0000 +@@ -0,0 +1,10 @@ ++/* PR 50193: ARM: ICE on a | (b << negative-constant) */ ++/* Ensure that the compiler doesn't ICE. */ ++ ++/* { dg-options "-O2" } */ ++ ++int ++foo(int a, int b) ++{ ++ return a | (b << -3); /* { dg-warning "left shift count is negative" } */ ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch new file mode 100644 index 0000000000..5dec32fa71 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch @@ -0,0 +1,47 @@ +2011-09-12 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-09-08 Andrew Stubbs <ams@codesourcery.com> + + PR tree-optimization/50318 + + gcc/ + * tree-ssa-math-opts.c (convert_plusminus_to_widen): Correct + typo in use of mult_rhs1 and mult_rhs2. + + gcc/testsuite/ + * gcc.target/arm/pr50318-1.c: New file. + +=== added file 'gcc/testsuite/gcc.target/arm/pr50318-1.c' +--- old/gcc/testsuite/gcc.target/arm/pr50318-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr50318-1.c 2011-09-08 20:11:43 +0000 +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target arm_dsp } */ ++ ++long long test (unsigned int sec, unsigned long long nsecs) ++{ ++ return (long long)(long)sec * 1000000000L + (long long)(unsigned ++ long)nsecs; ++} ++ ++/* { dg-final { scan-assembler "umlal" } } */ + +=== modified file 'gcc/tree-ssa-math-opts.c' +--- old/gcc/tree-ssa-math-opts.c 2011-08-09 10:26:48 +0000 ++++ new/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000 +@@ -1699,9 +1699,9 @@ + + /* Handle constants. */ + if (TREE_CODE (mult_rhs1) == INTEGER_CST) +- rhs1 = fold_convert (type1, mult_rhs1); ++ mult_rhs1 = fold_convert (type1, mult_rhs1); + if (TREE_CODE (mult_rhs2) == INTEGER_CST) +- rhs2 = fold_convert (type2, mult_rhs2); ++ mult_rhs2 = fold_convert (type2, mult_rhs2); + + gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2, + add_rhs); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch new file mode 100644 index 0000000000..2b96854c95 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch @@ -0,0 +1,92 @@ +2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/testsuite/ + * gcc.target/arm/pr50099.c: Fix testcase from previous commit. + +2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + LP:838994 + gcc/ + Backport from mainline. + + 2011-09-06 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + PR target/50099 + * config/arm/iterators.md (qhs_zextenddi_cstr): New. + (qhs_zextenddi_op): New. + * config/arm/arm.md ("zero_extend<mode>di2"): Use them. + * config/arm/predicates.md ("arm_extendqisi_mem_op"): + Distinguish between ARM and Thumb2 states. + + gcc/testsuite/ + * gcc.target/arm/pr50099.c: New test. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000 ++++ new/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000 +@@ -4136,8 +4136,8 @@ + + (define_insn "zero_extend<mode>di2" + [(set (match_operand:DI 0 "s_register_operand" "=r") +- (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>" +- "<qhs_extenddi_cstr>")))] ++ (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>" ++ "<qhs_zextenddi_cstr>")))] + "TARGET_32BIT <qhs_zextenddi_cond>" + "#" + [(set_attr "length" "8") + +=== modified file 'gcc/config/arm/iterators.md' +--- old/gcc/config/arm/iterators.md 2011-05-03 15:14:56 +0000 ++++ new/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000 +@@ -379,10 +379,14 @@ + (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")]) + (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6") + (QI "&& arm_arch6")]) ++(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand") ++ (HI "nonimmediate_operand") ++ (QI "nonimmediate_operand")]) + (define_mode_attr qhs_extenddi_op [(SI "s_register_operand") + (HI "nonimmediate_operand") +- (QI "nonimmediate_operand")]) +-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) ++ (QI "arm_reg_or_extendqisi_mem_op")]) ++(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")]) ++(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) + + ;;---------------------------------------------------------------------------- + ;; Code attributes + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000 ++++ new/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000 +@@ -289,8 +289,11 @@ + + (define_special_predicate "arm_extendqisi_mem_op" + (and (match_operand 0 "memory_operand") +- (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0), +- SIGN_EXTEND, 0)"))) ++ (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode, ++ XEXP (op, 0), ++ SIGN_EXTEND, ++ 0) ++ : memory_address_p (QImode, XEXP (op, 0))"))) + + (define_special_predicate "arm_reg_or_extendqisi_mem_op" + (ior (match_operand 0 "arm_extendqisi_mem_op") + +=== added file 'gcc/testsuite/gcc.target/arm/pr50099.c' +--- old/gcc/testsuite/gcc.target/arm/pr50099.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr50099.c 2011-09-09 16:42:45 +0000 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++long long foo (signed char * arg) ++{ ++ long long temp_1; ++ ++ temp_1 = arg[256]; ++ return temp_1; ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch new file mode 100644 index 0000000000..9474030f57 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch @@ -0,0 +1,767 @@ +2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from FSF mainline: + 2011-04-06 Wei Guozhi <carrot@google.com> + + PR target/47855 + gcc/ + * config/arm/arm.md (arm_cmpsi_insn): Compute attr "length". + (arm_cond_branch): Likewise. + (arm_cond_branch_reversed): Likewise. + (arm_jump): Likewise. + (push_multi): Likewise. + * config/arm/constraints.md (Py): New constraint. + + 2011-04-08 Wei Guozhi <carrot@google.com> + + PR target/47855 + * config/arm/arm-protos.h (arm_attr_length_push_multi): New prototype. + * config/arm/arm.c (arm_attr_length_push_multi): New function. + * config/arm/arm.md (*push_multi): Change the length computation to + call a C function. + +2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from FSF mainline: + + 2011-08-18 Jiangning Liu <jiangning.liu@arm.com> + + gcc/ + * config/arm/arm.md (*ior_scc_scc): Enable for Thumb2 as well. + (*ior_scc_scc_cmp): Likewise + (*and_scc_scc): Likewise. + (*and_scc_scc_cmp): Likewise. + (*and_scc_scc_nodom): Likewise. + (*cmp_ite0, *cmp_ite1, *cmp_and, *cmp_ior): Handle Thumb2. + + gcc/testsuite + * gcc.target/arm/thumb2-cond-cmp-1.c: New. Make sure conditional + compare can be generated. + * gcc.target/arm/thumb2-cond-cmp-2.c: Likewise. + * gcc.target/arm/thumb2-cond-cmp-3.c: Likewise. + * gcc.target/arm/thumb2-cond-cmp-4.c: Likewise. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000 +@@ -156,6 +156,7 @@ + extern const char *arm_output_memory_barrier (rtx *); + extern const char *arm_output_sync_insn (rtx, rtx *); + extern unsigned int arm_sync_loop_insns (rtx , rtx *); ++extern int arm_attr_length_push_multi(rtx, rtx); + + #if defined TREE_CODE + extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000 +@@ -24391,4 +24391,30 @@ + return NO_REGS; + } + ++/* Compute the atrribute "length" of insn "*push_multi". ++ So this function MUST be kept in sync with that insn pattern. */ ++int ++arm_attr_length_push_multi(rtx parallel_op, rtx first_op) ++{ ++ int i, regno, hi_reg; ++ int num_saves = XVECLEN (parallel_op, 0); ++ ++ /* ARM mode. */ ++ if (TARGET_ARM) ++ return 4; ++ ++ /* Thumb2 mode. */ ++ regno = REGNO (first_op); ++ hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM); ++ for (i = 1; i < num_saves && !hi_reg; i++) ++ { ++ regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0)); ++ hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM); ++ } ++ ++ if (!hi_reg) ++ return 2; ++ return 4; ++} ++ + #include "gt-arm.h" + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000 ++++ new/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000 +@@ -48,6 +48,15 @@ + (DOM_CC_X_OR_Y 2) + ] + ) ++;; conditional compare combination ++(define_constants ++ [(CMP_CMP 0) ++ (CMN_CMP 1) ++ (CMP_CMN 2) ++ (CMN_CMN 3) ++ (NUM_OF_COND_CMP 4) ++ ] ++) + + ;; UNSPEC Usage: + ;; Note: sin and cos are no-longer used. +@@ -7198,13 +7207,17 @@ + + (define_insn "*arm_cmpsi_insn" + [(set (reg:CC CC_REGNUM) +- (compare:CC (match_operand:SI 0 "s_register_operand" "r,r") +- (match_operand:SI 1 "arm_add_operand" "rI,L")))] ++ (compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r") ++ (match_operand:SI 1 "arm_add_operand" "Py,r,rI,L")))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, %1 ++ cmp%?\\t%0, %1 ++ cmp%?\\t%0, %1 + cmn%?\\t%0, #%n1" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "arch" "t2,t2,any,any") ++ (set_attr "length" "2,2,4,4")] + ) + + (define_insn "*cmpsi_shiftsi" +@@ -7375,7 +7388,14 @@ + return \"b%d1\\t%l0\"; + " + [(set_attr "conds" "use") +- (set_attr "type" "branch")] ++ (set_attr "type" "branch") ++ (set (attr "length") ++ (if_then_else ++ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0)) ++ (and (ge (minus (match_dup 0) (pc)) (const_int -250)) ++ (le (minus (match_dup 0) (pc)) (const_int 256)))) ++ (const_int 2) ++ (const_int 4)))] + ) + + (define_insn "*arm_cond_branch_reversed" +@@ -7394,7 +7414,14 @@ + return \"b%D1\\t%l0\"; + " + [(set_attr "conds" "use") +- (set_attr "type" "branch")] ++ (set_attr "type" "branch") ++ (set (attr "length") ++ (if_then_else ++ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0)) ++ (and (ge (minus (match_dup 0) (pc)) (const_int -250)) ++ (le (minus (match_dup 0) (pc)) (const_int 256)))) ++ (const_int 2) ++ (const_int 4)))] + ) + + +@@ -7846,7 +7873,14 @@ + return \"b%?\\t%l0\"; + } + " +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set (attr "length") ++ (if_then_else ++ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0)) ++ (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) ++ (le (minus (match_dup 0) (pc)) (const_int 2048)))) ++ (const_int 2) ++ (const_int 4)))] + ) + + (define_insn "*thumb_jump" +@@ -8931,40 +8965,85 @@ + (set_attr "length" "8,12")] + ) + +-;; ??? Is it worth using these conditional patterns in Thumb-2 mode? + (define_insn "*cmp_ite0" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" +- [(match_operand:SI 0 "s_register_operand" "r,r,r,r") +- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) ++ [(match_operand:SI 0 "s_register_operand" ++ "l,l,l,r,r,r,r,r,r") ++ (match_operand:SI 1 "arm_add_operand" ++ "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" +- [(match_operand:SI 2 "s_register_operand" "r,r,r,r") +- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]) ++ [(match_operand:SI 2 "s_register_operand" ++ "l,r,r,l,l,r,r,r,r") ++ (match_operand:SI 3 "arm_add_operand" ++ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]) + (const_int 0)) + (const_int 0)))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "* + { +- static const char * const opcodes[4][2] = +- { +- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, +- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, +- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, +- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} +- }; ++ static const char * const cmp1[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp%d5\\t%0, %1\", ++ \"cmp%d4\\t%2, %3\"}, ++ {\"cmn%d5\\t%0, #%n1\", ++ \"cmp%d4\\t%2, %3\"}, ++ {\"cmp%d5\\t%0, %1\", ++ \"cmn%d4\\t%2, #%n3\"}, ++ {\"cmn%d5\\t%0, #%n1\", ++ \"cmn%d4\\t%2, #%n3\"} ++ }; ++ static const char * const cmp2[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp\\t%2, %3\", ++ \"cmp\\t%0, %1\"}, ++ {\"cmp\\t%2, %3\", ++ \"cmn\\t%0, #%n1\"}, ++ {\"cmn\\t%2, #%n3\", ++ \"cmp\\t%0, %1\"}, ++ {\"cmn\\t%2, #%n3\", ++ \"cmn\\t%0, #%n1\"} ++ }; ++ static const char * const ite[2] = ++ { ++ \"it\\t%d5\", ++ \"it\\t%d4\" ++ }; ++ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, ++ CMP_CMP, CMN_CMP, CMP_CMP, ++ CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + +- return opcodes[which_alternative][swap]; ++ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); ++ if (TARGET_THUMB2) { ++ output_asm_insn (ite[swap], operands); ++ } ++ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); ++ return \"\"; + }" + [(set_attr "conds" "set") +- (set_attr "length" "8")] ++ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") ++ (set_attr_alternative "length" ++ [(const_int 6) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10))])] + ) + + (define_insn "*cmp_ite1" +@@ -8972,35 +9051,81 @@ + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" +- [(match_operand:SI 0 "s_register_operand" "r,r,r,r") +- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) ++ [(match_operand:SI 0 "s_register_operand" ++ "l,l,l,r,r,r,r,r,r") ++ (match_operand:SI 1 "arm_add_operand" ++ "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" +- [(match_operand:SI 2 "s_register_operand" "r,r,r,r") +- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]) ++ [(match_operand:SI 2 "s_register_operand" ++ "l,r,r,l,l,r,r,r,r") ++ (match_operand:SI 3 "arm_add_operand" ++ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]) + (const_int 1)) + (const_int 0)))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "* + { +- static const char * const opcodes[4][2] = +- { +- {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\", +- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"}, +- {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\", +- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"}, +- {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\", +- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"}, +- {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\", +- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"} +- }; ++ static const char * const cmp1[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp\\t%0, %1\", ++ \"cmp\\t%2, %3\"}, ++ {\"cmn\\t%0, #%n1\", ++ \"cmp\\t%2, %3\"}, ++ {\"cmp\\t%0, %1\", ++ \"cmn\\t%2, #%n3\"}, ++ {\"cmn\\t%0, #%n1\", ++ \"cmn\\t%2, #%n3\"} ++ }; ++ static const char * const cmp2[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp%d4\\t%2, %3\", ++ \"cmp%D5\\t%0, %1\"}, ++ {\"cmp%d4\\t%2, %3\", ++ \"cmn%D5\\t%0, #%n1\"}, ++ {\"cmn%d4\\t%2, #%n3\", ++ \"cmp%D5\\t%0, %1\"}, ++ {\"cmn%d4\\t%2, #%n3\", ++ \"cmn%D5\\t%0, #%n1\"} ++ }; ++ static const char * const ite[2] = ++ { ++ \"it\\t%d4\", ++ \"it\\t%D5\" ++ }; ++ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, ++ CMP_CMP, CMN_CMP, CMP_CMP, ++ CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), + reverse_condition (GET_CODE (operands[4]))); + +- return opcodes[which_alternative][swap]; ++ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); ++ if (TARGET_THUMB2) { ++ output_asm_insn (ite[swap], operands); ++ } ++ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); ++ return \"\"; + }" + [(set_attr "conds" "set") +- (set_attr "length" "8")] ++ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") ++ (set_attr_alternative "length" ++ [(const_int 6) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10))])] + ) + + (define_insn "*cmp_and" +@@ -9008,34 +9133,80 @@ + (compare + (and:SI + (match_operator 4 "arm_comparison_operator" +- [(match_operand:SI 0 "s_register_operand" "r,r,r,r") +- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) ++ [(match_operand:SI 0 "s_register_operand" ++ "l,l,l,r,r,r,r,r,r") ++ (match_operand:SI 1 "arm_add_operand" ++ "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" +- [(match_operand:SI 2 "s_register_operand" "r,r,r,r") +- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])) ++ [(match_operand:SI 2 "s_register_operand" ++ "l,r,r,l,l,r,r,r,r") ++ (match_operand:SI 3 "arm_add_operand" ++ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])) + (const_int 0)))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "* + { +- static const char *const opcodes[4][2] = +- { +- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, +- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, +- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, +- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} +- }; ++ static const char *const cmp1[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp%d5\\t%0, %1\", ++ \"cmp%d4\\t%2, %3\"}, ++ {\"cmn%d5\\t%0, #%n1\", ++ \"cmp%d4\\t%2, %3\"}, ++ {\"cmp%d5\\t%0, %1\", ++ \"cmn%d4\\t%2, #%n3\"}, ++ {\"cmn%d5\\t%0, #%n1\", ++ \"cmn%d4\\t%2, #%n3\"} ++ }; ++ static const char *const cmp2[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp\\t%2, %3\", ++ \"cmp\\t%0, %1\"}, ++ {\"cmp\\t%2, %3\", ++ \"cmn\\t%0, #%n1\"}, ++ {\"cmn\\t%2, #%n3\", ++ \"cmp\\t%0, %1\"}, ++ {\"cmn\\t%2, #%n3\", ++ \"cmn\\t%0, #%n1\"} ++ }; ++ static const char *const ite[2] = ++ { ++ \"it\\t%d5\", ++ \"it\\t%d4\" ++ }; ++ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, ++ CMP_CMP, CMN_CMP, CMP_CMP, ++ CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + +- return opcodes[which_alternative][swap]; ++ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); ++ if (TARGET_THUMB2) { ++ output_asm_insn (ite[swap], operands); ++ } ++ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); ++ return \"\"; + }" + [(set_attr "conds" "set") + (set_attr "predicable" "no") +- (set_attr "length" "8")] ++ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") ++ (set_attr_alternative "length" ++ [(const_int 6) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10))])] + ) + + (define_insn "*cmp_ior" +@@ -9043,34 +9214,80 @@ + (compare + (ior:SI + (match_operator 4 "arm_comparison_operator" +- [(match_operand:SI 0 "s_register_operand" "r,r,r,r") +- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) ++ [(match_operand:SI 0 "s_register_operand" ++ "l,l,l,r,r,r,r,r,r") ++ (match_operand:SI 1 "arm_add_operand" ++ "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" +- [(match_operand:SI 2 "s_register_operand" "r,r,r,r") +- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])) ++ [(match_operand:SI 2 "s_register_operand" ++ "l,r,r,l,l,r,r,r,r") ++ (match_operand:SI 3 "arm_add_operand" ++ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])) + (const_int 0)))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "* +-{ +- static const char *const opcodes[4][2] = + { +- {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\", +- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"}, +- {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\", +- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"}, +- {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\", +- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"}, +- {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\", +- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"} +- }; +- int swap = +- comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); ++ static const char *const cmp1[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp\\t%0, %1\", ++ \"cmp\\t%2, %3\"}, ++ {\"cmn\\t%0, #%n1\", ++ \"cmp\\t%2, %3\"}, ++ {\"cmp\\t%0, %1\", ++ \"cmn\\t%2, #%n3\"}, ++ {\"cmn\\t%0, #%n1\", ++ \"cmn\\t%2, #%n3\"} ++ }; ++ static const char *const cmp2[NUM_OF_COND_CMP][2] = ++ { ++ {\"cmp%D4\\t%2, %3\", ++ \"cmp%D5\\t%0, %1\"}, ++ {\"cmp%D4\\t%2, %3\", ++ \"cmn%D5\\t%0, #%n1\"}, ++ {\"cmn%D4\\t%2, #%n3\", ++ \"cmp%D5\\t%0, %1\"}, ++ {\"cmn%D4\\t%2, #%n3\", ++ \"cmn%D5\\t%0, #%n1\"} ++ }; ++ static const char *const ite[2] = ++ { ++ \"it\\t%D4\", ++ \"it\\t%D5\" ++ }; ++ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, ++ CMP_CMP, CMN_CMP, CMP_CMP, ++ CMN_CMP, CMP_CMN, CMN_CMN}; ++ int swap = ++ comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + +- return opcodes[which_alternative][swap]; +-} +-" ++ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); ++ if (TARGET_THUMB2) { ++ output_asm_insn (ite[swap], operands); ++ } ++ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); ++ return \"\"; ++ } ++ " + [(set_attr "conds" "set") +- (set_attr "length" "8")] ++ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") ++ (set_attr_alternative "length" ++ [(const_int 6) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (const_int 8) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10)) ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 8) ++ (const_int 10))])] + ) + + (define_insn_and_split "*ior_scc_scc" +@@ -9082,11 +9299,11 @@ + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM ++ "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y) + != CCmode)" + "#" +- "TARGET_ARM && reload_completed" ++ "TARGET_32BIT && reload_completed" + [(set (match_dup 7) + (compare + (ior:SI +@@ -9115,9 +9332,9 @@ + (set (match_operand:SI 7 "s_register_operand" "=r") + (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "#" +- "TARGET_ARM && reload_completed" ++ "TARGET_32BIT && reload_completed" + [(set (match_dup 0) + (compare + (ior:SI +@@ -9138,11 +9355,11 @@ + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM ++ "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + "#" +- "TARGET_ARM && reload_completed ++ "TARGET_32BIT && reload_completed + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + [(set (match_dup 7) +@@ -9173,9 +9390,9 @@ + (set (match_operand:SI 7 "s_register_operand" "=r") + (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] +- "TARGET_ARM" ++ "TARGET_32BIT" + "#" +- "TARGET_ARM && reload_completed" ++ "TARGET_32BIT && reload_completed" + [(set (match_dup 0) + (compare + (and:SI +@@ -9200,11 +9417,11 @@ + [(match_operand:SI 4 "s_register_operand" "r,r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM ++ "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + == CCmode)" + "#" +- "TARGET_ARM && reload_completed" ++ "TARGET_32BIT && reload_completed" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC CC_REGNUM))]) +@@ -10314,6 +10531,8 @@ + ;; Push multiple registers to the stack. Registers are in parallel (use ...) + ;; expressions. For simplicity, the first register is also in the unspec + ;; part. ++;; To avoid the usage of GNU extension, the length attribute is computed ++;; in a C function arm_attr_length_push_multi. + (define_insn "*push_multi" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") +@@ -10353,7 +10572,9 @@ + + return \"\"; + }" +- [(set_attr "type" "store4")] ++ [(set_attr "type" "store4") ++ (set (attr "length") ++ (symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))] + ) + + (define_insn "stack_tie" + +=== modified file 'gcc/config/arm/constraints.md' +--- old/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000 ++++ new/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000 +@@ -31,7 +31,7 @@ + ;; The following multi-letter normal constraints have been used: + ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz + ;; in Thumb-1 state: Pa, Pb, Pc, Pd +-;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px ++;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py + + ;; The following memory constraints have been used: + ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +@@ -201,6 +201,11 @@ + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) + ++(define_constraint "Py" ++ "@internal In Thumb-2 state a constant in the range 0 to 255" ++ (and (match_code "const_int") ++ (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255"))) ++ + (define_constraint "G" + "In ARM/Thumb-2 state a valid FPA immediate constant." + (and (match_code "const_double") + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 2011-09-12 14:14:00 +0000 +@@ -0,0 +1,13 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpne" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i == '+') || (j == '-') ) { ++ return 1; ++ } else { ++ return 0; ++ } ++} + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 2011-09-12 14:14:00 +0000 +@@ -0,0 +1,13 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpeq" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i == '+') && (j == '-') ) { ++ return 1; ++ } else { ++ return 0; ++ } ++} + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 2011-09-12 14:14:00 +0000 +@@ -0,0 +1,12 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpgt" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i >= '+') ? (j > '-') : 0) ++ return 1; ++ else ++ return 0; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c' +--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 2011-09-12 14:14:00 +0000 +@@ -0,0 +1,12 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpgt" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i >= '+') ? (j <= '-') : 1) ++ return 1; ++ else ++ return 0; ++} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch new file mode 100644 index 0000000000..41b5c6dbf9 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch @@ -0,0 +1,203 @@ +2011-09-15 Richard Sandiford <richard.sandiford@linaro.org> + + Revert: + + gcc/ + PR target/49030 + * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare. + * config/arm/arm.c (maybe_get_arm_condition_code): New function, + reusing the old code from get_arm_condition_code. Return ARM_NV + for invalid comparison codes. + (get_arm_condition_code): Redefine in terms of + maybe_get_arm_condition_code. + * config/arm/predicates.md (arm_comparison_operator): Use + maybe_get_arm_condition_code. + + gcc/testsuite/ + PR target/49030 + * gcc.dg/torture/pr49030.c: New test. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000 +@@ -180,7 +180,6 @@ + #endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE +-extern enum arm_cond_code maybe_get_arm_condition_code (rtx); + extern void thumb1_final_prescan_insn (rtx); + extern void thumb2_final_prescan_insn (rtx); + extern const char *thumb_load_double_from_address (rtx *); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000 +@@ -17494,10 +17494,10 @@ + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + + /* Returns the index of the ARM condition code string in +- `arm_condition_codes', or ARM_NV if the comparison is invalid. +- COMPARISON should be an rtx like `(eq (...) (...))'. */ +-enum arm_cond_code +-maybe_get_arm_condition_code (rtx comparison) ++ `arm_condition_codes'. COMPARISON should be an rtx like ++ `(eq (...) (...))'. */ ++static enum arm_cond_code ++get_arm_condition_code (rtx comparison) + { + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; +@@ -17521,11 +17521,11 @@ + case CC_DLTUmode: code = ARM_CC; + + dominance: ++ gcc_assert (comp_code == EQ || comp_code == NE); ++ + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); +- if (comp_code == NE) +- return code; +- return ARM_NV; ++ return code; + + case CC_NOOVmode: + switch (comp_code) +@@ -17534,7 +17534,7 @@ + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_Zmode: +@@ -17542,7 +17542,7 @@ + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_Nmode: +@@ -17550,7 +17550,7 @@ + { + case NE: return ARM_MI; + case EQ: return ARM_PL; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CCFPEmode: +@@ -17575,7 +17575,7 @@ + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_SWPmode: +@@ -17591,7 +17591,7 @@ + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_Cmode: +@@ -17599,7 +17599,7 @@ + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_CZmode: +@@ -17611,7 +17611,7 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CC_NCVmode: +@@ -17621,7 +17621,7 @@ + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + case CCmode: +@@ -17637,22 +17637,13 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: return ARM_NV; ++ default: gcc_unreachable (); + } + + default: gcc_unreachable (); + } + } + +-/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ +-static enum arm_cond_code +-get_arm_condition_code (rtx comparison) +-{ +- enum arm_cond_code code = maybe_get_arm_condition_code (comparison); +- gcc_assert (code != ARM_NV); +- return code; +-} +- + /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ + void + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000 ++++ new/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000 +@@ -243,9 +243,10 @@ + ;; True for integer comparisons and, if FP is active, for comparisons + ;; other than LTGT or UNEQ. + (define_special_predicate "arm_comparison_operator" +- (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, +- unordered,ordered,unlt,unle,unge,ungt") +- (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) ++ (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") ++ (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT ++ && (TARGET_FPA || TARGET_VFP)") ++ (match_code "unordered,ordered,unlt,unle,unge,ungt")))) + + (define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +=== removed file 'gcc/testsuite/gcc.dg/torture/pr49030.c' +--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000 ++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000 +@@ -1,19 +0,0 @@ +-void +-sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples, +- unsigned long dst_skip) +-{ +- long long y; +- while (nsamples--) +- { +- y = (long long) (*src * 8388608.0f) << 8; +- if (y > 2147483647) { +- *(int *) dst = 2147483647; +- } else if (y < -2147483647 - 1) { +- *(int *) dst = -2147483647 - 1; +- } else { +- *(int *) dst = (int) y; +- } +- dst += dst_skip; +- src++; +- } +-} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch new file mode 100644 index 0000000000..91b2191cb8 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch @@ -0,0 +1,80 @@ +2011-09-22 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from trunk -r178804: + modulo-sched.c (remove_node_from_ps): Return void + instead of bool. + (optimize_sc): Adjust call to remove_node_from_ps. + (sms_schedule): Add print info. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000 ++++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 +@@ -211,7 +211,7 @@ + static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, + int, int, sbitmap, int *, sbitmap, + sbitmap); +-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); ++static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); + + #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) + #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +@@ -834,8 +834,7 @@ + if (next_ps_i->node->cuid == g->closing_branch->cuid) + break; + +- gcc_assert (next_ps_i); +- gcc_assert (remove_node_from_ps (ps, next_ps_i)); ++ remove_node_from_ps (ps, next_ps_i); + success = + try_scheduling_node_in_cycle (ps, g->closing_branch, + g->closing_branch->cuid, c, +@@ -1485,8 +1484,8 @@ + if (dump_file) + { + fprintf (dump_file, +- "SMS succeeded %d %d (with ii, sc)\n", ps->ii, +- stage_count); ++ "%s:%d SMS succeeded %d %d (with ii, sc)\n", ++ insn_file (tail), insn_line (tail), ps->ii, stage_count); + print_partial_schedule (ps, dump_file); + } + +@@ -2810,22 +2809,18 @@ + } + + +-/* Removes the given PS_INSN from the partial schedule. Returns false if the +- node is not found in the partial schedule, else returns true. */ +-static bool ++/* Removes the given PS_INSN from the partial schedule. */ ++static void + remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) + { + int row; + +- if (!ps || !ps_i) +- return false; +- ++ gcc_assert (ps && ps_i); ++ + row = SMODULO (ps_i->cycle, ps->ii); + if (! ps_i->prev_in_row) + { +- if (ps_i != ps->rows[row]) +- return false; +- ++ gcc_assert (ps_i == ps->rows[row]); + ps->rows[row] = ps_i->next_in_row; + if (ps->rows[row]) + ps->rows[row]->prev_in_row = NULL; +@@ -2839,7 +2834,7 @@ + + ps->rows_length[row] -= 1; + free (ps_i); +- return true; ++ return; + } + + /* Unlike what literature describes for modulo scheduling (which focuses + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch new file mode 100644 index 0000000000..16779bbf1e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch @@ -0,0 +1,528 @@ +2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + Replace check_effective_target_arm_neon with + check_effective_target_arm_neon_ok. + + Backport from mainline: + + 2011-09-06 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * config/arm/arm.c (arm_preferred_simd_mode): Check + TARGET_NEON_VECTORIZE_DOUBLE instead of + TARGET_NEON_VECTORIZE_QUAD. + (arm_autovectorize_vector_sizes): Likewise. + * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse + mask of mvectorize-with-neon-double. Add RejectNegative. + (mvectorize-with-neon-double): New. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + New procedure. + (add_options_for_quad_vectors): Replace with ... + (add_options_for_double_vectors): ... this. + * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that + support multiple vector sizes since the vectorizer attempts to + vectorize with both vector sizes. + * gcc.dg/vect/no-vfa-vect-79.c, + gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c, + gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c, + gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c, + gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c, + gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c, + gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise. + * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable. + * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c, + gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c, + gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c, + gcc.dg/vect/vect-40.c: Likewise. + * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as + redundant. + * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c, + gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c, + gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c, + gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c: + Likewise. + * gcc.dg/vect/vect-peel-4.c: Make ia global. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 +@@ -22974,7 +22974,7 @@ + return false; + } + +-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword ++/* Use the option -mvectorize-with-neon-double to override the use of quardword + registers when autovectorizing for Neon, at least until multiple vector + widths are supported properly by the middle-end. */ + +@@ -22985,15 +22985,15 @@ + switch (mode) + { + case SFmode: +- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode; + case SImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode; + case HImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode; + case QImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode; + case DImode: +- if (TARGET_NEON_VECTORIZE_QUAD) ++ if (!TARGET_NEON_VECTORIZE_DOUBLE) + return V2DImode; + break; + +@@ -24226,7 +24226,7 @@ + static unsigned int + arm_autovectorize_vector_sizes (void) + { +- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8); + } + + static bool + +=== modified file 'gcc/config/arm/arm.opt' +--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000 ++++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000 +@@ -158,9 +158,13 @@ + Assume big endian bytes, little endian words + + mvectorize-with-neon-quad +-Target Report Mask(NEON_VECTORIZE_QUAD) ++Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE) + Use Neon quad-word (rather than double-word) registers for vectorization + ++mvectorize-with-neon-double ++Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE) ++Use Neon double-word (rather than quad-word) registers for vectorization ++ + mword-relocations + Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) + Only generate absolute relocations on word sized values. + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000 +@@ -45,6 +45,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000 +@@ -53,6 +53,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000 +@@ -53,6 +53,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000 +@@ -58,5 +58,6 @@ + If/when the aliasing problems are resolved, unalignment may + prevent vectorization on some targets. */ + /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000 +@@ -46,5 +46,6 @@ + If/when the aliasing problems are resolved, unalignment may + prevent vectorization on some targets. */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000 +@@ -64,6 +64,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000 +@@ -22,5 +22,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000 +@@ -20,5 +20,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000 +@@ -22,5 +22,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000 +@@ -37,5 +37,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000 +@@ -49,5 +49,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ +-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000 +@@ -49,5 +49,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_float } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include <signal.h> + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000 +@@ -6,12 +6,12 @@ + #define N 128 + + int ib[N+7]; ++int ia[N+1]; + + __attribute__ ((noinline)) + int main1 () + { + int i; +- int ia[N+1]; + + /* Don't peel keeping one load and the store aligned. */ + for (i = 0; i <= N; i++) + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000 +@@ -58,7 +58,8 @@ + } + + /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */ + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ + + +=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90' +--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000 ++++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000 +@@ -19,6 +19,7 @@ + end + + ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } +-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } } + ! { dg-final { cleanup-tree-dump "vect" } } + + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 +@@ -3265,6 +3265,24 @@ + }] + } + ++# Return 1 if the target supports multiple vector sizes ++ ++proc check_effective_target_vect_multiple_sizes { } { ++ global et_vect_multiple_sizes ++ ++ if [info exists et_vect_multiple_sizes_saved] { ++ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 ++ } else { ++ set et_vect_multiple_sizes_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_multiple_sizes_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2 ++ return $et_vect_multiple_sizes_saved ++} ++ + # Return 1 if the target supports section-anchors + + proc check_effective_target_section_anchors { } { +@@ -3648,11 +3666,11 @@ + return $flags + } + +-# Add to FLAGS the flags needed to enable 128-bit vectors. ++# Add to FLAGS the flags needed to enable 64-bit vectors. + +-proc add_options_for_quad_vectors { flags } { ++proc add_options_for_double_vectors { flags } { + if [is-effective-target arm_neon_ok] { +- return "$flags -mvectorize-with-neon-quad" ++ return "$flags -mvectorize-with-neon-double" + } + + return $flags + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch new file mode 100644 index 0000000000..2f70b1b9c2 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch @@ -0,0 +1,387 @@ +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/neon.md (neon_move_lo_quad_<mode>): Delete. + (neon_move_hi_quad_<mode>): Likewise. + (move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves. + +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-27 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi) + (neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di) + (neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si) + (neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands + that produce subreg moves. Define using VQX iterators. + +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-14 Richard Sandiford <richard.sandiford@linaro.org> + + * simplify-rtx.c (simplify_subreg): Check that the inner mode is + a scalar integer before applying integer-only optimisations to + inner arithmetic. + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000 ++++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 +@@ -1235,66 +1235,14 @@ + (const_string "neon_int_1") (const_string "neon_int_5")))] + ) + +-; FIXME: We wouldn't need the following insns if we could write subregs of +-; vector registers. Make an attempt at removing unnecessary moves, though +-; we're really at the mercy of the register allocator. +- +-(define_insn "neon_move_lo_quad_<mode>" +- [(set (match_operand:ANY128 0 "s_register_operand" "+w") +- (vec_concat:ANY128 +- (match_operand:<V_HALF> 1 "s_register_operand" "w") +- (vec_select:<V_HALF> +- (match_dup 0) +- (match_operand:ANY128 2 "vect_par_constant_high" ""))))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%e0, %P1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_move_hi_quad_<mode>" +- [(set (match_operand:ANY128 0 "s_register_operand" "+w") +- (vec_concat:ANY128 +- (vec_select:<V_HALF> +- (match_dup 0) +- (match_operand:ANY128 2 "vect_par_constant_low" "")) +- (match_operand:<V_HALF> 1 "s_register_operand" "w")))] +- +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%f0, %P1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- + (define_expand "move_hi_quad_<mode>" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand:<V_HALF> 1 "s_register_operand" "")] + "TARGET_NEON" + { +- rtvec v = rtvec_alloc (<V_mode_nunits>/2); +- rtx t1; +- int i; +- +- for (i=0; i < (<V_mode_nunits>/2); i++) +- RTVEC_ELT (v, i) = GEN_INT (i); +- +- t1 = gen_rtx_PARALLEL (<MODE>mode, v); +- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1)); +- ++ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode)), ++ operands[1]); + DONE; + }) + +@@ -1303,16 +1251,9 @@ + (match_operand:<V_HALF> 1 "s_register_operand" "")] + "TARGET_NEON" + { +- rtvec v = rtvec_alloc (<V_mode_nunits>/2); +- rtx t1; +- int i; +- +- for (i=0; i < (<V_mode_nunits>/2); i++) +- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); +- +- t1 = gen_rtx_PARALLEL (<MODE>mode, v); +- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1)); +- ++ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], ++ <MODE>mode, 0), ++ operands[1]); + DONE; + }) + +@@ -2950,183 +2891,27 @@ + (set_attr "neon_type" "neon_bp_simple")] + ) + +-(define_insn "neon_vget_highv16qi" +- [(set (match_operand:V8QI 0 "s_register_operand" "=w") +- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") +- (parallel [(const_int 8) (const_int 9) +- (const_int 10) (const_int 11) +- (const_int 12) (const_int 13) +- (const_int 14) (const_int 15)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv8hi" +- [(set (match_operand:V4HI 0 "s_register_operand" "=w") +- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") +- (parallel [(const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv4si" +- [(set (match_operand:V2SI 0 "s_register_operand" "=w") +- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") +- (parallel [(const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv4sf" +- [(set (match_operand:V2SF 0 "s_register_operand" "=w") +- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") +- (parallel [(const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv2di" +- [(set (match_operand:DI 0 "s_register_operand" "=w") +- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") +- (parallel [(const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv16qi" +- [(set (match_operand:V8QI 0 "s_register_operand" "=w") +- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3) +- (const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv8hi" +- [(set (match_operand:V4HI 0 "s_register_operand" "=w") +- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv4si" +- [(set (match_operand:V2SI 0 "s_register_operand" "=w") +- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv4sf" +- [(set (match_operand:V2SF 0 "s_register_operand" "=w") +- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv2di" +- [(set (match_operand:DI 0 "s_register_operand" "=w") +- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") +- (parallel [(const_int 0)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) ++(define_expand "neon_vget_high<mode>" ++ [(match_operand:<V_HALF> 0 "s_register_operand") ++ (match_operand:VQX 1 "s_register_operand")] ++ "TARGET_NEON" ++{ ++ emit_move_insn (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode))); ++ DONE; ++}) ++ ++(define_expand "neon_vget_low<mode>" ++ [(match_operand:<V_HALF> 0 "s_register_operand") ++ (match_operand:VQX 1 "s_register_operand")] ++ "TARGET_NEON" ++{ ++ emit_move_insn (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], ++ <MODE>mode, 0)); ++ DONE; ++}) + + (define_insn "neon_vcvt<mode>" + [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") + +=== modified file 'gcc/simplify-rtx.c' +--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000 ++++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000 +@@ -5567,6 +5567,7 @@ + /* Optimize SUBREG truncations of zero and sign extended values. */ + if ((GET_CODE (op) == ZERO_EXTEND + || GET_CODE (op) == SIGN_EXTEND) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)) + { + unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); +@@ -5605,6 +5606,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE + to avoid the possibility that an outer LSHIFTRT shifts by more + than the sign extension's sign_bit_copies and introduces zeros +@@ -5624,6 +5626,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5638,6 +5641,7 @@ + the outer subreg is effectively a truncation to the original mode. */ + if (GET_CODE (op) == ASHIFT + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5651,7 +5655,7 @@ + /* Recognize a word extraction from a multi-word subreg. */ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) +- && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD + && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode)) + && CONST_INT_P (XEXP (op, 1)) +@@ -5673,6 +5677,7 @@ + + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) ++ && SCALAR_INT_MODE_P (innermode) + && MEM_P (XEXP (op, 0)) + && CONST_INT_P (XEXP (op, 1)) + && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op)) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch new file mode 100644 index 0000000000..d44f8cf1a5 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch @@ -0,0 +1,290 @@ +2011-10-01 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from mainline -r179380 and -r179381 + + * ddg.c (autoinc_var_is_used_p): New function. + (create_ddg_dep_from_intra_loop_link, + add_cross_iteration_register_deps): Call it. + * ddg.h (autoinc_var_is_used_p): Declare. + * modulo-sched.c (sms_schedule): Handle instructions with REG_INC. + (generate_reg_moves): Call autoinc_var_is_used_p. Skip + instructions that do not set a register and verify no regmoves + are created for !single_set instructions. + + gcc/testsuite/ + + * gcc.dg/sms-10.c: New file + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000 ++++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000 +@@ -145,6 +145,27 @@ + return rtx_mem_access_p (PATTERN (insn)); + } + ++/* Return true if DEF_INSN contains address being auto-inc or auto-dec ++ which is used in USE_INSN. Otherwise return false. The result is ++ being used to decide whether to remove the edge between def_insn and ++ use_insn when -fmodulo-sched-allow-regmoves is set. This function ++ doesn't need to consider the specific address register; no reg_moves ++ will be allowed for any life range defined by def_insn and used ++ by use_insn, if use_insn uses an address register auto-inc'ed by ++ def_insn. */ ++bool ++autoinc_var_is_used_p (rtx def_insn, rtx use_insn) ++{ ++ rtx note; ++ ++ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1)) ++ if (REG_NOTE_KIND (note) == REG_INC ++ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn))) ++ return true; ++ ++ return false; ++} ++ + /* Computes the dependence parameters (latency, distance etc.), creates + a ddg_edge and adds it to the given DDG. */ + static void +@@ -173,10 +194,15 @@ + compensate for that by generating reg-moves based on the life-range + analysis. The anti-deps that will be deleted are the ones which + have true-deps edges in the opposite direction (in other words +- the kernel has only one def of the relevant register). TODO: +- support the removal of all anti-deps edges, i.e. including those ++ the kernel has only one def of the relevant register). ++ If the address that is being auto-inc or auto-dec in DEST_NODE ++ is used in SRC_NODE then do not remove the edge to make sure ++ reg-moves will not be created for this address. ++ TODO: support the removal of all anti-deps edges, i.e. including those + whose register has multiple defs in the loop. */ +- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP)) ++ if (flag_modulo_sched_allow_regmoves ++ && (t == ANTI_DEP && dt == REG_DEP) ++ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn)) + { + rtx set; + +@@ -302,10 +328,14 @@ + gcc_assert (first_def_node); + + /* Always create the edge if the use node is a branch in +- order to prevent the creation of reg-moves. */ ++ order to prevent the creation of reg-moves. ++ If the address that is being auto-inc or auto-dec in LAST_DEF ++ is used in USE_INSN then do not remove the edge to make sure ++ reg-moves will not be created for that address. */ + if (DF_REF_ID (last_def) != DF_REF_ID (first_def) + || !flag_modulo_sched_allow_regmoves +- || JUMP_P (use_node->insn)) ++ || JUMP_P (use_node->insn) ++ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)) + create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, + REG_DEP, 1); + + +=== modified file 'gcc/ddg.h' +--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000 ++++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000 +@@ -186,4 +186,6 @@ + int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to); + int longest_simple_path (ddg_ptr, int from, int to, sbitmap via); + ++bool autoinc_var_is_used_p (rtx, rtx); ++ + #endif /* GCC_DDG_H */ + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 ++++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 +@@ -477,7 +477,12 @@ + sbitmap *uses_of_defs; + rtx last_reg_move; + rtx prev_reg, old_reg; +- ++ rtx set = single_set (u->insn); ++ ++ /* Skip instructions that do not set a register. */ ++ if ((set && !REG_P (SET_DEST (set)))) ++ continue; ++ + /* Compute the number of reg_moves needed for u, by looking at life + ranges started at u (excluding self-loops). */ + for (e = u->out; e; e = e->next_out) +@@ -494,6 +499,20 @@ + && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) + nreg_moves4e--; + ++ if (nreg_moves4e >= 1) ++ { ++ /* !single_set instructions are not supported yet and ++ thus we do not except to encounter them in the loop ++ except from the doloop part. For the latter case ++ we assume no regmoves are generated as the doloop ++ instructions are tied to the branch with an edge. */ ++ gcc_assert (set); ++ /* If the instruction contains auto-inc register then ++ validate that the regmov is being generated for the ++ target regsiter rather then the inc'ed register. */ ++ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); ++ } ++ + nreg_moves = MAX (nreg_moves, nreg_moves4e); + } + +@@ -1266,12 +1285,10 @@ + continue; + } + +- /* Don't handle BBs with calls or barriers or auto-increment insns +- (to avoid creating invalid reg-moves for the auto-increment insns), ++ /* Don't handle BBs with calls or barriers + or !single_set with the exception of instructions that include + count_reg---these instructions are part of the control part + that do-loop recognizes. +- ??? Should handle auto-increment insns. + ??? Should handle insns defining subregs. */ + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) + { +@@ -1282,7 +1299,6 @@ + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE + && !reg_mentioned_p (count_reg, insn)) +- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) + || (INSN_P (insn) && (set = single_set (insn)) + && GET_CODE (SET_DEST (set)) == SUBREG)) + break; +@@ -1296,8 +1312,6 @@ + fprintf (dump_file, "SMS loop-with-call\n"); + else if (BARRIER_P (insn)) + fprintf (dump_file, "SMS loop-with-barrier\n"); +- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) +- fprintf (dump_file, "SMS reg inc\n"); + else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) + fprintf (dump_file, "SMS loop-with-not-single-set\n"); + +=== added file 'gcc/testsuite/gcc.dg/sms-10.c' +--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000 +@@ -0,0 +1,118 @@ ++ /* { dg-do run } */ ++ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */ ++ ++ ++typedef __SIZE_TYPE__ size_t; ++extern void *malloc (size_t); ++extern void free (void *); ++extern void abort (void); ++ ++struct regstat_n_sets_and_refs_t ++{ ++ int sets; ++ int refs; ++}; ++ ++struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs; ++ ++struct df_reg_info ++{ ++ unsigned int n_refs; ++}; ++ ++struct df_d ++{ ++ struct df_reg_info **def_regs; ++ struct df_reg_info **use_regs; ++}; ++struct df_d *df; ++ ++static inline int ++REG_N_SETS (int regno) ++{ ++ return regstat_n_sets_and_refs[regno].sets; ++} ++ ++__attribute__ ((noinline)) ++ int max_reg_num (void) ++{ ++ return 100; ++} ++ ++__attribute__ ((noinline)) ++ void regstat_init_n_sets_and_refs (void) ++{ ++ unsigned int i; ++ unsigned int max_regno = max_reg_num (); ++ ++ for (i = 0; i < max_regno; i++) ++ { ++ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs)); ++ (regstat_n_sets_and_refs[i].refs = ++ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i)); ++ } ++} ++ ++int a_sets[100] = ++ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ++ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, ++ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, ++ 62, 63, 64, ++ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, ++ 84, 85, 86, ++ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ++}; ++ ++int a_refs[100] = ++ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, ++ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, ++ 78, 80, 82, ++ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, ++ 118, 120, ++ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, ++ 152, 154, 156, ++ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, ++ 188, 190, 192, ++ 194, 196, 198 ++}; ++ ++int ++main () ++{ ++ struct df_reg_info *b[100], *c[100]; ++ struct df_d df1; ++ size_t s = sizeof (struct df_reg_info); ++ struct regstat_n_sets_and_refs_t a[100]; ++ ++ df = &df1; ++ regstat_n_sets_and_refs = a; ++ int i; ++ ++ for (i = 0; i < 100; i++) ++ { ++ b[i] = (struct df_reg_info *) malloc (s); ++ b[i]->n_refs = i; ++ c[i] = (struct df_reg_info *) malloc (s); ++ c[i]->n_refs = i; ++ } ++ ++ df1.def_regs = b; ++ df1.use_regs = c; ++ regstat_init_n_sets_and_refs (); ++ ++ for (i = 0; i < 100; i++) ++ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i])) ++ abort (); ++ ++ for (i = 0; i < 100; i++) ++ { ++ free (b[i]); ++ free (c[i]); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */ ++/* { dg-final { cleanup-rtl-dump "sms" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch new file mode 100644 index 0000000000..ef98142bc4 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch @@ -0,0 +1,105 @@ +2011-10-03 Michael Hope <michael.hope@linaro.org> + + Backport from mainline: + + 2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru> + + gcc/ + * config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New + define_insn patterns for combine. + + gcc/testsuite/ + * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test. + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 ++++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 +@@ -5428,3 +5428,32 @@ + emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); + DONE; + }) ++ ++(define_insn "neon_vabd<mode>_2" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") ++ (match_operand:VDQ 2 "s_register_operand" "w"))))] ++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" ++ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_fp_vadd_ddd_vabs_dd") ++ (const_string "neon_fp_vadd_qqq_vabs_qq")) ++ (const_string "neon_int_5")))] ++) ++ ++(define_insn "neon_vabd<mode>_3" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") ++ (match_operand:VDQ 2 "s_register_operand" "w")] ++ UNSPEC_VSUB)))] ++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" ++ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_fp_vadd_ddd_vabs_dd") ++ (const_string "neon_fp_vadd_qqq_vabs_qq")) ++ (const_string "neon_int_5")))] ++) + +=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c' +--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000 +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -funsafe-math-optimizations" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include <arm_neon.h> ++float32x2_t f_sub_abs_to_vabd_32() ++{ ++ float32x2_t val1 = vdup_n_f32 (10); ++ float32x2_t val2 = vdup_n_f32 (30); ++ float32x2_t sres = vsub_f32(val1, val2); ++ float32x2_t res = vabs_f32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.f32" } }*/ ++ ++#include <arm_neon.h> ++int8x8_t sub_abs_to_vabd_8() ++{ ++ int8x8_t val1 = vdup_n_s8 (10); ++ int8x8_t val2 = vdup_n_s8 (30); ++ int8x8_t sres = vsub_s8(val1, val2); ++ int8x8_t res = vabs_s8 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s8" } }*/ ++ ++int16x4_t sub_abs_to_vabd_16() ++{ ++ int16x4_t val1 = vdup_n_s16 (10); ++ int16x4_t val2 = vdup_n_s16 (30); ++ int16x4_t sres = vsub_s16(val1, val2); ++ int16x4_t res = vabs_s16 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s16" } }*/ ++ ++int32x2_t sub_abs_to_vabd_32() ++{ ++ int32x2_t val1 = vdup_n_s32 (10); ++ int32x2_t val2 = vdup_n_s32 (30); ++ int32x2_t sres = vsub_s32(val1, val2); ++ int32x2_t res = vabs_s32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s32" } }*/ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch new file mode 100644 index 0000000000..e097ec27fa --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch @@ -0,0 +1,436 @@ +2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/predicates.md (expandable_comparison_operator): New + predicate, extracted from... + (arm_comparison_operator): ...here. + * config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4) + (cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc) + (movdfcc): Use expandable_comparison_operator. + + gcc/testsuite/ + Backport from mainline: + + 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/arm/cmp-1.c: New test. + * gcc.target/arm/cmp-2.c: Likewise. + +2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> + + PR target/49030 + * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare. + * config/arm/arm.c (maybe_get_arm_condition_code): New function, + reusing the old code from get_arm_condition_code. Return ARM_NV + for invalid comparison codes. + (get_arm_condition_code): Redefine in terms of + maybe_get_arm_condition_code. + * config/arm/predicates.md (arm_comparison_operator): Use + maybe_get_arm_condition_code. + + gcc/testsuite/ + Backport from mainline: + + 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> + + PR target/49030 + * gcc.dg/torture/pr49030.c: New test. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000 +@@ -180,6 +180,7 @@ + #endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE ++extern enum arm_cond_code maybe_get_arm_condition_code (rtx); + extern void thumb1_final_prescan_insn (rtx); + extern void thumb2_final_prescan_insn (rtx); + extern const char *thumb_load_double_from_address (rtx *); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 ++++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000 +@@ -17494,10 +17494,10 @@ + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + + /* Returns the index of the ARM condition code string in +- `arm_condition_codes'. COMPARISON should be an rtx like +- `(eq (...) (...))'. */ +-static enum arm_cond_code +-get_arm_condition_code (rtx comparison) ++ `arm_condition_codes', or ARM_NV if the comparison is invalid. ++ COMPARISON should be an rtx like `(eq (...) (...))'. */ ++enum arm_cond_code ++maybe_get_arm_condition_code (rtx comparison) + { + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; +@@ -17521,11 +17521,11 @@ + case CC_DLTUmode: code = ARM_CC; + + dominance: +- gcc_assert (comp_code == EQ || comp_code == NE); +- + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); +- return code; ++ if (comp_code == NE) ++ return code; ++ return ARM_NV; + + case CC_NOOVmode: + switch (comp_code) +@@ -17534,7 +17534,7 @@ + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Zmode: +@@ -17542,7 +17542,7 @@ + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Nmode: +@@ -17550,7 +17550,7 @@ + { + case NE: return ARM_MI; + case EQ: return ARM_PL; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCFPEmode: +@@ -17575,7 +17575,7 @@ + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_SWPmode: +@@ -17591,7 +17591,7 @@ + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Cmode: +@@ -17599,7 +17599,7 @@ + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_CZmode: +@@ -17611,7 +17611,7 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_NCVmode: +@@ -17621,7 +17621,7 @@ + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCmode: +@@ -17637,13 +17637,22 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + default: gcc_unreachable (); + } + } + ++/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ ++static enum arm_cond_code ++get_arm_condition_code (rtx comparison) ++{ ++ enum arm_cond_code code = maybe_get_arm_condition_code (comparison); ++ gcc_assert (code != ARM_NV); ++ return code; ++} ++ + /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ + void + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000 ++++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000 +@@ -6543,7 +6543,7 @@ + + (define_expand "cbranchsi4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6594,7 +6594,7 @@ + + (define_expand "cbranchsf4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6606,7 +6606,7 @@ + + (define_expand "cbranchdf4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6618,7 +6618,7 @@ + + (define_expand "cbranchdi4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:DI 1 "cmpdi_operand" "") + (match_operand:DI 2 "cmpdi_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -7473,7 +7473,7 @@ + + (define_expand "cstoresi4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "reg_or_int_operand" "")]))] + "TARGET_32BIT || TARGET_THUMB1" +@@ -7609,7 +7609,7 @@ + + (define_expand "cstoresf4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT" +@@ -7619,7 +7619,7 @@ + + (define_expand "cstoredf4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" +@@ -7629,7 +7629,7 @@ + + (define_expand "cstoredi4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DI 2 "cmpdi_operand" "") + (match_operand:DI 3 "cmpdi_operand" "")]))] + "TARGET_32BIT" +@@ -7749,7 +7749,7 @@ + + (define_expand "movsicc" + [(set (match_operand:SI 0 "s_register_operand" "") +- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "") + (match_operand:SI 2 "arm_not_operand" "") + (match_operand:SI 3 "arm_not_operand" "")))] + "TARGET_32BIT" +@@ -7769,7 +7769,7 @@ + + (define_expand "movsfcc" + [(set (match_operand:SF 0 "s_register_operand" "") +- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "") + (match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "nonmemory_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" +@@ -7795,7 +7795,7 @@ + + (define_expand "movdfcc" + [(set (match_operand:DF 0 "s_register_operand" "") +- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "") + (match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_add_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 +@@ -242,11 +242,15 @@ + + ;; True for integer comparisons and, if FP is active, for comparisons + ;; other than LTGT or UNEQ. ++(define_special_predicate "expandable_comparison_operator" ++ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, ++ unordered,ordered,unlt,unle,unge,ungt")) ++ ++;; Likewise, but only accept comparisons that are directly supported ++;; by ARM condition codes. + (define_special_predicate "arm_comparison_operator" +- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") +- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT +- && (TARGET_FPA || TARGET_VFP)") +- (match_code "unordered,ordered,unlt,unle,unge,ungt")))) ++ (and (match_operand 0 "expandable_comparison_operator") ++ (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) + + (define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c' +--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000 +@@ -0,0 +1,19 @@ ++void ++sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples, ++ unsigned long dst_skip) ++{ ++ long long y; ++ while (nsamples--) ++ { ++ y = (long long) (*src * 8388608.0f) << 8; ++ if (y > 2147483647) { ++ *(int *) dst = 2147483647; ++ } else if (y < -2147483647 - 1) { ++ *(int *) dst = -2147483647 - 1; ++ } else { ++ *(int *) dst = (int) y; ++ } ++ dst += dst_skip; ++ src++; ++ } ++} + +=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c' +--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000 +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++/* { dg-final { scan-assembler-not "\tbl\t" } } */ ++/* { dg-final { scan-assembler-not "__aeabi" } } */ ++int x, y; ++ ++#define TEST_EXPR(NAME, ARGS, EXPR) \ ++ int NAME##1 ARGS { return (EXPR); } \ ++ int NAME##2 ARGS { return !(EXPR); } \ ++ int NAME##3 ARGS { return (EXPR) ? x : y; } \ ++ void NAME##4 ARGS { if (EXPR) x++; } \ ++ void NAME##5 ARGS { if (!(EXPR)) x++; } ++ ++#define TEST(NAME, TYPE, OPERATOR) \ ++ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \ ++ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \ ++ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \ ++ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \ ++ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \ ++ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1) ++ ++#define TEST_OP(NAME, OPERATOR) \ ++ TEST (sc_##NAME, signed char, OPERATOR) \ ++ TEST (uc_##NAME, unsigned char, OPERATOR) \ ++ TEST (ss_##NAME, short, OPERATOR) \ ++ TEST (us_##NAME, unsigned short, OPERATOR) \ ++ TEST (si_##NAME, int, OPERATOR) \ ++ TEST (ui_##NAME, unsigned int, OPERATOR) \ ++ TEST (sll_##NAME, long long, OPERATOR) \ ++ TEST (ull_##NAME, unsigned long long, OPERATOR) ++ ++TEST_OP (eq, ==) ++TEST_OP (ne, !=) ++TEST_OP (lt, <) ++TEST_OP (gt, >) ++TEST_OP (le, <=) ++TEST_OP (ge, >=) + +=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c' +--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000 +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_vfp_ok } */ ++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ ++/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */ ++/* { dg-final { scan-assembler-not "\tbl\t" } } */ ++/* { dg-final { scan-assembler-not "__aeabi" } } */ ++int x, y; ++ ++#define EQ(X, Y) ((X) == (Y)) ++#define NE(X, Y) ((X) != (Y)) ++#define LT(X, Y) ((X) < (Y)) ++#define GT(X, Y) ((X) > (Y)) ++#define LE(X, Y) ((X) <= (Y)) ++#define GE(X, Y) ((X) >= (Y)) ++ ++#define TEST_EXPR(NAME, ARGS, EXPR) \ ++ int NAME##1 ARGS { return (EXPR); } \ ++ int NAME##2 ARGS { return !(EXPR); } \ ++ int NAME##3 ARGS { return (EXPR) ? x : y; } \ ++ void NAME##4 ARGS { if (EXPR) x++; } \ ++ void NAME##5 ARGS { if (!(EXPR)) x++; } ++ ++#define TEST(NAME, TYPE, OPERATOR) \ ++ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \ ++ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \ ++ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \ ++ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \ ++ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \ ++ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1)) ++ ++#define TEST_OP(NAME, OPERATOR) \ ++ TEST (f_##NAME, float, OPERATOR) \ ++ TEST (d_##NAME, double, OPERATOR) \ ++ TEST (ld_##NAME, long double, OPERATOR) ++ ++TEST_OP (eq, EQ) ++TEST_OP (ne, NE) ++TEST_OP (lt, LT) ++TEST_OP (gt, GT) ++TEST_OP (le, LE) ++TEST_OP (ge, GE) ++TEST_OP (blt, __builtin_isless) ++TEST_OP (bgt, __builtin_isgreater) ++TEST_OP (ble, __builtin_islessequal) ++TEST_OP (bge, __builtin_isgreaterequal) ++/* This one should be expanded into separate ordered and equality ++ comparisons. */ ++TEST_OP (blg, __builtin_islessgreater) ++TEST_OP (bun, __builtin_isunordered) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch new file mode 100644 index 0000000000..4a886ce56d --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch @@ -0,0 +1,378 @@ +2011-10-06 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-data-ref.c (dr_analyze_innermost): Add new argument. + Allow not simple iv if analyzing basic block. + (create_data_ref): Update call to dr_analyze_innermost. + (stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise. + * tree-loop-distribution.c (generate_memset_zero): Likewise. + * tree-predcom.c (find_looparound_phi): Likewise. + * tree-data-ref.h (dr_analyze_innermost): Add new argument. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-24.c: New. + + + 2011-09-15 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow + read-after-read dependencies in basic block SLP. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-25.c: New. + + + 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use + operand_equal_p to compare DR_BASE_ADDRESSes. + (vect_check_interleaving): Likewise. + + gcc/testsuite/ + * gcc.dg/vect/vect-119.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, ++ int stride, int dummy) ++{ ++ int i; ++ h /= 8; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0]; ++ dst[1] += A*src[1]; ++ dst[2] += A*src[2]; ++ dst[3] += A*src[3]; ++ dst[4] += A*src[4]; ++ dst[5] += A*src[5]; ++ dst[6] += A*src[6]; ++ dst[7] += A*src[7]; ++ dst += stride; ++ src += stride; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (dst[i] != A * i) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0] + src[stride]; ++ dst[1] += A*src[1] + src[1+stride]; ++ dst[2] += A*src[2] + src[2+stride]; ++ dst[3] += A*src[3] + src[3+stride]; ++ dst[4] += A*src[4] + src[4+stride]; ++ dst[5] += A*src[5] + src[5+stride]; ++ dst[6] += A*src[6] + src[6+stride]; ++ dst[7] += A*src[7] + src[7+stride]; ++ dst += 8; ++ src += 8; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * i + i + 8) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++ ++#define OUTER 32 ++#define INNER 40 ++ ++static unsigned int ++bar (const unsigned int x[INNER][2], unsigned int sum) ++{ ++ int i; ++ ++ for (i = 0; i < INNER; i++) ++ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; ++ return sum; ++} ++ ++unsigned int foo (const unsigned int x[OUTER][INNER][2]) ++{ ++ int i; ++ unsigned int sum; ++ ++ sum = 0.0f; ++ for (i = 0; i < OUTER; i++) ++ sum = bar (x[i], sum); ++ return sum; ++} ++ ++/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/tree-data-ref.c' +--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000 ++++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000 +@@ -721,11 +721,11 @@ + } + + /* Analyzes the behavior of the memory reference DR in the innermost loop or +- basic block that contains it. Returns true if analysis succeed or false ++ basic block that contains it. Returns true if analysis succeed or false + otherwise. */ + + bool +-dr_analyze_innermost (struct data_reference *dr) ++dr_analyze_innermost (struct data_reference *dr, struct loop *nest) + { + gimple stmt = DR_STMT (dr); + struct loop *loop = loop_containing_stmt (stmt); +@@ -768,14 +768,25 @@ + } + else + base = build_fold_addr_expr (base); ++ + if (in_loop) + { + if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, + false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of base is not affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of base is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ base_iv.base = base; ++ base_iv.step = ssize_int (0); ++ base_iv.no_overflow = true; ++ } + } + } + else +@@ -800,10 +811,18 @@ + else if (!simple_iv (loop, loop_containing_stmt (stmt), + poffset, &offset_iv, false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of offset is not" +- " affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of offset is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ offset_iv.base = poffset; ++ offset_iv.step = ssize_int (0); ++ } + } + } + +@@ -967,7 +986,7 @@ + DR_REF (dr) = memref; + DR_IS_READ (dr) = is_read; + +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, nest); + dr_analyze_indices (dr, nest, loop); + dr_analyze_alias (dr); + +@@ -5185,7 +5204,7 @@ + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; + +- res = dr_analyze_innermost (dr) ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); + + free_data_ref (dr); +@@ -5225,7 +5244,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = *ref->pos; +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + base_address = DR_BASE_ADDRESS (dr); + + if (!base_address) + +=== modified file 'gcc/tree-data-ref.h' +--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000 ++++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000 +@@ -386,7 +386,7 @@ + DEF_VEC_ALLOC_O (data_ref_loc, heap); + + bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **); +-bool dr_analyze_innermost (struct data_reference *); ++bool dr_analyze_innermost (struct data_reference *, struct loop *); + extern bool compute_data_dependences_for_loop (struct loop *, bool, + VEC (loop_p, heap) **, + VEC (data_reference_p, heap) **, + +=== modified file 'gcc/tree-loop-distribution.c' +--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000 ++++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000 +@@ -267,7 +267,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; +- res = dr_analyze_innermost (dr); ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); + + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); + +=== modified file 'gcc/tree-predcom.c' +--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000 ++++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000 +@@ -1114,7 +1114,7 @@ + memset (&init_dr, 0, sizeof (struct data_reference)); + DR_REF (&init_dr) = init_ref; + DR_STMT (&init_dr) = phi; +- if (!dr_analyze_innermost (&init_dr)) ++ if (!dr_analyze_innermost (&init_dr, loop)) + return NULL; + + if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) + +=== modified file 'gcc/tree-vect-data-refs.c' +--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000 ++++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000 +@@ -353,11 +353,7 @@ + + /* Check that the data-refs have same bases and offsets. If not, we can't + determine if they are dependent. */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) + || !dr_equal_offsets_p (dra, drb)) + return true; + +@@ -403,11 +399,7 @@ + + /* Check that the data-refs have same first location (except init) and they + are both either store or load (not load and store). */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) + || !dr_equal_offsets_p (dra, drb) + || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) + || DR_IS_READ (dra) != DR_IS_READ (drb)) +@@ -615,6 +607,11 @@ + if (vect_check_interleaving (dra, drb)) + return false; + ++ /* Read-read is OK (we need this check here, after checking for ++ interleaving). */ ++ if (DR_IS_READ (dra) && DR_IS_READ (drb)) ++ return false; ++ + if (vect_print_dump_info (REPORT_DR_DETAILS)) + { + fprintf (vect_dump, "can't determine dependence between "); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch new file mode 100644 index 0000000000..f25a37858d --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch @@ -0,0 +1,240 @@ +2011-10-06 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block + vectorizable. + + Backport from mainline: + + 2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part + of vect_analyze_bb here. + (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect64): New. + * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case + of multiple vector sizes. + * gcc.dg/vect/bb-slp-26.c: New. + +=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 +@@ -49,6 +49,7 @@ + } + + /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ ++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "slp" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000 +@@ -0,0 +1,60 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++char src[N], dst[N]; ++ ++void foo (char * __restrict__ dst, char * __restrict__ src, int h, ++ int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0]; ++ dst[1] += A*src[1]; ++ dst[2] += A*src[2]; ++ dst[3] += A*src[3]; ++ dst[4] += A*src[4]; ++ dst[5] += A*src[5]; ++ dst[6] += A*src[6]; ++ dst[7] += A*src[7]; ++ dst += 8; ++ src += 8; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i/8; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * src[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 +@@ -3283,6 +3283,24 @@ + return $et_vect_multiple_sizes_saved + } + ++# Return 1 if the target supports vectors of 64 bits. ++ ++proc check_effective_target_vect64 { } { ++ global et_vect64 ++ ++ if [info exists et_vect64_saved] { ++ verbose "check_effective_target_vect64: using cached result" 2 ++ } else { ++ set et_vect64_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect64_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 ++ return $et_vect64_saved ++} ++ + # Return 1 if the target supports section-anchors + + proc check_effective_target_section_anchors { } { + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 +@@ -1664,42 +1664,18 @@ + + /* Check if the basic block can be vectorized. */ + +-bb_vec_info +-vect_slp_analyze_bb (basic_block bb) ++static bb_vec_info ++vect_slp_analyze_bb_1 (basic_block bb) + { + bb_vec_info bb_vinfo; + VEC (ddr_p, heap) *ddrs; + VEC (slp_instance, heap) *slp_instances; + slp_instance instance; +- int i, insns = 0; +- gimple_stmt_iterator gsi; ++ int i; + int min_vf = 2; + int max_vf = MAX_VECTORIZATION_FACTOR; + bool data_dependence_in_bb = false; + +- current_vector_size = 0; +- +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); +- +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- { +- gimple stmt = gsi_stmt (gsi); +- if (!is_gimple_debug (stmt) +- && !gimple_nop_p (stmt) +- && gimple_code (stmt) != GIMPLE_LABEL) +- insns++; +- } +- +- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) +- { +- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +- fprintf (vect_dump, "not vectorized: too many instructions in basic " +- "block.\n"); +- +- return NULL; +- } +- + bb_vinfo = new_bb_vec_info (bb); + if (!bb_vinfo) + return NULL; +@@ -1819,6 +1795,61 @@ + } + + ++bb_vec_info ++vect_slp_analyze_bb (basic_block bb) ++{ ++ bb_vec_info bb_vinfo; ++ int insns = 0; ++ gimple_stmt_iterator gsi; ++ unsigned int vector_sizes; ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); ++ ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple stmt = gsi_stmt (gsi); ++ if (!is_gimple_debug (stmt) ++ && !gimple_nop_p (stmt) ++ && gimple_code (stmt) != GIMPLE_LABEL) ++ insns++; ++ } ++ ++ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) ++ { ++ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) ++ fprintf (vect_dump, "not vectorized: too many instructions in basic " ++ "block.\n"); ++ ++ return NULL; ++ } ++ ++ /* Autodetect first vector size we try. */ ++ current_vector_size = 0; ++ vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); ++ ++ while (1) ++ { ++ bb_vinfo = vect_slp_analyze_bb_1 (bb); ++ if (bb_vinfo) ++ return bb_vinfo; ++ ++ destroy_bb_vec_info (bb_vinfo); ++ ++ vector_sizes &= ~current_vector_size; ++ if (vector_sizes == 0 ++ || current_vector_size == 0) ++ return NULL; ++ ++ /* Try the next biggest vector size. */ ++ current_vector_size = 1 << floor_log2 (vector_sizes); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "***** Re-trying analysis with " ++ "vector size %d\n", current_vector_size); ++ } ++} ++ ++ + /* SLP costs are calculated according to SLP instance unrolling factor (i.e., + the number of created vector stmts depends on the unrolling factor). + However, the actual number of vector stmts for every SLP node depends on + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch new file mode 100644 index 0000000000..13e6fd26e5 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch @@ -0,0 +1,124 @@ +2011-10-13 Andrew Stubbs <ams@codesourcery.com> + + Backport from mainline: + + 2011-10-07 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/predicates.md (shift_amount_operand): Remove constant + range check. + (shift_operator): Check range of constants for all shift operators. + + gcc/testsuite/ + * gcc.dg/pr50193-1.c: New file. + * gcc.target/arm/shiftable.c: New file. + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 +@@ -129,11 +129,12 @@ + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "memory_operand"))) + ++;; This doesn't have to do much because the constant is already checked ++;; in the shift_operator predicate. + (define_predicate "shift_amount_operand" + (ior (and (match_test "TARGET_ARM") + (match_operand 0 "s_register_operand")) +- (and (match_operand 0 "const_int_operand") +- (match_test "INTVAL (op) > 0")))) ++ (match_operand 0 "const_int_operand"))) + + (define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") +@@ -219,13 +220,20 @@ + (match_test "mode == GET_MODE (op)"))) + + ;; True for shift operators. ++;; Notes: ++;; * mult is only permitted with a constant shift amount ++;; * patterns that permit register shift amounts only in ARM mode use ++;; shift_amount_operand, patterns that always allow registers do not, ++;; so we don't have to worry about that sort of thing here. + (define_special_predicate "shift_operator" + (and (ior (ior (and (match_code "mult") + (match_test "power_of_two_operand (XEXP (op, 1), mode)")) + (and (match_code "rotate") + (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) +- (match_code "ashift,ashiftrt,lshiftrt,rotatert")) ++ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") ++ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT ++ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_test "mode == GET_MODE (op)"))) + + ;; True for MULT, to identify which variant of shift_operator is in use. + +=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c' +--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000 +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target arm32 } */ ++ ++/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some ++ of these as a left shift, others as a multiply. Check that we match the ++ right one. */ ++ ++int ++plus (int a, int b) ++{ ++ return (a * 64) + b; ++} ++ ++/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */ ++ ++int ++minus (int a, int b) ++{ ++ return a - (b * 64); ++} ++ ++/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */ ++ ++int ++ior (int a, int b) ++{ ++ return (a * 64) | b; ++} ++ ++/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */ ++ ++int ++xor (int a, int b) ++{ ++ return (a * 64) ^ b; ++} ++ ++/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */ ++ ++int ++and (int a, int b) ++{ ++ return (a * 64) & b; ++} ++ ++/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */ ++ ++int ++rsb (int a, int b) ++{ ++ return (a * 64) - b; ++} ++ ++/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */ ++ ++int ++mvn (int a, int b) ++{ ++ return ~(a * 64); ++} ++ ++/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch new file mode 100644 index 0000000000..6642f8a667 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch @@ -0,0 +1,362 @@ +2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-09-27 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block + vectorization. + (vectorizable_type_promotion): Likewise. + (vect_analyze_stmt): Call vectorizable_type_demotion and + vectorizable_type_promotion for basic blocks. + (supportable_widening_operation): Don't assume loop vectorization. + * tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for + basic blocks. Update vectorization factor for basic block + vectorization. + (vect_analyze_slp_instance): Allow multiple types for basic block + vectorization. Recheck unrolling factor after construction of SLP + instance. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit + vectors. + * gcc.dg/vect/bb-slp-27.c: New. + * gcc.dg/vect/bb-slp-28.c: New. + + + 2011-10-04 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + Make et_vect_multiple_sizes_saved global. + (check_effective_target_vect64): Make et_vect64_saved global. + +=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000 +@@ -48,8 +48,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ + /* { dg-final { cleanup-tree-dump "slp" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000 +@@ -0,0 +1,49 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define N 16 ++ ++short src[N], dst[N]; ++ ++void foo (int a) ++{ ++ dst[0] += a*src[0]; ++ dst[1] += a*src[1]; ++ dst[2] += a*src[2]; ++ dst[3] += a*src[3]; ++ dst[4] += a*src[4]; ++ dst[5] += a*src[5]; ++ dst[6] += a*src[6]; ++ dst[7] += a*src[7]; ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (A); ++ ++ for (i = 0; i < 8; i++) ++ { ++ if (dst[i] != A * i) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000 +@@ -0,0 +1,71 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 300 ++#define N 16 ++ ++char src[N]; ++short dst[N]; ++short src1[N], dst1[N]; ++ ++void foo (int a) ++{ ++ dst[0] = (short) (a * (int) src[0]); ++ dst[1] = (short) (a * (int) src[1]); ++ dst[2] = (short) (a * (int) src[2]); ++ dst[3] = (short) (a * (int) src[3]); ++ dst[4] = (short) (a * (int) src[4]); ++ dst[5] = (short) (a * (int) src[5]); ++ dst[6] = (short) (a * (int) src[6]); ++ dst[7] = (short) (a * (int) src[7]); ++ dst[8] = (short) (a * (int) src[8]); ++ dst[9] = (short) (a * (int) src[9]); ++ dst[10] = (short) (a * (int) src[10]); ++ dst[11] = (short) (a * (int) src[11]); ++ dst[12] = (short) (a * (int) src[12]); ++ dst[13] = (short) (a * (int) src[13]); ++ dst[14] = (short) (a * (int) src[14]); ++ dst[15] = (short) (a * (int) src[15]); ++ ++ dst1[0] += src1[0]; ++ dst1[1] += src1[1]; ++ dst1[2] += src1[2]; ++ dst1[3] += src1[3]; ++ dst1[4] += src1[4]; ++ dst1[5] += src1[5]; ++ dst1[6] += src1[6]; ++ dst1[7] += src1[7]; ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 2; ++ dst1[i] = 0; ++ src[i] = i; ++ src1[i] = i+2; ++ } ++ ++ foo (A); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (dst[i] != A * i ++ || (i < N/2 && dst1[i] != i + 2)) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 +@@ -3268,7 +3268,7 @@ + # Return 1 if the target supports multiple vector sizes + + proc check_effective_target_vect_multiple_sizes { } { +- global et_vect_multiple_sizes ++ global et_vect_multiple_sizes_saved + + if [info exists et_vect_multiple_sizes_saved] { + verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 +@@ -3286,7 +3286,7 @@ + # Return 1 if the target supports vectors of 64 bits. + + proc check_effective_target_vect64 { } { +- global et_vect64 ++ global et_vect64_saved + + if [info exists et_vect64_saved] { + verbose "check_effective_target_vect64: using cached result" 2 + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 +@@ -386,20 +386,15 @@ + return false; + } + +- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); +- if (ncopies != 1) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "SLP with multiple types "); +- +- /* FORNOW: multiple types are unsupported in BB SLP. */ +- if (bb_vinfo) +- return false; +- } +- + /* In case of multiple types we need to detect the smallest type. */ + if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) +- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ { ++ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ if (bb_vinfo) ++ vectorization_factor = *max_nunits; ++ } ++ ++ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); + + if (is_gimple_call (stmt)) + rhs_code = CALL_EXPR; +@@ -1183,7 +1178,6 @@ + if (loop_vinfo) + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + else +- /* No multitypes in BB SLP. */ + vectorization_factor = nunits; + + /* Calculate the unrolling factor. */ +@@ -1246,16 +1240,23 @@ + &max_nunits, &load_permutation, &loads, + vectorization_factor)) + { ++ /* Calculate the unrolling factor based on the smallest type. */ ++ if (max_nunits > nunits) ++ unrolling_factor = least_common_multiple (max_nunits, group_size) ++ / group_size; ++ ++ if (unrolling_factor != 1 && !loop_vinfo) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: unrolling required in basic" ++ " block SLP"); ++ return false; ++ } ++ + /* Create a new SLP instance. */ + new_instance = XNEW (struct _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; +- /* Calculate the unrolling factor based on the smallest type in the +- loop. */ +- if (max_nunits > nunits) +- unrolling_factor = least_common_multiple (max_nunits, group_size) +- / group_size; +- + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; + SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; + SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 +@@ -3081,11 +3081,9 @@ + VEC (tree, heap) *vec_oprnds0 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; + tree last_oprnd, intermediate_type; +- +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -3113,7 +3111,7 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) + { + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -3360,11 +3358,9 @@ + int multi_step_cvt = 0; + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; +- +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -3393,7 +3389,7 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) + { + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -5153,7 +5149,9 @@ + else + { + if (bb_vinfo) +- ok = (vectorizable_shift (stmt, NULL, NULL, node) ++ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) ++ || vectorizable_type_demotion (stmt, NULL, NULL, node) ++ || vectorizable_shift (stmt, NULL, NULL, node) + || vectorizable_operation (stmt, NULL, NULL, node) + || vectorizable_assignment (stmt, NULL, NULL, node) + || vectorizable_load (stmt, NULL, NULL, node, NULL) +@@ -5780,7 +5778,7 @@ + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); +- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *vect_loop = NULL; + bool ordered_p; + enum machine_mode vec_mode; + enum insn_code icode1, icode2; +@@ -5789,6 +5787,9 @@ + tree wide_vectype = vectype_out; + enum tree_code c1, c2; + ++ if (loop_info) ++ vect_loop = LOOP_VINFO_LOOP (loop_info); ++ + /* The result of a vectorized widening operation usually requires two vectors + (because the widened results do not fit int one vector). The generated + vector results would normally be expected to be generated in the same +@@ -5809,7 +5810,8 @@ + iterations in parallel). We therefore don't allow to change the order + of the computation in the inner-loop during outer-loop vectorization. */ + +- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction ++ if (vect_loop ++ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction + && !nested_in_vect_loop_p (vect_loop, stmt)) + ordered_p = false; + else + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch new file mode 100644 index 0000000000..28caa40076 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch @@ -0,0 +1,622 @@ +2011-10-17 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r178852: + + 2011-09-14 Julian Brown <julian@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_override_options): Add unaligned_access + support. + (arm_file_start): Emit attribute for unaligned access as appropriate. + * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD) + (UNSPEC_UNALIGNED_STORE): Add constants for unspecs. + (insv, extzv): Add unaligned-access support. + (extv): Change to expander. Likewise. + (extzv_t1, extv_regsi): Add helpers. + (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu) + (unaligned_storesi, unaligned_storehi): New. + (*extv_reg): New (previous extv implementation). + * config/arm/arm.opt (munaligned_access): Add option. + * config/arm/constraints.md (Uw): New constraint. + * expmed.c (store_bit_field_1): Adjust bitfield numbering according + to size of access, not size of unit, when BITS_BIG_ENDIAN != + BYTES_BIG_ENDIAN. Don't use bitfield accesses for + volatile accesses when -fstrict-volatile-bitfields is in effect. + (extract_bit_field_1): Likewise. + + Backport from mainline r172697: + + 2011-04-19 Wei Guozhi <carrot@google.com> + + PR target/47855 + gcc/ + * config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype. + * config/arm/arm.c (thumb1_legitimate_address_p): Remove the static + linkage. + * config/arm/constraints.md (Uu): New constraint. + * config/arm/arm.md (*arm_movqi_insn): Compute attr "length". + +=== modified file 'gcc/config/arm/arm-protos.h' +Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h 2012-03-05 16:07:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/config/arm/arm-protos.h 2012-03-05 16:07:50.392936694 -0800 +@@ -59,6 +59,7 @@ + int); + extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); ++extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); + extern int arm_const_double_rtx (rtx); + extern int neg_const_double_rtx_ok_for_fpa (rtx); + extern int vfp3_const_double_rtx (rtx); +Index: gcc-4_6-branch/gcc/config/arm/arm.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 16:07:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 16:07:50.400936694 -0800 +@@ -2065,6 +2065,28 @@ + fix_cm3_ldrd = 0; + } + ++ /* Enable -munaligned-access by default for ++ - all ARMv6 architecture-based processors ++ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. ++ ++ Disable -munaligned-access by default for ++ - all pre-ARMv6 architecture-based processors ++ - ARMv6-M architecture-based processors. */ ++ ++ if (unaligned_access == 2) ++ { ++ if (arm_arch6 && (arm_arch_notm || arm_arch7)) ++ unaligned_access = 1; ++ else ++ unaligned_access = 0; ++ } ++ else if (unaligned_access == 1 ++ && !(arm_arch6 && (arm_arch_notm || arm_arch7))) ++ { ++ warning (0, "target CPU does not support unaligned accesses"); ++ unaligned_access = 0; ++ } ++ + if (TARGET_THUMB1 && flag_schedule_insns) + { + /* Don't warn since it's on by default in -O2. */ +@@ -6123,7 +6145,7 @@ + addresses based on the frame pointer or arg pointer until the + reload pass starts. This is so that eliminating such addresses + into stack based ones won't produce impossible code. */ +-static int ++int + thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) + { + /* ??? Not clear if this is right. Experiment. */ +@@ -22251,6 +22273,10 @@ + val = 6; + asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + ++ /* Tag_CPU_unaligned_access. */ ++ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n", ++ unaligned_access); ++ + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", +Index: gcc-4_6-branch/gcc/config/arm/arm.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.md 2012-03-05 16:07:15.000000000 -0800 ++++ gcc-4_6-branch/gcc/config/arm/arm.md 2012-03-05 16:09:26.284941314 -0800 +@@ -114,6 +114,10 @@ + ; another symbolic address. + (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. + (UNSPEC_PIC_UNIFIED 29) ; Create a common pic addressing form. ++ (UNSPEC_UNALIGNED_LOAD 30) ; Used to represent ldr/ldrh instructions that access ++ ; unaligned locations, on architectures which support ++ ; that. ++ (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh. + ] + ) + +@@ -2461,10 +2465,10 @@ + ;;; this insv pattern, so this pattern needs to be reevalutated. + + (define_expand "insv" +- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "") +- (match_operand:SI 1 "general_operand" "") +- (match_operand:SI 2 "general_operand" "")) +- (match_operand:SI 3 "reg_or_int_operand" ""))] ++ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") ++ (match_operand 1 "general_operand" "") ++ (match_operand 2 "general_operand" "")) ++ (match_operand 3 "reg_or_int_operand" ""))] + "TARGET_ARM || arm_arch_thumb2" + " + { +@@ -2475,35 +2479,70 @@ + + if (arm_arch_thumb2) + { +- bool use_bfi = TRUE; +- +- if (GET_CODE (operands[3]) == CONST_INT) ++ if (unaligned_access && MEM_P (operands[0]) ++ && s_register_operand (operands[3], GET_MODE (operands[3])) ++ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0) + { +- HOST_WIDE_INT val = INTVAL (operands[3]) & mask; ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width ++ - start_bit; + +- if (val == 0) ++ if (width == 32) + { +- emit_insn (gen_insv_zero (operands[0], operands[1], +- operands[2])); +- DONE; ++ base_addr = adjust_address (operands[0], SImode, ++ start_bit / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_storesi (base_addr, operands[3])); + } ++ else ++ { ++ rtx tmp = gen_reg_rtx (HImode); + +- /* See if the set can be done with a single orr instruction. */ +- if (val == mask && const_ok_for_arm (val << start_bit)) +- use_bfi = FALSE; ++ base_addr = adjust_address (operands[0], HImode, ++ start_bit / BITS_PER_UNIT); ++ emit_move_insn (tmp, gen_lowpart (HImode, operands[3])); ++ emit_insn (gen_unaligned_storehi (base_addr, tmp)); ++ } ++ DONE; + } +- +- if (use_bfi) ++ else if (s_register_operand (operands[0], GET_MODE (operands[0]))) + { +- if (GET_CODE (operands[3]) != REG) +- operands[3] = force_reg (SImode, operands[3]); ++ bool use_bfi = TRUE; + +- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], +- operands[3])); +- DONE; ++ if (GET_CODE (operands[3]) == CONST_INT) ++ { ++ HOST_WIDE_INT val = INTVAL (operands[3]) & mask; ++ ++ if (val == 0) ++ { ++ emit_insn (gen_insv_zero (operands[0], operands[1], ++ operands[2])); ++ DONE; ++ } ++ ++ /* See if the set can be done with a single orr instruction. */ ++ if (val == mask && const_ok_for_arm (val << start_bit)) ++ use_bfi = FALSE; ++ } ++ ++ if (use_bfi) ++ { ++ if (GET_CODE (operands[3]) != REG) ++ operands[3] = force_reg (SImode, operands[3]); ++ ++ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++ } + } ++ else ++ FAIL; + } + ++ if (!s_register_operand (operands[0], GET_MODE (operands[0]))) ++ FAIL; ++ + target = copy_rtx (operands[0]); + /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical + subreg as the final target. */ +@@ -3695,12 +3734,10 @@ + ;; to reduce register pressure later on. + + (define_expand "extzv" +- [(set (match_dup 4) +- (ashift:SI (match_operand:SI 1 "register_operand" "") +- (match_operand:SI 2 "const_int_operand" ""))) +- (set (match_operand:SI 0 "register_operand" "") +- (lshiftrt:SI (match_dup 4) +- (match_operand:SI 3 "const_int_operand" "")))] ++ [(set (match_operand 0 "s_register_operand" "") ++ (zero_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] + "TARGET_THUMB1 || arm_arch_thumb2" + " + { +@@ -3709,10 +3746,57 @@ + + if (arm_arch_thumb2) + { +- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], +- operands[3])); +- DONE; ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ ++ if (unaligned_access && MEM_P (operands[1]) ++ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0) ++ { ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width ++ - bitpos; ++ ++ if (width == 32) ++ { ++ base_addr = adjust_address (operands[1], SImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); ++ } ++ else ++ { ++ rtx dest = operands[0]; ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ /* We may get a paradoxical subreg here. Strip it off. */ ++ if (GET_CODE (dest) == SUBREG ++ && GET_MODE (dest) == SImode ++ && GET_MODE (SUBREG_REG (dest)) == HImode) ++ dest = SUBREG_REG (dest); ++ ++ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) ++ FAIL; ++ ++ base_addr = adjust_address (operands[1], HImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadhiu (tmp, base_addr)); ++ emit_move_insn (gen_lowpart (SImode, dest), tmp); ++ } ++ DONE; ++ } ++ else if (s_register_operand (operands[1], GET_MODE (operands[1]))) ++ { ++ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++ } ++ else ++ FAIL; + } ++ ++ if (!s_register_operand (operands[1], GET_MODE (operands[1]))) ++ FAIL; + + operands[3] = GEN_INT (rshift); + +@@ -3722,12 +3806,154 @@ + DONE; + } + +- operands[2] = GEN_INT (lshift); +- operands[4] = gen_reg_rtx (SImode); ++ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift), ++ operands[3], gen_reg_rtx (SImode))); ++ DONE; + }" + ) + +-(define_insn "extv" ++;; Helper for extzv, for the Thumb-1 register-shifts case. ++ ++(define_expand "extzv_t1" ++ [(set (match_operand:SI 4 "s_register_operand" "") ++ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") ++ (match_operand:SI 2 "const_int_operand" ""))) ++ (set (match_operand:SI 0 "s_register_operand" "") ++ (lshiftrt:SI (match_dup 4) ++ (match_operand:SI 3 "const_int_operand" "")))] ++ "TARGET_THUMB1" ++ "") ++ ++(define_expand "extv" ++ [(set (match_operand 0 "s_register_operand" "") ++ (sign_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "arm_arch_thumb2" ++{ ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ ++ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32) ++ && (bitpos % BITS_PER_UNIT) == 0) ++ { ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos; ++ ++ if (width == 32) ++ { ++ base_addr = adjust_address (operands[1], SImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); ++ } ++ else ++ { ++ rtx dest = operands[0]; ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ /* We may get a paradoxical subreg here. Strip it off. */ ++ if (GET_CODE (dest) == SUBREG ++ && GET_MODE (dest) == SImode ++ && GET_MODE (SUBREG_REG (dest)) == HImode) ++ dest = SUBREG_REG (dest); ++ ++ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) ++ FAIL; ++ ++ base_addr = adjust_address (operands[1], HImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadhis (tmp, base_addr)); ++ emit_move_insn (gen_lowpart (SImode, dest), tmp); ++ } ++ ++ DONE; ++ } ++ else if (!s_register_operand (operands[1], GET_MODE (operands[1]))) ++ FAIL; ++ else if (GET_MODE (operands[0]) == SImode ++ && GET_MODE (operands[1]) == SImode) ++ { ++ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++ } ++ ++ FAIL; ++}) ++ ++; Helper to expand register forms of extv with the proper modes. ++ ++(define_expand "extv_regsi" ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "" ++{ ++}) ++ ++; ARMv6+ unaligned load/store instructions (used for packed structure accesses). ++ ++(define_insn "unaligned_loadsi" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%?\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load1")]) ++ ++(define_insn "unaligned_loadhis" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (sign_extend:SI ++ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD)))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%(sh%)\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load_byte")]) ++ ++(define_insn "unaligned_loadhiu" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (zero_extend:SI ++ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD)))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%(h%)\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load_byte")]) ++ ++(define_insn "unaligned_storesi" ++ [(set (match_operand:SI 0 "memory_operand" "=Uw,m") ++ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")] ++ UNSPEC_UNALIGNED_STORE))] ++ "unaligned_access && TARGET_32BIT" ++ "str%?\t%1, %0\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "store1")]) ++ ++(define_insn "unaligned_storehi" ++ [(set (match_operand:HI 0 "memory_operand" "=Uw,m") ++ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")] ++ UNSPEC_UNALIGNED_STORE))] ++ "unaligned_access && TARGET_32BIT" ++ "str%(h%)\t%1, %0\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "store1")]) ++ ++(define_insn "*extv_reg" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") +@@ -6069,8 +6295,8 @@ + + + (define_insn "*arm_movqi_insn" +- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") +- (match_operand:QI 1 "general_operand" "rI,K,m,r"))] ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m") ++ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))] + "TARGET_32BIT + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" +@@ -6078,10 +6304,14 @@ + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + ldr%(b%)\\t%0, %1 ++ str%(b%)\\t%1, %0 ++ ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0" +- [(set_attr "type" "*,*,load1,store1") +- (set_attr "insn" "mov,mvn,*,*") +- (set_attr "predicable" "yes")] ++ [(set_attr "type" "*,*,load1,store1,load1,store1") ++ (set_attr "insn" "mov,mvn,*,*,*,*") ++ (set_attr "predicable" "yes") ++ (set_attr "arch" "any,any,t2,t2,any,any") ++ (set_attr "length" "4,4,2,2,4,4")] + ) + + (define_insn "*thumb1_movqi_insn" +Index: gcc-4_6-branch/gcc/config/arm/arm.opt +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/arm.opt 2012-03-05 16:07:14.000000000 -0800 ++++ gcc-4_6-branch/gcc/config/arm/arm.opt 2012-03-05 16:07:50.404936697 -0800 +@@ -173,3 +173,7 @@ + Target Report Var(fix_cm3_ldrd) Init(2) + Avoid overlapping destination and address registers on LDRD instructions + that may trigger Cortex-M3 errata. ++ ++munaligned-access ++Target Report Var(unaligned_access) Init(2) ++Enable unaligned word and halfword accesses to packed data. +Index: gcc-4_6-branch/gcc/config/arm/constraints.md +=================================================================== +--- gcc-4_6-branch.orig/gcc/config/arm/constraints.md 2012-03-05 16:07:14.000000000 -0800 ++++ gcc-4_6-branch/gcc/config/arm/constraints.md 2012-03-05 16:07:50.404936697 -0800 +@@ -36,6 +36,7 @@ + ;; The following memory constraints have been used: + ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us + ;; in ARM state: Uq ++;; in Thumb state: Uu, Uw + + + (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS" +@@ -344,6 +345,27 @@ + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0))"))) + ++(define_memory_constraint "Uu" ++ "@internal ++ In Thumb state an address that is valid in 16bit encoding." ++ (and (match_code "mem") ++ (match_test "TARGET_THUMB ++ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), ++ 0)"))) ++ ++; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p ++; are actually LDM/STM instructions, so cannot be used to access unaligned ++; data. ++(define_memory_constraint "Uw" ++ "@internal ++ In Thumb state an address that is valid in 16bit encoding, and that can be ++ used for unaligned accesses." ++ (and (match_code "mem") ++ (match_test "TARGET_THUMB ++ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), ++ 0) ++ && GET_CODE (XEXP (op, 0)) != POST_INC"))) ++ + ;; We used to have constraint letters for S and R in ARM state, but + ;; all uses of these now appear to have been removed. + +Index: gcc-4_6-branch/gcc/expmed.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/expmed.c 2012-01-04 15:37:51.000000000 -0800 ++++ gcc-4_6-branch/gcc/expmed.c 2012-03-05 16:07:50.404936697 -0800 +@@ -657,6 +657,10 @@ + && GET_MODE (value) != BLKmode + && bitsize > 0 + && GET_MODE_BITSIZE (op_mode) >= bitsize ++ /* Do not use insv for volatile bitfields when ++ -fstrict-volatile-bitfields is in effect. */ ++ && !(MEM_P (op0) && MEM_VOLATILE_P (op0) ++ && flag_strict_volatile_bitfields > 0) + && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) + && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) + && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), +@@ -700,19 +704,21 @@ + copy_back = true; + } + +- /* On big-endian machines, we count bits from the most significant. +- If the bit field insn does not, we must invert. */ +- +- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) +- xbitpos = unit - bitsize - xbitpos; +- + /* We have been counting XBITPOS within UNIT. + Count instead within the size of the register. */ +- if (BITS_BIG_ENDIAN && !MEM_P (xop0)) ++ if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) + xbitpos += GET_MODE_BITSIZE (op_mode) - unit; + + unit = GET_MODE_BITSIZE (op_mode); + ++ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count ++ "backwards" from the size of the unit we are inserting into. ++ Otherwise, we count bits from the most significant on a ++ BYTES/BITS_BIG_ENDIAN machine. */ ++ ++ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) ++ xbitpos = unit - bitsize - xbitpos; ++ + /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ + value1 = value; + if (GET_MODE (value) != op_mode) +@@ -1528,6 +1534,10 @@ + if (ext_mode != MAX_MACHINE_MODE + && bitsize > 0 + && GET_MODE_BITSIZE (ext_mode) >= bitsize ++ /* Do not use extv/extzv for volatile bitfields when ++ -fstrict-volatile-bitfields is in effect. */ ++ && !(MEM_P (op0) && MEM_VOLATILE_P (op0) ++ && flag_strict_volatile_bitfields > 0) + /* If op0 is a register, we need it in EXT_MODE to make it + acceptable to the format of ext(z)v. */ + && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) +@@ -1552,17 +1562,20 @@ + /* Get ref to first byte containing part of the field. */ + xop0 = adjust_address (xop0, byte_mode, xoffset); + +- /* On big-endian machines, we count bits from the most significant. +- If the bit field insn does not, we must invert. */ +- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) +- xbitpos = unit - bitsize - xbitpos; +- + /* Now convert from counting within UNIT to counting in EXT_MODE. */ +- if (BITS_BIG_ENDIAN && !MEM_P (xop0)) ++ if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) + xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; + + unit = GET_MODE_BITSIZE (ext_mode); + ++ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count ++ "backwards" from the size of the unit we are extracting from. ++ Otherwise, we count bits from the most significant on a ++ BYTES/BITS_BIG_ENDIAN machine. */ ++ ++ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) ++ xbitpos = unit - bitsize - xbitpos; ++ + if (xtarget == 0) + xtarget = xspec_target = gen_reg_rtx (tmode); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch new file mode 100644 index 0000000000..3c0ff00856 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch @@ -0,0 +1,1951 @@ +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages. + (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete. + (node_sched_params): Remove first_reg_move and nreg_moves. + (ps_num_consecutive_stages, extend_node_sched_params): New functions. + (update_node_sched_params): Move up file. + (print_node_sched_params): Print the stage. Don't dump info related + to first_reg_move and nreg_moves. + (set_columns_for_row): New function. + (set_columns_for_ps): Move up file and use set_columns_for_row. + (schedule_reg_move): New function. + (schedule_reg_moves): Call extend_node_sched_params and + schedule_reg_move. Extend size of uses bitmap. Initialize + num_consecutive_stages. Return false if a move could not be + scheduled. + (apply_reg_moves): Don't emit moves here. + (permute_partial_schedule): Handle register moves. + (duplicate_insns_of_cycles): Remove for_prolog. Emit moves according + to the same stage-count test as ddg nodes. + (generate_prolog_epilog): Update calls accordingly. + (sms_schedule): Allow move-scheduling to add a new first stage. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_insn): Adjust comment. + (ps_reg_move_info): New structure. + (partial_schedule): Add reg_moves field. + (SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params. + (node_sched_params): Turn first_reg_move into an identifier. + (ps_reg_move): New function. + (ps_rtl_insn): Cope with register moves. + (ps_first_note): Adjust comment and assert that the instruction + isn't a register move. + (node_sched_params): Replace with... + (node_sched_param_vec): ...this vector. + (set_node_sched_params): Adjust accordingly. + (print_node_sched_params): Take a partial schedule instead of a ddg. + Use ps_rtl_insn and ps_reg_move. + (generate_reg_moves): Rename to... + (schedule_reg_moves): ...this. Remove rescan parameter. Record each + move in the partial schedule, but don't emit it here. Don't perform + register substitutions here either. + (apply_reg_moves): New function. + (duplicate_insns_of_cycles): Use register indices directly, + rather than finding instructions using PREV_INSN. Use ps_reg_move. + (sms_schedule): Call schedule_reg_moves before committing to + a partial schedule. Try the next ii if the schedule fails. + Use apply_reg_moves instead of generate_reg_moves. Adjust + call to print_node_sched_params. Free node_sched_param_vec + instead of node_sched_params. + (create_partial_schedule): Initialize reg_moves. + (free_partial_schedule): Free reg_moves. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_insn): Replace node field with an identifier. + (SCHED_ASAP): Replace with.. + (NODE_ASAP): ...this macro. + (SCHED_PARAMS): New macro. + (SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW) + (SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS. + (node_sched_params): Remove asap. + (ps_rtl_insn, ps_first_note): New functions. + (set_node_sched_params): Use XCNEWVEC. Don't copy across the + asap values. + (print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP. + (generate_reg_moves): Pass ids to the SCHED_* macros. + (update_node_sched_params): Take a ps insn identifier rather than + a node as parameter. Use ps_rtl_insn. + (set_columns_for_ps): Update for above field and SCHED_* macro changes. + (permute_partial_schedule): Use ps_rtl_insn and ps_first_note. + (optimize_sc): Update for above field and SCHED_* macro changes. + Update calls to try_scheduling_node_in_cycle and + update_node_sched_params. + (duplicate_insns_of_cycles): Adjust for above field and SCHED_* + macro changes. Use ps_rtl_insn and ps_first_note. + (sms_schedule): Pass ids to the SCHED_* macros. + (get_sched_window): Adjust for above field and SCHED_* macro changes. + Use NODE_ASAP instead of SCHED_ASAP. + (try_scheduling_node_in_cycle): Remove node parameter. Update + call to ps_add_node_check_conflicts. Pass ids to the SCHED_* + macros. + (sms_schedule_by_order): Update call to try_scheduling_node_in_cycle. + (ps_insert_empty_row): Adjust for above field changes. + (compute_split_row): Use ids rather than nodes. + (verify_partial_schedule): Adjust for above field changes. + (print_partial_schedule): Use ps_rtl_insn. + (create_ps_insn): Take an id rather than a node. + (ps_insn_find_column): Adjust for above field changes. + Use ps_rtl_insn. + (ps_insn_advance_column): Adjust for above field changes. + (add_node_to_ps): Remove node parameter. Update call to + create_ps_insn. + (ps_has_conflicts): Use ps_rtl_insn. + (ps_add_node_check_conflicts): Replace node parameter than an id. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (undo_replace_buff_elem): Delete. + (generate_reg_moves): Don't build and return an undo list. + (free_undo_replace_buff): Delete. + (sms_schedule): Adjust call to generate_reg_moves. + Don't call free_undo_replace_buff. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (get_sched_window): Use a table for the debug output. + Print the current ii. + (sms_schedule_by_order): Reduce whitespace in dump line. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (get_sched_window): Use just one loop for predecessors + and one loop for successors. Fix upper bound of memory range. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 ++++ new/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 +@@ -124,8 +124,10 @@ + /* A single instruction in the partial schedule. */ + struct ps_insn + { +- /* The corresponding DDG_NODE. */ +- ddg_node_ptr node; ++ /* Identifies the instruction to be scheduled. Values smaller than ++ the ddg's num_nodes refer directly to ddg nodes. A value of ++ X - num_nodes refers to register move X. */ ++ int id; + + /* The (absolute) cycle in which the PS instruction is scheduled. + Same as SCHED_TIME (node). */ +@@ -137,6 +139,33 @@ + + }; + ++/* Information about a register move that has been added to a partial ++ schedule. */ ++struct ps_reg_move_info ++{ ++ /* The source of the move is defined by the ps_insn with id DEF. ++ The destination is used by the ps_insns with the ids in USES. */ ++ int def; ++ sbitmap uses; ++ ++ /* The original form of USES' instructions used OLD_REG, but they ++ should now use NEW_REG. */ ++ rtx old_reg; ++ rtx new_reg; ++ ++ /* The number of consecutive stages that the move occupies. */ ++ int num_consecutive_stages; ++ ++ /* An instruction that sets NEW_REG to the correct value. The first ++ move associated with DEF will have an rhs of OLD_REG; later moves ++ use the result of the previous move. */ ++ rtx insn; ++}; ++ ++typedef struct ps_reg_move_info ps_reg_move_info; ++DEF_VEC_O (ps_reg_move_info); ++DEF_VEC_ALLOC_O (ps_reg_move_info, heap); ++ + /* Holds the partial schedule as an array of II rows. Each entry of the + array points to a linked list of PS_INSNs, which represents the + instructions that are scheduled for that row. */ +@@ -148,6 +177,10 @@ + /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ + ps_insn_ptr *rows; + ++ /* All the moves added for this partial schedule. Index X has ++ a ps_insn id of X + g->num_nodes. */ ++ VEC (ps_reg_move_info, heap) *reg_moves; ++ + /* rows_length[i] holds the number of instructions in the row. + It is used only (as an optimization) to back off quickly from + trying to schedule a node in a full row; that is, to avoid running +@@ -165,17 +198,6 @@ + int stage_count; /* The stage count of the partial schedule. */ + }; + +-/* We use this to record all the register replacements we do in +- the kernel so we can undo SMS if it is not profitable. */ +-struct undo_replace_buff_elem +-{ +- rtx insn; +- rtx orig_reg; +- rtx new_reg; +- struct undo_replace_buff_elem *next; +-}; +- +- + + static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); + static void free_partial_schedule (partial_schedule_ptr); +@@ -183,9 +205,7 @@ + void print_partial_schedule (partial_schedule_ptr, FILE *); + static void verify_partial_schedule (partial_schedule_ptr, sbitmap); + static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, +- ddg_node_ptr node, int cycle, +- sbitmap must_precede, +- sbitmap must_follow); ++ int, int, sbitmap, sbitmap); + static void rotate_partial_schedule (partial_schedule_ptr, int); + void set_row_column_for_ps (partial_schedule_ptr); + static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); +@@ -201,43 +221,27 @@ + static void permute_partial_schedule (partial_schedule_ptr, rtx); + static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, + rtx, rtx); +-static void duplicate_insns_of_cycles (partial_schedule_ptr, +- int, int, int, rtx); + static int calculate_stage_count (partial_schedule_ptr, int); + static void calculate_must_precede_follow (ddg_node_ptr, int, int, + int, int, sbitmap, sbitmap, sbitmap); + static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, + sbitmap, int, int *, int *, int *); +-static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, +- int, int, sbitmap, int *, sbitmap, +- sbitmap); ++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, ++ sbitmap, int *, sbitmap, sbitmap); + static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); + +-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) +-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +-#define SCHED_FIRST_REG_MOVE(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) +-#define SCHED_NREG_MOVES(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) +-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) +-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) +-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) ++#define NODE_ASAP(node) ((node)->aux.count) ++ ++#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x) ++#define SCHED_TIME(x) (SCHED_PARAMS (x)->time) ++#define SCHED_ROW(x) (SCHED_PARAMS (x)->row) ++#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) ++#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) + + /* The scheduling parameters held for each node. */ + typedef struct node_sched_params + { +- int asap; /* A lower-bound on the absolute scheduling cycle. */ +- int time; /* The absolute scheduling cycle (time >= asap). */ +- +- /* The following field (first_reg_move) is a pointer to the first +- register-move instruction added to handle the modulo-variable-expansion +- of the register defined by this node. This register-move copies the +- original register defined by the node. */ +- rtx first_reg_move; +- +- /* The number of register-move instructions added, immediately preceding +- first_reg_move. */ +- int nreg_moves; ++ int time; /* The absolute scheduling cycle. */ + + int row; /* Holds time % ii. */ + int stage; /* Holds time / ii. */ +@@ -247,6 +251,9 @@ + int column; + } *node_sched_params_ptr; + ++typedef struct node_sched_params node_sched_params; ++DEF_VEC_O (node_sched_params); ++DEF_VEC_ALLOC_O (node_sched_params, heap); + + /* The following three functions are copied from the current scheduler + code in order to use sched_analyze() for computing the dependencies. +@@ -296,6 +303,49 @@ + 0 + }; + ++/* Partial schedule instruction ID in PS is a register move. Return ++ information about it. */ ++static struct ps_reg_move_info * ++ps_reg_move (partial_schedule_ptr ps, int id) ++{ ++ gcc_checking_assert (id >= ps->g->num_nodes); ++ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes); ++} ++ ++/* Return the rtl instruction that is being scheduled by partial schedule ++ instruction ID, which belongs to schedule PS. */ ++static rtx ++ps_rtl_insn (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return ps->g->nodes[id].insn; ++ else ++ return ps_reg_move (ps, id)->insn; ++} ++ ++/* Partial schedule instruction ID, which belongs to PS, occured in ++ the original (unscheduled) loop. Return the first instruction ++ in the loop that was associated with ps_rtl_insn (PS, ID). ++ If the instruction had some notes before it, this is the first ++ of those notes. */ ++static rtx ++ps_first_note (partial_schedule_ptr ps, int id) ++{ ++ gcc_assert (id < ps->g->num_nodes); ++ return ps->g->nodes[id].first_note; ++} ++ ++/* Return the number of consecutive stages that are occupied by ++ partial schedule instruction ID in PS. */ ++static int ++ps_num_consecutive_stages (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return 1; ++ else ++ return ps_reg_move (ps, id)->num_consecutive_stages; ++} ++ + /* Given HEAD and TAIL which are the first and last insns in a loop; + return the register which controls the loop. Return zero if it has + more than one occurrence in the loop besides the control part or the +@@ -396,35 +446,59 @@ + } + + +-/* Points to the array that contains the sched data for each node. */ +-static node_sched_params_ptr node_sched_params; ++/* A vector that contains the sched data for each ps_insn. */ ++static VEC (node_sched_params, heap) *node_sched_param_vec; + +-/* Allocate sched_params for each node and initialize it. Assumes that +- the aux field of each node contain the asap bound (computed earlier), +- and copies it into the sched_params field. */ ++/* Allocate sched_params for each node and initialize it. */ + static void + set_node_sched_params (ddg_ptr g) + { +- int i; +- +- /* Allocate for each node in the DDG a place to hold the "sched_data". */ +- /* Initialize ASAP/ALAP/HIGHT to zero. */ +- node_sched_params = (node_sched_params_ptr) +- xcalloc (g->num_nodes, +- sizeof (struct node_sched_params)); +- +- /* Set the pointer of the general data of the node to point to the +- appropriate sched_params structure. */ +- for (i = 0; i < g->num_nodes; i++) +- { +- /* Watch out for aliasing problems? */ +- node_sched_params[i].asap = g->nodes[i].aux.count; +- g->nodes[i].aux.info = &node_sched_params[i]; +- } +-} +- +-static void +-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) ++ VEC_truncate (node_sched_params, node_sched_param_vec, 0); ++ VEC_safe_grow_cleared (node_sched_params, heap, ++ node_sched_param_vec, g->num_nodes); ++} ++ ++/* Make sure that node_sched_param_vec has an entry for every move in PS. */ ++static void ++extend_node_sched_params (partial_schedule_ptr ps) ++{ ++ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec, ++ ps->g->num_nodes + VEC_length (ps_reg_move_info, ++ ps->reg_moves)); ++} ++ ++/* Update the sched_params (time, row and stage) for node U using the II, ++ the CYCLE of U and MIN_CYCLE. ++ We're not simply taking the following ++ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); ++ because the stages may not be aligned on cycle 0. */ ++static void ++update_node_sched_params (int u, int ii, int cycle, int min_cycle) ++{ ++ int sc_until_cycle_zero; ++ int stage; ++ ++ SCHED_TIME (u) = cycle; ++ SCHED_ROW (u) = SMODULO (cycle, ii); ++ ++ /* The calculation of stage count is done adding the number ++ of stages before cycle zero and after cycle zero. */ ++ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); ++ ++ if (SCHED_TIME (u) < 0) ++ { ++ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ } ++ else ++ { ++ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; ++ } ++} ++ ++static void ++print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) + { + int i; + +@@ -432,22 +506,170 @@ + return; + for (i = 0; i < num_nodes; i++) + { +- node_sched_params_ptr nsp = &node_sched_params[i]; +- rtx reg_move = nsp->first_reg_move; +- int j; ++ node_sched_params_ptr nsp = SCHED_PARAMS (i); + + fprintf (file, "Node = %d; INSN = %d\n", i, +- (INSN_UID (g->nodes[i].insn))); +- fprintf (file, " asap = %d:\n", nsp->asap); ++ INSN_UID (ps_rtl_insn (ps, i))); ++ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); + fprintf (file, " time = %d:\n", nsp->time); +- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); +- for (j = 0; j < nsp->nreg_moves; j++) ++ fprintf (file, " stage = %d:\n", nsp->stage); ++ } ++} ++ ++/* Set SCHED_COLUMN for each instruction in row ROW of PS. */ ++static void ++set_columns_for_row (partial_schedule_ptr ps, int row) ++{ ++ ps_insn_ptr cur_insn; ++ int column; ++ ++ column = 0; ++ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) ++ SCHED_COLUMN (cur_insn->id) = column++; ++} ++ ++/* Set SCHED_COLUMN for each instruction in PS. */ ++static void ++set_columns_for_ps (partial_schedule_ptr ps) ++{ ++ int row; ++ ++ for (row = 0; row < ps->ii; row++) ++ set_columns_for_row (ps, row); ++} ++ ++/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. ++ Its single predecessor has already been scheduled, as has its ++ ddg node successors. (The move may have also another move as its ++ successor, in which case that successor will be scheduled later.) ++ ++ The move is part of a chain that satisfies register dependencies ++ between a producing ddg node and various consuming ddg nodes. ++ If some of these dependencies have a distance of 1 (meaning that ++ the use is upward-exposoed) then DISTANCE1_USES is nonnull and ++ contains the set of uses with distance-1 dependencies. ++ DISTANCE1_USES is null otherwise. ++ ++ MUST_FOLLOW is a scratch bitmap that is big enough to hold ++ all current ps_insn ids. ++ ++ Return true on success. */ ++static bool ++schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, ++ sbitmap distance1_uses, sbitmap must_follow) ++{ ++ unsigned int u; ++ int this_time, this_distance, this_start, this_end, this_latency; ++ int start, end, c, ii; ++ sbitmap_iterator sbi; ++ ps_reg_move_info *move; ++ rtx this_insn; ++ ps_insn_ptr psi; ++ ++ move = ps_reg_move (ps, i_reg_move); ++ ii = ps->ii; ++ if (dump_file) ++ { ++ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" ++ ", min cycle = %d\n\n", INSN_UID (move->insn), ii, ++ PS_MIN_CYCLE (ps)); ++ print_rtl_single (dump_file, move->insn); ++ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =====\n"); ++ } ++ ++ start = INT_MIN; ++ end = INT_MAX; ++ ++ /* For dependencies of distance 1 between a producer ddg node A ++ and consumer ddg node B, we have a chain of dependencies: ++ ++ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B ++ ++ where Mi is the ith move. For dependencies of distance 0 between ++ a producer ddg node A and consumer ddg node C, we have a chain of ++ dependencies: ++ ++ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C ++ ++ where Mi' occupies the same position as Mi but occurs a stage later. ++ We can only schedule each move once, so if we have both types of ++ chain, we model the second as: ++ ++ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C ++ ++ First handle the dependencies between the previously-scheduled ++ predecessor and the move. */ ++ this_insn = ps_rtl_insn (ps, move->def); ++ this_latency = insn_latency (this_insn, move->insn); ++ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; ++ this_time = SCHED_TIME (move->def) - this_distance * ii; ++ this_start = this_time + this_latency; ++ this_end = this_time + ii; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (move->def), ++ INSN_UID (this_insn), this_latency, this_distance, ++ INSN_UID (move->insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ ++ /* Handle the dependencies between the move and previously-scheduled ++ successors. */ ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi) ++ { ++ this_insn = ps_rtl_insn (ps, u); ++ this_latency = insn_latency (move->insn, this_insn); ++ if (distance1_uses && !TEST_BIT (distance1_uses, u)) ++ this_distance = -1; ++ else ++ this_distance = 0; ++ this_time = SCHED_TIME (u) + this_distance * ii; ++ this_start = this_time - ii; ++ this_end = this_time - this_latency; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), ++ this_latency, this_distance, INSN_UID (this_insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "----------- ----------- -----\n"); ++ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); ++ } ++ ++ sbitmap_zero (must_follow); ++ SET_BIT (must_follow, move->def); ++ ++ start = MAX (start, end - (ii - 1)); ++ for (c = end; c >= start; c--) ++ { ++ psi = ps_add_node_check_conflicts (ps, i_reg_move, c, ++ move->uses, must_follow); ++ if (psi) + { +- fprintf (file, " reg_move = "); +- print_rtl_single (file, reg_move); +- reg_move = PREV_INSN (reg_move); ++ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); ++ if (dump_file) ++ fprintf (dump_file, "\nScheduled register move INSN %d at" ++ " time %d, row %d\n\n", INSN_UID (move->insn), c, ++ SCHED_ROW (i_reg_move)); ++ return true; + } + } ++ ++ if (dump_file) ++ fprintf (dump_file, "\nNo available slot\n\n"); ++ ++ return false; + } + + /* +@@ -461,22 +683,23 @@ + nreg_moves = ----------------------------------- + 1 - { dependence. + ii { 1 if not. + */ +-static struct undo_replace_buff_elem * +-generate_reg_moves (partial_schedule_ptr ps, bool rescan) ++static bool ++schedule_reg_moves (partial_schedule_ptr ps) + { + ddg_ptr g = ps->g; + int ii = ps->ii; + int i; +- struct undo_replace_buff_elem *reg_move_replaces = NULL; + + for (i = 0; i < g->num_nodes; i++) + { + ddg_node_ptr u = &g->nodes[i]; + ddg_edge_ptr e; + int nreg_moves = 0, i_reg_move; +- sbitmap *uses_of_defs; +- rtx last_reg_move; + rtx prev_reg, old_reg; ++ int first_move; ++ int distances[2]; ++ sbitmap must_follow; ++ sbitmap distance1_uses; + rtx set = single_set (u->insn); + + /* Skip instructions that do not set a register. */ +@@ -485,18 +708,21 @@ + + /* Compute the number of reg_moves needed for u, by looking at life + ranges started at u (excluding self-loops). */ ++ distances[0] = distances[1] = false; + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; + + if (e->distance == 1) +- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; + + /* If dest precedes src in the schedule of the kernel, then dest + will read before src writes and we can save one reg_copy. */ +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + nreg_moves4e--; + + if (nreg_moves4e >= 1) +@@ -513,125 +739,105 @@ + gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); + } + ++ if (nreg_moves4e) ++ { ++ gcc_assert (e->distance < 2); ++ distances[e->distance] = true; ++ } + nreg_moves = MAX (nreg_moves, nreg_moves4e); + } + + if (nreg_moves == 0) + continue; + ++ /* Create NREG_MOVES register moves. */ ++ first_move = VEC_length (ps_reg_move_info, ps->reg_moves); ++ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves, ++ first_move + nreg_moves); ++ extend_node_sched_params (ps); ++ ++ /* Record the moves associated with this node. */ ++ first_move += ps->g->num_nodes; ++ ++ /* Generate each move. */ ++ old_reg = prev_reg = SET_DEST (single_set (u->insn)); ++ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) ++ { ++ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); ++ ++ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; ++ move->uses = sbitmap_alloc (first_move + nreg_moves); ++ move->old_reg = old_reg; ++ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); ++ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; ++ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); ++ sbitmap_zero (move->uses); ++ ++ prev_reg = move->new_reg; ++ } ++ ++ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; ++ + /* Every use of the register defined by node may require a different + copy of this register, depending on the time the use is scheduled. +- Set a bitmap vector, telling which nodes use each copy of this +- register. */ +- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); +- sbitmap_vector_zero (uses_of_defs, nreg_moves); ++ Record which uses require which move results. */ + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; + + if (e->distance == 1) +- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; + +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + dest_copy--; + + if (dest_copy) +- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); ++ { ++ ps_reg_move_info *move; ++ ++ move = ps_reg_move (ps, first_move + dest_copy - 1); ++ SET_BIT (move->uses, e->dest->cuid); ++ if (e->distance == 1) ++ SET_BIT (distance1_uses, e->dest->cuid); ++ } + } + +- /* Now generate the reg_moves, attaching relevant uses to them. */ +- SCHED_NREG_MOVES (u) = nreg_moves; +- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); +- /* Insert the reg-moves right before the notes which precede +- the insn they relates to. */ +- last_reg_move = u->first_note; +- ++ must_follow = sbitmap_alloc (first_move + nreg_moves); + for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) ++ if (!schedule_reg_move (ps, first_move + i_reg_move, ++ distance1_uses, must_follow)) ++ break; ++ sbitmap_free (must_follow); ++ if (distance1_uses) ++ sbitmap_free (distance1_uses); ++ if (i_reg_move < nreg_moves) ++ return false; ++ } ++ return true; ++} ++ ++/* Emit the moves associatied with PS. Apply the substitutions ++ associated with them. */ ++static void ++apply_reg_moves (partial_schedule_ptr ps) ++{ ++ ps_reg_move_info *move; ++ int i; ++ ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) ++ { ++ unsigned int i_use; ++ sbitmap_iterator sbi; ++ ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi) + { +- unsigned int i_use = 0; +- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); +- rtx reg_move = gen_move_insn (new_reg, prev_reg); +- sbitmap_iterator sbi; +- +- add_insn_before (reg_move, last_reg_move, NULL); +- last_reg_move = reg_move; +- +- if (!SCHED_FIRST_REG_MOVE (u)) +- SCHED_FIRST_REG_MOVE (u) = reg_move; +- +- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) +- { +- struct undo_replace_buff_elem *rep; +- +- rep = (struct undo_replace_buff_elem *) +- xcalloc (1, sizeof (struct undo_replace_buff_elem)); +- rep->insn = g->nodes[i_use].insn; +- rep->orig_reg = old_reg; +- rep->new_reg = new_reg; +- +- if (! reg_move_replaces) +- reg_move_replaces = rep; +- else +- { +- rep->next = reg_move_replaces; +- reg_move_replaces = rep; +- } +- +- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); +- if (rescan) +- df_insn_rescan (g->nodes[i_use].insn); +- } +- +- prev_reg = new_reg; ++ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); ++ df_insn_rescan (ps->g->nodes[i_use].insn); + } +- sbitmap_vector_free (uses_of_defs); +- } +- return reg_move_replaces; +-} +- +-/* Free memory allocated for the undo buffer. */ +-static void +-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) +-{ +- +- while (reg_move_replaces) +- { +- struct undo_replace_buff_elem *rep = reg_move_replaces; +- +- reg_move_replaces = reg_move_replaces->next; +- free (rep); +- } +-} +- +-/* Update the sched_params (time, row and stage) for node U using the II, +- the CYCLE of U and MIN_CYCLE. +- We're not simply taking the following +- SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); +- because the stages may not be aligned on cycle 0. */ +-static void +-update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle) +-{ +- int sc_until_cycle_zero; +- int stage; +- +- SCHED_TIME (u) = cycle; +- SCHED_ROW (u) = SMODULO (cycle, ii); +- +- /* The calculation of stage count is done adding the number +- of stages before cycle zero and after cycle zero. */ +- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); +- +- if (SCHED_TIME (u) < 0) +- { +- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); +- SCHED_STAGE (u) = sc_until_cycle_zero - stage; +- } +- else +- { +- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); +- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; + } + } + +@@ -647,18 +853,19 @@ + for (row = 0; row < ii; row++) + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int normalized_time = SCHED_TIME (u) - amount; + int new_min_cycle = PS_MIN_CYCLE (ps) - amount; + + if (dump_file) + { + /* Print the scheduling times after the rotation. */ ++ rtx insn = ps_rtl_insn (ps, u); ++ + fprintf (dump_file, "crr_insn->node=%d (insn id %d), " +- "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, +- INSN_UID (crr_insn->node->insn), normalized_time, +- new_min_cycle); +- if (JUMP_P (crr_insn->node->insn)) ++ "crr_insn->cycle=%d, min_cycle=%d", u, ++ INSN_UID (insn), normalized_time, new_min_cycle); ++ if (JUMP_P (insn)) + fprintf (dump_file, " (branch)"); + fprintf (dump_file, "\n"); + } +@@ -671,22 +878,6 @@ + } + } + +-/* Set SCHED_COLUMN of each node according to its position in PS. */ +-static void +-set_columns_for_ps (partial_schedule_ptr ps) +-{ +- int row; +- +- for (row = 0; row < ps->ii; row++) +- { +- ps_insn_ptr cur_insn = ps->rows[row]; +- int column = 0; +- +- for (; cur_insn; cur_insn = cur_insn->next_in_row) +- SCHED_COLUMN (cur_insn->node) = column++; +- } +-} +- + /* Permute the insns according to their order in PS, from row 0 to + row ii-1, and position them right before LAST. This schedules + the insns of the loop kernel. */ +@@ -699,9 +890,18 @@ + + for (row = 0; row < ii ; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) +- if (PREV_INSN (last) != ps_ij->node->insn) +- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, +- PREV_INSN (last)); ++ { ++ rtx insn = ps_rtl_insn (ps, ps_ij->id); ++ ++ if (PREV_INSN (last) != insn) ++ { ++ if (ps_ij->id < ps->g->num_nodes) ++ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, ++ PREV_INSN (last)); ++ else ++ add_insn_before (insn, last, NULL); ++ } ++ } + } + + /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE +@@ -750,7 +950,7 @@ + to row ii-1. If they are equal just bail out. */ + stage_count = calculate_stage_count (ps, amount); + stage_count_curr = +- calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1)); ++ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); + + if (stage_count == stage_count_curr) + { +@@ -779,7 +979,7 @@ + print_partial_schedule (ps, dump_file); + } + +- if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1) ++ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) + { + ok = true; + goto clear; +@@ -794,7 +994,7 @@ + { + bool success; + ps_insn_ptr next_ps_i; +- int branch_cycle = SCHED_TIME (g->closing_branch); ++ int branch_cycle = SCHED_TIME (g->closing_branch->cuid); + int row = SMODULO (branch_cycle, ps->ii); + int num_splits = 0; + sbitmap must_precede, must_follow, tmp_precede, tmp_follow; +@@ -850,13 +1050,12 @@ + branch so we can remove it from it's current cycle. */ + for (next_ps_i = ps->rows[row]; + next_ps_i; next_ps_i = next_ps_i->next_in_row) +- if (next_ps_i->node->cuid == g->closing_branch->cuid) ++ if (next_ps_i->id == g->closing_branch->cuid) + break; + + remove_node_from_ps (ps, next_ps_i); + success = +- try_scheduling_node_in_cycle (ps, g->closing_branch, +- g->closing_branch->cuid, c, ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, + sched_nodes, &num_splits, + tmp_precede, tmp_follow); + gcc_assert (num_splits == 0); +@@ -874,8 +1073,7 @@ + must_precede, branch_cycle, start, end, + step); + success = +- try_scheduling_node_in_cycle (ps, g->closing_branch, +- g->closing_branch->cuid, ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, + branch_cycle, sched_nodes, + &num_splits, tmp_precede, + tmp_follow); +@@ -889,7 +1087,7 @@ + fprintf (dump_file, + "SMS success in moving branch to cycle %d\n", c); + +- update_node_sched_params (g->closing_branch, ii, c, ++ update_node_sched_params (g->closing_branch->cuid, ii, c, + PS_MIN_CYCLE (ps)); + ok = true; + } +@@ -905,7 +1103,7 @@ + + static void + duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, +- int to_stage, int for_prolog, rtx count_reg) ++ int to_stage, rtx count_reg) + { + int row; + ps_insn_ptr ps_ij; +@@ -913,9 +1111,9 @@ + for (row = 0; row < ps->ii; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) + { +- ddg_node_ptr u_node = ps_ij->node; +- int j, i_reg_moves; +- rtx reg_move = NULL_RTX; ++ int u = ps_ij->id; ++ int first_u, last_u; ++ rtx u_insn; + + /* Do not duplicate any insn which refers to count_reg as it + belongs to the control part. +@@ -923,52 +1121,20 @@ + be ignored. + TODO: This should be done by analyzing the control part of + the loop. */ +- if (reg_mentioned_p (count_reg, u_node->insn) +- || JUMP_P (ps_ij->node->insn)) ++ u_insn = ps_rtl_insn (ps, u); ++ if (reg_mentioned_p (count_reg, u_insn) ++ || JUMP_P (u_insn)) + continue; + +- if (for_prolog) +- { +- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing +- number of reg_moves starting with the second occurrence of +- u_node, which is generated if its SCHED_STAGE <= to_stage. */ +- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); +- +- /* The reg_moves start from the *first* reg_move backwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < i_reg_moves; j++) +- reg_move = PREV_INSN (reg_move); +- } +- } +- else /* It's for the epilog. */ +- { +- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, +- starting to decrease one stage after u_node no longer occurs; +- that is, generate all reg_moves until +- SCHED_STAGE (u_node) == from_stage - 1. */ +- i_reg_moves = SCHED_NREG_MOVES (u_node) +- - (from_stage - SCHED_STAGE (u_node) - 1); +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); +- +- /* The reg_moves start from the *last* reg_move forwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) +- reg_move = PREV_INSN (reg_move); +- } +- } +- +- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) +- emit_insn (copy_rtx (PATTERN (reg_move))); +- if (SCHED_STAGE (u_node) >= from_stage +- && SCHED_STAGE (u_node) <= to_stage) +- duplicate_insn_chain (u_node->first_note, u_node->insn); ++ first_u = SCHED_STAGE (u); ++ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; ++ if (from_stage <= last_u && to_stage >= first_u) ++ { ++ if (u < ps->g->num_nodes) ++ duplicate_insn_chain (ps_first_note (ps, u), u_insn); ++ else ++ emit_insn (copy_rtx (PATTERN (u_insn))); ++ } + } + } + +@@ -1002,7 +1168,7 @@ + } + + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); ++ duplicate_insns_of_cycles (ps, 0, i, count_reg); + + /* Put the prolog on the entry edge. */ + e = loop_preheader_edge (loop); +@@ -1014,7 +1180,7 @@ + start_sequence (); + + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); ++ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg); + + /* Put the epilogue on the exit edge. */ + gcc_assert (single_exit (loop)); +@@ -1350,10 +1516,9 @@ + { + rtx head, tail; + rtx count_reg, count_init; +- int mii, rec_mii; +- unsigned stage_count = 0; ++ int mii, rec_mii, stage_count, min_cycle; + HOST_WIDEST_INT loop_count = 0; +- bool opt_sc_p = false; ++ bool opt_sc_p; + + if (! (g = g_arr[loop->num])) + continue; +@@ -1430,62 +1595,63 @@ + fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", + rec_mii, mii, maxii); + +- /* After sms_order_nodes and before sms_schedule_by_order, to copy over +- ASAP. */ +- set_node_sched_params (g); +- +- ps = sms_schedule_by_order (g, mii, maxii, node_order); +- +- if (ps) ++ for (;;) + { +- /* Try to achieve optimized SC by normalizing the partial +- schedule (having the cycles start from cycle zero). +- The branch location must be placed in row ii-1 in the +- final scheduling. If failed, shift all instructions to +- position the branch in row ii-1. */ +- opt_sc_p = optimize_sc (ps, g); +- if (opt_sc_p) +- stage_count = calculate_stage_count (ps, 0); +- else ++ set_node_sched_params (g); ++ ++ stage_count = 0; ++ opt_sc_p = false; ++ ps = sms_schedule_by_order (g, mii, maxii, node_order); ++ ++ if (ps) + { +- /* Bring the branch to cycle ii-1. */ +- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ /* Try to achieve optimized SC by normalizing the partial ++ schedule (having the cycles start from cycle zero). ++ The branch location must be placed in row ii-1 in the ++ final scheduling. If failed, shift all instructions to ++ position the branch in row ii-1. */ ++ opt_sc_p = optimize_sc (ps, g); ++ if (opt_sc_p) ++ stage_count = calculate_stage_count (ps, 0); ++ else ++ { ++ /* Bring the branch to cycle ii-1. */ ++ int amount = (SCHED_TIME (g->closing_branch->cuid) ++ - (ps->ii - 1)); + ++ if (dump_file) ++ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); ++ ++ stage_count = calculate_stage_count (ps, amount); ++ } ++ ++ gcc_assert (stage_count >= 1); ++ } ++ ++ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of ++ 1 means that there is no interleaving between iterations thus ++ we let the scheduling passes do the job in this case. */ ++ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC) ++ || (count_init && (loop_count <= stage_count)) ++ || (flag_branch_probabilities && (trip_count <= stage_count))) ++ { + if (dump_file) +- fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); +- +- stage_count = calculate_stage_count (ps, amount); +- } +- +- gcc_assert (stage_count >= 1); +- PS_STAGE_COUNT (ps) = stage_count; +- } +- +- /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of +- 1 means that there is no interleaving between iterations thus +- we let the scheduling passes do the job in this case. */ +- if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC) +- || (count_init && (loop_count <= stage_count)) +- || (flag_branch_probabilities && (trip_count <= stage_count))) +- { +- if (dump_file) +- { +- fprintf (dump_file, "SMS failed... \n"); +- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); +- fprintf (dump_file, ", trip-count="); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); +- fprintf (dump_file, ")\n"); +- } +- } +- else +- { +- struct undo_replace_buff_elem *reg_move_replaces; ++ { ++ fprintf (dump_file, "SMS failed... \n"); ++ fprintf (dump_file, "SMS sched-failed (stage-count=%d," ++ " loop-count=", stage_count); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); ++ fprintf (dump_file, ", trip-count="); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); ++ fprintf (dump_file, ")\n"); ++ } ++ break; ++ } + + if (!opt_sc_p) + { + /* Rotate the partial schedule to have the branch in row ii-1. */ +- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); + + reset_sched_times (ps, amount); + rotate_partial_schedule (ps, amount); +@@ -1493,6 +1659,29 @@ + + set_columns_for_ps (ps); + ++ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); ++ if (!schedule_reg_moves (ps)) ++ { ++ mii = ps->ii + 1; ++ free_partial_schedule (ps); ++ continue; ++ } ++ ++ /* Moves that handle incoming values might have been added ++ to a new first stage. Bump the stage count if so. ++ ++ ??? Perhaps we could consider rotating the schedule here ++ instead? */ ++ if (PS_MIN_CYCLE (ps) < min_cycle) ++ { ++ reset_sched_times (ps, 0); ++ stage_count++; ++ } ++ ++ /* The stage count should now be correct without rotation. */ ++ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); ++ PS_STAGE_COUNT (ps) = stage_count; ++ + canon_loop (loop); + + if (dump_file) +@@ -1531,17 +1720,16 @@ + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); + +- reg_move_replaces = generate_reg_moves (ps, true); ++ apply_reg_moves (ps); + if (dump_file) +- print_node_sched_params (dump_file, g->num_nodes, g); ++ print_node_sched_params (dump_file, g->num_nodes, ps); + /* Generate prolog and epilog. */ + generate_prolog_epilog (ps, loop, count_reg, count_init); +- +- free_undo_replace_buff (reg_move_replaces); ++ break; + } + + free_partial_schedule (ps); +- free (node_sched_params); ++ VEC_free (node_sched_params, heap, node_sched_param_vec); + free (node_order); + free_ddg (g); + } +@@ -1643,9 +1831,11 @@ + + static int + get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, +- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) ++ sbitmap sched_nodes, int ii, int *start_p, int *step_p, ++ int *end_p) + { + int start, step, end; ++ int early_start, late_start; + ddg_edge_ptr e; + sbitmap psp = sbitmap_alloc (ps->g->num_nodes); + sbitmap pss = sbitmap_alloc (ps->g->num_nodes); +@@ -1653,6 +1843,8 @@ + sbitmap u_node_succs = NODE_SUCCESSORS (u_node); + int psp_not_empty; + int pss_not_empty; ++ int count_preds; ++ int count_succs; + + /* 1. compute sched window for u (start, end, step). */ + sbitmap_zero (psp); +@@ -1660,214 +1852,119 @@ + psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); + pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); + +- if (psp_not_empty && !pss_not_empty) +- { +- int early_start = INT_MIN; +- +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge: "); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_not_empty," +- " checking p %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = +- MAX (early_start, p_st + e->latency - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency: %d", +- p_st, early_start, e->latency); +- +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- } +- start = early_start; +- end = MIN (end, early_start + ii); +- /* Schedule the node close to it's predecessors. */ +- step = 1; +- +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- } +- +- else if (!psp_not_empty && pss_not_empty) +- { +- int late_start = INT_MAX; +- +- end = INT_MIN; +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); +- +- late_start = MIN (late_start, +- s_st - e->latency + (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->data_type == MEM_DEP) +- end = MAX (end, SCHED_TIME (v_node) - ii + 1); +- if (dump_file) +- fprintf (dump_file, "end = %d\n", end); +- +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- start = late_start; +- end = MAX (end, late_start - ii); +- /* Schedule the node close to it's successors. */ ++ /* We first compute a forward range (start <= end), then decide whether ++ to reverse it. */ ++ early_start = INT_MIN; ++ late_start = INT_MAX; ++ start = INT_MIN; ++ end = INT_MAX; ++ step = 1; ++ ++ count_preds = 0; ++ count_succs = 0; ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" ++ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); ++ fprintf (dump_file, "%11s %11s %11s %11s %5s\n", ++ "start", "early start", "late start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =========== ===========" ++ " =====\n"); ++ } ++ /* Calculate early_start and limit end. Both bounds are inclusive. */ ++ if (psp_not_empty) ++ for (e = u_node->in; e != 0; e = e->next_in) ++ { ++ int v = e->src->cuid; ++ ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int p_st = SCHED_TIME (v); ++ int earliest = p_st + e->latency - (e->distance * ii); ++ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11s %11d %11s %11d %5d", ++ "", earliest, "", latest, p_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } ++ ++ early_start = MAX (early_start, earliest); ++ end = MIN (end, latest); ++ ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_preds++; ++ } ++ } ++ ++ /* Calculate late_start and limit start. Both bounds are inclusive. */ ++ if (pss_not_empty) ++ for (e = u_node->out; e != 0; e = e->next_out) ++ { ++ int v = e->dest->cuid; ++ ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int s_st = SCHED_TIME (v); ++ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); ++ int latest = s_st - e->latency + (e->distance * ii); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11d %11s %11d %11s %5d", ++ earliest, "", latest, "", s_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } ++ ++ start = MAX (start, earliest); ++ late_start = MIN (late_start, latest); ++ ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_succs++; ++ } ++ } ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "----------- ----------- ----------- -----------" ++ " -----\n"); ++ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", ++ start, early_start, late_start, end, "", ++ "(max, max, min, min)"); ++ } ++ ++ /* Get a target scheduling window no bigger than ii. */ ++ if (early_start == INT_MIN && late_start == INT_MAX) ++ early_start = NODE_ASAP (u_node); ++ else if (early_start == INT_MIN) ++ early_start = late_start - (ii - 1); ++ late_start = MIN (late_start, early_start + (ii - 1)); ++ ++ /* Apply memory dependence limits. */ ++ start = MAX (start, early_start); ++ end = MIN (end, late_start); ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", ++ "", start, end, "", ""); ++ ++ /* If there are at least as many successors as predecessors, schedule the ++ node close to its successors. */ ++ if (pss_not_empty && count_succs >= count_preds) ++ { ++ int tmp = end; ++ end = start; ++ start = tmp; + step = -1; +- +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- +- } +- +- else if (psp_not_empty && pss_not_empty) +- { +- int early_start = INT_MIN; +- int late_start = INT_MAX; +- int count_preds = 0; +- int count_succs = 0; +- +- start = INT_MIN; +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking p %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = MAX (early_start, +- p_st + e->latency +- - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency = %d", +- p_st, early_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_preds++; +- +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); +- +- late_start = MIN (late_start, +- s_st - e->latency +- + (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_succs++; +- +- if (e->data_type == MEM_DEP) +- start = MAX (start, SCHED_TIME (v_node) - ii + 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- start = MAX (start, early_start); +- end = MIN (end, MIN (early_start + ii, late_start + 1)); +- step = 1; +- /* If there are more successors than predecessors schedule the +- node close to it's successors. */ +- if (count_succs >= count_preds) +- { +- int old_start = start; +- +- start = end - 1; +- end = old_start - 1; +- step = -1; +- } +- } +- else /* psp is empty && pss is empty. */ +- { +- start = SCHED_ASAP (u_node); +- end = start + ii; +- step = 1; +- } ++ } ++ ++ /* Now that we've finalized the window, make END an exclusive rather ++ than an inclusive bound. */ ++ end += step; + + *start_p = start; + *step_p = step; +@@ -1880,10 +1977,10 @@ + if (dump_file) + fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", + start, end, step); +- return -1; ++ return -1; + } + +- return 0; ++ return 0; + } + + /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the +@@ -1939,7 +2036,7 @@ + SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ + for (e = u_node->in; e != 0; e = e->next_in) + if (TEST_BIT (sched_nodes, e->src->cuid) +- && ((SCHED_TIME (e->src) - (e->distance * ii)) == ++ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == + first_cycle_in_window)) + { + if (dump_file) +@@ -1964,7 +2061,7 @@ + SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ + for (e = u_node->out; e != 0; e = e->next_out) + if (TEST_BIT (sched_nodes, e->dest->cuid) +- && ((SCHED_TIME (e->dest) + (e->distance * ii)) == ++ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == + last_cycle_in_window)) + { + if (dump_file) +@@ -1988,7 +2085,7 @@ + last row of the scheduling window) */ + + static bool +-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, ++try_scheduling_node_in_cycle (partial_schedule_ptr ps, + int u, int cycle, sbitmap sched_nodes, + int *num_splits, sbitmap must_precede, + sbitmap must_follow) +@@ -1997,11 +2094,10 @@ + bool success = 0; + + verify_partial_schedule (ps, sched_nodes); +- psi = ps_add_node_check_conflicts (ps, u_node, cycle, +- must_precede, must_follow); ++ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); + if (psi) + { +- SCHED_TIME (u_node) = cycle; ++ SCHED_TIME (u) = cycle; + SET_BIT (sched_nodes, u); + success = 1; + *num_splits = 0; +@@ -2062,8 +2158,8 @@ + &step, &end) == 0) + { + if (dump_file) +- fprintf (dump_file, "\nTrying to schedule node %d \ +- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID ++ fprintf (dump_file, "\nTrying to schedule node %d " ++ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID + (g->nodes[u].insn)), start, end, step); + + gcc_assert ((step > 0 && start < end) +@@ -2081,7 +2177,7 @@ + &tmp_precede, must_precede, + c, start, end, step); + success = +- try_scheduling_node_in_cycle (ps, u_node, u, c, ++ try_scheduling_node_in_cycle (ps, u, c, + sched_nodes, + &num_splits, tmp_precede, + tmp_follow); +@@ -2181,7 +2277,7 @@ + for (crr_insn = rows_new[row]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); + + SCHED_TIME (u) = new_time; +@@ -2202,7 +2298,7 @@ + for (crr_insn = rows_new[row + 1]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; + + SCHED_TIME (u) = new_time; +@@ -2242,24 +2338,24 @@ + { + ddg_edge_ptr e; + int lower = INT_MIN, upper = INT_MAX; +- ddg_node_ptr crit_pred = NULL; +- ddg_node_ptr crit_succ = NULL; ++ int crit_pred = -1; ++ int crit_succ = -1; + int crit_cycle; + + for (e = u_node->in; e != 0; e = e->next_in) + { +- ddg_node_ptr v_node = e->src; ++ int v = e->src->cuid; + +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) +- if (SCHED_TIME (v_node) > lower) ++ if (TEST_BIT (sched_nodes, v) ++ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) ++ if (SCHED_TIME (v) > lower) + { +- crit_pred = v_node; +- lower = SCHED_TIME (v_node); ++ crit_pred = v; ++ lower = SCHED_TIME (v); + } + } + +- if (crit_pred != NULL) ++ if (crit_pred >= 0) + { + crit_cycle = SCHED_TIME (crit_pred) + 1; + return SMODULO (crit_cycle, ii); +@@ -2267,17 +2363,18 @@ + + for (e = u_node->out; e != 0; e = e->next_out) + { +- ddg_node_ptr v_node = e->dest; +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) +- if (SCHED_TIME (v_node) < upper) ++ int v = e->dest->cuid; ++ ++ if (TEST_BIT (sched_nodes, v) ++ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) ++ if (SCHED_TIME (v) < upper) + { +- crit_succ = v_node; +- upper = SCHED_TIME (v_node); ++ crit_succ = v; ++ upper = SCHED_TIME (v); + } + } + +- if (crit_succ != NULL) ++ if (crit_succ >= 0) + { + crit_cycle = SCHED_TIME (crit_succ); + return SMODULO (crit_cycle, ii); +@@ -2301,10 +2398,10 @@ + + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + + length++; +- gcc_assert (TEST_BIT (sched_nodes, u->cuid)); ++ gcc_assert (TEST_BIT (sched_nodes, u)); + /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by + popcount (sched_nodes) == number of insns in ps. */ + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); +@@ -2719,6 +2816,7 @@ + partial_schedule_ptr ps = XNEW (struct partial_schedule); + ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); + ps->rows_length = (int *) xcalloc (ii, sizeof (int)); ++ ps->reg_moves = NULL; + ps->ii = ii; + ps->history = history; + ps->min_cycle = INT_MAX; +@@ -2753,8 +2851,16 @@ + static void + free_partial_schedule (partial_schedule_ptr ps) + { ++ ps_reg_move_info *move; ++ unsigned int i; ++ + if (!ps) + return; ++ ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) ++ sbitmap_free (move->uses); ++ VEC_free (ps_reg_move_info, heap, ps->reg_moves); ++ + free_ps_insns (ps); + free (ps->rows); + free (ps->rows_length); +@@ -2796,12 +2902,12 @@ + fprintf (dump, "\n[ROW %d ]: ", i); + while (ps_i) + { +- if (JUMP_P (ps_i->node->insn)) +- fprintf (dump, "%d (branch), ", +- INSN_UID (ps_i->node->insn)); ++ rtx insn = ps_rtl_insn (ps, ps_i->id); ++ ++ if (JUMP_P (insn)) ++ fprintf (dump, "%d (branch), ", INSN_UID (insn)); + else +- fprintf (dump, "%d, ", +- INSN_UID (ps_i->node->insn)); ++ fprintf (dump, "%d, ", INSN_UID (insn)); + + ps_i = ps_i->next_in_row; + } +@@ -2810,11 +2916,11 @@ + + /* Creates an object of PS_INSN and initializes it to the given parameters. */ + static ps_insn_ptr +-create_ps_insn (ddg_node_ptr node, int cycle) ++create_ps_insn (int id, int cycle) + { + ps_insn_ptr ps_i = XNEW (struct ps_insn); + +- ps_i->node = node; ++ ps_i->id = id; + ps_i->next_in_row = NULL; + ps_i->prev_in_row = NULL; + ps_i->cycle = cycle; +@@ -2879,10 +2985,11 @@ + next_ps_i; + next_ps_i = next_ps_i->next_in_row) + { +- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) ++ if (must_follow ++ && TEST_BIT (must_follow, next_ps_i->id) + && ! first_must_follow) + first_must_follow = next_ps_i; +- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) ++ if (must_precede && TEST_BIT (must_precede, next_ps_i->id)) + { + /* If we have already met a node that must follow, then + there is no possible column. */ +@@ -2893,8 +3000,8 @@ + } + /* The closing branch must be the last in the row. */ + if (must_precede +- && TEST_BIT (must_precede, next_ps_i->node->cuid) +- && JUMP_P (next_ps_i->node->insn)) ++ && TEST_BIT (must_precede, next_ps_i->id) ++ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) + return false; + + last_in_row = next_ps_i; +@@ -2903,7 +3010,7 @@ + /* The closing branch is scheduled as well. Make sure there is no + dependent instruction after it as the branch should be the last + instruction in the row. */ +- if (JUMP_P (ps_i->node->insn)) ++ if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) + { + if (first_must_follow) + return false; +@@ -2954,7 +3061,6 @@ + { + ps_insn_ptr prev, next; + int row; +- ddg_node_ptr next_node; + + if (!ps || !ps_i) + return false; +@@ -2964,11 +3070,9 @@ + if (! ps_i->next_in_row) + return false; + +- next_node = ps_i->next_in_row->node; +- + /* Check if next_in_row is dependent on ps_i, both having same sched + times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ +- if (must_follow && TEST_BIT (must_follow, next_node->cuid)) ++ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id)) + return false; + + /* Advance PS_I over its next_in_row in the doubly linked list. */ +@@ -2999,7 +3103,7 @@ + before/after (respectively) the node pointed to by PS_I when scheduled + in the same cycle. */ + static ps_insn_ptr +-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, ++add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, + sbitmap must_precede, sbitmap must_follow) + { + ps_insn_ptr ps_i; +@@ -3008,7 +3112,7 @@ + if (ps->rows_length[row] >= issue_rate) + return NULL; + +- ps_i = create_ps_insn (node, cycle); ++ ps_i = create_ps_insn (id, cycle); + + /* Finds and inserts PS_I according to MUST_FOLLOW and + MUST_PRECEDE. */ +@@ -3060,7 +3164,7 @@ + crr_insn; + crr_insn = crr_insn->next_in_row) + { +- rtx insn = crr_insn->node->insn; ++ rtx insn = ps_rtl_insn (ps, crr_insn->id); + + if (!NONDEBUG_INSN_P (insn)) + continue; +@@ -3097,7 +3201,7 @@ + cuid N must be come before/after (respectively) the node pointed to by + PS_I when scheduled in the same cycle. */ + ps_insn_ptr +-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, ++ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, + int c, sbitmap must_precede, + sbitmap must_follow) + { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch new file mode 100644 index 0000000000..02f8e51779 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch @@ -0,0 +1,147 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-09-09 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm-cores.def (generic-armv7-a): New architecture. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * config/arm/arm.c (arm_file_start): Output .arch directive when + user passes -mcpu=generic-*. + (arm_issue_rate): Add genericv7a support. + * config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec. + (ASM_CPU_SPEC): New define. + * config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec). + * config/arm/semi.h (ASM_SPEC): Likewise. + * doc/invoke.texi (ARM Options): Document -mcpu=generic-* + and -mtune=generic-*. + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000 ++++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000 +@@ -124,6 +124,7 @@ + ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) + ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) ++ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +@@ -135,3 +136,4 @@ + ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) + ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) + ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) ++ + +=== modified file 'gcc/config/arm/arm-tune.md' +--- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000 ++++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000 ++++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000 +@@ -22185,6 +22185,8 @@ + const char *fpu_name; + if (arm_selected_arch) + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); ++ else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0) ++ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8); + else + asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); + +@@ -23717,6 +23719,7 @@ + case cortexr4: + case cortexr4f: + case cortexr5: ++ case genericv7a: + case cortexa5: + case cortexa8: + case cortexa9: + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000 ++++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 +@@ -198,6 +198,7 @@ + Do not define this macro if it does not need to do anything. */ + #define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ ++ { "asm_cpu_spec", ASM_CPU_SPEC }, \ + SUBTARGET_EXTRA_SPECS + + #ifndef SUBTARGET_EXTRA_SPECS +@@ -2278,4 +2279,8 @@ + instruction. */ + #define MAX_LDM_STM_OPS 4 + ++#define ASM_CPU_SPEC \ ++ " %{mcpu=generic-*:-march=%*;" \ ++ " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" ++ + #endif /* ! GCC_ARM_H */ + +=== modified file 'gcc/config/arm/elf.h' +--- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000 ++++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000 +@@ -56,8 +56,7 @@ + #define ASM_SPEC "\ + %{mbig-endian:-EB} \ + %{mlittle-endian:-EL} \ +-%{mcpu=*:-mcpu=%*} \ +-%{march=*:-march=%*} \ ++%(asm_cpu_spec) \ + %{mapcs-*:-mapcs-%*} \ + %(subtarget_asm_float_spec) \ + %{mthumb-interwork:-mthumb-interwork} \ + +=== modified file 'gcc/config/arm/semi.h' +--- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000 ++++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000 +@@ -65,8 +65,7 @@ + #define ASM_SPEC "\ + %{fpic|fpie: -k} %{fPIC|fPIE: -k} \ + %{mbig-endian:-EB} \ +-%{mcpu=*:-mcpu=%*} \ +-%{march=*:-march=%*} \ ++%(arm_cpu_spec) \ + %{mapcs-float:-mfloat} \ + %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \ + %{mfloat-abi=*} %{mfpu=*} \ + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000 ++++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 +@@ -10215,6 +10215,10 @@ + @samp{cortex-m0}, + @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++@option{-mcpu=generic-@var{arch}} is also permissible, and is ++equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. ++See @option{-mtune} for more information. ++ + @item -mtune=@var{name} + @opindex mtune + This option is very similar to the @option{-mcpu=} option, except that +@@ -10226,6 +10230,13 @@ + For some ARM implementations better performance can be obtained by using + this option. + ++@option{-mtune=generic-@var{arch}} specifies that GCC should tune the ++performance for a blend of processors within architecture @var{arch}. ++The aim is to generate code that run well on the current most popular ++processors, balancing between optimizations that benefit some CPUs in the ++range, and avoiding performance pitfalls of other CPUs. The effects of ++this option may change in future GCC versions as CPU models come and go. ++ + @item -march=@var{name} + @opindex march + This specifies the name of the target ARM architecture. GCC uses this + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch new file mode 100644 index 0000000000..695aa85591 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch @@ -0,0 +1,304 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + * config/arm/driver-arm.c (host_detect_local_cpu): Close the file + before exiting. + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config.host (arm*-*-linux*): Add driver-arm.o and x-arm. + * config/arm/arm.opt: Add 'native' processor_type and + arm_arch enum values. + * config/arm/arm.h (host_detect_local_cpu): New prototype. + (EXTRA_SPEC_FUNCTIONS): New define. + (MCPU_MTUNE_NATIVE_SPECS): New define. + (DRIVER_SELF_SPECS): New define. + * config/arm/driver-arm.c: New file. + * config/arm/x-arm: New file. + * doc/invoke.texi (ARM Options): Document -mcpu=native, + -mtune=native and -march=native. + +=== modified file 'gcc/config.host' +--- old/gcc/config.host 2011-02-15 09:49:14 +0000 ++++ new/gcc/config.host 2011-10-19 17:01:50 +0000 +@@ -100,6 +100,14 @@ + esac + + case ${host} in ++ arm*-*-linux*) ++ case ${target} in ++ arm*-*-*) ++ host_extra_gcc_objs="driver-arm.o" ++ host_xmake_file="${host_xmake_file} arm/x-arm" ++ ;; ++ esac ++ ;; + alpha*-*-linux*) + case ${target} in + alpha*-*-linux*) + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 ++++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 +@@ -2283,4 +2283,21 @@ + " %{mcpu=generic-*:-march=%*;" \ + " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" + ++/* -mcpu=native handling only makes sense with compiler running on ++ an ARM chip. */ ++#if defined(__arm__) ++extern const char *host_detect_local_cpu (int argc, const char **argv); ++# define EXTRA_SPEC_FUNCTIONS \ ++ { "local_cpu_detect", host_detect_local_cpu }, ++ ++# define MCPU_MTUNE_NATIVE_SPECS \ ++ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \ ++ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \ ++ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" ++#else ++# define MCPU_MTUNE_NATIVE_SPECS "" ++#endif ++ ++#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS ++ + #endif /* ! GCC_ARM_H */ + +=== modified file 'gcc/config/arm/arm.opt' +--- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000 ++++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000 +@@ -48,6 +48,11 @@ + Target RejectNegative Joined + Specify the name of the target architecture + ++; Other arm_arch values are loaded from arm-tables.opt ++; but that is a generated file and this is an odd-one-out. ++EnumValue ++Enum(arm_arch) String(native) Value(-1) DriverOnly ++ + marm + Target RejectNegative InverseMask(THUMB) Undocumented + +@@ -153,6 +158,11 @@ + Target RejectNegative Joined + Tune code for the given processor + ++; Other processor_type values are loaded from arm-tables.opt ++; but that is a generated file and this is an odd-one-out. ++EnumValue ++Enum(processor_type) String(native) Value(-1) DriverOnly ++ + mwords-little-endian + Target Report RejectNegative Mask(LITTLE_WORDS) + Assume big endian bytes, little endian words + +=== added file 'gcc/config/arm/driver-arm.c' +--- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 +@@ -0,0 +1,149 @@ ++/* Subroutines for the gcc driver. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "configargs.h" ++ ++struct vendor_cpu { ++ const char *part_no; ++ const char *arch_name; ++ const char *cpu_name; ++}; ++ ++static struct vendor_cpu arm_cpu_table[] = { ++ {"0x926", "armv5te", "arm926ej-s"}, ++ {"0xa26", "armv5te", "arm1026ej-s"}, ++ {"0xb02", "armv6k", "mpcore"}, ++ {"0xb36", "armv6j", "arm1136j-s"}, ++ {"0xb56", "armv6t2", "arm1156t2-s"}, ++ {"0xb76", "armv6zk", "arm1176jz-s"}, ++ {"0xc05", "armv7-a", "cortex-a5"}, ++ {"0xc08", "armv7-a", "cortex-a8"}, ++ {"0xc09", "armv7-a", "cortex-a9"}, ++ {"0xc0f", "armv7-a", "cortex-a15"}, ++ {"0xc14", "armv7-r", "cortex-r4"}, ++ {"0xc15", "armv7-r", "cortex-r5"}, ++ {"0xc20", "armv6-m", "cortex-m0"}, ++ {"0xc21", "armv6-m", "cortex-m1"}, ++ {"0xc23", "armv7-m", "cortex-m3"}, ++ {"0xc24", "armv7e-m", "cortex-m4"}, ++ {NULL, NULL, NULL} ++}; ++ ++struct { ++ const char *vendor_no; ++ const struct vendor_cpu *vendor_parts; ++} vendors[] = { ++ {"0x41", arm_cpu_table}, ++ {NULL, NULL} ++}; ++ ++/* This will be called by the spec parser in gcc.c when it sees ++ a %:local_cpu_detect(args) construct. Currently it will be called ++ with either "arch", "cpu" or "tune" as argument depending on if ++ -march=native, -mcpu=native or -mtune=native is to be substituted. ++ ++ It returns a string containing new command line parameters to be ++ put at the place of the above two options, depending on what CPU ++ this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for ++ -march=native. If the routine can't detect a known processor, ++ the -march or -mtune option is discarded. ++ ++ ARGC and ARGV are set depending on the actual arguments given ++ in the spec. */ ++const char * ++host_detect_local_cpu (int argc, const char **argv) ++{ ++ const char *val = NULL; ++ char buf[128]; ++ FILE *f = NULL; ++ bool arch; ++ const struct vendor_cpu *cpu_table = NULL; ++ ++ if (argc < 1) ++ goto not_found; ++ ++ arch = strcmp (argv[0], "arch") == 0; ++ if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune")) ++ goto not_found; ++ ++ f = fopen ("/proc/cpuinfo", "r"); ++ if (f == NULL) ++ goto not_found; ++ ++ while (fgets (buf, sizeof (buf), f) != NULL) ++ { ++ /* Ensure that CPU implementer is ARM (0x41). */ ++ if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0) ++ { ++ int i; ++ for (i = 0; vendors[i].vendor_no != NULL; i++) ++ if (strstr (buf, vendors[i].vendor_no) != NULL) ++ { ++ cpu_table = vendors[i].vendor_parts; ++ break; ++ } ++ } ++ ++ /* Detect arch/cpu. */ ++ if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0) ++ { ++ int i; ++ ++ if (cpu_table == NULL) ++ goto not_found; ++ ++ for (i = 0; cpu_table[i].part_no != NULL; i++) ++ if (strstr (buf, cpu_table[i].part_no) != NULL) ++ { ++ val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name; ++ break; ++ } ++ break; ++ } ++ } ++ ++ fclose (f); ++ ++ if (val == NULL) ++ goto not_found; ++ ++ return concat ("-m", argv[0], "=", val, NULL); ++ ++not_found: ++ { ++ unsigned int i; ++ unsigned int opt; ++ const char *search[] = {NULL, "arch"}; ++ ++ if (f) ++ fclose (f); ++ ++ search[0] = argv[0]; ++ for (opt = 0; opt < ARRAY_SIZE (search); opt++) ++ for (i = 0; i < ARRAY_SIZE (configure_default_options); i++) ++ if (strcmp (configure_default_options[i].name, search[opt]) == 0) ++ return concat ("-m", search[opt], "=", ++ configure_default_options[i].value, NULL); ++ return NULL; ++ } ++} + +=== added file 'gcc/config/arm/x-arm' +--- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000 +@@ -0,0 +1,3 @@ ++driver-arm.o: $(srcdir)/config/arm/driver-arm.c \ ++ $(CONFIG_H) $(SYSTEM_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 ++++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000 +@@ -10215,10 +10215,16 @@ + @samp{cortex-m0}, + @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++ + @option{-mcpu=generic-@var{arch}} is also permissible, and is + equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. + See @option{-mtune} for more information. + ++@option{-mcpu=native} causes the compiler to auto-detect the CPU ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -mtune=@var{name} + @opindex mtune + This option is very similar to the @option{-mcpu=} option, except that +@@ -10237,6 +10243,11 @@ + range, and avoiding performance pitfalls of other CPUs. The effects of + this option may change in future GCC versions as CPU models come and go. + ++@option{-mtune=native} causes the compiler to auto-detect the CPU ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -march=@var{name} + @opindex march + This specifies the name of the target ARM architecture. GCC uses this +@@ -10250,6 +10261,11 @@ + @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, + @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++@option{-march=native} causes the compiler to auto-detect the architecture ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -mfpu=@var{name} + @itemx -mfpe=@var{number} + @itemx -mfp=@var{number} + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch new file mode 100644 index 0000000000..ad91d77366 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch @@ -0,0 +1,123 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + PR tree-optimization/50717 + + gcc/ + * tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type' + parameter. Calculate 'type' from stmt. + (convert_mult_to_widen): Update call the is_widening_mult_p. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.dg/pr50717-1.c: New file. + * gcc.target/arm/wmul-12.c: Correct types. + * gcc.target/arm/wmul-8.c: Correct types. + +=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c' +--- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000 +@@ -0,0 +1,26 @@ ++/* PR tree-optimization/50717 */ ++/* Ensure that widening multiply-and-accumulate is not used where integer ++ type promotion or users' casts should prevent it. */ ++ ++/* { dg-options "-O2 -fdump-tree-widening_mul" } */ ++ ++long long ++f (unsigned int a, char b, long long c) ++{ ++ return (a * b) + c; ++} ++ ++int ++g (short a, short b, int c) ++{ ++ return (short)(a * b) + c; ++} ++ ++int ++h (char a, char b, int c) ++{ ++ return (char)(a * b) + c; ++} ++ ++/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */ ++/* { dg-final { cleanup-tree-dump "widening_mul" } } */ + +=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000 +@@ -4,8 +4,8 @@ + long long + foo (int *b, int *c) + { +- int tmp = *b * *c; +- return 10 + (long long)tmp; ++ long long tmp = (long long)*b * *c; ++ return 10 + tmp; + } + + /* { dg-final { scan-assembler "smlal" } } */ + +=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000 +@@ -4,7 +4,7 @@ + long long + foo (long long a, int *b, int *c) + { +- return a + *b * *c; ++ return a + (long long)*b * *c; + } + + /* { dg-final { scan-assembler "smlal" } } */ + +=== modified file 'gcc/tree-ssa-math-opts.c' +--- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000 ++++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000 +@@ -1351,10 +1351,12 @@ + and *TYPE2_OUT would give the operands of the multiplication. */ + + static bool +-is_widening_mult_p (tree type, gimple stmt, ++is_widening_mult_p (gimple stmt, + tree *type1_out, tree *rhs1_out, + tree *type2_out, tree *rhs2_out) + { ++ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); ++ + if (TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + return false; +@@ -1416,7 +1418,7 @@ + if (TREE_CODE (type) != INTEGER_TYPE) + return false; + +- if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2)) ++ if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) + return false; + + to_mode = TYPE_MODE (type); +@@ -1592,7 +1594,7 @@ + if (code == PLUS_EXPR + && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) + { +- if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1, ++ if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs2; +@@ -1600,7 +1602,7 @@ + } + else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) + { +- if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1, ++ if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs1; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch new file mode 100644 index 0000000000..843f1cff25 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch @@ -0,0 +1,24 @@ +2011-10-21 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-10-21 Andrew Stubbs <ams@codesourcery.com> + + PR target/50809 + + gcc/ + * config/arm/driver-arm.c (vendors): Make static. + +=== modified file 'gcc/config/arm/driver-arm.c' +--- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 ++++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000 +@@ -49,7 +49,7 @@ + {NULL, NULL, NULL} + }; + +-struct { ++static struct { + const char *vendor_no; + const struct vendor_cpu *vendor_parts; + } vendors[] = { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch new file mode 100644 index 0000000000..1ad48e512e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch @@ -0,0 +1,453 @@ +2011-10-27 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-stmts.c (vectorizable_load): For SLP without permutation + treat the first load of the node as the first element in its + interleaving chain. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if + necessary and possible. + (vect_build_slp_tree): Add new argument. Allow load groups of any size + in basic blocks. Keep all the loads for further permutation check. + Use the new argument to determine if there is a permutation. Update + the recursive calls. + (vect_supported_load_permutation_p): Allow subchains of interleaving + chains in basic block vectorization. + (vect_analyze_slp_instance): Update the call to vect_build_slp_tree. + Check load permutation based on the new parameter. + (vect_schedule_slp_instance): Don't start from the first element in + interleaving chain unless the loads are permuted. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-29.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] = A*src[0] + B*src[1]; ++ dst[1] = A*src[1] + B*src[2]; ++ dst[2] = A*src[2] + B*src[3]; ++ dst[3] = A*src[3] + B*src[4]; ++ dst[4] = A*src[4] + B*src[5]; ++ dst[5] = A*src[5] + B*src[6]; ++ dst[6] = A*src[6] + B*src[7]; ++ dst[7] = A*src[7] + B*src[8]; ++ dst += stride; ++ src += stride; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * src[i] + B * src[i+1]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 +@@ -115,13 +115,15 @@ + { + tree oprnd; + unsigned int i, number_of_oprnds; +- tree def; ++ tree def[2]; + gimple def_stmt; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + stmt_vec_info stmt_info = + vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); + enum gimple_rhs_class rhs_class; + struct loop *loop = NULL; ++ enum tree_code rhs_code; ++ bool different_types = false; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -133,7 +135,7 @@ + { + oprnd = gimple_op (stmt, i + 1); + +- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, ++ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], + &dt[i]) + || (!def_stmt && dt[i] != vect_constant_def)) + { +@@ -188,11 +190,11 @@ + switch (gimple_code (def_stmt)) + { + case GIMPLE_PHI: +- def = gimple_phi_result (def_stmt); ++ def[i] = gimple_phi_result (def_stmt); + break; + + case GIMPLE_ASSIGN: +- def = gimple_assign_lhs (def_stmt); ++ def[i] = gimple_assign_lhs (def_stmt); + break; + + default: +@@ -206,8 +208,8 @@ + { + /* op0 of the first stmt of the group - store its info. */ + *first_stmt_dt0 = dt[i]; +- if (def) +- *first_stmt_def0_type = TREE_TYPE (def); ++ if (def[i]) ++ *first_stmt_def0_type = TREE_TYPE (def[i]); + else + *first_stmt_const_oprnd = oprnd; + +@@ -227,8 +229,8 @@ + { + /* op1 of the first stmt of the group - store its info. */ + *first_stmt_dt1 = dt[i]; +- if (def) +- *first_stmt_def1_type = TREE_TYPE (def); ++ if (def[i]) ++ *first_stmt_def1_type = TREE_TYPE (def[i]); + else + { + /* We assume that the stmt contains only one constant +@@ -249,22 +251,53 @@ + the def-stmt/s of the first stmt. */ + if ((i == 0 + && (*first_stmt_dt0 != dt[i] +- || (*first_stmt_def0_type && def ++ || (*first_stmt_def0_type && def[0] + && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def))))) ++ TREE_TYPE (def[0]))))) + || (i == 1 + && (*first_stmt_dt1 != dt[i] +- || (*first_stmt_def1_type && def ++ || (*first_stmt_def1_type && def[1] + && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def))))) +- || (!def ++ TREE_TYPE (def[1]))))) ++ || (!def[i] + && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), +- TREE_TYPE (oprnd)))) ++ TREE_TYPE (oprnd))) ++ || different_types) + { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: different types "); ++ if (i != number_of_oprnds - 1) ++ different_types = true; ++ else ++ { ++ if (is_gimple_assign (stmt) ++ && (rhs_code = gimple_assign_rhs_code (stmt)) ++ && TREE_CODE_CLASS (rhs_code) == tcc_binary ++ && commutative_tree_code (rhs_code) ++ && *first_stmt_dt0 == dt[1] ++ && *first_stmt_dt1 == dt[0] ++ && def[0] && def[1] ++ && !(*first_stmt_def0_type ++ && !types_compatible_p (*first_stmt_def0_type, ++ TREE_TYPE (def[1]))) ++ && !(*first_stmt_def1_type ++ && !types_compatible_p (*first_stmt_def1_type, ++ TREE_TYPE (def[0])))) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Swapping operands of "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ++ gimple_assign_rhs2_ptr (stmt)); ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); + +- return false; ++ return false; ++ } ++ } + } + } + } +@@ -278,10 +311,10 @@ + + case vect_internal_def: + case vect_reduction_def: +- if (i == 0) ++ if ((i == 0 && !different_types) || (i == 1 && different_types)) + VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); + else +- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); ++ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); + break; + + default: +@@ -289,7 +322,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: illegal type of def "); +- print_generic_expr (vect_dump, def, TDF_SLIM); ++ print_generic_expr (vect_dump, def[i], TDF_SLIM); + } + + return false; +@@ -312,7 +345,7 @@ + int ncopies_for_cost, unsigned int *max_nunits, + VEC (int, heap) **load_permutation, + VEC (slp_tree, heap) **loads, +- unsigned int vectorization_factor) ++ unsigned int vectorization_factor, bool *loads_permuted) + { + VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); + VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); +@@ -523,7 +556,9 @@ + + /* Check that the size of interleaved loads group is not + greater than the SLP group size. */ +- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) ++ if (loop_vinfo ++ && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) ++ > ncopies * group_size) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -644,19 +679,22 @@ + /* Strided loads were reached - stop the recursion. */ + if (stop_recursion) + { ++ VEC_safe_push (slp_tree, heap, *loads, *node); + if (permutation) + { +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ ++ *loads_permuted = true; + *inside_cost + += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) + * group_size; + } + else +- { +- /* We don't check here complex numbers chains, so we keep them in +- LOADS for further check in vect_supported_load_permutation_p. */ ++ { ++ /* We don't check here complex numbers chains, so we set ++ LOADS_PERMUTED for further check in ++ vect_supported_load_permutation_p. */ + if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ *loads_permuted = true; + } + + return true; +@@ -675,7 +713,7 @@ + if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads, +- vectorization_factor)) ++ vectorization_factor, loads_permuted)) + return false; + + SLP_TREE_LEFT (*node) = left_node; +@@ -693,7 +731,7 @@ + if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads, +- vectorization_factor)) ++ vectorization_factor, loads_permuted)) + return false; + + SLP_TREE_RIGHT (*node) = right_node; +@@ -879,8 +917,10 @@ + bool supported, bad_permutation = false; + sbitmap load_index; + slp_tree node, other_complex_node; +- gimple stmt, first = NULL, other_node_first; ++ gimple stmt, first = NULL, other_node_first, load, next_load, first_load; + unsigned complex_numbers = 0; ++ struct data_reference *dr; ++ bb_vec_info bb_vinfo; + + /* FORNOW: permutations are only supported in SLP. */ + if (!slp_instn) +@@ -1040,6 +1080,76 @@ + } + } + ++ /* In basic block vectorization we allow any subchain of an interleaving ++ chain. ++ FORNOW: not supported in loop SLP because of realignment compications. */ ++ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); ++ bad_permutation = false; ++ /* Check that for every node in the instance teh loads form a subchain. */ ++ if (bb_vinfo) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ next_load = NULL; ++ first_load = NULL; ++ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) ++ { ++ if (!first_load) ++ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load)); ++ else if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load))) ++ { ++ bad_permutation = true; ++ break; ++ } ++ ++ if (j != 0 && next_load != load) ++ { ++ bad_permutation = true; ++ break; ++ } ++ ++ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load)); ++ } ++ ++ if (bad_permutation) ++ break; ++ } ++ ++ /* Check that the alignment of the first load in every subchain, i.e., ++ the first statement in every load node, is supported. */ ++ if (!bad_permutation) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); ++ if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load))) ++ { ++ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); ++ if (vect_supportable_dr_alignment (dr, false) ++ == dr_unaligned_unsupported) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "unsupported unaligned load "); ++ print_gimple_stmt (vect_dump, first_load, 0, ++ TDF_SLIM); ++ } ++ bad_permutation = true; ++ break; ++ } ++ } ++ } ++ ++ if (!bad_permutation) ++ { ++ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); ++ return true; ++ } ++ } ++ } ++ + /* FORNOW: the only supported permutation is 0..01..1.. of length equal to + GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as + well (unless it's reduction). */ +@@ -1149,6 +1259,7 @@ + VEC (int, heap) *load_permutation; + VEC (slp_tree, heap) *loads; + struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); ++ bool loads_permuted = false; + + if (dr) + { +@@ -1238,7 +1349,7 @@ + if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, + &inside_cost, &outside_cost, ncopies_for_cost, + &max_nunits, &load_permutation, &loads, +- vectorization_factor)) ++ vectorization_factor, &loads_permuted)) + { + /* Calculate the unrolling factor based on the smallest type. */ + if (max_nunits > nunits) +@@ -1263,7 +1374,8 @@ + SLP_INSTANCE_LOADS (new_instance) = loads; + SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; + SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; +- if (VEC_length (slp_tree, loads)) ++ ++ if (loads_permuted) + { + if (!vect_supported_load_permutation_p (new_instance, group_size, + load_permutation)) +@@ -2542,10 +2654,11 @@ + /* Loads should be inserted before the first load. */ + if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) + && STMT_VINFO_STRIDED_ACCESS (stmt_info) +- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) ++ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) ++ && SLP_INSTANCE_LOAD_PERMUTATION (instance)) + si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); + else if (is_pattern_stmt_p (stmt_info)) +- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); ++ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + else + si = gsi_for_stmt (stmt); + + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 +@@ -4285,6 +4285,11 @@ + if (strided_load) + { + first_stmt = DR_GROUP_FIRST_DR (stmt_info); ++ if (slp ++ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) ++ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) ++ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); ++ + /* Check if the chain of loads is already vectorized. */ + if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) + { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch new file mode 100644 index 0000000000..421a8fe3a9 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch @@ -0,0 +1,1505 @@ + 2011-10-18 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo, + vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document. + * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR, + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + (op_code_prio): Likewise. + (op_symbol_code): Handle WIDEN_LSHIFT_EXPR. + * optabs.c (optab_for_tree_code): Handle + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo. + * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo. + * genopinit.c (optabs): Initialize the new optabs. + * expr.c (expand_expr_real_2): Handle + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + * gimple-pretty-print.c (dump_binary_rhs): Likewise. + * tree-vectorizer.h (NUM_PATTERNS): Increase to 8. + * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR, + VEC_WIDEN_LSHIFT_LO_EXPR): New. + * cfgexpand.c (expand_debug_expr): Handle new tree codes. + * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add + vect_recog_widen_shift_pattern. + (vect_handle_widen_mult_by_const): Rename... + (vect_handle_widen_op_by_const): ...to this. Handle shifts. + Add a new argument, update documentation. + (vect_recog_widen_mult_pattern): Assume that only second + operand can be constant. Update call to + vect_handle_widen_op_by_const. + (vect_recog_over_widening_pattern): Fix typo. + (vect_recog_widen_shift_pattern): New. + * tree-vect-stmts.c (vectorizable_type_promotion): Handle + widening shifts. + (supportable_widening_operation): Likewise. + * tree-inline.c (estimate_operator_cost): Handle new tree codes. + * tree-vect-generic.c (expand_vector_operations_1): Likewise. + * tree-cfg.c (verify_gimple_assign_binary): Likewise. + * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New. + (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>, + vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>): + Likewise. + * config/arm/predicates.md (const_neon_scalar_shift_amount_operand): + New. + * config/arm/iterators.md (V_innermode): New. + * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand + for widening shift. + + gcc/testsuite + * testsuite/lib/target-supports.exp + (check_effective_target_vect_widen_shift): New. + * gcc.dg/vect/vect-widen-shift-s16.c: New. + * gcc.dg/vect/vect-widen-shift-s8.c: New. + * gcc.dg/vect/vect-widen-shift-u16.c: New. + * gcc.dg/vect/vect-widen-shift-u8.c: New. + + 2011-10-06 Jakub Jelinek <jakub@redhat.com> + + gcc/ + * tree-vect-patterns.c (vect_pattern_recog_1): Use + vect_recog_func_ptr typedef for the first argument. + (vect_pattern_recog): Rename vect_recog_func_ptr variable + to vect_recog_func, use vect_recog_func_ptr typedef for it. + + 2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50727 + * tree-vect-patterns.c (vect_operation_fits_smaller_type): Add + DEF_STMT to the list of statements to be replaced by the + pattern statements. + + 2011-10-09 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50635 + * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add + DEF_STMT to the list of statements to be replaced by the + pattern statements. + (vect_handle_widen_mult_by_const): Don't check TYPE_OUT. + +=== modified file 'gcc/cfgexpand.c' +--- old/gcc/cfgexpand.c 2011-07-01 09:19:21 +0000 ++++ new/gcc/cfgexpand.c 2011-10-23 13:33:07 +0000 +@@ -3215,6 +3215,8 @@ + case VEC_UNPACK_LO_EXPR: + case VEC_WIDEN_MULT_HI_EXPR: + case VEC_WIDEN_MULT_LO_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + return NULL; + + /* Misc codes. */ + +=== modified file 'gcc/config/arm/iterators.md' +--- old/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000 ++++ new/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000 +@@ -388,6 +388,9 @@ + (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")]) + (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) + ++;; Mode attribute for vshll. ++(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) ++ + ;;---------------------------------------------------------------------------- + ;; Code attributes + ;;---------------------------------------------------------------------------- + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 ++++ new/gcc/config/arm/neon.md 2011-10-23 13:33:07 +0000 +@@ -5316,6 +5316,44 @@ + } + ) + ++(define_insn "neon_vec_<US>shiftl_<mode>" ++ [(set (match_operand:<V_widen> 0 "register_operand" "=w") ++ (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") ++ (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] ++ "TARGET_NEON" ++{ ++ return "vshll.<US><V_sz_elem> %q0, %P1, %2"; ++} ++ [(set_attr "neon_type" "neon_shift_1")] ++) ++ ++(define_expand "vec_widen_<US>shiftl_lo_<mode>" ++ [(match_operand:<V_unpack> 0 "register_operand" "") ++ (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ { ++ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), ++ operands[2])); ++ DONE; ++ } ++) ++ ++(define_expand "vec_widen_<US>shiftl_hi_<mode>" ++ [(match_operand:<V_unpack> 0 "register_operand" "") ++ (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ { ++ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode)), ++ operands[2])); ++ DONE; ++ } ++) ++ + ;; Vectorize for non-neon-quad case + (define_insn "neon_unpack<US>_<mode>" + [(set (match_operand:<V_widen> 0 "register_operand" "=w") +@@ -5392,6 +5430,34 @@ + } + ) + ++(define_expand "vec_widen_<US>shiftl_hi_<mode>" ++ [(match_operand:<V_double_width> 0 "register_operand" "") ++ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON" ++ { ++ rtx tmpreg = gen_reg_rtx (<V_widen>mode); ++ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); ++ emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); ++ ++ DONE; ++ } ++) ++ ++(define_expand "vec_widen_<US>shiftl_lo_<mode>" ++ [(match_operand:<V_double_width> 0 "register_operand" "") ++ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON" ++ { ++ rtx tmpreg = gen_reg_rtx (<V_widen>mode); ++ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); ++ emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); ++ ++ DONE; ++ } ++) ++ + ;; The case when using all quad registers. + (define_insn "vec_pack_trunc_<mode>" + [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-23 13:33:07 +0000 +@@ -136,6 +136,11 @@ + (match_operand 0 "s_register_operand")) + (match_operand 0 "const_int_operand"))) + ++(define_predicate "const_neon_scalar_shift_amount_operand" ++ (and (match_code "const_int") ++ (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode) ++ && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0"))) ++ + (define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +=== modified file 'gcc/doc/md.texi' +--- old/gcc/doc/md.texi 2011-08-13 08:32:32 +0000 ++++ new/gcc/doc/md.texi 2011-10-23 13:33:07 +0000 +@@ -4230,6 +4230,17 @@ + elements of the two vectors, and put the N/2 products of size 2*S in the + output vector (operand 0). + ++@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern ++@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern ++@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern ++@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern ++@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}} ++@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}} ++Signed/Unsigned widening shift left. The first input (operand 1) is a vector ++with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift ++the high/low elements of operand 1, and put the N/2 results of size 2*S in the ++output vector (operand 0). ++ + @cindex @code{mulhisi3} instruction pattern + @item @samp{mulhisi3} + Multiply operands 1 and 2, which have mode @code{HImode}, and store + +=== modified file 'gcc/expr.c' +--- old/gcc/expr.c 2011-08-25 11:42:09 +0000 ++++ new/gcc/expr.c 2011-10-23 13:33:07 +0000 +@@ -8290,6 +8290,19 @@ + return target; + } + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ { ++ tree oprnd0 = treeop0; ++ tree oprnd1 = treeop1; ++ ++ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); ++ target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX, ++ target, unsignedp); ++ gcc_assert (target); ++ return target; ++ } ++ + case VEC_PACK_TRUNC_EXPR: + case VEC_PACK_SAT_EXPR: + case VEC_PACK_FIX_TRUNC_EXPR: + +=== modified file 'gcc/genopinit.c' +--- old/gcc/genopinit.c 2011-07-15 13:06:31 +0000 ++++ new/gcc/genopinit.c 2011-10-23 13:33:07 +0000 +@@ -268,6 +268,10 @@ + "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))", + "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))", + "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))", ++ "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))", ++ "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))", ++ "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))", ++ "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))", + "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))", + "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))", + "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))", + +=== modified file 'gcc/gimple-pretty-print.c' +--- old/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000 ++++ new/gcc/gimple-pretty-print.c 2011-10-23 13:33:07 +0000 +@@ -343,6 +343,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + for (p = tree_code_name [(int) code]; *p; p++) + pp_character (buffer, TOUPPER (*p)); + pp_string (buffer, " <"); + +=== modified file 'gcc/optabs.c' +--- old/gcc/optabs.c 2011-08-11 15:46:01 +0000 ++++ new/gcc/optabs.c 2011-10-23 13:33:07 +0000 +@@ -454,6 +454,14 @@ + return TYPE_UNSIGNED (type) ? + vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; ++ ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; ++ + case VEC_UNPACK_HI_EXPR: + return TYPE_UNSIGNED (type) ? + vec_unpacku_hi_optab : vec_unpacks_hi_optab; +@@ -6351,6 +6359,10 @@ + init_optab (vec_widen_umult_lo_optab, UNKNOWN); + init_optab (vec_widen_smult_hi_optab, UNKNOWN); + init_optab (vec_widen_smult_lo_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN); + init_optab (vec_unpacks_hi_optab, UNKNOWN); + init_optab (vec_unpacks_lo_optab, UNKNOWN); + init_optab (vec_unpacku_hi_optab, UNKNOWN); + +=== modified file 'gcc/optabs.h' +--- old/gcc/optabs.h 2011-07-27 14:12:45 +0000 ++++ new/gcc/optabs.h 2011-10-23 13:33:07 +0000 +@@ -350,6 +350,12 @@ + OTI_vec_widen_umult_lo, + OTI_vec_widen_smult_hi, + OTI_vec_widen_smult_lo, ++ /* Widening shift left. ++ The high/low part of the resulting vector is returned. */ ++ OTI_vec_widen_ushiftl_hi, ++ OTI_vec_widen_ushiftl_lo, ++ OTI_vec_widen_sshiftl_hi, ++ OTI_vec_widen_sshiftl_lo, + /* Extract and widen the high/low part of a vector of signed or + floating point elements. */ + OTI_vec_unpacks_hi, +@@ -542,6 +548,10 @@ + #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo]) + #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi]) + #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo]) ++#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi]) ++#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo]) ++#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi]) ++#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo]) + #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi]) + #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo]) + #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi]) + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,107 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 16 ++ ++__attribute__ ((noinline)) void ++foo (short *src, int *dst) ++{ ++ int i; ++ short b, b0, b1, b2, b3, *s = src; ++ int *d = dst; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ b0 = *s++; ++ b1 = *s++; ++ b2 = *s++; ++ b3 = *s++; ++ *d = b0 << C; ++ d++; ++ *d = b1 << C; ++ d++; ++ *d = b2 << C; ++ d++; ++ *d = b3 << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N/4; i++) ++ { ++ b0 = *s++; ++ b1 = *s++; ++ b2 = *s++; ++ b3 = *s++; ++ *d = b0 << C; ++ d++; ++ *d = b1 << C; ++ d++; ++ *d = b2 << C; ++ d++; ++ *d = b3 << 6; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N/4; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << 6) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ short in[N]; ++ int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 12 ++ ++__attribute__ ((noinline)) void ++foo (char *src, int *dst) ++{ ++ int i; ++ char b, *s = src; ++ int *d = dst; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d = b << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ char in[N]; ++ int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 7 ++ ++__attribute__ ((noinline)) void ++foo (unsigned short *src, unsigned int *dst) ++{ ++ int i; ++ unsigned short b, *s = src; ++ unsigned int *d = dst; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d = b << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned short in[N]; ++ unsigned int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,65 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C1 10 ++#define C2 5 ++ ++__attribute__ ((noinline)) void ++foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2) ++{ ++ int i; ++ unsigned char b, *s = src; ++ unsigned int *d1 = dst1, *d2 = dst2; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d1 = b << C1; ++ d1++; ++ *d2 = b << C2; ++ d2++; ++ } ++ ++ s = src; ++ d1 = dst1; ++ d2 = dst2; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d1 != b << C1 || *d2 != b << C2) ++ abort (); ++ d1++; ++ d2++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N]; ++ unsigned int out1[N]; ++ unsigned int out2[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out1[i] = 255; ++ out2[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out1, out2); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 +@@ -2783,6 +2783,26 @@ + } + + # Return 1 if the target plus current options supports a vector ++# widening shift, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. ++ ++proc check_effective_target_vect_widen_shift { } { ++ global et_vect_widen_shift_saved ++ ++ if [info exists et_vect_shift_saved] { ++ verbose "check_effective_target_vect_widen_shift: using cached result" 2 ++ } else { ++ set et_vect_widen_shift_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_widen_shift_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2 ++ return $et_vect_widen_shift_saved ++} ++ ++# Return 1 if the target plus current options supports a vector + # dot-product of signed chars, 0 otherwise. + # + # This won't change for different subtargets so cache the result. + +=== modified file 'gcc/tree-cfg.c' +--- old/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000 ++++ new/gcc/tree-cfg.c 2011-10-23 13:33:07 +0000 +@@ -3473,6 +3473,44 @@ + return false; + } + ++ case WIDEN_LSHIFT_EXPR: ++ { ++ if (!INTEGRAL_TYPE_P (lhs_type) ++ || !INTEGRAL_TYPE_P (rhs1_type) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))) ++ { ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } ++ ++ return false; ++ } ++ ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ { ++ if (TREE_CODE (rhs1_type) != VECTOR_TYPE ++ || TREE_CODE (lhs_type) != VECTOR_TYPE ++ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) ++ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type)) ++ > TYPE_PRECISION (TREE_TYPE (lhs_type)))) ++ { ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } ++ ++ return false; ++ } ++ + case PLUS_EXPR: + case MINUS_EXPR: + { + +=== modified file 'gcc/tree-inline.c' +--- old/gcc/tree-inline.c 2011-08-13 08:32:32 +0000 ++++ new/gcc/tree-inline.c 2011-10-23 13:33:07 +0000 +@@ -3343,6 +3343,7 @@ + case DOT_PROD_EXPR: + case WIDEN_MULT_PLUS_EXPR: + case WIDEN_MULT_MINUS_EXPR: ++ case WIDEN_LSHIFT_EXPR: + + case VEC_WIDEN_MULT_HI_EXPR: + case VEC_WIDEN_MULT_LO_EXPR: +@@ -3357,6 +3358,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + + return 1; + + +=== modified file 'gcc/tree-pretty-print.c' +--- old/gcc/tree-pretty-print.c 2010-11-05 09:00:50 +0000 ++++ new/gcc/tree-pretty-print.c 2011-10-23 13:33:07 +0000 +@@ -1539,6 +1539,7 @@ + case RROTATE_EXPR: + case VEC_LSHIFT_EXPR: + case VEC_RSHIFT_EXPR: ++ case WIDEN_LSHIFT_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_AND_EXPR: +@@ -2209,6 +2210,22 @@ + pp_string (buffer, " > "); + break; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; ++ ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; ++ + case VEC_UNPACK_HI_EXPR: + pp_string (buffer, " VEC_UNPACK_HI_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); +@@ -2531,6 +2548,9 @@ + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ case WIDEN_LSHIFT_EXPR: + return 11; + + case WIDEN_SUM_EXPR: +@@ -2706,6 +2726,9 @@ + case VEC_RSHIFT_EXPR: + return "v>>"; + ++ case WIDEN_LSHIFT_EXPR: ++ return "w<<"; ++ + case POINTER_PLUS_EXPR: + return "+"; + + +=== modified file 'gcc/tree-vect-generic.c' +--- old/gcc/tree-vect-generic.c 2011-02-08 14:16:50 +0000 ++++ new/gcc/tree-vect-generic.c 2011-10-23 13:33:07 +0000 +@@ -552,7 +552,9 @@ + || code == VEC_UNPACK_LO_EXPR + || code == VEC_PACK_TRUNC_EXPR + || code == VEC_PACK_SAT_EXPR +- || code == VEC_PACK_FIX_TRUNC_EXPR) ++ || code == VEC_PACK_FIX_TRUNC_EXPR ++ || code == VEC_WIDEN_LSHIFT_HI_EXPR ++ || code == VEC_WIDEN_LSHIFT_LO_EXPR) + type = TREE_TYPE (rhs1); + + /* Optabs will try converting a negation into a subtraction, so + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000 +@@ -48,12 +48,15 @@ + static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); + static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, + tree *); ++static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, ++ tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, + vect_recog_dot_prod_pattern, + vect_recog_pow_pattern, +- vect_recog_over_widening_pattern}; ++ vect_recog_over_widening_pattern, ++ vect_recog_widen_shift_pattern}; + + + /* Function widened_name_p +@@ -331,27 +334,38 @@ + return pattern_stmt; + } + +-/* Handle two cases of multiplication by a constant. The first one is when +- the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second +- operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to +- TYPE. ++ ++/* Handle widening operation by a constant. At the moment we support MULT_EXPR ++ and LSHIFT_EXPR. ++ ++ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR ++ we check that CONST_OPRND is less or equal to the size of HALF_TYPE. + + Otherwise, if the type of the result (TYPE) is at least 4 times bigger than +- HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than +- TYPE), we can perform widen-mult from the intermediate type to TYPE and +- replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ ++ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) ++ that satisfies the above restrictions, we can perform a widening opeartion ++ from the intermediate type to TYPE and replace a_T = (TYPE) a_t; ++ with a_it = (interm_type) a_t; */ + + static bool +-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd, +- VEC (gimple, heap) **stmts, tree type, +- tree *half_type, gimple def_stmt) ++vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, ++ tree const_oprnd, tree *oprnd, ++ VEC (gimple, heap) **stmts, tree type, ++ tree *half_type, gimple def_stmt) + { + tree new_type, new_oprnd, tmp; + gimple new_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + +- if (int_fits_type_p (const_oprnd, *half_type)) ++ if (code != MULT_EXPR && code != LSHIFT_EXPR) ++ return false; ++ ++ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) ++ != 1)) ++ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) + { + /* CONST_OPRND is a constant of HALF_TYPE. */ + *oprnd = gimple_assign_rhs1 (def_stmt); +@@ -364,14 +378,16 @@ + || !vinfo_for_stmt (def_stmt)) + return false; + +- /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for ++ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for + a type 2 times bigger than HALF_TYPE. */ + new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, + TYPE_UNSIGNED (type)); +- if (!int_fits_type_p (const_oprnd, new_type)) ++ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) + return false; + +- /* Use NEW_TYPE for widen_mult. */ ++ /* Use NEW_TYPE for widening operation. */ + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) + { + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); +@@ -381,6 +397,7 @@ + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) + return false; + ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); + *oprnd = gimple_assign_lhs (new_stmt); + } + else +@@ -392,7 +409,6 @@ + new_oprnd = make_ssa_name (tmp, NULL); + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, + NULL_TREE); +- SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; + VEC_safe_push (gimple, heap, *stmts, def_stmt); + *oprnd = new_oprnd; +@@ -402,7 +418,6 @@ + return true; + } + +- + /* Function vect_recog_widen_mult_pattern + + Try to find the following pattern: +@@ -491,7 +506,7 @@ + enum tree_code dummy_code; + int dummy_int; + VEC (tree, heap) *dummy_vec; +- bool op0_ok, op1_ok; ++ bool op1_ok; + + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -511,38 +526,23 @@ + return NULL; + + /* Check argument 0. */ +- op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); ++ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) ++ return NULL; + /* Check argument 1. */ + op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); + +- /* In case of multiplication by a constant one of the operands may not match +- the pattern, but not both. */ +- if (!op0_ok && !op1_ok) +- return NULL; +- +- if (op0_ok && op1_ok) ++ if (op1_ok) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } +- else if (!op0_ok) +- { +- if (TREE_CODE (oprnd0) == INTEGER_CST +- && TREE_CODE (half_type1) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, +- stmts, type, +- &half_type1, def_stmt1)) +- half_type0 = half_type1; +- else +- return NULL; +- } +- else if (!op1_ok) ++ else + { + if (TREE_CODE (oprnd1) == INTEGER_CST + && TREE_CODE (half_type0) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0, +- stmts, type, +- &half_type0, def_stmt0)) ++ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, ++ &oprnd0, stmts, type, ++ &half_type0, def_stmt0)) + half_type1 = half_type0; + else + return NULL; +@@ -998,6 +998,7 @@ + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) + return false; + ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); + oprnd = gimple_assign_lhs (new_stmt); + } + else +@@ -1128,7 +1129,7 @@ + statetments, except for the case when the last statement in the + sequence doesn't have a corresponding pattern statement. In such + case we associate the last pattern statement with the last statement +- in the sequence. Therefore, we only add an original statetement to ++ in the sequence. Therefore, we only add the original statement to + the list if we know that it is not the last. */ + if (prev_stmt) + VEC_safe_push (gimple, heap, *stmts, prev_stmt); +@@ -1215,6 +1216,231 @@ + } + + ++/* Detect widening shift pattern: ++ ++ type a_t; ++ TYPE a_T, res_T; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ ++ where type 'TYPE' is at least double the size of type 'type'. ++ ++ Also detect unsigned cases: ++ ++ unsigned type a_t; ++ unsigned TYPE u_res_T; ++ TYPE a_T, res_T; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ S4 u_res_T = (unsigned TYPE) res_T; ++ ++ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we ++ create an additional pattern stmt for S2 to create a variable of an ++ intermediate type, and perform widen-shift on the intermediate type: ++ ++ type a_t; ++ interm_type a_it; ++ TYPE a_T, res_T, res_T'; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ '--> a_it = (interm_type) a_t; ++ S3 res_T = a_T << CONST; ++ '--> res_T' = a_it <<* CONST; ++ ++ Input/Output: ++ ++ * STMTS: Contains a stmt from which the pattern search begins. ++ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 ++ in STMTS. When an intermediate type is used and a pattern statement is ++ created for S2, we also put S2 here (before S3). ++ ++ Output: ++ ++ * TYPE_IN: The type of the input arguments to the pattern. ++ ++ * TYPE_OUT: The type of the output of this pattern. ++ ++ * Return value: A new stmt that will be used to replace the sequence of ++ stmts that constitute the pattern. In this case it will be: ++ WIDEN_LSHIFT_EXPR <a_t, CONST>. */ ++ ++static gimple ++vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) ++{ ++ gimple last_stmt = VEC_pop (gimple, *stmts); ++ gimple def_stmt0; ++ tree oprnd0, oprnd1; ++ tree type, half_type0; ++ gimple pattern_stmt, orig_stmt = NULL; ++ tree vectype, vectype_out = NULL_TREE; ++ tree dummy; ++ tree var; ++ enum tree_code dummy_code; ++ int dummy_int; ++ VEC (tree, heap) * dummy_vec; ++ gimple use_stmt = NULL; ++ bool over_widen = false; ++ ++ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) ++ return NULL; ++ ++ orig_stmt = last_stmt; ++ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) ++ { ++ /* This statement was also detected as over-widening operation (it can't ++ be any other pattern, because only over-widening detects shifts). ++ LAST_STMT is the final type demotion statement, but its related ++ statement is shift. We analyze the related statement to catch cases: ++ ++ orig code: ++ type a_t; ++ itype res; ++ TYPE a_T, res_T; ++ ++ S1 a_T = (TYPE) a_t; ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; ++ ++ (size of type * 2 <= size of itype ++ and size of itype * 2 <= size of TYPE) ++ ++ code after over-widening pattern detection: ++ ++ S1 a_T = (TYPE) a_t; ++ --> a_it = (itype) a_t; ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; <--- LAST_STMT ++ --> res = a_it << CONST; ++ ++ after widen_shift: ++ ++ S1 a_T = (TYPE) a_t; ++ --> a_it = (itype) a_t; - redundant ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; ++ --> res = a_t w<< CONST; ++ ++ i.e., we replace the three statements with res = a_t w<< CONST. */ ++ last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt)); ++ over_widen = true; ++ } ++ ++ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); ++ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) ++ return NULL; ++ ++ /* Check operand 0: it has to be defined by a type promotion. */ ++ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) ++ return NULL; ++ ++ /* Check operand 1: has to be positive. We check that it fits the type ++ in vect_handle_widen_op_by_const (). */ ++ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ type = gimple_expr_type (last_stmt); ++ ++ /* Check if this a widening operation. */ ++ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, ++ &oprnd0, stmts, ++ type, &half_type0, def_stmt0)) ++ return NULL; ++ ++ /* Handle unsigned case. Look for ++ S4 u_res_T = (unsigned TYPE) res_T; ++ Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */ ++ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) ++ { ++ tree lhs = gimple_assign_lhs (last_stmt), use_lhs; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ int nuses = 0; ++ tree use_type; ++ ++ if (over_widen) ++ { ++ /* In case of over-widening pattern, S4 should be ORIG_STMT itself. ++ We check here that TYPE is the correct type for the operation, ++ i.e., it's the type of the original result. */ ++ tree orig_type = gimple_expr_type (orig_stmt); ++ if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type))) ++ return NULL; ++ } ++ else ++ { ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); ++ nuses++; ++ } ++ ++ if (nuses != 1 || !is_gimple_assign (use_stmt) ++ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) ++ return NULL; ++ ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ ++ if (!INTEGRAL_TYPE_P (use_type) ++ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) ++ return NULL; ++ ++ type = use_type; ++ } ++ } ++ ++ /* Pattern detected. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); ++ ++ /* Check target support. */ ++ vectype = get_vectype_for_scalar_type (half_type0); ++ vectype_out = get_vectype_for_scalar_type (type); ++ ++ if (!vectype ++ || !vectype_out ++ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, ++ vectype_out, vectype, ++ &dummy, &dummy, &dummy_code, ++ &dummy_code, &dummy_int, ++ &dummy_vec)) ++ return NULL; ++ ++ *type_in = vectype; ++ *type_out = vectype_out; ++ ++ /* Pattern supported. Create a stmt to be used to replace the pattern. */ ++ var = vect_recog_temp_ssa_var (type, NULL); ++ pattern_stmt = ++ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ ++ if (use_stmt) ++ last_stmt = use_stmt; ++ else ++ last_stmt = orig_stmt; ++ ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); ++ return pattern_stmt; ++} ++ + /* Mark statements that are involved in a pattern. */ + + static inline void +@@ -1278,7 +1504,8 @@ + static void + vect_pattern_recog_1 ( + gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), +- gimple_stmt_iterator si) ++ gimple_stmt_iterator si, ++ VEC (gimple, heap) **stmts_to_replace) + { + gimple stmt = gsi_stmt (si), pattern_stmt; + stmt_vec_info stmt_info; +@@ -1288,14 +1515,14 @@ + enum tree_code code; + int i; + gimple next; +- VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + +- VEC_quick_push (gimple, stmts_to_replace, stmt); +- pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); ++ VEC_truncate (gimple, *stmts_to_replace, 0); ++ VEC_quick_push (gimple, *stmts_to_replace, stmt); ++ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); + if (!pattern_stmt) + return; + +- stmt = VEC_last (gimple, stmts_to_replace); ++ stmt = VEC_last (gimple, *stmts_to_replace); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + +@@ -1303,8 +1530,6 @@ + { + /* No need to check target support (already checked by the pattern + recognition function). */ +- if (type_out) +- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); + pattern_vectype = type_out ? type_out : type_in; + } + else +@@ -1360,8 +1585,8 @@ + /* It is possible that additional pattern stmts are created and inserted in + STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the + relevant statements. */ +- for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) +- && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); ++ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) ++ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); + i++) + { + stmt_info = vinfo_for_stmt (stmt); +@@ -1374,8 +1599,6 @@ + + vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); + } +- +- VEC_free (gimple, heap, stmts_to_replace); + } + + +@@ -1465,6 +1688,7 @@ + gimple_stmt_iterator si; + unsigned int i, j; + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); ++ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); +@@ -1480,8 +1704,11 @@ + for (j = 0; j < NUM_PATTERNS; j++) + { + vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; +- vect_pattern_recog_1 (vect_recog_func_ptr, si); ++ vect_pattern_recog_1 (vect_recog_func_ptr, si, ++ &stmts_to_replace); + } + } + } ++ ++ VEC_free (gimple, heap, stmts_to_replace); + } + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 +@@ -480,6 +480,11 @@ + } + } + } ++ else if (rhs_code == WIDEN_LSHIFT_EXPR) ++ { ++ need_same_oprnds = true; ++ first_op1 = gimple_assign_rhs2 (stmt); ++ } + } + else + { + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 +@@ -3359,6 +3359,7 @@ + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ unsigned int k; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; +@@ -3375,7 +3376,8 @@ + + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) +- && code != WIDEN_MULT_EXPR) ++ && code != WIDEN_MULT_EXPR ++ && code != WIDEN_LSHIFT_EXPR) + return false; + + scalar_dest = gimple_assign_lhs (stmt); +@@ -3403,7 +3405,7 @@ + bool ok; + + op1 = gimple_assign_rhs2 (stmt); +- if (code == WIDEN_MULT_EXPR) ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) + { + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ +@@ -3480,7 +3482,7 @@ + fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", + ncopies); + +- if (code == WIDEN_MULT_EXPR) ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) + { + if (CONSTANT_CLASS_P (op0)) + op0 = fold_convert (TREE_TYPE (op1), op0); +@@ -3521,6 +3523,8 @@ + if (op_type == binary_op) + vec_oprnds1 = VEC_alloc (tree, heap, 1); + } ++ else if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -3534,15 +3538,33 @@ + if (j == 0) + { + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, +- &vec_oprnds1, -1); +- else ++ { ++ if (code == WIDEN_LSHIFT_EXPR) ++ { ++ vec_oprnd1 = op1; ++ /* Store vec_oprnd1 for every vector stmt to be created ++ for SLP_NODE. We check during the analysis that all ++ the shift arguments are the same. */ ++ for (k = 0; k < slp_node->vec_stmts_size - 1; k++) ++ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); ++ ++ vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, ++ -1); ++ } ++ else ++ vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, ++ &vec_oprnds1, -1); ++ } ++ else + { + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + } + } +@@ -3553,7 +3575,10 @@ + VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); + VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); + } + } +@@ -5853,6 +5878,19 @@ + } + break; + ++ case WIDEN_LSHIFT_EXPR: ++ if (BYTES_BIG_ENDIAN) ++ { ++ c1 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c2 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ else ++ { ++ c2 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c1 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ break; ++ + CASE_CONVERT: + if (BYTES_BIG_ENDIAN) + { + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 +@@ -896,7 +896,7 @@ + Additional pattern recognition functions can (and will) be added + in the future. */ + typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); +-#define NUM_PATTERNS 5 ++#define NUM_PATTERNS 6 + void vect_pattern_recog (loop_vec_info); + + /* In tree-vectorizer.c. */ + +=== modified file 'gcc/tree.def' +--- old/gcc/tree.def 2011-01-21 14:14:12 +0000 ++++ new/gcc/tree.def 2011-10-23 13:33:07 +0000 +@@ -1092,6 +1092,19 @@ + is subtracted from t3. */ + DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) + ++/* Widening shift left. ++ The first operand is of type t1. ++ The second operand is the number of bits to shift by; it need not be the ++ same type as the first operand and result. ++ Note that the result is undefined if the second operand is larger ++ than or equal to the first operand's type size. ++ The type of the entire expression is t2, such that t2 is at least twice ++ the size of t1. ++ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting) ++ the first argument from type t1 to type t2, and then shifting it ++ by the second argument. */ ++DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) ++ + /* Fused multiply-add. + All operands and the result are of the same type. No intermediate + rounding is performed after multiplying operand one with operand two +@@ -1147,6 +1160,16 @@ + DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) + DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) + ++/* Widening vector shift left in bits. ++ Operand 0 is a vector to be shifted with N elements of size S. ++ Operand 1 is an integer shift amount in bits. ++ The result of the operation is N elements of size 2*S. ++ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results. ++ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results. ++ */ ++DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) ++DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) ++ + /* PREDICT_EXPR. Specify hint for branch prediction. The + PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the + outcome (0 for not taken and 1 for taken). Once the profile is guessed + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch new file mode 100644 index 0000000000..95b9ea9b83 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch @@ -0,0 +1,61 @@ +2011-11-04 Revital Eres <revital.eres@linaro.org> + + Backport from mainline -r180673: + + gcc/ + * modulo-sched.c (generate_prolog_epilog): Mark prolog + and epilog as BB_DISABLE_SCHEDULE. + (mark_loop_unsched): New function. + (sms_schedule): Call it. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 ++++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000 +@@ -1173,6 +1173,8 @@ + /* Put the prolog on the entry edge. */ + e = loop_preheader_edge (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; + + end_sequence (); + +@@ -1186,9 +1188,24 @@ + gcc_assert (single_exit (loop)); + e = single_exit (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; ++ + end_sequence (); + } + ++/* Mark LOOP as software pipelined so the later ++ scheduling passes don't touch it. */ ++static void ++mark_loop_unsched (struct loop *loop) ++{ ++ unsigned i; ++ basic_block *bbs = get_loop_body (loop); ++ ++ for (i = 0; i < loop->num_nodes; i++) ++ bbs[i]->flags |= BB_DISABLE_SCHEDULE; ++} ++ + /* Return true if all the BBs of the loop are empty except the + loop header. */ + static bool +@@ -1714,9 +1731,10 @@ + permute_partial_schedule (ps, g->closing_branch->first_note); + + /* Mark this loop as software pipelined so the later +- scheduling passes doesn't touch it. */ ++ scheduling passes don't touch it. */ + if (! flag_resched_modulo_sched) +- g->bb->flags |= BB_DISABLE_SCHEDULE; ++ mark_loop_unsched (loop); ++ + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); + + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch new file mode 100644 index 0000000000..dcffe1945c --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch @@ -0,0 +1,23 @@ +2011-11-02 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-11-01 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning. + +=== modified file 'gcc/config/arm/bpabi.h' +--- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000 ++++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000 +@@ -56,7 +56,8 @@ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + + #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ +- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" ++ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ ++ ":%{!r:--be8}}}" + + /* Tell the assembler to build BPABI binaries. */ + #undef SUBTARGET_EXTRA_ASM_SPEC + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch new file mode 100644 index 0000000000..70a7bdfa2b --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch @@ -0,0 +1,1400 @@ +2011-11-17 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-11-03 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (slp_void_p): New. + (struct _slp_tree): Replace left and right with children. Update + documentation. + (struct _slp_oprnd_info): New. + (vect_get_vec_defs): Declare. + (vect_get_slp_defs): Update arguments. + * tree-vect-loop.c (vect_create_epilog_for_reduction): Call + vect_get_vec_defs instead of vect_get_slp_defs. + (vectorizable_reduction): Likewise. + * tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument. + Update call to vect_get_slp_defs. + (vectorizable_conversion): Update call to vect_get_vec_defs. + (vectorizable_assignment, vectorizable_shift, + vectorizable_operation): Likewise. + (vectorizable_type_demotion): Call vect_get_vec_defs instead of + vect_get_slp_defs. + (vectorizable_type_promotion, vectorizable_store): Likewise. + (vect_analyze_stmt): Fix typo. + * tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal. + (vect_print_slp_tree, vect_mark_slp_stmts, + vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts, + vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations, + vect_schedule_slp_instance): Likewise. + (vect_create_new_slp_node): New. + (vect_create_oprnd_info, vect_free_oprnd_info): Likewise. + (vect_get_and_check_slp_defs): Pass information about defs using + oprnds_info, allow any number of operands. + (vect_build_slp_tree): Likewise. Update calls to + vect_get_and_check_slp_defs. Fix comments. + (vect_analyze_slp_instance): Move node creation to + vect_create_new_slp_node. + (vect_get_slp_defs): Allow any number of operands. + + 2011-11-11 Jakub Jelinek <jakub@redhat.com> + + gcc/ + * tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN + vector. + (vect_create_new_slp_node): Don't allocate node before checking stmt + type. + (vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always + free def_stmts vectors and additionally free oprnd_info. + (vect_build_slp_tree): Adjust callers. Call it even if + stop_recursion. If vect_create_new_slp_node or + vect_build_slp_tree fails, properly handle freeing memory. + If it succeeded, clear def_stmts in oprnd_info. + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000 +@@ -3282,8 +3282,8 @@ + + /* Get the loop-entry arguments. */ + if (slp_node) +- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs, +- NULL, reduc_index); ++ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, ++ NULL, slp_node, reduc_index); + else + { + vec_initial_defs = VEC_alloc (tree, heap, 1); +@@ -4451,8 +4451,8 @@ + } + + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1, +- -1); ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, ++ slp_node, -1); + else + { + loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 ++++ new/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000 +@@ -67,15 +67,16 @@ + static void + vect_free_slp_tree (slp_tree node) + { ++ int i; ++ slp_void_p child; ++ + if (!node) + return; + +- if (SLP_TREE_LEFT (node)) +- vect_free_slp_tree (SLP_TREE_LEFT (node)); +- +- if (SLP_TREE_RIGHT (node)) +- vect_free_slp_tree (SLP_TREE_RIGHT (node)); +- ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_free_slp_tree ((slp_tree) child); ++ ++ VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node)); + VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); + + if (SLP_TREE_VEC_STMTS (node)) +@@ -96,48 +97,116 @@ + } + + +-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that +- they are of a legal type and that they match the defs of the first stmt of +- the SLP group (stored in FIRST_STMT_...). */ ++/* Create an SLP node for SCALAR_STMTS. */ ++ ++static slp_tree ++vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts) ++{ ++ slp_tree node; ++ gimple stmt = VEC_index (gimple, scalar_stmts, 0); ++ unsigned int nops; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ nops = gimple_num_ops (stmt) - 1; ++ else ++ return NULL; ++ ++ node = XNEW (struct _slp_tree); ++ SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; ++ SLP_TREE_VEC_STMTS (node) = NULL; ++ SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops); ++ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; ++ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ ++ return node; ++} ++ ++ ++/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each ++ operand. */ ++static VEC (slp_oprnd_info, heap) * ++vect_create_oprnd_info (int nops, int group_size) ++{ ++ int i; ++ slp_oprnd_info oprnd_info; ++ VEC (slp_oprnd_info, heap) *oprnds_info; ++ ++ oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops); ++ for (i = 0; i < nops; i++) ++ { ++ oprnd_info = XNEW (struct _slp_oprnd_info); ++ oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size); ++ oprnd_info->first_dt = vect_uninitialized_def; ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ oprnd_info->first_pattern = false; ++ VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info); ++ } ++ ++ return oprnds_info; ++} ++ ++ ++/* Free operands info. */ ++ ++static void ++vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info) ++{ ++ int i; ++ slp_oprnd_info oprnd_info; ++ ++ FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info) ++ { ++ VEC_free (gimple, heap, oprnd_info->def_stmts); ++ XDELETE (oprnd_info); ++ } ++ ++ VEC_free (slp_oprnd_info, heap, *oprnds_info); ++} ++ ++ ++/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that ++ they are of a valid type and that they match the defs of the first stmt of ++ the SLP group (stored in OPRNDS_INFO). */ + + static bool + vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, + slp_tree slp_node, gimple stmt, +- VEC (gimple, heap) **def_stmts0, +- VEC (gimple, heap) **def_stmts1, +- enum vect_def_type *first_stmt_dt0, +- enum vect_def_type *first_stmt_dt1, +- tree *first_stmt_def0_type, +- tree *first_stmt_def1_type, +- tree *first_stmt_const_oprnd, +- int ncopies_for_cost, +- bool *pattern0, bool *pattern1) ++ int ncopies_for_cost, bool first, ++ VEC (slp_oprnd_info, heap) **oprnds_info) + { + tree oprnd; + unsigned int i, number_of_oprnds; +- tree def[2]; ++ tree def, def_op0 = NULL_TREE; + gimple def_stmt; +- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; +- stmt_vec_info stmt_info = +- vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); +- enum gimple_rhs_class rhs_class; ++ enum vect_def_type dt = vect_uninitialized_def; ++ enum vect_def_type dt_op0 = vect_uninitialized_def; ++ stmt_vec_info stmt_info = vinfo_for_stmt (stmt); ++ tree lhs = gimple_get_lhs (stmt); + struct loop *loop = NULL; + enum tree_code rhs_code; + bool different_types = false; ++ bool pattern = false; ++ slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); + +- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); +- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ ++ if (is_gimple_call (stmt)) ++ number_of_oprnds = gimple_call_num_args (stmt); ++ else ++ number_of_oprnds = gimple_num_ops (stmt) - 1; + + for (i = 0; i < number_of_oprnds; i++) + { + oprnd = gimple_op (stmt, i + 1); ++ oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); + +- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], +- &dt[i]) +- || (!def_stmt && dt[i] != vect_constant_def)) ++ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, ++ &dt) ++ || (!def_stmt && dt != vect_constant_def)) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -158,29 +227,24 @@ + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + { +- if (!*first_stmt_dt0) +- *pattern0 = true; +- else +- { +- if (i == 1 && !*first_stmt_dt1) +- *pattern1 = true; +- else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "Build SLP failed: some of the stmts" +- " are in a pattern, and others are not "); +- print_generic_expr (vect_dump, oprnd, TDF_SLIM); +- } ++ pattern = true; ++ if (!first && !oprnd_info->first_pattern) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "Build SLP failed: some of the stmts" ++ " are in a pattern, and others are not "); ++ print_generic_expr (vect_dump, oprnd, TDF_SLIM); ++ } + +- return false; +- } ++ return false; + } + + def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); +- dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); ++ dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); + +- if (*dt == vect_unknown_def_type) ++ if (dt == vect_unknown_def_type ++ || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unsupported pattern."); +@@ -190,11 +254,11 @@ + switch (gimple_code (def_stmt)) + { + case GIMPLE_PHI: +- def[i] = gimple_phi_result (def_stmt); ++ def = gimple_phi_result (def_stmt); + break; + + case GIMPLE_ASSIGN: +- def[i] = gimple_assign_lhs (def_stmt); ++ def = gimple_assign_lhs (def_stmt); + break; + + default: +@@ -204,117 +268,125 @@ + } + } + +- if (!*first_stmt_dt0) ++ if (first) + { +- /* op0 of the first stmt of the group - store its info. */ +- *first_stmt_dt0 = dt[i]; +- if (def[i]) +- *first_stmt_def0_type = TREE_TYPE (def[i]); +- else +- *first_stmt_const_oprnd = oprnd; ++ oprnd_info->first_dt = dt; ++ oprnd_info->first_pattern = pattern; ++ if (def) ++ { ++ oprnd_info->first_def_type = TREE_TYPE (def); ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ } ++ else ++ { ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = oprnd; ++ } + +- /* Analyze costs (for the first stmt of the group only). */ +- if (rhs_class != GIMPLE_SINGLE_RHS) +- /* Not memory operation (we don't call this functions for loads). */ +- vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); +- else +- /* Store. */ +- vect_model_store_cost (stmt_info, ncopies_for_cost, false, +- dt[0], slp_node); ++ if (i == 0) ++ { ++ def_op0 = def; ++ dt_op0 = dt; ++ /* Analyze costs (for the first stmt of the group only). */ ++ if (REFERENCE_CLASS_P (lhs)) ++ /* Store. */ ++ vect_model_store_cost (stmt_info, ncopies_for_cost, false, ++ dt, slp_node); ++ else ++ /* Not memory operation (we don't call this function for ++ loads). */ ++ vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt, ++ slp_node); ++ } + } + + else + { +- if (!*first_stmt_dt1 && i == 1) +- { +- /* op1 of the first stmt of the group - store its info. */ +- *first_stmt_dt1 = dt[i]; +- if (def[i]) +- *first_stmt_def1_type = TREE_TYPE (def[i]); +- else +- { +- /* We assume that the stmt contains only one constant +- operand. We fail otherwise, to be on the safe side. */ +- if (*first_stmt_const_oprnd) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: two constant " +- "oprnds in stmt"); +- return false; +- } +- *first_stmt_const_oprnd = oprnd; +- } +- } +- else +- { +- /* Not first stmt of the group, check that the def-stmt/s match +- the def-stmt/s of the first stmt. */ +- if ((i == 0 +- && (*first_stmt_dt0 != dt[i] +- || (*first_stmt_def0_type && def[0] +- && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def[0]))))) +- || (i == 1 +- && (*first_stmt_dt1 != dt[i] +- || (*first_stmt_def1_type && def[1] +- && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def[1]))))) +- || (!def[i] +- && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), +- TREE_TYPE (oprnd))) +- || different_types) +- { +- if (i != number_of_oprnds - 1) +- different_types = true; ++ /* Not first stmt of the group, check that the def-stmt/s match ++ the def-stmt/s of the first stmt. Allow different definition ++ types for reduction chains: the first stmt must be a ++ vect_reduction_def (a phi node), and the rest ++ vect_internal_def. */ ++ if (((oprnd_info->first_dt != dt ++ && !(oprnd_info->first_dt == vect_reduction_def ++ && dt == vect_internal_def)) ++ || (oprnd_info->first_def_type != NULL_TREE ++ && def ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def)))) ++ || (!def ++ && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), ++ TREE_TYPE (oprnd))) ++ || different_types) ++ { ++ if (number_of_oprnds != 2) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } ++ ++ /* Try to swap operands in case of binary operation. */ ++ if (i == 0) ++ different_types = true; ++ else ++ { ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (is_gimple_assign (stmt) ++ && (rhs_code = gimple_assign_rhs_code (stmt)) ++ && TREE_CODE_CLASS (rhs_code) == tcc_binary ++ && commutative_tree_code (rhs_code) ++ && oprnd0_info->first_dt == dt ++ && oprnd_info->first_dt == dt_op0 ++ && def_op0 && def ++ && !(oprnd0_info->first_def_type ++ && !types_compatible_p (oprnd0_info->first_def_type, ++ TREE_TYPE (def))) ++ && !(oprnd_info->first_def_type ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def_op0)))) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Swapping operands of "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ ++ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ++ gimple_assign_rhs2_ptr (stmt)); ++ } + else +- { +- if (is_gimple_assign (stmt) +- && (rhs_code = gimple_assign_rhs_code (stmt)) +- && TREE_CODE_CLASS (rhs_code) == tcc_binary +- && commutative_tree_code (rhs_code) +- && *first_stmt_dt0 == dt[1] +- && *first_stmt_dt1 == dt[0] +- && def[0] && def[1] +- && !(*first_stmt_def0_type +- && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def[1]))) +- && !(*first_stmt_def1_type +- && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def[0])))) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- { +- fprintf (vect_dump, "Swapping operands of "); +- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); +- } +- swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), +- gimple_assign_rhs2_ptr (stmt)); +- } +- else +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: different types "); +- +- return false; +- } +- } ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } + } + } + } + + /* Check the types of the definitions. */ +- switch (dt[i]) ++ switch (dt) + { + case vect_constant_def: + case vect_external_def: ++ case vect_reduction_def: + break; + + case vect_internal_def: +- case vect_reduction_def: +- if ((i == 0 && !different_types) || (i == 1 && different_types)) +- VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); ++ if (different_types) ++ { ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (i == 0) ++ VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt); ++ else ++ VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt); ++ } + else +- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); ++ VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt); + break; + + default: +@@ -322,7 +394,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: illegal type of def "); +- print_generic_expr (vect_dump, def[i], TDF_SLIM); ++ print_generic_expr (vect_dump, def, TDF_SLIM); + } + + return false; +@@ -347,15 +419,10 @@ + VEC (slp_tree, heap) **loads, + unsigned int vectorization_factor, bool *loads_permuted) + { +- VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); +- VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); + unsigned int i; + VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); + gimple stmt = VEC_index (gimple, stmts, 0); +- enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; +- enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; + enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; +- tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; + tree lhs; + bool stop_recursion = false, need_same_oprnds = false; + tree vectype, scalar_type, first_op1 = NULL_TREE; +@@ -364,13 +431,21 @@ + int icode; + enum machine_mode optab_op2_mode; + enum machine_mode vec_mode; +- tree first_stmt_const_oprnd = NULL_TREE; + struct data_reference *first_dr; +- bool pattern0 = false, pattern1 = false; + HOST_WIDE_INT dummy; + bool permutation = false; + unsigned int load_place; + gimple first_load, prev_first_load = NULL; ++ VEC (slp_oprnd_info, heap) *oprnds_info; ++ unsigned int nops; ++ slp_oprnd_info oprnd_info; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else ++ nops = gimple_num_ops (stmt) - 1; ++ ++ oprnds_info = vect_create_oprnd_info (nops, group_size); + + /* For every stmt in NODE find its def stmt/s. */ + FOR_EACH_VEC_ELT (gimple, stmts, i, stmt) +@@ -391,6 +466,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -400,10 +476,11 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, +- "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); ++ "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -416,6 +493,8 @@ + fprintf (vect_dump, "Build SLP failed: unsupported data-type "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } ++ ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -462,6 +541,7 @@ + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: no optab."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + icode = (int) optab_handler (optab, vec_mode); +@@ -470,6 +550,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: " + "op not supported by target."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + optab_op2_mode = insn_data[icode].operand[2].mode; +@@ -506,6 +587,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -519,6 +601,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + } +@@ -530,15 +613,12 @@ + { + /* Store. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, +- stmt, &def_stmts0, &def_stmts1, +- &first_stmt_dt0, +- &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ stmt, ncopies_for_cost, ++ (i == 0), &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + else + { +@@ -556,6 +636,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -573,6 +654,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -593,6 +675,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + } +@@ -612,6 +695,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -639,7 +723,7 @@ + { + if (TREE_CODE_CLASS (rhs_code) == tcc_reference) + { +- /* Not strided load. */ ++ /* Not strided load. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: not strided load "); +@@ -647,6 +731,7 @@ + } + + /* FORNOW: Not strided loads are not supported. */ ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -661,19 +746,18 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + + /* Find the def-stmts. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, +- &def_stmts0, &def_stmts1, +- &first_stmt_dt0, &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ ncopies_for_cost, (i == 0), ++ &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + } + +@@ -702,46 +786,37 @@ + *loads_permuted = true; + } + ++ vect_free_oprnd_info (&oprnds_info); + return true; + } + + /* Create SLP_TREE nodes for the definition node/s. */ +- if (first_stmt_dt0 == vect_internal_def) +- { +- slp_tree left_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; +- SLP_TREE_VEC_STMTS (left_node) = NULL; +- SLP_TREE_LEFT (left_node) = NULL; +- SLP_TREE_RIGHT (left_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, +- inside_cost, outside_cost, ncopies_for_cost, +- max_nunits, load_permutation, loads, +- vectorization_factor, loads_permuted)) +- return false; +- +- SLP_TREE_LEFT (*node) = left_node; +- } +- +- if (first_stmt_dt1 == vect_internal_def) +- { +- slp_tree right_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; +- SLP_TREE_VEC_STMTS (right_node) = NULL; +- SLP_TREE_LEFT (right_node) = NULL; +- SLP_TREE_RIGHT (right_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, +- inside_cost, outside_cost, ncopies_for_cost, +- max_nunits, load_permutation, loads, +- vectorization_factor, loads_permuted)) +- return false; +- +- SLP_TREE_RIGHT (*node) = right_node; +- } +- ++ FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info) ++ { ++ slp_tree child; ++ ++ if (oprnd_info->first_dt != vect_internal_def) ++ continue; ++ ++ child = vect_create_new_slp_node (oprnd_info->def_stmts); ++ if (!child ++ || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, ++ inside_cost, outside_cost, ncopies_for_cost, ++ max_nunits, load_permutation, loads, ++ vectorization_factor, loads_permuted)) ++ { ++ if (child) ++ oprnd_info->def_stmts = NULL; ++ vect_free_slp_tree (child); ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } ++ ++ oprnd_info->def_stmts = NULL; ++ VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child); ++ } ++ ++ vect_free_oprnd_info (&oprnds_info); + return true; + } + +@@ -751,6 +826,7 @@ + { + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return; +@@ -763,8 +839,8 @@ + } + fprintf (vect_dump, "\n"); + +- vect_print_slp_tree (SLP_TREE_LEFT (node)); +- vect_print_slp_tree (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_print_slp_tree ((slp_tree) child); + } + + +@@ -778,6 +854,7 @@ + { + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return; +@@ -786,8 +863,8 @@ + if (j < 0 || i == j) + STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; + +- vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); +- vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts ((slp_tree) child, mark, j); + } + + +@@ -799,6 +876,7 @@ + int i; + gimple stmt; + stmt_vec_info stmt_info; ++ slp_void_p child; + + if (!node) + return; +@@ -811,8 +889,8 @@ + STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; + } + +- vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); +- vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts_relevant ((slp_tree) child); + } + + +@@ -885,12 +963,13 @@ + gimple stmt; + VEC (gimple, heap) *tmp_stmts; + unsigned int index, i; ++ slp_void_p child; + + if (!node) + return; + +- vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); +- vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation); + + gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); + tmp_stmts = VEC_alloc (gimple, heap, group_size); +@@ -1253,7 +1332,7 @@ + gimple stmt) + { + slp_instance new_instance; +- slp_tree node = XNEW (struct _slp_tree); ++ slp_tree node; + unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); + unsigned int unrolling_factor = 1, nunits; + tree vectype, scalar_type = NULL_TREE; +@@ -1265,6 +1344,7 @@ + VEC (slp_tree, heap) *loads; + struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + bool loads_permuted = false; ++ VEC (gimple, heap) *scalar_stmts; + + if (dr) + { +@@ -1308,39 +1388,26 @@ + } + + /* Create a node (a root of the SLP tree) for the packed strided stores. */ +- SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); ++ scalar_stmts = VEC_alloc (gimple, heap, group_size); + next = stmt; + if (dr) + { + /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ + while (next) + { +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + } + } + else + { + /* Collect reduction statements. */ +- for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, +- next); +- i++) +- { +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "pushing reduction into node: "); +- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); +- } +- } ++ VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); ++ for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + } + +- SLP_TREE_VEC_STMTS (node) = NULL; +- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; +- SLP_TREE_LEFT (node) = NULL; +- SLP_TREE_RIGHT (node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ node = vect_create_new_slp_node (scalar_stmts); + + /* Calculate the number of vector stmts to create based on the unrolling + factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is +@@ -1517,6 +1584,7 @@ + imm_use_iterator imm_iter; + gimple use_stmt; + stmt_vec_info stmt_vinfo; ++ slp_void_p child; + + if (!node) + return; +@@ -1534,8 +1602,8 @@ + == vect_reduction_def)) + vect_mark_slp_stmts (node, hybrid, i); + +- vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); +- vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_detect_hybrid_slp_stmts ((slp_tree) child); + } + + +@@ -1625,13 +1693,14 @@ + bool dummy; + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return true; + +- if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) +- || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) +- return false; ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child)) ++ return false; + + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) + { +@@ -2207,88 +2276,102 @@ + If the scalar definitions are loop invariants or constants, collect them and + call vect_get_constant_vectors() to create vector stmts. + Otherwise, the def-stmts must be already vectorized and the vectorized stmts +- must be stored in the LEFT/RIGHT node of SLP_NODE, and we call +- vect_get_slp_vect_defs() to retrieve them. +- If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from +- the right node. This is used when the second operand must remain scalar. */ ++ must be stored in the corresponding child of SLP_NODE, and we call ++ vect_get_slp_vect_defs () to retrieve them. */ + + void +-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, +- VEC (tree,heap) **vec_oprnds0, +- VEC (tree,heap) **vec_oprnds1, int reduc_index) ++vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node, ++ VEC (slp_void_p, heap) **vec_oprnds, int reduc_index) + { +- gimple first_stmt; +- enum tree_code code; +- int number_of_vects; ++ gimple first_stmt, first_def; ++ int number_of_vects = 0, i; ++ unsigned int child_index = 0; + HOST_WIDE_INT lhs_size_unit, rhs_size_unit; ++ slp_tree child = NULL; ++ VEC (tree, heap) *vec_defs; ++ tree oprnd, def_lhs; ++ bool vectorized_defs; + + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_LEFT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); +- else +- { +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); +- /* Number of vector stmts was calculated according to LHS in +- vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if +- necessary. See vect_get_smallest_scalar_type () for details. */ +- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, +- &rhs_size_unit); +- if (rhs_size_unit != lhs_size_unit) +- { +- number_of_vects *= rhs_size_unit; +- number_of_vects /= lhs_size_unit; +- } ++ FOR_EACH_VEC_ELT (tree, ops, i, oprnd) ++ { ++ /* For each operand we check if it has vectorized definitions in a child ++ node or we need to create them (for invariants and constants). We ++ check if the LHS of the first stmt of the next child matches OPRND. ++ If it does, we found the correct child. Otherwise, we call ++ vect_get_constant_vectors (), and not advance CHILD_INDEX in order ++ to check this child node for the next operand. */ ++ vectorized_defs = false; ++ if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index) ++ { ++ child = (slp_tree) VEC_index (slp_void_p, ++ SLP_TREE_CHILDREN (slp_node), ++ child_index); ++ first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0); ++ ++ /* In the end of a pattern sequence we have a use of the original stmt, ++ so we need to compare OPRND with the original def. */ ++ if (is_pattern_stmt_p (vinfo_for_stmt (first_def)) ++ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt)) ++ && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt))) ++ first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); ++ ++ if (is_gimple_call (first_def)) ++ def_lhs = gimple_call_lhs (first_def); ++ else ++ def_lhs = gimple_assign_lhs (first_def); ++ ++ if (operand_equal_p (oprnd, def_lhs, 0)) ++ { ++ /* The number of vector defs is determined by the number of ++ vector statements in the node from which we get those ++ statements. */ ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); ++ vectorized_defs = true; ++ child_index++; ++ } ++ } ++ ++ if (!vectorized_defs) ++ { ++ if (i == 0) ++ { ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++ /* Number of vector stmts was calculated according to LHS in ++ vect_schedule_slp_instance (), fix it by replacing LHS with ++ RHS, if necessary. See vect_get_smallest_scalar_type () for ++ details. */ ++ vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, ++ &rhs_size_unit); ++ if (rhs_size_unit != lhs_size_unit) ++ { ++ number_of_vects *= rhs_size_unit; ++ number_of_vects /= lhs_size_unit; ++ } ++ } ++ } ++ ++ /* Allocate memory for vectorized defs. */ ++ vec_defs = VEC_alloc (tree, heap, number_of_vects); ++ ++ /* For reduction defs we call vect_get_constant_vectors (), since we are ++ looking for initial loop invariant values. */ ++ if (vectorized_defs && reduc_index == -1) ++ /* The defs are already vectorized. */ ++ vect_get_slp_vect_defs (child, &vec_defs); ++ else ++ /* Build vectors from scalar defs. */ ++ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, ++ number_of_vects, reduc_index); ++ ++ VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs); ++ ++ /* For reductions, we only need initial values. */ ++ if (reduc_index != -1) ++ return; + } +- +- /* Allocate memory for vectorized defs. */ +- *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); +- +- /* SLP_NODE corresponds either to a group of stores or to a group of +- unary/binary operations. We don't call this function for loads. +- For reduction defs we call vect_get_constant_vectors(), since we are +- looking for initial loop invariant values. */ +- if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects, +- reduc_index); +- +- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) +- /* Since we don't call this function with loads, this is a group of +- stores. */ +- return; +- +- /* For reductions, we only need initial values. */ +- if (reduc_index != -1) +- return; +- +- code = gimple_assign_rhs_code (first_stmt); +- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) +- return; +- +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_RIGHT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); +- else +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); +- +- *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); +- +- if (SLP_TREE_RIGHT (slp_node)) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects, +- -1); + } + +- + /* Create NCOPIES permutation statements using the mask MASK_BYTES (by + building a vector of type MASK_TYPE from it) and two input vectors placed in + DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and +@@ -2605,14 +2688,14 @@ + tree vectype; + int i; + slp_tree loads_node; ++ slp_void_p child; + + if (!node) + return false; + +- vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, +- vectorization_factor); +- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, +- vectorization_factor); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_schedule_slp_instance ((slp_tree) child, instance, ++ vectorization_factor); + + stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); + stmt_info = vinfo_for_stmt (stmt); + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000 +@@ -1419,16 +1419,35 @@ + } + + +-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not +- NULL. */ ++/* Get vectorized definitions for OP0 and OP1. ++ REDUC_INDEX is the index of reduction operand in case of reduction, ++ and -1 otherwise. */ + +-static void ++void + vect_get_vec_defs (tree op0, tree op1, gimple stmt, +- VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, +- slp_tree slp_node) ++ VEC (tree, heap) **vec_oprnds0, ++ VEC (tree, heap) **vec_oprnds1, ++ slp_tree slp_node, int reduc_index) + { + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1); ++ { ++ int nops = (op1 == NULL_TREE) ? 1 : 2; ++ VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); ++ VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); ++ ++ VEC_quick_push (tree, ops, op0); ++ if (op1) ++ VEC_quick_push (tree, ops, op1); ++ ++ vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); ++ ++ *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); ++ if (op1) ++ *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); ++ ++ VEC_free (tree, heap, ops); ++ VEC_free (slp_void_p, heap, vec_defs); ++ } + else + { + tree vec_oprnd; +@@ -2016,7 +2035,8 @@ + for (j = 0; j < ncopies; j++) + { + if (j == 0) +- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); ++ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, ++ -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); + +@@ -2221,7 +2241,7 @@ + { + /* Handle uses. */ + if (j == 0) +- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); ++ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); + +@@ -2576,10 +2596,10 @@ + operand 1 should be of a vector type (the usual case). */ + if (vec_oprnd1) + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + } + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); +@@ -2887,10 +2907,10 @@ + { + if (op_type == binary_op || op_type == ternary_op) + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + if (op_type == ternary_op) + { + vec_oprnds2 = VEC_alloc (tree, heap, 1); +@@ -3202,7 +3222,8 @@ + { + /* Handle uses. */ + if (slp_node) +- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1); ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); + else + { + VEC_free (tree, heap, vec_oprnds0); +@@ -3548,12 +3569,12 @@ + for (k = 0; k < slp_node->vec_stmts_size - 1; k++) + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + +- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, +- -1); ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); + } + else +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, +- &vec_oprnds1, -1); ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, ++ &vec_oprnds1, slp_node, -1); + } + else + { +@@ -3796,6 +3817,7 @@ + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); + first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); ++ op = gimple_assign_rhs1 (first_stmt); + } + else + /* VEC_NUM is the number of vect stmts to be created for this +@@ -3878,8 +3900,8 @@ + if (slp) + { + /* Get vectorized arguments for SLP_NODE. */ +- vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, +- NULL, -1); ++ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, ++ NULL, slp_node, -1); + + vec_oprnd = VEC_index (tree, vec_oprnds, 0); + } +@@ -5040,7 +5062,7 @@ + In basic blocks we only analyze statements that are a part of some SLP + instance, therefore, all the statements are relevant. + +- Pattern statement need to be analyzed instead of the original statement ++ Pattern statement needs to be analyzed instead of the original statement + if the original statement is not relevant. Otherwise, we analyze both + statements. */ + + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 ++++ new/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000 +@@ -73,15 +73,15 @@ + /************************************************************************ + SLP + ************************************************************************/ ++typedef void *slp_void_p; ++DEF_VEC_P (slp_void_p); ++DEF_VEC_ALLOC_P (slp_void_p, heap); + +-/* A computation tree of an SLP instance. Each node corresponds to a group of ++/* A computation tree of an SLP instance. Each node corresponds to a group of + stmts to be packed in a SIMD stmt. */ + typedef struct _slp_tree { +- /* Only binary and unary operations are supported. LEFT child corresponds to +- the first operand and RIGHT child to the second if the operation is +- binary. */ +- struct _slp_tree *left; +- struct _slp_tree *right; ++ /* Nodes that contain def-stmts of this node statements operands. */ ++ VEC (slp_void_p, heap) *children; + /* A group of scalar stmts to be vectorized together. */ + VEC (gimple, heap) *stmts; + /* Vectorized stmt/s. */ +@@ -146,14 +146,32 @@ + #define SLP_INSTANCE_LOADS(S) (S)->loads + #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load + +-#define SLP_TREE_LEFT(S) (S)->left +-#define SLP_TREE_RIGHT(S) (S)->right ++#define SLP_TREE_CHILDREN(S) (S)->children + #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts + #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts + #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size + #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop + #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop + ++/* This structure is used in creation of an SLP tree. Each instance ++ corresponds to the same operand in a group of scalar stmts in an SLP ++ node. */ ++typedef struct _slp_oprnd_info ++{ ++ /* Def-stmts for the operands. */ ++ VEC (gimple, heap) *def_stmts; ++ /* Information about the first statement, its vector def-type, type, the ++ operand itself in case it's constant, and an indication if it's a pattern ++ stmt. */ ++ enum vect_def_type first_dt; ++ tree first_def_type; ++ tree first_const_oprnd; ++ bool first_pattern; ++} *slp_oprnd_info; ++ ++DEF_VEC_P(slp_oprnd_info); ++DEF_VEC_ALLOC_P(slp_oprnd_info, heap); ++ + + typedef struct _vect_peel_info + { +@@ -819,6 +837,8 @@ + unsigned int *, unsigned int *); + extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); + extern bool vect_supportable_shift (enum tree_code, tree); ++extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, ++ VEC (tree, heap) **, slp_tree, int); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); +@@ -885,8 +905,9 @@ + extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); + extern void vect_make_slp_decision (loop_vec_info); + extern void vect_detect_hybrid_slp (loop_vec_info); +-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, +- VEC (tree,heap) **, int); ++extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree, ++ VEC (slp_void_p, heap) **, int); ++ + extern LOC find_bb_location (basic_block); + extern bb_vec_info vect_slp_analyze_bb (basic_block); + extern void vect_slp_transform_bb (basic_block); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch new file mode 100644 index 0000000000..d72446919f --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch @@ -0,0 +1,515 @@ +2011-11-21 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r180131: + + 2011-10-18 Julian Brown <julian@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_block_move_unaligned_straight) + (arm_adjust_block_mem, arm_block_move_unaligned_loop) + (arm_movmemqi_unaligned): New. + (arm_gen_movmemqi): Support unaligned block copies. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_arm_unaligned): New. + * gcc.target/arm/unaligned-memcpy-1.c: New. + * gcc.target/arm/unaligned-memcpy-2.c: New. + * gcc.target/arm/unaligned-memcpy-3.c: New. + * gcc.target/arm/unaligned-memcpy-4.c: New. + + 2011-09-15 James Greenhalgh <james.greenhalgh@arm.com> + + gcc/ + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000 ++++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000 +@@ -10803,6 +10803,335 @@ + return true; + } + ++/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit ++ unaligned copies on processors which support unaligned semantics for those ++ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency ++ (using more registers) by doing e.g. load/load/store/store for a factor of 2. ++ An interleave factor of 1 (the minimum) will perform no interleaving. ++ Load/store multiple are used for aligned addresses where possible. */ ++ ++static void ++arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, ++ HOST_WIDE_INT length, ++ unsigned int interleave_factor) ++{ ++ rtx *regs = XALLOCAVEC (rtx, interleave_factor); ++ int *regnos = XALLOCAVEC (int, interleave_factor); ++ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; ++ HOST_WIDE_INT i, j; ++ HOST_WIDE_INT remaining = length, words; ++ rtx halfword_tmp = NULL, byte_tmp = NULL; ++ rtx dst, src; ++ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD; ++ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD; ++ HOST_WIDE_INT srcoffset, dstoffset; ++ HOST_WIDE_INT src_autoinc, dst_autoinc; ++ rtx mem, addr; ++ ++ gcc_assert (1 <= interleave_factor && interleave_factor <= 4); ++ ++ /* Use hard registers if we have aligned source or destination so we can use ++ load/store multiple with contiguous registers. */ ++ if (dst_aligned || src_aligned) ++ for (i = 0; i < interleave_factor; i++) ++ regs[i] = gen_rtx_REG (SImode, i); ++ else ++ for (i = 0; i < interleave_factor; i++) ++ regs[i] = gen_reg_rtx (SImode); ++ ++ dst = copy_addr_to_reg (XEXP (dstbase, 0)); ++ src = copy_addr_to_reg (XEXP (srcbase, 0)); ++ ++ srcoffset = dstoffset = 0; ++ ++ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. ++ For copying the last bytes we want to subtract this offset again. */ ++ src_autoinc = dst_autoinc = 0; ++ ++ for (i = 0; i < interleave_factor; i++) ++ regnos[i] = i; ++ ++ /* Copy BLOCK_SIZE_BYTES chunks. */ ++ ++ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes) ++ { ++ /* Load words. */ ++ if (src_aligned && interleave_factor > 1) ++ { ++ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src, ++ TRUE, srcbase, &srcoffset)); ++ src_autoinc += UNITS_PER_WORD * interleave_factor; ++ } ++ else ++ { ++ for (j = 0; j < interleave_factor; j++) ++ { ++ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD ++ - src_autoinc); ++ mem = adjust_automodify_address (srcbase, SImode, addr, ++ srcoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_loadsi (regs[j], mem)); ++ } ++ srcoffset += block_size_bytes; ++ } ++ ++ /* Store words. */ ++ if (dst_aligned && interleave_factor > 1) ++ { ++ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst, ++ TRUE, dstbase, &dstoffset)); ++ dst_autoinc += UNITS_PER_WORD * interleave_factor; ++ } ++ else ++ { ++ for (j = 0; j < interleave_factor; j++) ++ { ++ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD ++ - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, SImode, addr, ++ dstoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_storesi (mem, regs[j])); ++ } ++ dstoffset += block_size_bytes; ++ } ++ ++ remaining -= block_size_bytes; ++ } ++ ++ /* Copy any whole words left (note these aren't interleaved with any ++ subsequent halfword/byte load/stores in the interests of simplicity). */ ++ ++ words = remaining / UNITS_PER_WORD; ++ ++ gcc_assert (words < interleave_factor); ++ ++ if (src_aligned && words > 1) ++ { ++ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, ++ &srcoffset)); ++ src_autoinc += UNITS_PER_WORD * words; ++ } ++ else ++ { ++ for (j = 0; j < words; j++) ++ { ++ addr = plus_constant (src, ++ srcoffset + j * UNITS_PER_WORD - src_autoinc); ++ mem = adjust_automodify_address (srcbase, SImode, addr, ++ srcoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_loadsi (regs[j], mem)); ++ } ++ srcoffset += words * UNITS_PER_WORD; ++ } ++ ++ if (dst_aligned && words > 1) ++ { ++ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase, ++ &dstoffset)); ++ dst_autoinc += words * UNITS_PER_WORD; ++ } ++ else ++ { ++ for (j = 0; j < words; j++) ++ { ++ addr = plus_constant (dst, ++ dstoffset + j * UNITS_PER_WORD - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, SImode, addr, ++ dstoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_storesi (mem, regs[j])); ++ } ++ dstoffset += words * UNITS_PER_WORD; ++ } ++ ++ remaining -= words * UNITS_PER_WORD; ++ ++ gcc_assert (remaining < 4); ++ ++ /* Copy a halfword if necessary. */ ++ ++ if (remaining >= 2) ++ { ++ halfword_tmp = gen_reg_rtx (SImode); ++ ++ addr = plus_constant (src, srcoffset - src_autoinc); ++ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset); ++ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem)); ++ ++ /* Either write out immediately, or delay until we've loaded the last ++ byte, depending on interleave factor. */ ++ if (interleave_factor == 1) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); ++ emit_insn (gen_unaligned_storehi (mem, ++ gen_lowpart (HImode, halfword_tmp))); ++ halfword_tmp = NULL; ++ dstoffset += 2; ++ } ++ ++ remaining -= 2; ++ srcoffset += 2; ++ } ++ ++ gcc_assert (remaining < 2); ++ ++ /* Copy last byte. */ ++ ++ if ((remaining & 1) != 0) ++ { ++ byte_tmp = gen_reg_rtx (SImode); ++ ++ addr = plus_constant (src, srcoffset - src_autoinc); ++ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset); ++ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem); ++ ++ if (interleave_factor == 1) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); ++ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); ++ byte_tmp = NULL; ++ dstoffset++; ++ } ++ ++ remaining--; ++ srcoffset++; ++ } ++ ++ /* Store last halfword if we haven't done so already. */ ++ ++ if (halfword_tmp) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); ++ emit_insn (gen_unaligned_storehi (mem, ++ gen_lowpart (HImode, halfword_tmp))); ++ dstoffset += 2; ++ } ++ ++ /* Likewise for last byte. */ ++ ++ if (byte_tmp) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); ++ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); ++ dstoffset++; ++ } ++ ++ gcc_assert (remaining == 0 && srcoffset == dstoffset); ++} ++ ++/* From mips_adjust_block_mem: ++ ++ Helper function for doing a loop-based block operation on memory ++ reference MEM. Each iteration of the loop will operate on LENGTH ++ bytes of MEM. ++ ++ Create a new base register for use within the loop and point it to ++ the start of MEM. Create a new memory reference that uses this ++ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ ++ ++static void ++arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, ++ rtx *loop_mem) ++{ ++ *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); ++ ++ /* Although the new mem does not refer to a known location, ++ it does keep up to LENGTH bytes of alignment. */ ++ *loop_mem = change_address (mem, BLKmode, *loop_reg); ++ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); ++} ++ ++/* From mips_block_move_loop: ++ ++ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER ++ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that ++ the memory regions do not overlap. */ ++ ++static void ++arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, ++ unsigned int interleave_factor, ++ HOST_WIDE_INT bytes_per_iter) ++{ ++ rtx label, src_reg, dest_reg, final_src, test; ++ HOST_WIDE_INT leftover; ++ ++ leftover = length % bytes_per_iter; ++ length -= leftover; ++ ++ /* Create registers and memory references for use within the loop. */ ++ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); ++ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); ++ ++ /* Calculate the value that SRC_REG should have after the last iteration of ++ the loop. */ ++ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), ++ 0, 0, OPTAB_WIDEN); ++ ++ /* Emit the start of the loop. */ ++ label = gen_label_rtx (); ++ emit_label (label); ++ ++ /* Emit the loop body. */ ++ arm_block_move_unaligned_straight (dest, src, bytes_per_iter, ++ interleave_factor); ++ ++ /* Move on to the next block. */ ++ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter)); ++ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter)); ++ ++ /* Emit the loop condition. */ ++ test = gen_rtx_NE (VOIDmode, src_reg, final_src); ++ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); ++ ++ /* Mop up any left-over bytes. */ ++ if (leftover) ++ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); ++} ++ ++/* Emit a block move when either the source or destination is unaligned (not ++ aligned to a four-byte boundary). This may need further tuning depending on ++ core type, optimize_size setting, etc. */ ++ ++static int ++arm_movmemqi_unaligned (rtx *operands) ++{ ++ HOST_WIDE_INT length = INTVAL (operands[2]); ++ ++ if (optimize_size) ++ { ++ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; ++ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD; ++ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit ++ size of code if optimizing for size. We'll use ldm/stm if src_aligned ++ or dst_aligned though: allow more interleaving in those cases since the ++ resulting code can be smaller. */ ++ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; ++ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; ++ ++ if (length > 12) ++ arm_block_move_unaligned_loop (operands[0], operands[1], length, ++ interleave_factor, bytes_per_iter); ++ else ++ arm_block_move_unaligned_straight (operands[0], operands[1], length, ++ interleave_factor); ++ } ++ else ++ { ++ /* Note that the loop created by arm_block_move_unaligned_loop may be ++ subject to loop unrolling, which makes tuning this condition a little ++ redundant. */ ++ if (length > 32) ++ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16); ++ else ++ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); ++ } ++ ++ return 1; ++} ++ + int + arm_gen_movmemqi (rtx *operands) + { +@@ -10815,8 +11144,13 @@ + + if (GET_CODE (operands[2]) != CONST_INT + || GET_CODE (operands[3]) != CONST_INT +- || INTVAL (operands[2]) > 64 +- || INTVAL (operands[3]) & 3) ++ || INTVAL (operands[2]) > 64) ++ return 0; ++ ++ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) ++ return arm_movmemqi_unaligned (operands); ++ ++ if (INTVAL (operands[3]) & 3) + return 0; + + dstbase = operands[0]; + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 ++++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000 +@@ -47,6 +47,8 @@ + { \ + if (TARGET_DSP_MULTIPLY) \ + builtin_define ("__ARM_FEATURE_DSP"); \ ++ if (unaligned_access) \ ++ builtin_define ("__ARM_FEATURE_UNALIGNED"); \ + /* Define __arm__ even when in thumb mode, for \ + consistency with armcc. */ \ + builtin_define ("__arm__"); \ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++void unknown_alignment (char *dest, char *src) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* We should see three unaligned word loads and store pairs, one unaligned ++ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */ ++ ++/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char dest[16]; ++ ++void aligned_dest (char *src) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word store for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char src[16]; ++ ++void aligned_src (char *dest) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word load for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char src[16]; ++char dest[16]; ++ ++void aligned_both (void) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* We know both src and dest to be aligned: expect multiword loads/stores. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000 +@@ -1894,6 +1894,18 @@ + }] + } + ++# Return 1 if this is an ARM target that supports unaligned word/halfword ++# load/store instructions. ++ ++proc check_effective_target_arm_unaligned { } { ++ return [check_no_compiler_messages arm_unaligned assembly { ++ #ifndef __ARM_FEATURE_UNALIGNED ++ #error no unaligned support ++ #endif ++ int i; ++ }] ++} ++ + # Add the options needed for NEON. We need either -mfloat-abi=softfp + # or -mfloat-abi=hard, but if one is already specified by the + # multilib, use it. Similarly, if a -mfpu option already enables + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch new file mode 100644 index 0000000000..2cf2741ba6 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch @@ -0,0 +1,375 @@ +2011-11-22 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-10-06 Jakub Jelinek <jakub@redhat.com> + + gcc/ + PR tree-optimization/50596 + * tree-vectorizer.h (vect_is_simple_cond): New prototype. + (NUM_PATTERNS): Change to 6. + * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New + function. + (vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern. + (vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt + if it already has one, and don't set STMT_VINFO_VECTYPE in it + if it is already set. + * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle + COND_EXPR in pattern stmts. + (vect_is_simple_cond): No longer static. + + gcc/testsuite: + PR tree-optimization/50596 + * gcc.dg/vect/vect-cond-8.c: New test. + + 2011-10-07 Jakub Jelinek <jakub@redhat.com> + + gcc/ + PR tree-optimization/50650 + * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't + call vect_is_simple_cond here, instead fail if cond_expr isn't + COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL + for cond_expr's first operand. + * tree-vect-stmts.c (vect_is_simple_cond): Static again. + * tree-vectorizer.h (vect_is_simple_cond): Remove prototype. + + + gcc/ + * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce + it to integral types only. + + gcc/testsuite/ + * gcc.dg/vect/pr30858.c: Expect the error message twice for targets + with multiple vector sizes. + * gcc.dg/vect/vect-cond-8.c: Rename to... + * gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float + to int. + * lib/target-supports.exp (check_effective_target_vect_condition): + Return true for NEON. + +=== modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c' +Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/testsuite/gcc.dg/vect/pr30858.c 2012-01-04 15:33:52.000000000 -0800 ++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c 2012-03-05 16:23:47.748983031 -0800 +@@ -11,5 +11,6 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2012-03-05 16:23:47.748983031 -0800 +@@ -0,0 +1,75 @@ ++/* { dg-require-effective-target vect_condition } */ ++ ++#include "tree-vect.h" ++ ++#define N 1024 ++int a[N], b[N], c[N]; ++char d[N], e[N], f[N]; ++unsigned char k[N]; ++ ++__attribute__((noinline, noclone)) void ++f1 (void) ++{ ++ int i; ++ for (i = 0; i < N; ++i) ++ k[i] = a[i] < b[i] ? 17 : 0; ++} ++ ++__attribute__((noinline, noclone)) void ++f2 (void) ++{ ++ int i; ++ for (i = 0; i < N; ++i) ++ k[i] = a[i] < b[i] ? 0 : 24; ++} ++ ++__attribute__((noinline, noclone)) void ++f3 (void) ++{ ++ int i; ++ for (i = 0; i < N; ++i) ++ k[i] = a[i] < b[i] ? 51 : 12; ++} ++ ++int ++main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ switch (i % 9) ++ { ++ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; ++ case 1: a[i] = 0; b[i] = 0; break; ++ case 2: a[i] = i + 1; b[i] = - i - 1; break; ++ case 3: a[i] = i; b[i] = i + 7; break; ++ case 4: a[i] = i; b[i] = i; break; ++ case 5: a[i] = i + 16; b[i] = i + 3; break; ++ case 6: a[i] = - i - 5; b[i] = - i; break; ++ case 7: a[i] = - i; b[i] = - i; break; ++ case 8: a[i] = - i; b[i] = - i - 7; break; ++ } ++ d[i] = i; ++ e[i] = 2 * i; ++ } ++ f1 (); ++ for (i = 0; i < N; i++) ++ if (k[i] != ((i % 3) == 0 ? 17 : 0)) ++ abort (); ++ f2 (); ++ for (i = 0; i < N; i++) ++ if (k[i] != ((i % 3) == 0 ? 0 : 24)) ++ abort (); ++ f3 (); ++ for (i = 0; i < N; i++) ++ if (k[i] != ((i % 3) == 0 ? 51 : 12)) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +Index: gcc-4_6-branch/gcc/tree-vect-patterns.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-03-05 16:23:10.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-03-05 16:23:47.748983031 -0800 +@@ -50,13 +50,16 @@ + tree *); + static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, + tree *, tree *); ++static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, ++ tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, + vect_recog_dot_prod_pattern, + vect_recog_pow_pattern, + vect_recog_over_widening_pattern, +- vect_recog_widen_shift_pattern}; ++ vect_recog_widen_shift_pattern, ++ vect_recog_mixed_size_cond_pattern}; + + + /* Function widened_name_p +@@ -1441,6 +1444,118 @@ + return pattern_stmt; + } + ++/* Function vect_recog_mixed_size_cond_pattern ++ ++ Try to find the following pattern: ++ ++ type x_t, y_t; ++ TYPE a_T, b_T, c_T; ++ loop: ++ S1 a_T = x_t CMP y_t ? b_T : c_T; ++ ++ where type 'TYPE' is an integral type which has different size ++ from 'type'. b_T and c_T are constants and if 'TYPE' is wider ++ than 'type', the constants need to fit into an integer type ++ with the same width as 'type'. ++ ++ Input: ++ ++ * LAST_STMT: A stmt from which the pattern search begins. ++ ++ Output: ++ ++ * TYPE_IN: The type of the input arguments to the pattern. ++ ++ * TYPE_OUT: The type of the output of this pattern. ++ ++ * Return value: A new stmt that will be used to replace the pattern. ++ Additionally a def_stmt is added. ++ ++ a_it = x_t CMP y_t ? b_it : c_it; ++ a_T = (TYPE) a_it; */ ++ ++static gimple ++vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) ++{ ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); ++ tree cond_expr, then_clause, else_clause; ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; ++ tree type, vectype, comp_vectype, comp_type, op, tmp; ++ enum machine_mode cmpmode; ++ gimple pattern_stmt, def_stmt; ++ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ ++ if (!is_gimple_assign (last_stmt) ++ || gimple_assign_rhs_code (last_stmt) != COND_EXPR ++ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) ++ return NULL; ++ ++ op = gimple_assign_rhs1 (last_stmt); ++ cond_expr = TREE_OPERAND (op, 0); ++ then_clause = TREE_OPERAND (op, 1); ++ else_clause = TREE_OPERAND (op, 2); ++ ++ if (TREE_CODE (then_clause) != INTEGER_CST ++ || TREE_CODE (else_clause) != INTEGER_CST) ++ return NULL; ++ ++ if (!COMPARISON_CLASS_P (cond_expr)) ++ return NULL; ++ ++ type = gimple_expr_type (last_stmt); ++ comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); ++ if (!INTEGRAL_TYPE_P (comp_type) ++ || !INTEGRAL_TYPE_P (type)) ++ return NULL; ++ ++ comp_vectype = get_vectype_for_scalar_type (comp_type); ++ if (comp_vectype == NULL_TREE) ++ return NULL; ++ ++ cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); ++ ++ if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) ++ return NULL; ++ ++ vectype = get_vectype_for_scalar_type (type); ++ if (vectype == NULL_TREE) ++ return NULL; ++ ++ if (types_compatible_p (vectype, comp_vectype)) ++ return NULL; ++ ++ if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) ++ return NULL; ++ ++ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) ++ { ++ if (!int_fits_type_p (then_clause, comp_type) ++ || !int_fits_type_p (else_clause, comp_type)) ++ return NULL; ++ } ++ ++ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), ++ fold_convert (comp_type, then_clause), ++ fold_convert (comp_type, else_clause)); ++ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp); ++ ++ pattern_stmt ++ = gimple_build_assign_with_ops (NOP_EXPR, ++ vect_recog_temp_ssa_var (type, NULL), ++ gimple_assign_lhs (def_stmt), NULL_TREE); ++ ++ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); ++ set_vinfo_for_stmt (def_stmt, def_stmt_info); ++ STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; ++ *type_in = vectype; ++ *type_out = vectype; ++ ++ return pattern_stmt; ++} ++ ++ + /* Mark statements that are involved in a pattern. */ + + static inline void +@@ -1468,14 +1583,18 @@ + if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) + { + def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); +- set_vinfo_for_stmt (def_stmt, +- new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); +- gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); + def_stmt_info = vinfo_for_stmt (def_stmt); ++ if (def_stmt_info == NULL) ++ { ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); ++ set_vinfo_for_stmt (def_stmt, def_stmt_info); ++ } ++ gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); + STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; + STMT_VINFO_DEF_TYPE (def_stmt_info) + = STMT_VINFO_DEF_TYPE (orig_stmt_info); +- STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; ++ if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) ++ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; + } + } + +Index: gcc-4_6-branch/gcc/tree-vect-stmts.c +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-03-05 16:23:11.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-03-05 16:23:47.748983031 -0800 +@@ -655,20 +655,40 @@ + tree rhs = gimple_assign_rhs1 (stmt); + unsigned int op_num; + tree op; ++ enum tree_code rhs_code; + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) + { + case GIMPLE_SINGLE_RHS: +- op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); +- for (i = 0; i < op_num; i++) +- { +- op = TREE_OPERAND (rhs, i); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, +- &worklist)) +- { +- VEC_free (gimple, heap, worklist); +- return false; +- } +- } ++ op = gimple_assign_rhs1 (stmt); ++ rhs_code = gimple_assign_rhs_code (stmt); ++ i = 0; ++ if (rhs_code == COND_EXPR ++ && COMPARISON_CLASS_P (TREE_OPERAND (op, 0))) ++ { ++ op = TREE_OPERAND (op, 0); ++ if (!process_use (stmt, TREE_OPERAND (op, 0), ++ loop_vinfo, ++ live_p, relevant, &worklist) ++ || !process_use (stmt, TREE_OPERAND (op, 1), ++ loop_vinfo, ++ live_p, relevant, &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ i = 1; ++ } ++ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); ++ for (i; i < op_num; i++) ++ { ++ op = TREE_OPERAND (rhs, i); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } + break; + + case GIMPLE_BINARY_RHS: +Index: gcc-4_6-branch/gcc/tree-vectorizer.h +=================================================================== +--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-03-05 16:23:11.000000000 -0800 ++++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-03-05 16:23:47.748983031 -0800 +@@ -917,7 +917,7 @@ + Additional pattern recognition functions can (and will) be added + in the future. */ + typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); +-#define NUM_PATTERNS 6 ++#define NUM_PATTERNS 7 + void vect_pattern_recog (loop_vec_info); + + /* In tree-vectorizer.c. */ diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch new file mode 100644 index 0000000000..ea42ad6eeb --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch @@ -0,0 +1,805 @@ +2011-11-22 Ira Rosen <ira.rosen@linaro.org> + Backport from mainline: + + 2011-11-06 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (vectorizable_condition): Add argument. + * tree-vect-loop.c (vectorizable_reduction): Fail for condition + in SLP. Update calls to vectorizable_condition. + * tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to + the arguments. Pass it to vect_is_simple_use_1. + (vectorizable_condition): Add slp_node to the arguments. Support + vectorization of basic blocks. Fail for reduction in SLP. Update + calls to vect_is_simple_cond and vect_is_simple_use. Support SLP: + call vect_get_slp_defs to get vector operands. + (vect_analyze_stmt): Update calls to vectorizable_condition. + (vect_transform_stmt): Likewise. + * tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR. + (vect_get_and_check_slp_defs): Handle COND_EXPR. Allow pattern + def stmts. + (vect_build_slp_tree): Handle COND_EXPR. + (vect_analyze_slp_instance): Push pattern statements to root node. + (vect_get_constant_vectors): Fix comments. Handle COND_EXPR. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-cond-1.c: New test. + * gcc.dg/vect/slp-cond-1.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 2011-11-20 08:24:08 +0000 +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target vect_condition } */ ++ ++#include "tree-vect.h" ++ ++#define N 128 ++ ++__attribute__((noinline, noclone)) void ++foo (int *a, int stride) ++{ ++ int i; ++ ++ for (i = 0; i < N/stride; i++, a += stride) ++ { ++ a[0] = a[0] ? 1 : 5; ++ a[1] = a[1] ? 2 : 6; ++ a[2] = a[2] ? 3 : 7; ++ a[3] = a[3] ? 4 : 8; ++ } ++} ++ ++ ++int a[N]; ++int main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ a[i] = i; ++ ++ foo (a, 4); ++ ++ for (i = 1; i < N; i++) ++ if (a[i] != i%4 + 1) ++ abort (); ++ ++ if (a[0] != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/slp-cond-1.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2011-11-20 08:24:08 +0000 +@@ -0,0 +1,126 @@ ++/* { dg-require-effective-target vect_condition } */ ++#include "tree-vect.h" ++ ++#define N 32 ++int a[N], b[N]; ++int d[N], e[N]; ++int k[N]; ++ ++__attribute__((noinline, noclone)) void ++f1 (void) ++{ ++ int i; ++ for (i = 0; i < N/4; i++) ++ { ++ k[4*i] = a[4*i] < b[4*i] ? 17 : 0; ++ k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0; ++ k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0; ++ k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0; ++ } ++} ++ ++__attribute__((noinline, noclone)) void ++f2 (void) ++{ ++ int i; ++ for (i = 0; i < N/2; ++i) ++ { ++ k[2*i] = a[2*i] < b[2*i] ? 0 : 24; ++ k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4; ++ } ++} ++ ++__attribute__((noinline, noclone)) void ++f3 (void) ++{ ++ int i; ++ for (i = 0; i < N/2; ++i) ++ { ++ k[2*i] = a[2*i] < b[2*i] ? 51 : 12; ++ k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12; ++ } ++} ++ ++__attribute__((noinline, noclone)) void ++f4 (void) ++{ ++ int i; ++ for (i = 0; i < N/2; ++i) ++ { ++ int d0 = d[2*i], e0 = e[2*i]; ++ int d1 = d[2*i+1], e1 = e[2*i+1]; ++ k[2*i] = a[2*i] >= b[2*i] ? d0 : e0; ++ k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1; ++ } ++} ++ ++int ++main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ switch (i % 9) ++ { ++ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; ++ case 1: a[i] = 0; b[i] = 0; break; ++ case 2: a[i] = i + 1; b[i] = - i - 1; break; ++ case 3: a[i] = i; b[i] = i + 7; break; ++ case 4: a[i] = i; b[i] = i; break; ++ case 5: a[i] = i + 16; b[i] = i + 3; break; ++ case 6: a[i] = - i - 5; b[i] = - i; break; ++ case 7: a[i] = - i; b[i] = - i; break; ++ case 8: a[i] = - i; b[i] = - i - 7; break; ++ } ++ d[i] = i; ++ e[i] = 2 * i; ++ } ++ f1 (); ++ for (i = 0; i < N; i++) ++ if (k[i] != ((i % 3) == 0 ? 17 : 0)) ++ abort (); ++ ++ f2 (); ++ for (i = 0; i < N; i++) ++ { ++ switch (i % 9) ++ { ++ case 0: ++ case 6: ++ if (k[i] != ((i/9 % 2) == 0 ? 0 : 7)) ++ abort (); ++ break; ++ case 1: ++ case 5: ++ case 7: ++ if (k[i] != ((i/9 % 2) == 0 ? 4 : 24)) ++ abort (); ++ break; ++ case 2: ++ case 4: ++ case 8: ++ if (k[i] != ((i/9 % 2) == 0 ? 24 : 4)) ++ abort (); ++ break; ++ case 3: ++ if (k[i] != ((i/9 % 2) == 0 ? 7 : 0)) ++ abort (); ++ break; ++ } ++ } ++ ++ f3 (); ++ ++ f4 (); ++ for (i = 0; i < N; i++) ++ if (k[i] != ((i % 3) == 0 ? e[i] : d[i])) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000 ++++ new/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000 +@@ -4087,6 +4087,9 @@ + gcc_unreachable (); + } + ++ if (code == COND_EXPR && slp_node) ++ return false; ++ + scalar_dest = gimple_assign_lhs (stmt); + scalar_type = TREE_TYPE (scalar_dest); + if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) +@@ -4161,7 +4164,7 @@ + + if (code == COND_EXPR) + { +- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) ++ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported condition in reduction"); +@@ -4433,7 +4436,7 @@ + gcc_assert (!slp_node); + vectorizable_condition (stmt, gsi, vec_stmt, + PHI_RESULT (VEC_index (gimple, phis, 0)), +- reduc_index); ++ reduc_index, NULL); + /* Multiple types are not supported for condition. */ + break; + } + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000 ++++ new/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000 +@@ -109,7 +109,11 @@ + if (is_gimple_call (stmt)) + nops = gimple_call_num_args (stmt); + else if (is_gimple_assign (stmt)) +- nops = gimple_num_ops (stmt) - 1; ++ { ++ nops = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ nops = 4; ++ } + else + return NULL; + +@@ -190,20 +194,51 @@ + bool different_types = false; + bool pattern = false; + slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; ++ int op_idx = 1; ++ tree compare_rhs = NULL_TREE, rhs = NULL_TREE; ++ int cond_idx = -1; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); + + if (is_gimple_call (stmt)) + number_of_oprnds = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ { ++ number_of_oprnds = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ { ++ number_of_oprnds = 4; ++ cond_idx = 0; ++ rhs = gimple_assign_rhs1 (stmt); ++ } ++ } + else +- number_of_oprnds = gimple_num_ops (stmt) - 1; ++ return false; + + for (i = 0; i < number_of_oprnds; i++) + { +- oprnd = gimple_op (stmt, i + 1); ++ if (compare_rhs) ++ oprnd = compare_rhs; ++ else ++ oprnd = gimple_op (stmt, op_idx++); ++ + oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); + ++ if (-1 < cond_idx && cond_idx < 4) ++ { ++ if (compare_rhs) ++ compare_rhs = NULL_TREE; ++ else ++ oprnd = TREE_OPERAND (rhs, cond_idx++); ++ } ++ ++ if (COMPARISON_CLASS_P (oprnd)) ++ { ++ compare_rhs = TREE_OPERAND (oprnd, 1); ++ oprnd = TREE_OPERAND (oprnd, 0); ++ } ++ + if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, + &dt) + || (!def_stmt && dt != vect_constant_def)) +@@ -243,8 +278,7 @@ + def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); + dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); + +- if (dt == vect_unknown_def_type +- || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt))) ++ if (dt == vect_unknown_def_type) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unsupported pattern."); +@@ -423,6 +457,7 @@ + VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); + gimple stmt = VEC_index (gimple, stmts, 0); + enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; ++ enum tree_code first_cond_code = ERROR_MARK; + tree lhs; + bool stop_recursion = false, need_same_oprnds = false; + tree vectype, scalar_type, first_op1 = NULL_TREE; +@@ -439,11 +474,18 @@ + VEC (slp_oprnd_info, heap) *oprnds_info; + unsigned int nops; + slp_oprnd_info oprnd_info; ++ tree cond; + + if (is_gimple_call (stmt)) + nops = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ { ++ nops = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ nops = 4; ++ } + else +- nops = gimple_num_ops (stmt) - 1; ++ return false; + + oprnds_info = vect_create_oprnd_info (nops, group_size); + +@@ -484,6 +526,22 @@ + return false; + } + ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == COND_EXPR ++ && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) ++ && !COMPARISON_CLASS_P (cond)) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, ++ "Build SLP failed: condition is not comparison "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } ++ + scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); + vectype = get_vectype_for_scalar_type (scalar_type); + if (!vectype) +@@ -737,7 +795,8 @@ + + /* Not memory operation. */ + if (TREE_CODE_CLASS (rhs_code) != tcc_binary +- && TREE_CODE_CLASS (rhs_code) != tcc_unary) ++ && TREE_CODE_CLASS (rhs_code) != tcc_unary ++ && rhs_code != COND_EXPR) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -750,6 +809,26 @@ + return false; + } + ++ if (rhs_code == COND_EXPR) ++ { ++ tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); ++ ++ if (i == 0) ++ first_cond_code = TREE_CODE (cond_expr); ++ else if (first_cond_code != TREE_CODE (cond_expr)) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Build SLP failed: different" ++ " operation"); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } ++ } ++ + /* Find the def-stmts. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, + ncopies_for_cost, (i == 0), +@@ -1395,7 +1474,12 @@ + /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ + while (next) + { +- VEC_safe_push (gimple, heap, scalar_stmts, next); ++ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) ++ && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) ++ VEC_safe_push (gimple, heap, scalar_stmts, ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); ++ else ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + } + } +@@ -1404,7 +1488,7 @@ + /* Collect reduction statements. */ + VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); + for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) +- VEC_safe_push (gimple, heap, scalar_stmts, next); ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + } + + node = vect_create_new_slp_node (scalar_stmts); +@@ -2160,15 +2244,15 @@ + + For example, we have two scalar operands, s1 and s2 (e.g., group of + strided accesses of size two), while NUNITS is four (i.e., four scalars +- of this type can be packed in a vector). The output vector will contain +- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES ++ of this type can be packed in a vector). The output vector will contain ++ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES + will be 2). + + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors + containing the operands. + + For example, NUNITS is four as before, and the group size is 8 +- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and ++ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and + {s5, s6, s7, s8}. */ + + number_of_copies = least_common_multiple (nunits, group_size) / group_size; +@@ -2180,8 +2264,18 @@ + { + if (is_store) + op = gimple_assign_rhs1 (stmt); +- else ++ else if (gimple_assign_rhs_code (stmt) != COND_EXPR) + op = gimple_op (stmt, op_num + 1); ++ else ++ { ++ if (op_num == 0 || op_num == 1) ++ { ++ tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); ++ op = TREE_OPERAND (cond, op_num); ++ } ++ else ++ op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1); ++ } + + if (reduc_index != -1) + { + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000 +@@ -4816,7 +4816,7 @@ + condition operands are supportable using vec_is_simple_use. */ + + static bool +-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) ++vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) + { + tree lhs, rhs; + tree def; +@@ -4831,7 +4831,7 @@ + if (TREE_CODE (lhs) == SSA_NAME) + { + gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); +- if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def, ++ if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def, + &dt)) + return false; + } +@@ -4842,7 +4842,7 @@ + if (TREE_CODE (rhs) == SSA_NAME) + { + gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); +- if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def, ++ if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def, + &dt)) + return false; + } +@@ -4868,7 +4868,8 @@ + + bool + vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, +- gimple *vec_stmt, tree reduc_def, int reduc_index) ++ gimple *vec_stmt, tree reduc_def, int reduc_index, ++ slp_tree slp_node) + { + tree scalar_dest = NULL_TREE; + tree vec_dest = NULL_TREE; +@@ -4885,19 +4886,24 @@ + tree def; + enum vect_def_type dt, dts[4]; + int nunits = TYPE_VECTOR_SUBPARTS (vectype); +- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; ++ int ncopies; + enum tree_code code; + stmt_vec_info prev_stmt_info = NULL; +- int j; ++ int i, j; ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; ++ VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL; + +- /* FORNOW: unsupported in basic block SLP. */ +- gcc_assert (loop_vinfo); ++ if (slp_node || PURE_SLP_STMT (stmt_info)) ++ ncopies = 1; ++ else ++ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + gcc_assert (ncopies >= 1); +- if (reduc_index && ncopies > 1) ++ if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info))) + return false; /* FORNOW */ + +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def +@@ -4905,10 +4911,6 @@ + && reduc_def)) + return false; + +- /* FORNOW: SLP not supported. */ +- if (STMT_SLP_TYPE (stmt_info)) +- return false; +- + /* FORNOW: not yet supported. */ + if (STMT_VINFO_LIVE_P (stmt_info)) + { +@@ -4932,7 +4934,7 @@ + then_clause = TREE_OPERAND (op, 1); + else_clause = TREE_OPERAND (op, 2); + +- if (!vect_is_simple_cond (cond_expr, loop_vinfo)) ++ if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo)) + return false; + + /* We do not handle two different vector types for the condition +@@ -4944,7 +4946,7 @@ + if (TREE_CODE (then_clause) == SSA_NAME) + { + gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); +- if (!vect_is_simple_use (then_clause, loop_vinfo, NULL, ++ if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo, + &then_def_stmt, &def, &dt)) + return false; + } +@@ -4956,7 +4958,7 @@ + if (TREE_CODE (else_clause) == SSA_NAME) + { + gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); +- if (!vect_is_simple_use (else_clause, loop_vinfo, NULL, ++ if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo, + &else_def_stmt, &def, &dt)) + return false; + } +@@ -4974,7 +4976,15 @@ + return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); + } + +- /* Transform */ ++ /* Transform. */ ++ ++ if (!slp_node) ++ { ++ vec_oprnds0 = VEC_alloc (tree, heap, 1); ++ vec_oprnds1 = VEC_alloc (tree, heap, 1); ++ vec_oprnds2 = VEC_alloc (tree, heap, 1); ++ vec_oprnds3 = VEC_alloc (tree, heap, 1); ++ } + + /* Handle def. */ + scalar_dest = gimple_assign_lhs (stmt); +@@ -4983,67 +4993,118 @@ + /* Handle cond expr. */ + for (j = 0; j < ncopies; j++) + { +- gimple new_stmt; ++ gimple new_stmt = NULL; + if (j == 0) + { +- gimple gtemp; +- vec_cond_lhs = ++ if (slp_node) ++ { ++ VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4); ++ VEC (slp_void_p, heap) *vec_defs; ++ ++ vec_defs = VEC_alloc (slp_void_p, heap, 4); ++ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0)); ++ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1)); ++ VEC_safe_push (tree, heap, ops, then_clause); ++ VEC_safe_push (tree, heap, ops, else_clause); ++ vect_get_slp_defs (ops, slp_node, &vec_defs, -1); ++ vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ ++ VEC_free (tree, heap, ops); ++ VEC_free (slp_void_p, heap, vec_defs); ++ } ++ else ++ { ++ gimple gtemp; ++ vec_cond_lhs = + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), + stmt, NULL); +- vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, ++ vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, + NULL, >emp, &def, &dts[0]); +- vec_cond_rhs = +- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), +- stmt, NULL); +- vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, +- NULL, >emp, &def, &dts[1]); +- if (reduc_index == 1) +- vec_then_clause = reduc_def; +- else +- { +- vec_then_clause = vect_get_vec_def_for_operand (then_clause, +- stmt, NULL); +- vect_is_simple_use (then_clause, loop_vinfo, +- NULL, >emp, &def, &dts[2]); +- } +- if (reduc_index == 2) +- vec_else_clause = reduc_def; +- else +- { +- vec_else_clause = vect_get_vec_def_for_operand (else_clause, +- stmt, NULL); +- vect_is_simple_use (else_clause, loop_vinfo, ++ ++ vec_cond_rhs = ++ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), ++ stmt, NULL); ++ vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, ++ NULL, >emp, &def, &dts[1]); ++ if (reduc_index == 1) ++ vec_then_clause = reduc_def; ++ else ++ { ++ vec_then_clause = vect_get_vec_def_for_operand (then_clause, ++ stmt, NULL); ++ vect_is_simple_use (then_clause, loop_vinfo, ++ NULL, >emp, &def, &dts[2]); ++ } ++ if (reduc_index == 2) ++ vec_else_clause = reduc_def; ++ else ++ { ++ vec_else_clause = vect_get_vec_def_for_operand (else_clause, ++ stmt, NULL); ++ vect_is_simple_use (else_clause, loop_vinfo, + NULL, >emp, &def, &dts[3]); ++ } + } + } + else + { +- vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs); +- vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs); ++ vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], ++ VEC_pop (tree, vec_oprnds0)); ++ vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], ++ VEC_pop (tree, vec_oprnds1)); + vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], +- vec_then_clause); ++ VEC_pop (tree, vec_oprnds2)); + vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], +- vec_else_clause); ++ VEC_pop (tree, vec_oprnds3)); ++ } ++ ++ if (!slp_node) ++ { ++ VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs); ++ VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs); ++ VEC_quick_push (tree, vec_oprnds2, vec_then_clause); ++ VEC_quick_push (tree, vec_oprnds3, vec_else_clause); + } + + /* Arguments are ready. Create the new vector stmt. */ +- vec_compare = build2 (TREE_CODE (cond_expr), vectype, +- vec_cond_lhs, vec_cond_rhs); +- vec_cond_expr = build3 (VEC_COND_EXPR, vectype, +- vec_compare, vec_then_clause, vec_else_clause); +- +- new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- if (j == 0) +- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; +- else +- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; +- +- prev_stmt_info = vinfo_for_stmt (new_stmt); ++ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs) ++ { ++ vec_cond_rhs = VEC_index (tree, vec_oprnds1, i); ++ vec_then_clause = VEC_index (tree, vec_oprnds2, i); ++ vec_else_clause = VEC_index (tree, vec_oprnds3, i); ++ ++ vec_compare = build2 (TREE_CODE (cond_expr), vectype, ++ vec_cond_lhs, vec_cond_rhs); ++ vec_cond_expr = build3 (VEC_COND_EXPR, vectype, ++ vec_compare, vec_then_clause, vec_else_clause); ++ ++ new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ if (slp_node) ++ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); ++ } ++ ++ if (slp_node) ++ continue; ++ ++ if (j == 0) ++ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; ++ else ++ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; ++ ++ prev_stmt_info = vinfo_for_stmt (new_stmt); + } + ++ VEC_free (tree, heap, vec_oprnds0); ++ VEC_free (tree, heap, vec_oprnds1); ++ VEC_free (tree, heap, vec_oprnds2); ++ VEC_free (tree, heap, vec_oprnds3); ++ + return true; + } + +@@ -5217,7 +5278,7 @@ + || vectorizable_call (stmt, NULL, NULL) + || vectorizable_store (stmt, NULL, NULL, NULL) + || vectorizable_reduction (stmt, NULL, NULL, NULL) +- || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); ++ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); + else + { + if (bb_vinfo) +@@ -5227,7 +5288,8 @@ + || vectorizable_operation (stmt, NULL, NULL, node) + || vectorizable_assignment (stmt, NULL, NULL, node) + || vectorizable_load (stmt, NULL, NULL, node, NULL) +- || vectorizable_store (stmt, NULL, NULL, node)); ++ || vectorizable_store (stmt, NULL, NULL, node) ++ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + } + + if (!ok) +@@ -5343,8 +5405,7 @@ + break; + + case condition_vec_info_type: +- gcc_assert (!slp_node); +- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); ++ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); + gcc_assert (done); + break; + + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000 ++++ new/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000 +@@ -832,7 +832,7 @@ + extern void vect_remove_stores (gimple); + extern bool vect_analyze_stmt (gimple, bool *, slp_tree); + extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, +- tree, int); ++ tree, int, slp_tree); + extern void vect_get_load_cost (struct data_reference *, int, bool, + unsigned int *, unsigned int *); + extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch new file mode 100644 index 0000000000..e501959c7a --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch @@ -0,0 +1,495 @@ +2011-11-27 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (vect_pattern_recog): Add new argument. + * tree-vect-loop.c (vect_analyze_loop_2): Update call to + vect_pattern_recog. + * tree-vect-patterns.c (widened_name_p): Pass basic block + info to vect_is_simple_use. + (vect_recog_dot_prod_pattern): Fail for basic blocks. + (vect_recog_widen_sum_pattern): Likewise. + (vect_handle_widen_op_by_const): Support basic blocks. + (vect_operation_fits_smaller_type, + vect_recog_over_widening_pattern): Likewise. + (vect_recog_mixed_size_cond_pattern): Support basic blocks. + Add printing. + (vect_mark_pattern_stmts): Update calls to new_stmt_vec_info. + (vect_pattern_recog_1): Check for reduction only in loops. + (vect_pattern_recog): Add new argument. Support basic blocks. + * tree-vect-stmts.c (vectorizable_conversion): Pass basic block + info to vect_is_simple_use_1. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic + blocks. + (vect_slp_analyze_bb_1): Call vect_pattern_recog. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-pattern-1.c: New test. + * gcc.dg/vect/bb-slp-pattern-2.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 2011-11-23 06:37:10 +0000 +@@ -0,0 +1,55 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 8 ++ ++unsigned short X[N]; ++unsigned short Y[N]; ++unsigned int result[N]; ++ ++/* unsigned short->unsigned int widening-mult. */ ++__attribute__ ((noinline, noclone)) void ++foo (void) ++{ ++ result[0] = (unsigned int)(X[0] * Y[0]); ++ result[1] = (unsigned int)(X[1] * Y[1]); ++ result[2] = (unsigned int)(X[2] * Y[2]); ++ result[3] = (unsigned int)(X[3] * Y[3]); ++ result[4] = (unsigned int)(X[4] * Y[4]); ++ result[5] = (unsigned int)(X[5] * Y[5]); ++ result[6] = (unsigned int)(X[6] * Y[6]); ++ result[7] = (unsigned int)(X[7] * Y[7]); ++} ++ ++int main (void) ++{ ++ int i, tmp; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ X[i] = i; ++ Y[i] = 64-i; ++ } ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ { ++ __asm__ volatile (""); ++ tmp = X[i] * Y[i]; ++ if (result[i] != tmp) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 2011-11-23 06:37:10 +0000 +@@ -0,0 +1,53 @@ ++/* { dg-require-effective-target vect_condition } */ ++ ++#include "tree-vect.h" ++ ++#define N 128 ++ ++__attribute__((noinline, noclone)) void ++foo (short * __restrict__ a, int * __restrict__ b, int stride) ++{ ++ int i; ++ ++ for (i = 0; i < N/stride; i++, a += stride, b += stride) ++ { ++ a[0] = b[0] ? 1 : 7; ++ a[1] = b[1] ? 2 : 0; ++ a[2] = b[2] ? 3 : 0; ++ a[3] = b[3] ? 4 : 0; ++ a[4] = b[4] ? 5 : 0; ++ a[5] = b[5] ? 6 : 0; ++ a[6] = b[6] ? 7 : 0; ++ a[7] = b[7] ? 8 : 0; ++ } ++} ++ ++short a[N]; ++int b[N]; ++int main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ a[i] = i; ++ b[i] = -i; ++ } ++ ++ foo (a, b, 8); ++ ++ for (i = 1; i < N; i++) ++ if (a[i] != i%8 + 1) ++ abort (); ++ ++ if (a[0] != 7) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000 ++++ new/gcc/tree-vect-loop.c 2011-11-23 06:47:35 +0000 +@@ -1458,7 +1458,7 @@ + + vect_analyze_scalar_cycles (loop_vinfo); + +- vect_pattern_recog (loop_vinfo); ++ vect_pattern_recog (loop_vinfo, NULL); + + /* Data-flow analysis to detect stmts that do not need to be vectorized. */ + + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000 +@@ -83,11 +83,13 @@ + tree oprnd0; + enum vect_def_type dt; + tree def; ++ bb_vec_info bb_vinfo; + + stmt_vinfo = vinfo_for_stmt (use_stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + +- if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) ++ if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt)) + return false; + + if (dt != vect_internal_def +@@ -111,7 +113,7 @@ + || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) + return false; + +- if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, ++ if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, + &dt)) + return false; + +@@ -188,9 +190,14 @@ + gimple pattern_stmt; + tree prod_type; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *loop; + tree var, rhs; + ++ if (!loop_info) ++ return NULL; ++ ++ loop = LOOP_VINFO_LOOP (loop_info); ++ + if (!is_gimple_assign (last_stmt)) + return NULL; + +@@ -358,8 +365,16 @@ + { + tree new_type, new_oprnd, tmp; + gimple new_stmt; +- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ loop_vec_info loop_vinfo; ++ struct loop *loop = NULL; ++ bb_vec_info bb_vinfo; ++ stmt_vec_info stmt_vinfo; ++ ++ stmt_vinfo = vinfo_for_stmt (stmt); ++ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); ++ if (loop_vinfo) ++ loop = LOOP_VINFO_LOOP (loop_vinfo); + + if (code != MULT_EXPR && code != LSHIFT_EXPR) + return false; +@@ -377,7 +392,9 @@ + + if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) + || !gimple_bb (def_stmt) +- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) ++ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) ++ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo) ++ && gimple_code (def_stmt) != GIMPLE_PHI) + || !vinfo_for_stmt (def_stmt)) + return false; + +@@ -774,9 +791,14 @@ + tree type, half_type; + gimple pattern_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *loop; + tree var; + ++ if (!loop_info) ++ return NULL; ++ ++ loop = LOOP_VINFO_LOOP (loop_info); ++ + if (!is_gimple_assign (last_stmt)) + return NULL; + +@@ -877,7 +899,11 @@ + gimple def_stmt, new_stmt; + bool first = false; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); ++ struct loop *loop = NULL; ++ ++ if (loop_info) ++ loop = LOOP_VINFO_LOOP (loop_info); + + *new_def_stmt = NULL; + +@@ -909,7 +935,9 @@ + first = true; + if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) + || !gimple_bb (def_stmt) +- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) ++ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) ++ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) ++ && gimple_code (def_stmt) != GIMPLE_PHI) + || !vinfo_for_stmt (def_stmt)) + return false; + } +@@ -1087,7 +1115,16 @@ + int nuses = 0; + tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; + bool first; +- struct loop *loop = (gimple_bb (stmt))->loop_father; ++ loop_vec_info loop_vinfo; ++ struct loop *loop = NULL; ++ bb_vec_info bb_vinfo; ++ stmt_vec_info stmt_vinfo; ++ ++ stmt_vinfo = vinfo_for_stmt (stmt); ++ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); ++ if (loop_vinfo) ++ loop = LOOP_VINFO_LOOP (loop_vinfo); + + first = true; + while (1) +@@ -1120,7 +1157,8 @@ + + if (nuses != 1 || !is_gimple_assign (use_stmt) + || !gimple_bb (use_stmt) +- || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) ++ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) ++ || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo))) + return NULL; + + /* Create pattern statement for STMT. */ +@@ -1485,6 +1523,7 @@ + enum machine_mode cmpmode; + gimple pattern_stmt, def_stmt; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + + if (!is_gimple_assign (last_stmt) + || gimple_assign_rhs_code (last_stmt) != COND_EXPR +@@ -1538,7 +1577,8 @@ + tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), + fold_convert (comp_type, then_clause), + fold_convert (comp_type, else_clause)); +- def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp); ++ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), ++ tmp); + + pattern_stmt + = gimple_build_assign_with_ops (NOP_EXPR, +@@ -1546,12 +1586,15 @@ + gimple_assign_lhs (def_stmt), NULL_TREE); + + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; +- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_info); + STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; + *type_in = vectype; + *type_out = vectype; + ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: "); ++ + return pattern_stmt; + } + +@@ -1565,10 +1608,11 @@ + stmt_vec_info pattern_stmt_info, def_stmt_info; + stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); + gimple def_stmt; + + set_vinfo_for_stmt (pattern_stmt, +- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); ++ new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo)); + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + +@@ -1586,7 +1630,7 @@ + def_stmt_info = vinfo_for_stmt (def_stmt); + if (def_stmt_info == NULL) + { +- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_info); + } + gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); +@@ -1697,9 +1741,10 @@ + + /* Patterns cannot be vectorized using SLP, because they change the order of + computation. */ +- FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) +- if (next == stmt) +- VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); ++ if (loop_vinfo) ++ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) ++ if (next == stmt) ++ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); + + /* It is possible that additional pattern stmts are created and inserted in + STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the +@@ -1799,26 +1844,46 @@ + be recorded in S3. */ + + void +-vect_pattern_recog (loop_vec_info loop_vinfo) ++vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) + { +- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); +- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); +- unsigned int nbbs = loop->num_nodes; ++ struct loop *loop; ++ basic_block *bbs, bb; ++ unsigned int nbbs; + gimple_stmt_iterator si; + unsigned int i, j; + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); + VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); ++ gimple stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); + +- /* Scan through the loop stmts, applying the pattern recognition ++ if (loop_vinfo) ++ { ++ loop = LOOP_VINFO_LOOP (loop_vinfo); ++ bbs = LOOP_VINFO_BBS (loop_vinfo); ++ nbbs = loop->num_nodes; ++ } ++ else ++ { ++ bb = BB_VINFO_BB (bb_vinfo); ++ nbbs = 1; ++ bbs = XNEW (basic_block); ++ bbs[0] = bb; ++ } ++ ++ /* Scan through the stmts, applying the pattern recognition + functions starting at each stmt visited: */ + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { ++ if (bb_vinfo && (stmt = gsi_stmt (si)) ++ && vinfo_for_stmt (stmt) ++ && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) ++ continue; ++ + /* Scan over all generic vect_recog_xxx_pattern functions. */ + for (j = 0; j < NUM_PATTERNS; j++) + { +@@ -1830,4 +1895,6 @@ + } + + VEC_free (gimple, heap, stmts_to_replace); ++ if (bb_vinfo) ++ free (bbs); + } + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000 ++++ new/gcc/tree-vect-slp.c 2011-11-23 06:47:35 +0000 +@@ -255,12 +255,14 @@ + /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt + from the pattern. Check that all the stmts of the node are in the + pattern. */ +- if (loop && def_stmt && gimple_bb (def_stmt) +- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) ++ if (def_stmt && gimple_bb (def_stmt) ++ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) ++ || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo) ++ && gimple_code (def_stmt) != GIMPLE_PHI)) + && vinfo_for_stmt (def_stmt) + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) +- && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) +- && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) ++ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) ++ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + { + pattern = true; + if (!first && !oprnd_info->first_pattern) +@@ -1972,6 +1974,8 @@ + return NULL; + } + ++ vect_pattern_recog (NULL, bb_vinfo); ++ + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, + &data_dependence_in_bb) + || min_vf > max_vf + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-11-23 06:47:35 +0000 +@@ -3451,11 +3451,11 @@ + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ + if (CONSTANT_CLASS_P (op0)) +- ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, ++ ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[1], &vectype_in); + else +- ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, +- &dt[1]); ++ ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, ++ &def, &dt[1]); + + if (!ok) + { + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000 ++++ new/gcc/tree-vectorizer.h 2011-11-23 06:47:35 +0000 +@@ -918,7 +918,7 @@ + in the future. */ + typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); + #define NUM_PATTERNS 7 +-void vect_pattern_recog (loop_vec_info); ++void vect_pattern_recog (loop_vec_info, bb_vec_info); + + /* In tree-vectorizer.c. */ + unsigned vectorize_loops (void); + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch new file mode 100644 index 0000000000..17cfd10682 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch @@ -0,0 +1,1818 @@ +2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> + + Backport from mainline (svn r19983): + + 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> + + gcc/testsuite/ + * gcc.dg/di-longlong64-sync-1.c: New test. + * gcc.dg/di-sync-multithread.c: New test. + * gcc.target/arm/di-longlong64-sync-withhelpers.c: New test. + * gcc.target/arm/di-longlong64-sync-withldrexd.c: New test. + * lib/target-supports.exp: (arm_arch_*_ok): Series of effective-target + tests for v5, v6, v6k, and v7-a, and add-options helpers. + (check_effective_target_arm_arm_ok): New helper. + (check_effective_target_sync_longlong): New helper. + +2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> + + Backport from mainline (svn r19982): + + 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> + + gcc/ + * config/arm/linux-atomic-64bit.c: New (based on linux-atomic.c). + * config/arm/linux-atomic.c: Change comment to point to 64bit version. + (SYNC_LOCK_RELEASE): Instantiate 64bit version. + * config/arm/t-linux-eabi: Pull in linux-atomic-64bit.c. + +2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> + + Backport from mainline (svn r19981): + + 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> + + gcc/ + * config/arm/arm.c (arm_output_ldrex): Support ldrexd. + (arm_output_strex): Support strexd. + (arm_output_it): New helper to output it in Thumb2 mode only. + (arm_output_sync_loop): Support DI mode. Change comment to + not support const_int. + (arm_expand_sync): Support DI mode. + * config/arm/arm.h (TARGET_HAVE_LDREXBHD): Split into LDREXBH + and LDREXD. + * config/arm/iterators.md (NARROW): move from sync.md. + (QHSD): New iterator for all current ARM integer modes. + (SIDI): New iterator for SI and DI modes only. + * config/arm/sync.md (sync_predtab): New mode_attr. + (sync_compare_and_swapsi): Fold into sync_compare_and_swap<mode>. + (sync_lock_test_and_setsi): Fold into sync_lock_test_and_setsi<mode>. + (sync_<sync_optab>si): Fold into sync_<sync_optab><mode>. + (sync_nandsi): Fold into sync_nand<mode>. + (sync_new_<sync_optab>si): Fold into sync_new_<sync_optab><mode>. + (sync_new_nandsi): Fold into sync_new_nand<mode>. + (sync_old_<sync_optab>si): Fold into sync_old_<sync_optab><mode>. + (sync_old_nandsi): Fold into sync_old_nand<mode>. + (sync_compare_and_swap<mode>): Support SI & DI. + (sync_lock_test_and_set<mode>): Likewise. + (sync_<sync_optab><mode>): Likewise. + (sync_nand<mode>): Likewise. + (sync_new_<sync_optab><mode>): Likewise. + (sync_new_nand<mode>): Likewise. + (sync_old_<sync_optab><mode>): Likewise. + (sync_old_nand<mode>): Likewise. + (arm_sync_compare_and_swapsi): Turn into iterator on SI & DI. + (arm_sync_lock_test_and_setsi): Likewise. + (arm_sync_new_<sync_optab>si): Likewise. + (arm_sync_new_nandsi): Likewise. + (arm_sync_old_<sync_optab>si): Likewise. + (arm_sync_old_nandsi): Likewise. + (arm_sync_compare_and_swap<mode> NARROW): use sync_predtab, fix indent. + (arm_sync_lock_test_and_setsi<mode> NARROW): Likewise. + (arm_sync_new_<sync_optab><mode> NARROW): Likewise. + (arm_sync_new_nand<mode> NARROW): Likewise. + (arm_sync_old_<sync_optab><mode> NARROW): Likewise. + (arm_sync_old_nand<mode> NARROW): Likewise. + +2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> + + Backport from mainline (svn r19980): + + 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> + + PR target/48126 + + gcc/ + * config/arm/arm.c (arm_output_sync_loop): Move label before barrier. + +2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> + + Backport from mainline (svn r19979): + + 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> + + gcc/ + * config/arm/arm.h (TARGET_HAVE_DMB_MCR): MCR Not available in Thumb1. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000 ++++ new/gcc/config/arm/arm.c 2011-11-28 15:07:01 +0000 +@@ -24307,12 +24307,26 @@ + rtx target, + rtx memory) + { +- const char *suffix = arm_ldrex_suffix (mode); +- rtx operands[2]; ++ rtx operands[3]; + + operands[0] = target; +- operands[1] = memory; +- arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); ++ if (mode != DImode) ++ { ++ const char *suffix = arm_ldrex_suffix (mode); ++ operands[1] = memory; ++ arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); ++ } ++ else ++ { ++ /* The restrictions on target registers in ARM mode are that the two ++ registers are consecutive and the first one is even; Thumb is ++ actually more flexible, but DI should give us this anyway. ++ Note that the 1st register always gets the lowest word in memory. */ ++ gcc_assert ((REGNO (target) & 1) == 0); ++ operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1); ++ operands[2] = memory; ++ arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2"); ++ } + } + + /* Emit a strex{b,h,d, } instruction appropriate for the specified +@@ -24325,14 +24339,41 @@ + rtx value, + rtx memory) + { +- const char *suffix = arm_ldrex_suffix (mode); +- rtx operands[3]; ++ rtx operands[4]; + + operands[0] = result; + operands[1] = value; +- operands[2] = memory; +- arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix, +- cc); ++ if (mode != DImode) ++ { ++ const char *suffix = arm_ldrex_suffix (mode); ++ operands[2] = memory; ++ arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", ++ suffix, cc); ++ } ++ else ++ { ++ /* The restrictions on target registers in ARM mode are that the two ++ registers are consecutive and the first one is even; Thumb is ++ actually more flexible, but DI should give us this anyway. ++ Note that the 1st register always gets the lowest word in memory. */ ++ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); ++ operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1); ++ operands[3] = memory; ++ arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3", ++ cc); ++ } ++} ++ ++/* Helper to emit an it instruction in Thumb2 mode only; although the assembler ++ will ignore it in ARM mode, emitting it will mess up instruction counts we ++ sometimes keep 'flags' are the extra t's and e's if it's more than one ++ instruction that is conditional. */ ++static void ++arm_output_it (emit_f emit, const char *flags, const char *cond) ++{ ++ rtx operands[1]; /* Don't actually use the operand. */ ++ if (TARGET_THUMB2) ++ arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond); + } + + /* Helper to emit a two operand instruction. */ +@@ -24374,7 +24415,7 @@ + + required_value: + +- RTX register or const_int representing the required old_value for ++ RTX register representing the required old_value for + the modify to continue, if NULL no comparsion is performed. */ + static void + arm_output_sync_loop (emit_f emit, +@@ -24388,7 +24429,13 @@ + enum attr_sync_op sync_op, + int early_barrier_required) + { +- rtx operands[1]; ++ rtx operands[2]; ++ /* We'll use the lo for the normal rtx in the none-DI case ++ as well as the least-sig word in the DI case. */ ++ rtx old_value_lo, required_value_lo, new_value_lo, t1_lo; ++ rtx old_value_hi, required_value_hi, new_value_hi, t1_hi; ++ ++ bool is_di = mode == DImode; + + gcc_assert (t1 != t2); + +@@ -24399,82 +24446,142 @@ + + arm_output_ldrex (emit, mode, old_value, memory); + ++ if (is_di) ++ { ++ old_value_lo = gen_lowpart (SImode, old_value); ++ old_value_hi = gen_highpart (SImode, old_value); ++ if (required_value) ++ { ++ required_value_lo = gen_lowpart (SImode, required_value); ++ required_value_hi = gen_highpart (SImode, required_value); ++ } ++ else ++ { ++ /* Silence false potentially unused warning. */ ++ required_value_lo = NULL_RTX; ++ required_value_hi = NULL_RTX; ++ } ++ new_value_lo = gen_lowpart (SImode, new_value); ++ new_value_hi = gen_highpart (SImode, new_value); ++ t1_lo = gen_lowpart (SImode, t1); ++ t1_hi = gen_highpart (SImode, t1); ++ } ++ else ++ { ++ old_value_lo = old_value; ++ new_value_lo = new_value; ++ required_value_lo = required_value; ++ t1_lo = t1; ++ ++ /* Silence false potentially unused warning. */ ++ t1_hi = NULL_RTX; ++ new_value_hi = NULL_RTX; ++ required_value_hi = NULL_RTX; ++ old_value_hi = NULL_RTX; ++ } ++ + if (required_value) + { +- rtx operands[2]; ++ operands[0] = old_value_lo; ++ operands[1] = required_value_lo; + +- operands[0] = old_value; +- operands[1] = required_value; + arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); ++ if (is_di) ++ { ++ arm_output_it (emit, "", "eq"); ++ arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi); ++ } + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); + } + + switch (sync_op) + { + case SYNC_OP_ADD: +- arm_output_op3 (emit, "add", t1, old_value, new_value); ++ arm_output_op3 (emit, is_di ? "adds" : "add", ++ t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi); + break; + + case SYNC_OP_SUB: +- arm_output_op3 (emit, "sub", t1, old_value, new_value); ++ arm_output_op3 (emit, is_di ? "subs" : "sub", ++ t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi); + break; + + case SYNC_OP_IOR: +- arm_output_op3 (emit, "orr", t1, old_value, new_value); ++ arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi); + break; + + case SYNC_OP_XOR: +- arm_output_op3 (emit, "eor", t1, old_value, new_value); ++ arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi); + break; + + case SYNC_OP_AND: +- arm_output_op3 (emit,"and", t1, old_value, new_value); ++ arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); + break; + + case SYNC_OP_NAND: +- arm_output_op3 (emit, "and", t1, old_value, new_value); +- arm_output_op2 (emit, "mvn", t1, t1); ++ arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo); ++ if (is_di) ++ arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); ++ arm_output_op2 (emit, "mvn", t1_lo, t1_lo); ++ if (is_di) ++ arm_output_op2 (emit, "mvn", t1_hi, t1_hi); + break; + + case SYNC_OP_NONE: + t1 = new_value; ++ t1_lo = new_value_lo; ++ if (is_di) ++ t1_hi = new_value_hi; + break; + } + ++ /* Note that the result of strex is a 0/1 flag that's always 1 register. */ + if (t2) + { +- arm_output_strex (emit, mode, "", t2, t1, memory); +- operands[0] = t2; +- arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); +- arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", +- LOCAL_LABEL_PREFIX); ++ arm_output_strex (emit, mode, "", t2, t1, memory); ++ operands[0] = t2; ++ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); ++ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", ++ LOCAL_LABEL_PREFIX); + } + else + { + /* Use old_value for the return value because for some operations + the old_value can easily be restored. This saves one register. */ +- arm_output_strex (emit, mode, "", old_value, t1, memory); +- operands[0] = old_value; ++ arm_output_strex (emit, mode, "", old_value_lo, t1, memory); ++ operands[0] = old_value_lo; + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", + LOCAL_LABEL_PREFIX); + ++ /* Note that we only used the _lo half of old_value as a temporary ++ so in DI we don't have to restore the _hi part. */ + switch (sync_op) + { + case SYNC_OP_ADD: +- arm_output_op3 (emit, "sub", old_value, t1, new_value); ++ arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo); + break; + + case SYNC_OP_SUB: +- arm_output_op3 (emit, "add", old_value, t1, new_value); ++ arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo); + break; + + case SYNC_OP_XOR: +- arm_output_op3 (emit, "eor", old_value, t1, new_value); ++ arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo); + break; + + case SYNC_OP_NONE: +- arm_output_op2 (emit, "mov", old_value, required_value); ++ arm_output_op2 (emit, "mov", old_value_lo, required_value_lo); + break; + + default: +@@ -24482,8 +24589,11 @@ + } + } + ++ /* Note: label is before barrier so that in cmp failure case we still get ++ a barrier to stop subsequent loads floating upwards past the ldrex ++ PR target/48126. */ ++ arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); + arm_process_output_memory_barrier (emit, NULL); +- arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); + } + + static rtx +@@ -24577,7 +24687,7 @@ + target = gen_reg_rtx (mode); + + memory = arm_legitimize_sync_memory (memory); +- if (mode != SImode) ++ if (mode != SImode && mode != DImode) + { + rtx load_temp = gen_reg_rtx (SImode); + + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000 ++++ new/gcc/config/arm/arm.h 2011-11-28 15:07:01 +0000 +@@ -300,7 +300,8 @@ + #define TARGET_HAVE_DMB (arm_arch7) + + /* Nonzero if this chip implements a memory barrier via CP15. */ +-#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB) ++#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \ ++ && ! TARGET_THUMB1) + + /* Nonzero if this chip implements a memory barrier instruction. */ + #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) +@@ -308,8 +309,12 @@ + /* Nonzero if this chip supports ldrex and strex */ + #define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) + +-/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */ +-#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7) ++/* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ ++#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) ++ ++/* Nonzero if this chip supports ldrexd and strexd. */ ++#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \ ++ && arm_arch_notm) + + /* Nonzero if integer division instructions supported. */ + #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ + +=== modified file 'gcc/config/arm/iterators.md' +--- old/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000 ++++ new/gcc/config/arm/iterators.md 2011-11-28 15:07:01 +0000 +@@ -33,6 +33,15 @@ + ;; A list of integer modes that are up to one word long + (define_mode_iterator QHSI [QI HI SI]) + ++;; A list of integer modes that are less than a word ++(define_mode_iterator NARROW [QI HI]) ++ ++;; A list of all the integer modes upto 64bit ++(define_mode_iterator QHSD [QI HI SI DI]) ++ ++;; A list of the 32bit and 64bit integer modes ++(define_mode_iterator SIDI [SI DI]) ++ + ;; Integer element sizes implemented by IWMMXT. + (define_mode_iterator VMMX [V2SI V4HI V8QI]) + + +=== added file 'gcc/config/arm/linux-atomic-64bit.c' +--- old/gcc/config/arm/linux-atomic-64bit.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/linux-atomic-64bit.c 2011-10-14 15:50:44 +0000 +@@ -0,0 +1,166 @@ ++/* 64bit Linux-specific atomic operations for ARM EABI. ++ Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ++ Based on linux-atomic.c ++ ++ 64 bit additions david.gilbert@linaro.org ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++<http://www.gnu.org/licenses/>. */ ++ ++/* 64bit helper functions for atomic operations; the compiler will ++ call these when the code is compiled for a CPU without ldrexd/strexd. ++ (If the CPU had those then the compiler inlines the operation). ++ ++ These helpers require a kernel helper that's only present on newer ++ kernels; we check for that in an init section and bail out rather ++ unceremoneously. */ ++ ++extern unsigned int __write (int fd, const void *buf, unsigned int count); ++extern void abort (void); ++ ++/* Kernel helper for compare-and-exchange. */ ++typedef int (__kernel_cmpxchg64_t) (const long long* oldval, ++ const long long* newval, ++ long long *ptr); ++#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60) ++ ++/* Kernel helper page version number. */ ++#define __kernel_helper_version (*(unsigned int *)0xffff0ffc) ++ ++/* Check that the kernel has a new enough version at load. */ ++static void __check_for_sync8_kernelhelper (void) ++{ ++ if (__kernel_helper_version < 5) ++ { ++ const char err[] = "A newer kernel is required to run this binary. " ++ "(__kernel_cmpxchg64 helper)\n"; ++ /* At this point we need a way to crash with some information ++ for the user - I'm not sure I can rely on much else being ++ available at this point, so do the same as generic-morestack.c ++ write () and abort (). */ ++ __write (2 /* stderr. */, err, sizeof (err)); ++ abort (); ++ } ++}; ++ ++static void (*__sync8_kernelhelper_inithook[]) (void) ++ __attribute__ ((used, section (".init_array"))) = { ++ &__check_for_sync8_kernelhelper ++}; ++ ++#define HIDDEN __attribute__ ((visibility ("hidden"))) ++ ++#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP) \ ++ long long HIDDEN \ ++ __sync_fetch_and_##OP##_8 (long long *ptr, long long val) \ ++ { \ ++ int failure; \ ++ long long tmp,tmp2; \ ++ \ ++ do { \ ++ tmp = *ptr; \ ++ tmp2 = PFX_OP (tmp INF_OP val); \ ++ failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ ++ } while (failure != 0); \ ++ \ ++ return tmp; \ ++ } ++ ++FETCH_AND_OP_WORD64 (add, , +) ++FETCH_AND_OP_WORD64 (sub, , -) ++FETCH_AND_OP_WORD64 (or, , |) ++FETCH_AND_OP_WORD64 (and, , &) ++FETCH_AND_OP_WORD64 (xor, , ^) ++FETCH_AND_OP_WORD64 (nand, ~, &) ++ ++#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH ++#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH ++ ++/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for ++ subword-sized quantities. */ ++ ++#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP) \ ++ long long HIDDEN \ ++ __sync_##OP##_and_fetch_8 (long long *ptr, long long val) \ ++ { \ ++ int failure; \ ++ long long tmp,tmp2; \ ++ \ ++ do { \ ++ tmp = *ptr; \ ++ tmp2 = PFX_OP (tmp INF_OP val); \ ++ failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ ++ } while (failure != 0); \ ++ \ ++ return tmp2; \ ++ } ++ ++OP_AND_FETCH_WORD64 (add, , +) ++OP_AND_FETCH_WORD64 (sub, , -) ++OP_AND_FETCH_WORD64 (or, , |) ++OP_AND_FETCH_WORD64 (and, , &) ++OP_AND_FETCH_WORD64 (xor, , ^) ++OP_AND_FETCH_WORD64 (nand, ~, &) ++ ++long long HIDDEN ++__sync_val_compare_and_swap_8 (long long *ptr, long long oldval, ++ long long newval) ++{ ++ int failure; ++ long long actual_oldval; ++ ++ while (1) ++ { ++ actual_oldval = *ptr; ++ ++ if (__builtin_expect (oldval != actual_oldval, 0)) ++ return actual_oldval; ++ ++ failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr); ++ ++ if (__builtin_expect (!failure, 1)) ++ return oldval; ++ } ++} ++ ++typedef unsigned char bool; ++ ++bool HIDDEN ++__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval, ++ long long newval) ++{ ++ int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr); ++ return (failure == 0); ++} ++ ++long long HIDDEN ++__sync_lock_test_and_set_8 (long long *ptr, long long val) ++{ ++ int failure; ++ long long oldval; ++ ++ do { ++ oldval = *ptr; ++ failure = __kernel_cmpxchg64 (&oldval, &val, ptr); ++ } while (failure != 0); ++ ++ return oldval; ++} + +=== modified file 'gcc/config/arm/linux-atomic.c' +--- old/gcc/config/arm/linux-atomic.c 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/linux-atomic.c 2011-10-14 15:50:44 +0000 +@@ -32,8 +32,8 @@ + #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) + + /* Note: we implement byte, short and int versions of atomic operations using +- the above kernel helpers, but there is no support for "long long" (64-bit) +- operations as yet. */ ++ the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit) ++ operations. */ + + #define HIDDEN __attribute__ ((visibility ("hidden"))) + +@@ -273,6 +273,7 @@ + *ptr = 0; \ + } + ++SYNC_LOCK_RELEASE (long long, 8) + SYNC_LOCK_RELEASE (int, 4) + SYNC_LOCK_RELEASE (short, 2) + SYNC_LOCK_RELEASE (char, 1) + +=== modified file 'gcc/config/arm/sync.md' +--- old/gcc/config/arm/sync.md 2010-12-31 13:25:33 +0000 ++++ new/gcc/config/arm/sync.md 2011-10-14 15:47:15 +0000 +@@ -1,6 +1,7 @@ + ;; Machine description for ARM processor synchronization primitives. + ;; Copyright (C) 2010 Free Software Foundation, Inc. + ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com) ++;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org) + ;; + ;; This file is part of GCC. + ;; +@@ -33,31 +34,24 @@ + MEM_VOLATILE_P (operands[0]) = 1; + }) + +-(define_expand "sync_compare_and_swapsi" +- [(set (match_operand:SI 0 "s_register_operand") +- (unspec_volatile:SI [(match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand") +- (match_operand:SI 3 "s_register_operand")] +- VUNSPEC_SYNC_COMPARE_AND_SWAP))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omrn; +- generator.u.omrn = gen_arm_sync_compare_and_swapsi; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2], +- operands[3]); +- DONE; +- }) + +-(define_mode_iterator NARROW [QI HI]) ++(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && ++ TARGET_HAVE_MEMORY_BARRIER") ++ (QI "TARGET_HAVE_LDREXBH && ++ TARGET_HAVE_MEMORY_BARRIER") ++ (HI "TARGET_HAVE_LDREXBH && ++ TARGET_HAVE_MEMORY_BARRIER") ++ (DI "TARGET_HAVE_LDREXD && ++ ARM_DOUBLEWORD_ALIGN && ++ TARGET_HAVE_MEMORY_BARRIER")]) + + (define_expand "sync_compare_and_swap<mode>" +- [(set (match_operand:NARROW 0 "s_register_operand") +- (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand") +- (match_operand:NARROW 3 "s_register_operand")] ++ [(set (match_operand:QHSD 0 "s_register_operand") ++ (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand") ++ (match_operand:QHSD 3 "s_register_operand")] + VUNSPEC_SYNC_COMPARE_AND_SWAP))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omrn; +@@ -67,25 +61,11 @@ + DONE; + }) + +-(define_expand "sync_lock_test_and_setsi" +- [(match_operand:SI 0 "s_register_operand") +- (match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand")] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_lock_test_and_setsi; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, +- operands[2]); +- DONE; +- }) +- + (define_expand "sync_lock_test_and_set<mode>" +- [(match_operand:NARROW 0 "s_register_operand") +- (match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand")] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "s_register_operand") ++ (match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand")] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; +@@ -115,51 +95,25 @@ + (plus "*") + (minus "*")]) + +-(define_expand "sync_<sync_optab>si" +- [(match_operand:SI 0 "memory_operand") +- (match_operand:SI 1 "s_register_operand") +- (syncop:SI (match_dup 0) (match_dup 1))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_new_<sync_optab>si; +- arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); +- DONE; +- }) +- +-(define_expand "sync_nandsi" +- [(match_operand:SI 0 "memory_operand") +- (match_operand:SI 1 "s_register_operand") +- (not:SI (and:SI (match_dup 0) (match_dup 1)))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_new_nandsi; +- arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); +- DONE; +- }) +- + (define_expand "sync_<sync_optab><mode>" +- [(match_operand:NARROW 0 "memory_operand") +- (match_operand:NARROW 1 "s_register_operand") +- (syncop:NARROW (match_dup 0) (match_dup 1))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "memory_operand") ++ (match_operand:QHSD 1 "s_register_operand") ++ (syncop:QHSD (match_dup 0) (match_dup 1))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_<sync_optab><mode>; + arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL, +- operands[1]); ++ operands[1]); + DONE; + }) + + (define_expand "sync_nand<mode>" +- [(match_operand:NARROW 0 "memory_operand") +- (match_operand:NARROW 1 "s_register_operand") +- (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "memory_operand") ++ (match_operand:QHSD 1 "s_register_operand") ++ (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; +@@ -169,57 +123,27 @@ + DONE; + }) + +-(define_expand "sync_new_<sync_optab>si" +- [(match_operand:SI 0 "s_register_operand") +- (match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand") +- (syncop:SI (match_dup 1) (match_dup 2))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_new_<sync_optab>si; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, +- operands[2]); +- DONE; +- }) +- +-(define_expand "sync_new_nandsi" +- [(match_operand:SI 0 "s_register_operand") +- (match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand") +- (not:SI (and:SI (match_dup 1) (match_dup 2)))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_new_nandsi; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, +- operands[2]); +- DONE; +- }) +- + (define_expand "sync_new_<sync_optab><mode>" +- [(match_operand:NARROW 0 "s_register_operand") +- (match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand") +- (syncop:NARROW (match_dup 1) (match_dup 2))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "s_register_operand") ++ (match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand") ++ (syncop:QHSD (match_dup 1) (match_dup 2))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_<sync_optab><mode>; + arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], +- NULL, operands[2]); ++ NULL, operands[2]); + DONE; + }) + + (define_expand "sync_new_nand<mode>" +- [(match_operand:NARROW 0 "s_register_operand") +- (match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand") +- (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "s_register_operand") ++ (match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand") ++ (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; +@@ -229,57 +153,27 @@ + DONE; + }); + +-(define_expand "sync_old_<sync_optab>si" +- [(match_operand:SI 0 "s_register_operand") +- (match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand") +- (syncop:SI (match_dup 1) (match_dup 2))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_old_<sync_optab>si; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, +- operands[2]); +- DONE; +- }) +- +-(define_expand "sync_old_nandsi" +- [(match_operand:SI 0 "s_register_operand") +- (match_operand:SI 1 "memory_operand") +- (match_operand:SI 2 "s_register_operand") +- (not:SI (and:SI (match_dup 1) (match_dup 2)))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- struct arm_sync_generator generator; +- generator.op = arm_sync_generator_omn; +- generator.u.omn = gen_arm_sync_old_nandsi; +- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, +- operands[2]); +- DONE; +- }) +- + (define_expand "sync_old_<sync_optab><mode>" +- [(match_operand:NARROW 0 "s_register_operand") +- (match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand") +- (syncop:NARROW (match_dup 1) (match_dup 2))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "s_register_operand") ++ (match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand") ++ (syncop:QHSD (match_dup 1) (match_dup 2))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_old_<sync_optab><mode>; + arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], +- NULL, operands[2]); ++ NULL, operands[2]); + DONE; + }) + + (define_expand "sync_old_nand<mode>" +- [(match_operand:NARROW 0 "s_register_operand") +- (match_operand:NARROW 1 "memory_operand") +- (match_operand:NARROW 2 "s_register_operand") +- (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ [(match_operand:QHSD 0 "s_register_operand") ++ (match_operand:QHSD 1 "memory_operand") ++ (match_operand:QHSD 2 "s_register_operand") ++ (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] ++ "<sync_predtab>" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; +@@ -289,22 +183,22 @@ + DONE; + }) + +-(define_insn "arm_sync_compare_and_swapsi" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI +- [(match_operand:SI 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r") +- (match_operand:SI 3 "s_register_operand" "r")] +- VUNSPEC_SYNC_COMPARE_AND_SWAP)) +- (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)] ++(define_insn "arm_sync_compare_and_swap<mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (unspec_volatile:SIDI ++ [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:SIDI 2 "s_register_operand" "r") ++ (match_operand:SIDI 3 "s_register_operand" "r")] ++ VUNSPEC_SYNC_COMPARE_AND_SWAP)) ++ (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + ] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_required_value" "2") +@@ -318,7 +212,7 @@ + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r") ++ (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "s_register_operand" "r")] + VUNSPEC_SYNC_COMPARE_AND_SWAP))) + (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)] +@@ -326,10 +220,10 @@ + (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + ] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_required_value" "2") +@@ -338,18 +232,18 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_lock_test_and_setsi" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (match_operand:SI 1 "arm_sync_memory_operand" "+Q")) ++(define_insn "arm_sync_lock_test_and_set<mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")) + (set (match_dup 1) +- (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")] +- VUNSPEC_SYNC_LOCK)) ++ (unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")] ++ VUNSPEC_SYNC_LOCK)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_release_barrier" "no") + (set_attr "sync_result" "0") + (set_attr "sync_memory" "1") +@@ -364,10 +258,10 @@ + (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")] +- VUNSPEC_SYNC_LOCK)) ++ VUNSPEC_SYNC_LOCK)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); + } +@@ -380,22 +274,48 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_new_<sync_optab>si" ++(define_insn "arm_sync_new_<sync_optab><mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (unspec_volatile:SIDI [(syncop:SIDI ++ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:SIDI 2 "s_register_operand" "r")) ++ ] ++ VUNSPEC_SYNC_NEW_OP)) ++ (set (match_dup 1) ++ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_NEW_OP)) ++ (clobber (reg:CC CC_REGNUM)) ++ (clobber (match_scratch:SI 3 "=&r"))] ++ "<sync_predtab>" ++ { ++ return arm_output_sync_insn (insn, operands); ++ } ++ [(set_attr "sync_result" "0") ++ (set_attr "sync_memory" "1") ++ (set_attr "sync_new_value" "2") ++ (set_attr "sync_t1" "0") ++ (set_attr "sync_t2" "3") ++ (set_attr "sync_op" "<sync_optab>") ++ (set_attr "conds" "clob") ++ (set_attr "predicable" "no")]) ++ ++(define_insn "arm_sync_new_<sync_optab><mode>" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI +- (match_operand:SI 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r")) +- ] +- VUNSPEC_SYNC_NEW_OP)) ++ (zero_extend:SI ++ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) ++ (match_operand:SI 2 "s_register_operand" "r")) ++ ] ++ VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) +- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_NEW_OP)) ++ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") +@@ -405,22 +325,22 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_new_nandsi" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI [(not:SI (and:SI +- (match_operand:SI 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r"))) +- ] +- VUNSPEC_SYNC_NEW_OP)) ++(define_insn "arm_sync_new_nand<mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (unspec_volatile:SIDI [(not:SIDI (and:SIDI ++ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:SIDI 2 "s_register_operand" "r"))) ++ ] ++ VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) +- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_NEW_OP)) ++ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") +@@ -430,50 +350,24 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_new_<sync_optab><mode>" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI [(syncop:SI +- (zero_extend:SI +- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) +- (match_operand:SI 2 "s_register_operand" "r")) +- ] +- VUNSPEC_SYNC_NEW_OP)) +- (set (match_dup 1) +- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_NEW_OP)) +- (clobber (reg:CC CC_REGNUM)) +- (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" +- { +- return arm_output_sync_insn (insn, operands); +- } +- [(set_attr "sync_result" "0") +- (set_attr "sync_memory" "1") +- (set_attr "sync_new_value" "2") +- (set_attr "sync_t1" "0") +- (set_attr "sync_t2" "3") +- (set_attr "sync_op" "<sync_optab>") +- (set_attr "conds" "clob") +- (set_attr "predicable" "no")]) +- + (define_insn "arm_sync_new_nand<mode>" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI + [(not:SI + (and:SI +- (zero_extend:SI +- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) +- (match_operand:SI 2 "s_register_operand" "r"))) ++ (zero_extend:SI ++ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) ++ (match_operand:SI 2 "s_register_operand" "r"))) + ] VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_NEW_OP)) ++ VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); +- } ++ } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") +@@ -483,20 +377,20 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_old_<sync_optab>si" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI [(syncop:SI +- (match_operand:SI 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r")) +- ] +- VUNSPEC_SYNC_OLD_OP)) ++(define_insn "arm_sync_old_<sync_optab><mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (unspec_volatile:SIDI [(syncop:SIDI ++ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:SIDI 2 "s_register_operand" "r")) ++ ] ++ VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) +- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_OLD_OP)) ++ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) +- (clobber (match_scratch:SI 3 "=&r")) ++ (clobber (match_scratch:SIDI 3 "=&r")) + (clobber (match_scratch:SI 4 "<sync_clobber>"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); + } +@@ -509,47 +403,21 @@ + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +-(define_insn "arm_sync_old_nandsi" +- [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI [(not:SI (and:SI +- (match_operand:SI 1 "arm_sync_memory_operand" "+Q") +- (match_operand:SI 2 "s_register_operand" "r"))) +- ] +- VUNSPEC_SYNC_OLD_OP)) +- (set (match_dup 1) +- (unspec_volatile:SI [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_OLD_OP)) +- (clobber (reg:CC CC_REGNUM)) +- (clobber (match_scratch:SI 3 "=&r")) +- (clobber (match_scratch:SI 4 "=&r"))] +- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" +- { +- return arm_output_sync_insn (insn, operands); +- } +- [(set_attr "sync_result" "0") +- (set_attr "sync_memory" "1") +- (set_attr "sync_new_value" "2") +- (set_attr "sync_t1" "3") +- (set_attr "sync_t2" "4") +- (set_attr "sync_op" "nand") +- (set_attr "conds" "clob") +- (set_attr "predicable" "no")]) +- + (define_insn "arm_sync_old_<sync_optab><mode>" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI +- (zero_extend:SI +- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) +- (match_operand:SI 2 "s_register_operand" "r")) +- ] +- VUNSPEC_SYNC_OLD_OP)) ++ (zero_extend:SI ++ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) ++ (match_operand:SI 2 "s_register_operand" "r")) ++ ] ++ VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) +- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_OLD_OP)) ++ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 "<sync_clobber>"))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); + } +@@ -563,20 +431,46 @@ + (set_attr "predicable" "no")]) + + (define_insn "arm_sync_old_nand<mode>" ++ [(set (match_operand:SIDI 0 "s_register_operand" "=&r") ++ (unspec_volatile:SIDI [(not:SIDI (and:SIDI ++ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:SIDI 2 "s_register_operand" "r"))) ++ ] ++ VUNSPEC_SYNC_OLD_OP)) ++ (set (match_dup 1) ++ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_OLD_OP)) ++ (clobber (reg:CC CC_REGNUM)) ++ (clobber (match_scratch:SIDI 3 "=&r")) ++ (clobber (match_scratch:SI 4 "=&r"))] ++ "<sync_predtab>" ++ { ++ return arm_output_sync_insn (insn, operands); ++ } ++ [(set_attr "sync_result" "0") ++ (set_attr "sync_memory" "1") ++ (set_attr "sync_new_value" "2") ++ (set_attr "sync_t1" "3") ++ (set_attr "sync_t2" "4") ++ (set_attr "sync_op" "nand") ++ (set_attr "conds" "clob") ++ (set_attr "predicable" "no")]) ++ ++(define_insn "arm_sync_old_nand<mode>" + [(set (match_operand:SI 0 "s_register_operand" "=&r") +- (unspec_volatile:SI [(not:SI (and:SI +- (zero_extend:SI +- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) +- (match_operand:SI 2 "s_register_operand" "r"))) +- ] +- VUNSPEC_SYNC_OLD_OP)) ++ (unspec_volatile:SI [(not:SI (and:SI ++ (zero_extend:SI ++ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) ++ (match_operand:SI 2 "s_register_operand" "r"))) ++ ] ++ VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) +- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] +- VUNSPEC_SYNC_OLD_OP)) ++ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] ++ VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] +- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" ++ "<sync_predtab>" + { + return arm_output_sync_insn (insn, operands); + } + +=== modified file 'gcc/config/arm/t-linux-eabi' +--- old/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/t-linux-eabi 2011-10-14 15:50:44 +0000 +@@ -36,3 +36,4 @@ + EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + + LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c ++LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c + +=== added file 'gcc/testsuite/gcc.dg/di-longlong64-sync-1.c' +--- old/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 2011-10-14 15:56:32 +0000 +@@ -0,0 +1,164 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sync_longlong } */ ++/* { dg-options "-std=gnu99" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++ ++ ++/* Test basic functionality of the intrinsics. The operations should ++ not be optimized away if no one checks the return values. */ ++ ++/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long ++ (an explicit 64bit type maybe a better bet) and 2) Use values that cross ++ the 32bit boundary and cause carries since the actual maths are done as ++ pairs of 32 bit instructions. */ ++ ++/* Note: This file is #included by some of the ARM tests. */ ++ ++__extension__ typedef __SIZE_TYPE__ size_t; ++ ++extern void abort (void); ++extern void *memcpy (void *, const void *, size_t); ++extern int memcmp (const void *, const void *, size_t); ++ ++/* Temporary space where the work actually gets done. */ ++static long long AL[24]; ++/* Values copied into AL before we start. */ ++static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1, ++ ++ 0x100000002ll, 0x100000002ll, ++ 0x100000002ll, 0x100000002ll, ++ ++ 0, 0x1000e0de0000ll, ++ 42 , 0xc001c0de0000ll, ++ ++ -1ll, 0, 0xff00ff0000ll, -1ll, ++ ++ 0, 0x1000e0de0000ll, ++ 42 , 0xc001c0de0000ll, ++ ++ -1ll, 0, 0xff00ff0000ll, -1ll}; ++/* This is what should be in AL at the end. */ ++static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0, ++ ++ 0x100000002ll, 0x100000002ll, ++ 0x100000002ll, 0x100000002ll, ++ ++ 1, 0xc001c0de0000ll, ++ 20, 0x1000e0de0000ll, ++ ++ 0x300000007ll , 0x500000009ll, ++ 0xf100ff0001ll, ~0xa00000007ll, ++ ++ 1, 0xc001c0de0000ll, ++ 20, 0x1000e0de0000ll, ++ ++ 0x300000007ll , 0x500000009ll, ++ 0xf100ff0001ll, ~0xa00000007ll }; ++ ++/* First check they work in terms of what they do to memory. */ ++static void ++do_noret_di (void) ++{ ++ __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll); ++ __sync_lock_test_and_set (AL+2, 1); ++ __sync_lock_release (AL+3); ++ ++ /* The following tests should not change the value since the ++ original does NOT match. */ ++ __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll); ++ __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll); ++ ++ __sync_fetch_and_add (AL+8, 1); ++ __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry. */ ++ __sync_fetch_and_sub (AL+10, 22); ++ __sync_fetch_and_sub (AL+11, 0xb000e0000000ll); ++ ++ __sync_fetch_and_and (AL+12, 0x300000007ll); ++ __sync_fetch_and_or (AL+13, 0x500000009ll); ++ __sync_fetch_and_xor (AL+14, 0xe00000001ll); ++ __sync_fetch_and_nand (AL+15, 0xa00000007ll); ++ ++ /* These should be the same as the fetch_and_* cases except for ++ return value. */ ++ __sync_add_and_fetch (AL+16, 1); ++ /* add to both halves & carry. */ ++ __sync_add_and_fetch (AL+17, 0xb000e0000000ll); ++ __sync_sub_and_fetch (AL+18, 22); ++ __sync_sub_and_fetch (AL+19, 0xb000e0000000ll); ++ ++ __sync_and_and_fetch (AL+20, 0x300000007ll); ++ __sync_or_and_fetch (AL+21, 0x500000009ll); ++ __sync_xor_and_fetch (AL+22, 0xe00000001ll); ++ __sync_nand_and_fetch (AL+23, 0xa00000007ll); ++} ++ ++/* Now check return values. */ ++static void ++do_ret_di (void) ++{ ++ if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) != ++ 1) abort (); ++ if (__sync_lock_test_and_set (AL+2, 1) != 0) abort (); ++ __sync_lock_release (AL+3); /* no return value, but keep to match results. */ ++ ++ /* The following tests should not change the value since the ++ original does NOT match. */ ++ if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) != ++ 0) abort (); ++ if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) != ++ 0) abort (); ++ ++ if (__sync_fetch_and_add (AL+8, 1) != 0) abort (); ++ if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort (); ++ if (__sync_fetch_and_sub (AL+10, 22) != 42) abort (); ++ if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll) ++ abort (); ++ ++ if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort (); ++ if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort (); ++ if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort (); ++ if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort (); ++ ++ /* These should be the same as the fetch_and_* cases except for ++ return value. */ ++ if (__sync_add_and_fetch (AL+16, 1) != 1) abort (); ++ if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll) ++ abort (); ++ if (__sync_sub_and_fetch (AL+18, 22) != 20) abort (); ++ if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll) ++ abort (); ++ ++ if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort (); ++ if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort (); ++ if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort (); ++ if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort (); ++} ++ ++int main () ++{ ++ memcpy (AL, init_di, sizeof (init_di)); ++ ++ do_noret_di (); ++ ++ if (memcmp (AL, test_di, sizeof (test_di))) ++ abort (); ++ ++ memcpy (AL, init_di, sizeof (init_di)); ++ ++ do_ret_di (); ++ ++ if (memcmp (AL, test_di, sizeof (test_di))) ++ abort (); ++ ++ return 0; ++} + +=== added file 'gcc/testsuite/gcc.dg/di-sync-multithread.c' +--- old/gcc/testsuite/gcc.dg/di-sync-multithread.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/di-sync-multithread.c 2011-10-14 15:56:32 +0000 +@@ -0,0 +1,205 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sync_longlong } */ ++/* { dg-require-effective-target pthread_h } */ ++/* { dg-require-effective-target pthread } */ ++/* { dg-options "-pthread -std=gnu99" } */ ++ ++/* test of long long atomic ops performed in parallel in 3 pthreads ++ david.gilbert@linaro.org */ ++ ++#include <pthread.h> ++#include <unistd.h> ++ ++/*#define DEBUGIT 1 */ ++ ++#ifdef DEBUGIT ++#include <stdio.h> ++ ++#define DOABORT(x,...) {\ ++ fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\ ++ } ++ ++#else ++ ++#define DOABORT(x,...) abort (); ++ ++#endif ++ ++/* Passed to each thread to describe which bits it is going to work on. */ ++struct threadwork { ++ unsigned long long count; /* incremented each time the worker loops. */ ++ unsigned int thread; /* ID */ ++ unsigned int addlsb; /* 8 bit */ ++ unsigned int logic1lsb; /* 5 bit */ ++ unsigned int logic2lsb; /* 8 bit */ ++}; ++ ++/* The shared word where all the atomic work is done. */ ++static volatile long long workspace; ++ ++/* A shared word to tell the workers to quit when non-0. */ ++static long long doquit; ++ ++extern void abort (void); ++ ++/* Note this test doesn't test the return values much. */ ++void* ++worker (void* data) ++{ ++ struct threadwork *tw = (struct threadwork*)data; ++ long long add1bit = 1ll << tw->addlsb; ++ long long logic1bit = 1ll << tw->logic1lsb; ++ long long logic2bit = 1ll << tw->logic2lsb; ++ ++ /* Clear the bits we use. */ ++ __sync_and_and_fetch (&workspace, ~(0xffll * add1bit)); ++ __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit)); ++ __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit)); ++ ++ do ++ { ++ long long tmp1, tmp2, tmp3; ++ /* OK, lets try and do some stuff to the workspace - by the end ++ of the main loop our area should be the same as it is now - i.e. 0. */ ++ ++ /* Push the arithmetic section upto 128 - one of the threads will ++ case this to carry accross the 32bit boundary. */ ++ for (tmp2 = 0; tmp2 < 64; tmp2++) ++ { ++ /* Add 2 using the two different adds. */ ++ tmp1 = __sync_add_and_fetch (&workspace, add1bit); ++ tmp3 = __sync_fetch_and_add (&workspace, add1bit); ++ ++ /* The value should be the intermediate add value in both cases. */ ++ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) ++ DOABORT ("Mismatch of add intermediates on thread %d " ++ "workspace=0x%llx tmp1=0x%llx " ++ "tmp2=0x%llx tmp3=0x%llx\n", ++ tw->thread, workspace, tmp1, tmp2, tmp3); ++ } ++ ++ /* Set the logic bits. */ ++ tmp2=__sync_or_and_fetch (&workspace, ++ 0x1fll * logic1bit | 0xffll * logic2bit); ++ ++ /* Check the logic bits are set and the arithmetic value is correct. */ ++ if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit ++ | 0xffll * add1bit)) ++ != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit)) ++ DOABORT ("Midloop check failed on thread %d " ++ "workspace=0x%llx tmp2=0x%llx " ++ "masktmp2=0x%llx expected=0x%llx\n", ++ tw->thread, workspace, tmp2, ++ tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit | ++ 0xffll * add1bit), ++ (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit)); ++ ++ /* Pull the arithmetic set back down to 0 - again this should cause a ++ carry across the 32bit boundary in one thread. */ ++ ++ for (tmp2 = 0; tmp2 < 64; tmp2++) ++ { ++ /* Subtract 2 using the two different subs. */ ++ tmp1=__sync_sub_and_fetch (&workspace, add1bit); ++ tmp3=__sync_fetch_and_sub (&workspace, add1bit); ++ ++ /* The value should be the intermediate sub value in both cases. */ ++ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) ++ DOABORT ("Mismatch of sub intermediates on thread %d " ++ "workspace=0x%llx tmp1=0x%llx " ++ "tmp2=0x%llx tmp3=0x%llx\n", ++ tw->thread, workspace, tmp1, tmp2, tmp3); ++ } ++ ++ ++ /* Clear the logic bits. */ ++ __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit); ++ tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit)); ++ ++ /* The logic bits and the arithmetic bits should be zero again. */ ++ if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit)) ++ DOABORT ("End of worker loop; bits none 0 on thread %d " ++ "workspace=0x%llx tmp3=0x%llx " ++ "mask=0x%llx maskedtmp3=0x%llx\n", ++ tw->thread, workspace, tmp3, (0x1fll * logic1bit | ++ 0xffll * logic2bit | 0xffll * add1bit), ++ tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit)); ++ ++ __sync_add_and_fetch (&tw->count, 1); ++ } ++ while (!__sync_bool_compare_and_swap (&doquit, 1, 1)); ++ ++ pthread_exit (0); ++} ++ ++int ++main () ++{ ++ /* We have 3 threads doing three sets of operations, an 8 bit ++ arithmetic field, a 5 bit logic field and an 8 bit logic ++ field (just to pack them all in). ++ ++ 6 5 4 4 3 2 1 ++ 3 6 8 0 2 4 6 8 0 ++ |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,... ++ - T0 -- T1 -- T2 --T2 -- T0 -*- T2-- T1-- T1 -***- T0- ++ logic2 logic2 arith log2 arith log1 log1 arith log1 ++ ++ */ ++ unsigned int t; ++ long long tmp; ++ int err; ++ ++ struct threadwork tw[3]={ ++ { 0ll, 0, 27, 0, 56 }, ++ { 0ll, 1, 8,16, 48 }, ++ { 0ll, 2, 40,21, 35 } ++ }; ++ ++ pthread_t threads[3]; ++ ++ __sync_lock_release (&doquit); ++ ++ /* Get the work space into a known value - All 1's. */ ++ __sync_lock_release (&workspace); /* Now all 0. */ ++ tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll); ++ if (tmp!=0) ++ DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx " ++ "tmp=0x%llx\n", workspace,tmp); ++ ++ for (t = 0; t < 3; t++) ++ { ++ err=pthread_create (&threads[t], NULL , worker, &tw[t]); ++ if (err) DOABORT ("pthread_create failed on thread %d with error %d\n", ++ t, err); ++ }; ++ ++ sleep (5); ++ ++ /* Stop please. */ ++ __sync_lock_test_and_set (&doquit, 1ll); ++ ++ for (t = 0; t < 3; t++) ++ { ++ err=pthread_join (threads[t], NULL); ++ if (err) ++ DOABORT ("pthread_join failed on thread %d with error %d\n", t, err); ++ }; ++ ++ __sync_synchronize (); ++ ++ /* OK, so all the workers have finished - ++ the workers should have zero'd their workspace, the unused areas ++ should still be 1. */ ++ if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0)) ++ DOABORT ("End of run workspace mismatch, got %llx\n", workspace); ++ ++ /* All the workers should have done some work. */ ++ for (t = 0; t < 3; t++) ++ { ++ if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t); ++ }; ++ ++ return 0; ++} ++ + +=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c' +--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 2011-10-14 15:56:32 +0000 +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_arch_v5_ok } */ ++/* { dg-options "-std=gnu99" } */ ++/* { dg-add-options arm_arch_v5 } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "file included" "In file included" { target *-*-* } 0 } */ ++ ++#include "../../gcc.dg/di-longlong64-sync-1.c" ++ ++/* On an old ARM we have no ldrexd or strexd so we have to use helpers. */ ++/* { dg-final { scan-assembler-not "ldrexd" } } */ ++/* { dg-final { scan-assembler-not "strexd" } } */ ++/* { dg-final { scan-assembler "__sync_" } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c' +--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2011-10-14 15:56:32 +0000 +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_arm_ok } */ ++/* { dg-options "-marm -std=gnu99" } */ ++/* { dg-require-effective-target arm_arch_v6k_ok } */ ++/* { dg-add-options arm_arch_v6k } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "file included" "In file included" { target *-*-* } 0 } */ ++ ++#include "../../gcc.dg/di-longlong64-sync-1.c" ++ ++/* We should be using ldrexd, strexd and no helpers or shorter ldrex. */ ++/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */ ++/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */ ++/* { dg-final { scan-assembler-not "__sync_" } } */ ++/* { dg-final { scan-assembler-not "ldrex\t" } } */ ++/* { dg-final { scan-assembler-not "strex\t" } } */ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-11-22 17:10:17 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-11-28 15:07:01 +0000 +@@ -2000,6 +2000,47 @@ + check_effective_target_arm_fp16_ok_nocache] + } + ++# Creates a series of routines that return 1 if the given architecture ++# can be selected and a routine to give the flags to select that architecture ++# Note: Extra flags may be added to disable options from newer compilers ++# (Thumb in particular - but others may be added in the future) ++# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */ ++# /* { dg-add-options arm_arch_v5 } */ ++foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__ ++ v6 "-march=armv6" __ARM_ARCH_6__ ++ v6k "-march=armv6k" __ARM_ARCH_6K__ ++ v7a "-march=armv7-a" __ARM_ARCH_7A__ } { ++ eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { ++ proc check_effective_target_arm_arch_FUNC_ok { } { ++ if { [ string match "*-marm*" "FLAG" ] && ++ ![check_effective_target_arm_arm_ok] } { ++ return 0 ++ } ++ return [check_no_compiler_messages arm_arch_FUNC_ok assembly { ++ #if !defined (DEF) ++ #error FOO ++ #endif ++ } "FLAG" ] ++ } ++ ++ proc add_options_for_arm_arch_FUNC { flags } { ++ return "$flags FLAG" ++ } ++ }] ++} ++ ++# Return 1 if this is an ARM target where -marm causes ARM to be ++# used (not Thumb) ++ ++proc check_effective_target_arm_arm_ok { } { ++ return [check_no_compiler_messages arm_arm_ok assembly { ++ #if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__) ++ #error FOO ++ #endif ++ } "-marm"] ++} ++ ++ + # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be + # used. + +@@ -3384,6 +3425,31 @@ + return $et_sync_int_long_saved + } + ++# Return 1 if the target supports atomic operations on "long long" and can ++# execute them ++# So far only put checks in for ARM, others may want to add their own ++proc check_effective_target_sync_longlong { } { ++ return [check_runtime sync_longlong_runtime { ++ #include <stdlib.h> ++ int main () ++ { ++ long long l1; ++ ++ if (sizeof (long long) != 8) ++ exit (1); ++ ++ #ifdef __arm__ ++ /* Just check for native; checking for kernel fallback is tricky. */ ++ asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1"); ++ #else ++ # error "Add other suitable archs here" ++ #endif ++ ++ exit (0); ++ } ++ } "" ] ++} ++ + # Return 1 if the target supports atomic operations on "char" and "short". + + proc check_effective_target_sync_char_short { } { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch new file mode 100644 index 0000000000..88d74c72f3 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch @@ -0,0 +1,487 @@ +2011-11-27 Ira Rosen <ira.rosen@linaro.org> + + Needs to be merged upstream: + + gcc/ + * tree-vect-patterns.c (widened_name_p): Rename to ... + (type_conversion_p): ... this. Add new argument to determine + if it's a promotion or demotion operation. Check for + CONVERT_EXPR_CODE_P instead of NOP_EXPR. + (vect_recog_dot_prod_pattern): Call type_conversion_p instead + widened_name_p. + (vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, + vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern): + Likewise. + (vect_recog_mixed_size_cond_pattern): Likewise and allow + non-constant then and else clauses. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-cond-3.c: New test. + * gcc.dg/vect/bb-slp-cond-4.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 2011-11-27 11:29:32 +0000 +@@ -0,0 +1,85 @@ ++/* { dg-require-effective-target vect_condition } */ ++ ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Comparison in int, then/else and result in unsigned char. */ ++ ++static inline unsigned char ++foo (int x, int y, int a, int b) ++{ ++ if (x >= y) ++ return a; ++ else ++ return b; ++} ++ ++__attribute__((noinline, noclone)) void ++bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b, ++ unsigned char * __restrict__ c, unsigned char * __restrict__ d, ++ unsigned char * __restrict__ e, int stride, int w) ++{ ++ int i; ++ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride, ++ d += stride, e += stride) ++ { ++ e[0] = foo (c[0], d[0], a[0] * w, b[0] * w); ++ e[1] = foo (c[1], d[1], a[1] * w, b[1] * w); ++ e[2] = foo (c[2], d[2], a[2] * w, b[2] * w); ++ e[3] = foo (c[3], d[3], a[3] * w, b[3] * w); ++ e[4] = foo (c[4], d[4], a[4] * w, b[4] * w); ++ e[5] = foo (c[5], d[5], a[5] * w, b[5] * w); ++ e[6] = foo (c[6], d[6], a[6] * w, b[6] * w); ++ e[7] = foo (c[7], d[7], a[7] * w, b[7] * w); ++ e[8] = foo (c[8], d[8], a[8] * w, b[8] * w); ++ e[9] = foo (c[9], d[9], a[9] * w, b[9] * w); ++ e[10] = foo (c[10], d[10], a[10] * w, b[10] * w); ++ e[11] = foo (c[11], d[11], a[11] * w, b[11] * w); ++ e[12] = foo (c[12], d[12], a[12] * w, b[12] * w); ++ e[13] = foo (c[13], d[13], a[13] * w, b[13] * w); ++ e[14] = foo (c[14], d[14], a[14] * w, b[14] * w); ++ e[15] = foo (c[15], d[15], a[15] * w, b[15] * w); ++ } ++} ++ ++ ++unsigned char a[N], b[N], c[N], d[N], e[N]; ++ ++int main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ a[i] = i; ++ b[i] = 5; ++ e[i] = 0; ++ ++ switch (i % 9) ++ { ++ case 0: asm (""); c[i] = i; d[i] = i + 1; break; ++ case 1: c[i] = 0; d[i] = 0; break; ++ case 2: c[i] = i + 1; d[i] = i - 1; break; ++ case 3: c[i] = i; d[i] = i + 7; break; ++ case 4: c[i] = i; d[i] = i; break; ++ case 5: c[i] = i + 16; d[i] = i + 3; break; ++ case 6: c[i] = i - 5; d[i] = i; break; ++ case 7: c[i] = i; d[i] = i; break; ++ case 8: c[i] = i; d[i] = i - 7; break; ++ } ++ } ++ ++ bar (a, b, c, d, e, 16, 2); ++ for (i = 0; i < N; i++) ++ if (e[i] != ((i % 3) == 0 ? 10 : 2 * i)) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 2011-11-27 11:29:32 +0000 +@@ -0,0 +1,85 @@ ++/* { dg-require-effective-target vect_condition } */ ++ ++#include "tree-vect.h" ++ ++#define N 64 ++ ++/* Comparison in short, then/else and result in int. */ ++static inline int ++foo (short x, short y, int a, int b) ++{ ++ if (x >= y) ++ return a; ++ else ++ return b; ++} ++ ++__attribute__((noinline, noclone)) void ++bar (short * __restrict__ a, short * __restrict__ b, ++ short * __restrict__ c, short * __restrict__ d, ++ int * __restrict__ e, int stride, int w) ++{ ++ int i; ++ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride, ++ d += stride, e += stride) ++ { ++ e[0] = foo (c[0], d[0], a[0], b[0]); ++ e[1] = foo (c[1], d[1], a[1], b[1]); ++ e[2] = foo (c[2], d[2], a[2], b[2]); ++ e[3] = foo (c[3], d[3], a[3], b[3]); ++ e[4] = foo (c[4], d[4], a[4], b[4]); ++ e[5] = foo (c[5], d[5], a[5], b[5]); ++ e[6] = foo (c[6], d[6], a[6], b[6]); ++ e[7] = foo (c[7], d[7], a[7], b[7]); ++ e[8] = foo (c[8], d[8], a[8], b[8]); ++ e[9] = foo (c[9], d[9], a[9], b[9]); ++ e[10] = foo (c[10], d[10], a[10], b[10]); ++ e[11] = foo (c[11], d[11], a[11], b[11]); ++ e[12] = foo (c[12], d[12], a[12], b[12]); ++ e[13] = foo (c[13], d[13], a[13], b[13]); ++ e[14] = foo (c[14], d[14], a[14], b[14]); ++ e[15] = foo (c[15], d[15], a[15], b[15]); ++ } ++} ++ ++ ++short a[N], b[N], c[N], d[N]; ++int e[N]; ++ ++int main () ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ a[i] = i; ++ b[i] = 5; ++ e[i] = 0; ++ ++ switch (i % 9) ++ { ++ case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break; ++ case 1: c[i] = 0; d[i] = 0; break; ++ case 2: c[i] = i + 1; d[i] = - i - 1; break; ++ case 3: c[i] = i; d[i] = i + 7; break; ++ case 4: c[i] = i; d[i] = i; break; ++ case 5: c[i] = i + 16; d[i] = i + 3; break; ++ case 6: c[i] = - i - 5; d[i] = - i; break; ++ case 7: c[i] = - i; d[i] = - i; break; ++ case 8: c[i] = - i; d[i] = - i - 7; break; ++ } ++ } ++ ++ bar (a, b, c, d, e, 16, 2); ++ for (i = 0; i < N; i++) ++ if (e[i] != ((i % 3) == 0 ? 5 : i)) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000 +@@ -62,18 +62,16 @@ + vect_recog_mixed_size_cond_pattern}; + + +-/* Function widened_name_p +- +- Check whether NAME, an ssa-name used in USE_STMT, +- is a result of a type-promotion, such that: +- DEF_STMT: NAME = NOP (name0) +- where the type of name0 (HALF_TYPE) is smaller than the type of NAME. ++/* Check whether NAME, an ssa-name used in USE_STMT, ++ is a result of a type promotion or demotion, such that: ++ DEF_STMT: NAME = NOP (name0) ++ where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME. + If CHECK_SIGN is TRUE, check that either both types are signed or both are + unsigned. */ + + static bool +-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, +- bool check_sign) ++type_conversion_p (tree name, gimple use_stmt, bool check_sign, ++ tree *orig_type, gimple *def_stmt, bool *promotion) + { + tree dummy; + gimple dummy_gimple; +@@ -96,21 +94,27 @@ + && dt != vect_external_def && dt != vect_constant_def) + return false; + +- if (! *def_stmt) ++ if (!*def_stmt) + return false; + + if (!is_gimple_assign (*def_stmt)) + return false; + +- if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) ++ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) + return false; + + oprnd0 = gimple_assign_rhs1 (*def_stmt); + +- *half_type = TREE_TYPE (oprnd0); +- if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) +- || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) +- || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) ++ *orig_type = TREE_TYPE (oprnd0); ++ if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) ++ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) ++ return false; ++ ++ if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) ++ *promotion = true; ++ else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2)) ++ *promotion = false; ++ else + return false; + + if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, +@@ -192,6 +196,7 @@ + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop; + tree var, rhs; ++ bool promotion; + + if (!loop_info) + return NULL; +@@ -255,7 +260,9 @@ + return NULL; + stmt = last_stmt; + +- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) ++ if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, ++ &promotion) ++ && promotion) + { + stmt = def_stmt; + oprnd0 = gimple_assign_rhs1 (stmt); +@@ -310,10 +317,14 @@ + if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) + || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) + return NULL; +- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) ++ if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, ++ &promotion) ++ || !promotion) + return NULL; + oprnd00 = gimple_assign_rhs1 (def_stmt); +- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) ++ if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, ++ &promotion) ++ || !promotion) + return NULL; + oprnd01 = gimple_assign_rhs1 (def_stmt); + if (!types_compatible_p (half_type0, half_type1)) +@@ -526,7 +537,7 @@ + enum tree_code dummy_code; + int dummy_int; + VEC (tree, heap) *dummy_vec; +- bool op1_ok; ++ bool op1_ok, promotion; + + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -546,12 +557,14 @@ + return NULL; + + /* Check argument 0. */ +- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) ++ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, ++ &promotion) ++ || !promotion) + return NULL; +- /* Check argument 1. */ +- op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); +- +- if (op1_ok) ++ /* Check argument 1. */ ++ op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, ++ &def_stmt1, &promotion); ++ if (op1_ok && promotion) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); +@@ -793,6 +806,7 @@ + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop; + tree var; ++ bool promotion; + + if (!loop_info) + return NULL; +@@ -832,8 +846,10 @@ + Left to check that oprnd0 is defined by a cast from type 'type' to type + 'TYPE'. */ + +- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) +- return NULL; ++ if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, ++ &promotion) ++ || !promotion) ++ return NULL; + + oprnd0 = gimple_assign_rhs1 (stmt); + *type_in = half_type; +@@ -899,6 +915,7 @@ + gimple def_stmt, new_stmt; + bool first = false; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); ++ bool promotion; + bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); + struct loop *loop = NULL; + +@@ -933,7 +950,9 @@ + else + { + first = true; +- if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) ++ if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, ++ &promotion) ++ || !promotion + || !gimple_bb (def_stmt) + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) + || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) +@@ -1327,6 +1346,7 @@ + VEC (tree, heap) * dummy_vec; + gimple use_stmt = NULL; + bool over_widen = false; ++ bool promotion; + + if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) + return NULL; +@@ -1381,8 +1401,10 @@ + return NULL; + + /* Check operand 0: it has to be defined by a type promotion. */ +- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) +- return NULL; ++ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, ++ &promotion) ++ || !promotion) ++ return NULL; + + /* Check operand 1: has to be positive. We check that it fits the type + in vect_handle_widen_op_by_const (). */ +@@ -1492,9 +1514,9 @@ + S1 a_T = x_t CMP y_t ? b_T : c_T; + + where type 'TYPE' is an integral type which has different size +- from 'type'. b_T and c_T are constants and if 'TYPE' is wider ++ from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider + than 'type', the constants need to fit into an integer type +- with the same width as 'type'. ++ with the same width as 'type') or results of conversion from 'type'. + + Input: + +@@ -1523,6 +1545,9 @@ + enum machine_mode cmpmode; + gimple pattern_stmt, def_stmt; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; ++ gimple def_stmt0 = NULL, def_stmt1 = NULL; ++ bool promotion; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + + if (!is_gimple_assign (last_stmt) +@@ -1535,25 +1560,40 @@ + then_clause = TREE_OPERAND (op, 1); + else_clause = TREE_OPERAND (op, 2); + +- if (TREE_CODE (then_clause) != INTEGER_CST +- || TREE_CODE (else_clause) != INTEGER_CST) +- return NULL; +- + if (!COMPARISON_CLASS_P (cond_expr)) + return NULL; + + type = gimple_expr_type (last_stmt); + comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); +- if (!INTEGRAL_TYPE_P (comp_type) +- || !INTEGRAL_TYPE_P (type)) +- return NULL; +- + comp_vectype = get_vectype_for_scalar_type (comp_type); + if (comp_vectype == NULL_TREE) + return NULL; + ++ if (types_compatible_p (type, comp_type) ++ || !INTEGRAL_TYPE_P (comp_type) ++ || !INTEGRAL_TYPE_P (type)) ++ return NULL; ++ ++ if ((TREE_CODE (then_clause) != INTEGER_CST ++ && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, ++ &def_stmt0, &promotion)) ++ || (TREE_CODE (else_clause) != INTEGER_CST ++ && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, ++ &def_stmt1, &promotion))) ++ return NULL; ++ ++ if (orig_type0 && orig_type1 ++ && (!types_compatible_p (orig_type0, orig_type1) ++ || !types_compatible_p (orig_type0, comp_type))) ++ return NULL; ++ ++ if (orig_type0) ++ then_clause = gimple_assign_rhs1 (def_stmt0); ++ ++ if (orig_type1) ++ else_clause = gimple_assign_rhs1 (def_stmt1); ++ + cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); +- + if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) + return NULL; + +@@ -1561,18 +1601,15 @@ + if (vectype == NULL_TREE) + return NULL; + +- if (types_compatible_p (vectype, comp_vectype)) +- return NULL; +- + if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) + return NULL; + +- if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) +- { +- if (!int_fits_type_p (then_clause, comp_type) +- || !int_fits_type_p (else_clause, comp_type)) +- return NULL; +- } ++ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode) ++ && ((TREE_CODE (then_clause) == INTEGER_CST ++ && !int_fits_type_p (then_clause, comp_type)) ++ || (TREE_CODE (else_clause) == INTEGER_CST ++ && !int_fits_type_p (else_clause, comp_type)))) ++ return NULL; + + tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), + fold_convert (comp_type, then_clause), + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch new file mode 100644 index 0000000000..43a2a4da96 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch @@ -0,0 +1,276 @@ +2011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline -A15 tuning. + 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + + * config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. + * config/arm/arm.md (mul64): New attribute. + (generic_sched): Cortex-A15 is not scheduled generically. + (cortex-a15.md): Include. + * config/arm/cortex-a15.md: New machine description. + * config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md. + + 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + * config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-12-05 10:55:48 +0000 ++++ new/gcc/config/arm/arm.c 2011-12-05 12:33:25 +0000 +@@ -24056,6 +24056,9 @@ + { + switch (arm_tune) + { ++ case cortexa15: ++ return 3; ++ + case cortexr4: + case cortexr4f: + case cortexr5: + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-10-26 11:38:30 +0000 ++++ new/gcc/config/arm/arm.md 2011-12-02 00:38:59 +0000 +@@ -345,6 +345,13 @@ + (const_string "mult") + (const_string "alu"))) + ++; Is this an (integer side) multiply with a 64-bit result? ++(define_attr "mul64" "no,yes" ++ (if_then_else ++ (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") ++ (const_string "yes") ++ (const_string "no"))) ++ + ; Load scheduling, set from the arm_ld_sched variable + ; initialized by arm_option_override() + (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) +@@ -511,7 +518,7 @@ + + (define_attr "generic_sched" "yes,no" + (const (if_then_else +- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") ++ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") + (eq_attr "tune_cortexr4" "yes")) + (const_string "no") + (const_string "yes")))) +@@ -537,6 +544,7 @@ + (include "cortex-a5.md") + (include "cortex-a8.md") + (include "cortex-a9.md") ++(include "cortex-a15.md") + (include "cortex-r4.md") + (include "cortex-r4f.md") + (include "cortex-m4.md") + +=== added file 'gcc/config/arm/cortex-a15.md' +--- old/gcc/config/arm/cortex-a15.md 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/cortex-a15.md 2011-12-02 00:38:59 +0000 +@@ -0,0 +1,186 @@ ++;; ARM Cortex-A15 pipeline description ++;; Copyright (C) 2011 Free Software Foundation, Inc. ++;; ++;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com> ++ ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++(define_automaton "cortex_a15") ++ ++;; The Cortex-A15 core is modelled as a triple issue pipeline that has ++;; the following dispatch units. ++;; 1. Two pipelines for simple integer operations: SX1, SX2 ++;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2 ++;; 3. One pipeline for branch operations: BX ++;; 4. One pipeline for integer multiply and divide operations: MX ++;; 5. Two pipelines for load and store operations: LS1, LS2 ++;; ++;; We can issue into three pipelines per-cycle. ++;; ++;; We assume that where we have unit pairs xx1 is always filled before xx2. ++ ++;; The three issue units ++(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15") ++ ++(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)") ++(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))") ++(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)") ++(final_presence_set "ca15_i1" "ca15_i0") ++(final_presence_set "ca15_i2" "ca15_i1") ++ ++;; The main dispatch units ++(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15") ++(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15") ++(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15") ++(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15") ++ ++(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)") ++ ++;; The extended load-store pipeline ++(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15") ++ ++;; The extended ALU pipeline ++(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15") ++(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15") ++ ++;; Simple Execution Unit: ++;; ++;; Simple ALU without shift ++(define_insn_reservation "cortex_a15_alu" 2 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "alu") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") ++ ++;; ALU ops with immediate shift ++(define_insn_reservation "cortex_a15_alu_shift" 3 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ ++ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") ++ ++;; ALU ops with register controlled shift ++(define_insn_reservation "cortex_a15_alu_shift_reg" 3 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "neon_type" "none"))) ++ "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ ++ |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ ++ |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") ++ ++;; Multiply Execution Unit: ++;; ++;; 32-bit multiplies ++(define_insn_reservation "cortex_a15_mult32" 3 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "mult") ++ (and (eq_attr "neon_type" "none") ++ (eq_attr "mul64" "no")))) ++ "ca15_issue1,ca15_mx") ++ ++;; 64-bit multiplies ++(define_insn_reservation "cortex_a15_mult64" 4 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "mult") ++ (and (eq_attr "neon_type" "none") ++ (eq_attr "mul64" "yes")))) ++ "ca15_issue1,ca15_mx*2") ++ ++;; Integer divide ++(define_insn_reservation "cortex_a15_udiv" 9 ++ (and (eq_attr "tune" "cortexa15") ++ (eq_attr "insn" "udiv")) ++ "ca15_issue1,ca15_mx") ++ ++(define_insn_reservation "cortex_a15_sdiv" 10 ++ (and (eq_attr "tune" "cortexa15") ++ (eq_attr "insn" "sdiv")) ++ "ca15_issue1,ca15_mx") ++ ++;; Block all issue pipes for a cycle ++(define_insn_reservation "cortex_a15_block" 1 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "block") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue3") ++ ++;; Branch execution Unit ++;; ++;; Branches take one issue slot. ++;; No latency as there is no result ++(define_insn_reservation "cortex_a15_branch" 0 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "branch") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue1,ca15_bx") ++ ++ ++;; We lie with calls. They take up all issue slots, and form a block in the ++;; pipeline. The result however is available the next cycle. ++;; ++;; Addition of new units requires this to be updated. ++(define_insn_reservation "cortex_a15_call" 1 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "call") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue3,\ ++ ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\ ++ ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\ ++ +ca15_sx2_sat+ca15_ldr+ca15_str") ++ ++;; Load-store execution Unit ++;; ++;; Loads of up to two words. ++(define_insn_reservation "cortex_a15_load1" 4 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "load_byte,load1,load2") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue1,ca15_ls,ca15_ldr,nothing") ++ ++;; Loads of three or four words. ++(define_insn_reservation "cortex_a15_load3" 5 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "load3,load4") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") ++ ++;; Stores of up to two words. ++(define_insn_reservation "cortex_a15_store1" 0 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "store1,store2") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue1,ca15_ls,ca15_str") ++ ++;; Stores of three or four words. ++(define_insn_reservation "cortex_a15_store3" 0 ++ (and (eq_attr "tune" "cortexa15") ++ (and (eq_attr "type" "store3,store4") ++ (eq_attr "neon_type" "none"))) ++ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") ++ ++;; Simple execution unit bypasses ++(define_bypass 1 "cortex_a15_alu" ++ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") ++(define_bypass 2 "cortex_a15_alu_shift" ++ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") ++(define_bypass 2 "cortex_a15_alu_shift_reg" ++ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") ++(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3") ++(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3") ++(define_bypass 2 "cortex_a15_alu_shift_reg" ++ "cortex_a15_load1,cortex_a15_load3") + +=== modified file 'gcc/config/arm/t-arm' +--- old/gcc/config/arm/t-arm 2011-01-03 20:52:22 +0000 ++++ new/gcc/config/arm/t-arm 2011-12-02 00:38:59 +0000 +@@ -31,6 +31,16 @@ + $(srcdir)/config/arm/fmp626.md \ + $(srcdir)/config/arm/fa726te.md \ + $(srcdir)/config/arm/arm926ejs.md \ ++ $(srcdir)/config/arm/cortex-a15.md \ ++ $(srcdir)/config/arm/cortex-a5.md \ ++ $(srcdir)/config/arm/cortex-a8.md \ ++ $(srcdir)/config/arm/cortex-a8-neon.md \ ++ $(srcdir)/config/arm/cortex-a9.md \ ++ $(srcdir)/config/arm/cortex-a9-neon.md \ ++ $(srcdir)/config/arm/cortex-m4-fpu.md \ ++ $(srcdir)/config/arm/cortex-m4.md \ ++ $(srcdir)/config/arm/cortex-r4f.md \ ++ $(srcdir)/config/arm/cortex-r4.md \ + $(srcdir)/config/arm/cirrus.md \ + $(srcdir)/config/arm/fpa.md \ + $(srcdir)/config/arm/vec-common.md \ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch new file mode 100644 index 0000000000..8c51c1d1f3 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch @@ -0,0 +1,69 @@ +2011-12-20 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-11-29 Ira Rosen <ira.rosen@linaro.org> + + PR tree-optimization/51301 + gcc/ + * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that + the last statement doesn't convert to a bigger type than the original + type of the computation. + + gcc/testsuite/ + * gcc.dg/vect/pr51301.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/pr51301.c' +--- old/gcc/testsuite/gcc.dg/vect/pr51301.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr51301.c 2011-11-30 17:54:51 +0000 +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++typedef signed char int8_t; ++typedef signed long long int64_t; ++int64_t ++f0a (int8_t * __restrict__ arg1) ++{ ++ int idx; ++ int64_t result = 0; ++ for (idx = 0; idx < 416; idx += 1) ++ result += arg1[idx] << (arg1[idx] == arg1[idx]); ++ return result; ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000 +@@ -1138,6 +1138,7 @@ + struct loop *loop = NULL; + bb_vec_info bb_vinfo; + stmt_vec_info stmt_vinfo; ++ tree type = NULL; + + stmt_vinfo = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +@@ -1207,6 +1208,7 @@ + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + } + ++ type = gimple_expr_type (stmt); + prev_stmt = stmt; + stmt = use_stmt; + +@@ -1222,9 +1224,11 @@ + { + use_lhs = gimple_assign_lhs (use_stmt); + use_type = TREE_TYPE (use_lhs); +- /* Support only type promotion or signedess change. */ ++ /* Support only type promotion or signedess change. Check that USE_TYPE ++ is not bigger than the original type. */ + if (!INTEGRAL_TYPE_P (use_type) +- || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) ++ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type) ++ || TYPE_PRECISION (type) < TYPE_PRECISION (use_type)) + return NULL; + + if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch new file mode 100644 index 0000000000..c433fc73f1 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch @@ -0,0 +1,22 @@ +2012-01-05 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r182271: + + 2011-12-13 Revital Eres <revital.eres@linaro.org> + + gcc/ + * modulo-sched.c (mark_loop_unsched): Free bbs. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000 ++++ new/gcc/modulo-sched.c 2012-01-05 02:45:23 +0000 +@@ -1204,6 +1204,8 @@ + + for (i = 0; i < loop->num_nodes; i++) + bbs[i]->flags |= BB_DISABLE_SCHEDULE; ++ ++ free (bbs); + } + + /* Return true if all the BBs of the loop are empty except the + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch new file mode 100644 index 0000000000..895d6a6cff --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch @@ -0,0 +1,104 @@ +2012-01-12 Ulrich Weigand <ulrich.weigand@linaro.org> + + LP 879725 + Backport from mainline: + + 2012-01-02 Revital Eres <revital.eres@linaro.org> + + gcc/ + * ddg.c (def_has_ccmode_p): New function. + (add_cross_iteration_register_deps, + create_ddg_dep_from_intra_loop_link): Call it. + + gcc/testsuite/ + * gcc.dg/sms-11.c: New file. + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-10-02 06:56:53 +0000 ++++ new/gcc/ddg.c 2012-01-10 16:05:14 +0000 +@@ -166,6 +166,24 @@ + return false; + } + ++/* Return true if one of the definitions in INSN has MODE_CC. Otherwise ++ return false. */ ++static bool ++def_has_ccmode_p (rtx insn) ++{ ++ df_ref *def; ++ ++ for (def = DF_INSN_DEFS (insn); *def; def++) ++ { ++ enum machine_mode mode = GET_MODE (DF_REF_REG (*def)); ++ ++ if (GET_MODE_CLASS (mode) == MODE_CC) ++ return true; ++ } ++ ++ return false; ++} ++ + /* Computes the dependence parameters (latency, distance etc.), creates + a ddg_edge and adds it to the given DDG. */ + static void +@@ -202,6 +220,7 @@ + whose register has multiple defs in the loop. */ + if (flag_modulo_sched_allow_regmoves + && (t == ANTI_DEP && dt == REG_DEP) ++ && !def_has_ccmode_p (dest_node->insn) + && !autoinc_var_is_used_p (dest_node->insn, src_node->insn)) + { + rtx set; +@@ -335,7 +354,8 @@ + if (DF_REF_ID (last_def) != DF_REF_ID (first_def) + || !flag_modulo_sched_allow_regmoves + || JUMP_P (use_node->insn) +- || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)) ++ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn) ++ || def_has_ccmode_p (DF_REF_INSN (last_def))) + create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, + REG_DEP, 1); + + +=== added file 'gcc/testsuite/gcc.dg/sms-11.c' +--- old/gcc/testsuite/gcc.dg/sms-11.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/sms-11.c 2012-01-10 16:05:14 +0000 +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */ ++ ++extern void abort (void); ++ ++float out[4][4] = { 6, 6, 7, 5, 6, 7, 5, 5, 6, 4, 4, 4, 6, 2, 3, 4 }; ++ ++void ++invert (void) ++{ ++ int i, j, k = 0, swap; ++ float tmp[4][4] = { 5, 6, 7, 5, 6, 7, 5, 5, 4, 4, 4, 4, 3, 2, 3, 4 }; ++ ++ for (i = 0; i < 4; i++) ++ { ++ for (j = i + 1; j < 4; j++) ++ if (tmp[j][i] > tmp[i][i]) ++ swap = j; ++ ++ if (swap != i) ++ tmp[i][k] = tmp[swap][k]; ++ } ++ ++ for (i = 0; i < 4; i++) ++ for (j = 0; j < 4; j++) ++ if (tmp[i][j] != out[i][j]) ++ abort (); ++} ++ ++int ++main () ++{ ++ invert (); ++ return 0; ++} ++ ++/* { dg-final { cleanup-rtl-dump "sms" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch new file mode 100644 index 0000000000..0199f7b89b --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch @@ -0,0 +1,76 @@ +2012-01-18 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r183126: + + 2012-01-12 Ira Rosen <irar@il.ibm.com> + + gcc/ + PR tree-optimization/51799 + * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check + that the last operation is a type demotion. + + gcc/testsuite/ + * gcc.dg/vect/pr51799.c: New test. + * gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift + patterns. + +=== added file 'gcc/testsuite/gcc.dg/vect/pr51799.c' +--- old/gcc/testsuite/gcc.dg/vect/pr51799.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/pr51799.c 2012-01-18 01:53:19 +0000 +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++ ++typedef signed char int8_t; ++typedef unsigned char uint8_t; ++typedef signed short int16_t; ++typedef unsigned long uint32_t; ++void ++f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1, ++ uint32_t * __restrict__ arg4, int8_t temp_6) ++{ ++ int idx; ++ for (idx = 0; idx < 416; idx += 1) ++ { ++ result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6); ++ } ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2012-01-18 01:53:19 +0000 +@@ -59,7 +59,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +- + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000 ++++ new/gcc/tree-vect-patterns.c 2012-01-18 01:53:19 +0000 +@@ -1224,13 +1224,15 @@ + { + use_lhs = gimple_assign_lhs (use_stmt); + use_type = TREE_TYPE (use_lhs); +- /* Support only type promotion or signedess change. Check that USE_TYPE +- is not bigger than the original type. */ ++ /* Support only type demotion or signedess change. */ + if (!INTEGRAL_TYPE_P (use_type) +- || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type) +- || TYPE_PRECISION (type) < TYPE_PRECISION (use_type)) ++ || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) + return NULL; + ++ /* Check that NEW_TYPE is not bigger than the conversion result. */ ++ if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) ++ return NULL; ++ + if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) + || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) + { + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch new file mode 100644 index 0000000000..a20d889a56 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch @@ -0,0 +1,45 @@ +2012-01-16 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r183011: + + 2012-01-09 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + + * config/arm/arm-cores.def (cortex-a15): Use cortex_a15_tune for + tuning parameters. + * config/arm/arm.c (arm_cortex_a15_tune): New static variable. + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000 ++++ new/gcc/config/arm/arm-cores.def 2012-01-15 22:02:31 +0000 +@@ -128,7 +128,7 @@ + ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) + ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) + ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) + ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-01-05 15:35:39 +0000 ++++ new/gcc/config/arm/arm.c 2012-01-15 22:02:31 +0000 +@@ -983,6 +983,17 @@ + arm_default_branch_cost + }; + ++const struct tune_params arm_cortex_a15_tune = ++{ ++ arm_9e_rtx_costs, ++ NULL, ++ 1, /* Constant limit. */ ++ 1, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */ ++ false, /* Prefer constant pool. */ ++ arm_cortex_a5_branch_cost ++}; ++ + const struct tune_params arm_fa726te_tune = + { + arm_9e_rtx_costs, + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch new file mode 100644 index 0000000000..e93493f17f --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch @@ -0,0 +1,47 @@ + 2012-01-16 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r182561: + + 2011-12-20 Richard Henderson <rth@redhat.com> + + gcc/ + * config/arm/arm.md (*arm_cmpdi_unsigned): Enable for thumb2. + * config/arm/arm.c (arm_select_cc_mode): Use it. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-01-15 22:02:31 +0000 ++++ new/gcc/config/arm/arm.c 2012-01-23 00:06:27 +0000 +@@ -11602,7 +11602,7 @@ + return CC_Zmode; + + /* We can do an equality test in three Thumb instructions. */ +- if (!TARGET_ARM) ++ if (!TARGET_32BIT) + return CC_Zmode; + + /* FALLTHROUGH */ +@@ -11614,7 +11614,7 @@ + /* DImode unsigned comparisons can be implemented by cmp + + cmpeq without a scratch register. Not worth doing in + Thumb-2. */ +- if (TARGET_ARM) ++ if (TARGET_32BIT) + return CC_CZmode; + + /* FALLTHROUGH */ + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2012-01-05 15:35:39 +0000 ++++ new/gcc/config/arm/arm.md 2012-01-15 21:02:00 +0000 +@@ -7515,8 +7515,8 @@ + [(set (reg:CC_CZ CC_REGNUM) + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r") + (match_operand:DI 1 "arm_di_operand" "rDi")))] +- "TARGET_ARM" +- "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1" ++ "TARGET_32BIT" ++ "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" + [(set_attr "conds" "set") + (set_attr "length" "8")] + ) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch new file mode 100644 index 0000000000..f15f37a583 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch @@ -0,0 +1,63 @@ + 2012-01-16 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r181210: + + gcc/ + 2011-11-07 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + + * config/arm/arm-cores.def: Add -mcpu=cortex-a7. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Likewise. + * config/arm/bpabi.h (BE8_LINK_SPEC): Add Cortex A-7. + * doc/invoke.texi: Document -mcpu=cortex-a7. + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2012-01-15 22:02:31 +0000 ++++ new/gcc/config/arm/arm-cores.def 2012-01-23 00:36:02 +0000 +@@ -126,6 +126,7 @@ + ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) + ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) ++ARM_CORE("cortex-a7", cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) + ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) + +=== modified file 'gcc/config/arm/arm-tune.md' +--- old/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000 ++++ new/gcc/config/arm/arm-tune.md 2012-01-15 22:43:29 +0000 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) + +=== modified file 'gcc/config/arm/bpabi.h' +--- old/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000 ++++ new/gcc/config/arm/bpabi.h 2012-01-15 22:43:29 +0000 +@@ -56,6 +56,7 @@ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + + #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ ++ "|mcpu=cortex-a7"\ + "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ + ":%{!r:--be8}}}" + + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2012-01-05 15:35:39 +0000 ++++ new/gcc/doc/invoke.texi 2012-01-15 22:43:29 +0000 +@@ -10202,8 +10202,8 @@ + @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e}, + @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp}, + @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s}, +-@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15}, +-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, ++@samp{cortex-a5}, @samp{cortex-a7}, @samp{cortex-a8}, @samp{cortex-a9}, ++@samp{cortex-a15}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, + @samp{cortex-m4}, @samp{cortex-m3}, + @samp{cortex-m1}, + @samp{cortex-m0}, + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch new file mode 100644 index 0000000000..5b0735b004 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch @@ -0,0 +1,25 @@ +2012-01-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline + 2012-01-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + PR target/51819 + * config/arm/arm.c (arm_print_operand): Correct output of alignment + hints for neon loads and stores. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-01-23 00:06:27 +0000 ++++ new/gcc/config/arm/arm.c 2012-01-23 18:54:21 +0000 +@@ -17463,9 +17463,9 @@ + /* Only certain alignment specifiers are supported by the hardware. */ + if (memsize == 16 && (align % 32) == 0) + align_bits = 256; +- else if ((memsize == 8 || memsize == 16) && (align % 16) == 0) ++ else if (memsize == 16 && (align % 16) == 0) + align_bits = 128; +- else if ((align % 8) == 0) ++ else if (memsize >= 8 && (align % 8) == 0) + align_bits = 64; + else + align_bits = 0; + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch new file mode 100644 index 0000000000..de3d2ff4cf --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch @@ -0,0 +1,2389 @@ +2012-02-08 Ulrich Weigand <ulrich.weigand@linaro.org> + + gcc/ + * config/arm/arm.c (arm_option_optimization_table): Enable + -fsched-pressure using -fsched-pressure-algorithm=model by + default when optimizing. + +2012-02-08 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * sched-deps.c (fixup_sched_groups): Rename to... + (chain_to_prev_insn): ...this. + (chain_to_prev_insn_p): New function. + (deps_analyze_insn): Use it instead of SCHED_GROUP_P. + +2012-02-08 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * sched-int.h (_haifa_insn_data): Move priority_status. + Add model_index. + (INSN_MODEL_INDEX): New macro. + * haifa-sched.c (insn_delay): New function. + (sched_regno_pressure_class): Update commentary. + (mark_regno_birth_or_death): Pass the liveness bitmap and + pressure array as arguments, instead of using curr_reg_live and + curr_reg_pressure. Only update the pressure if the bit in the + liveness set has changed. + (initiate_reg_pressure_info): Always trust the live-in set for + SCHED_PRESSURE_MODEL. + (initiate_bb_reg_pressure_info): Update call to + mark_regno_birth_or_death. + (dep_list_size): Take the list as argument. + (calculate_reg_deaths): New function, extracted from... + (setup_insn_reg_pressure_info): ...here. + (MODEL_BAR): New macro. + (model_pressure_data, model_insn_info, model_pressure_limit) + (model_pressure_group): New structures. + (model_schedule, model_worklist, model_insns, model_num_insns) + (model_curr_point, model_before_pressure, model_next_priority): + New variables. + (MODEL_PRESSURE_DATA, MODEL_MAX_PRESSURE, MODEL_REF_PRESSURE) + (MODEL_INSN_INFO, MODEL_INSN): New macros. + (model_index, model_update_limit_points_in_group): New functions. + (model_update_limit_points, model_last_use_except): Likewise. + (model_start_update_pressure, model_update_pressure): Likewise. + (model_recompute, model_spill_cost, model_excess_group_cost): Likewise. + (model_excess_cost, model_dump_pressure_points): Likewise. + (model_set_excess_costs): Likewise. + (rank_for_schedule): Extend SCHED_PRIORITY_WEIGHTED ordering to + SCHED_PRIORITY_MODEL. Use insn_delay. Use the order in the model + schedule as an alternative tie-breaker. Update the call to + dep_list_size. + (ready_sort): Call model_set_excess_costs. + (update_register_pressure): Update call to mark_regno_birth_or_death. + Rely on that function to check liveness rather than doing it here. + (model_classify_pressure, model_order_p, model_add_to_worklist_at) + (model_remove_from_worklist, model_add_to_worklist, model_promote_insn) + (model_add_to_schedule, model_analyze_insns, model_init_pressure_group) + (model_record_pressure, model_record_pressures): New functions. + (model_record_final_pressures, model_add_successors_to_worklist) + (model_promote_predecessors, model_choose_insn): Likewise. + (model_reset_queue_indices, model_dump_pressure_summary): Likewise. + (model_start_schedule, model_finalize_pressure_group): Likewise. + (model_end_schedule): Likewise. + (schedule_insn): Say when we're scheduling the next instruction + in the model schedule. + (schedule_insn): Handle SCHED_PRESSURE_MODEL. + (queue_to_ready): Do not add instructions that are + MAX_SCHED_READY_INSNS beyond the current point of the model schedule. + Always allow the next instruction in the model schedule to be added. + (debug_ready_list): Print the INSN_REG_PRESSURE_EXCESS_COST_CHANGE + and delay for SCHED_PRESSURE_MODEL too. + (prune_ready_list): Extend SCHED_PRIORITY_WEIGHTED handling to + SCHED_PRIORITY_MODEL, but also take the DFA into account. + (schedule_block): Call model_start_schedule and model_end_schedule. + Extend SCHED_PRIORITY_WEIGHTED stall handling to SCHED_PRIORITY_MODEL. + (sched_init): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling + to SCHED_PRESSURE_MODEL, but don't allocate saved_reg_live or + region_ref_regs. + (sched_finish): Update accordingly. + (fix_tick_ready): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling + to SCHED_PRESSURE_MODEL. + (add_jump_dependencies): Update call to dep_list_size. + (haifa_finish_h_i_d): Fix leak of max_reg_pressure. + (haifa_init_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling + to SCHED_PRESSURE_MODEL. + * sched-deps.c (init_insn_reg_pressure_info): Likewise, but don't + allocate INSN_MAX_REG_PRESSURE for SCHED_PRESSURE_MODEL. + (sched_analyze_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE + handling to SCHED_PRESSURE_MODEL. + +2012-02-08 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * common.opt (fsched-pressure-algorithm=): New option. + * flag-types.h (sched_pressure_algorithm): New enum. + * sched-int.h (sched_pressure_p): Replace with... + (sched_pressure): ...this new variable. + * haifa-sched.c (sched_pressure_p): Replace with... + (sched_pressure): ...this new variable. + (sched_regno_pressure_class, rank_for_schedule, ready_sort) + (update_reg_and_insn_max_reg_pressure, schedule_insn) + (debug_ready_list, schedule_block, sched_init, sched_finish) + (fix_tick_ready, haifa_init_insn): Update accordingly. + * sched-deps.c (init_insn_reg_pressure_info): Likewise. + * sched-rgn.c (schedule_region): Likewise. + +2012-02-08 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-04-01 Bernd Schmidt <bernds@codesourcery.com> + + * haifa-sched.c (prune_ready_list): New function, broken out of + schedule_block. + (schedule_block): Use it. + +=== modified file 'gcc/common.opt' +--- old/gcc/common.opt 2011-04-11 15:26:47 +0000 ++++ new/gcc/common.opt 2012-02-08 23:38:13 +0000 +@@ -1614,6 +1614,19 @@ + Common Report Var(flag_sched_pressure) Init(0) Optimization + Enable register pressure sensitive insn scheduling + ++fsched-pressure-algorithm= ++Common Joined RejectNegative Enum(sched_pressure_algorithm) Var(flag_sched_pressure_algorithm) Init(SCHED_PRESSURE_WEIGHTED) ++-fira-algorithm=[CB|priority] Set the used IRA algorithm ++ ++Enum ++Name(sched_pressure_algorithm) Type(enum sched_pressure_algorithm) UnknownError(unknown %<fsched-pressure%> algorithm %qs) ++ ++EnumValue ++Enum(sched_pressure_algorithm) String(weighted) Value(SCHED_PRESSURE_WEIGHTED) ++ ++EnumValue ++Enum(sched_pressure_algorithm) String(model) Value(SCHED_PRESSURE_MODEL) ++ + fsched-spec + Common Report Var(flag_schedule_speculative) Init(1) Optimization + Allow speculative motion of non-loads + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-02-01 14:13:07 +0000 ++++ new/gcc/config/arm/arm.c 2012-02-09 00:47:59 +0000 +@@ -311,6 +311,11 @@ + /* Set default optimization options. */ + static const struct default_options arm_option_optimization_table[] = + { ++ /* Enable -fsched-pressure using -fsched-pressure-algorithm=model ++ by default when optimizing. */ ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure_algorithm_, ++ NULL, SCHED_PRESSURE_MODEL }, + /* Enable section anchors by default at -O1 or higher. */ + { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + +=== modified file 'gcc/flag-types.h' +--- old/gcc/flag-types.h 2010-11-24 13:28:38 +0000 ++++ new/gcc/flag-types.h 2012-02-08 23:38:13 +0000 +@@ -106,6 +106,14 @@ + }; + #endif + ++/* The algorithm used to implement -fsched-pressure. */ ++enum sched_pressure_algorithm ++{ ++ SCHED_PRESSURE_NONE, ++ SCHED_PRESSURE_WEIGHTED, ++ SCHED_PRESSURE_MODEL ++}; ++ + /* The algorithm used for the integrated register allocator (IRA). */ + enum ira_algorithm + { + +=== modified file 'gcc/haifa-sched.c' +--- old/gcc/haifa-sched.c 2011-02-19 20:59:23 +0000 ++++ new/gcc/haifa-sched.c 2012-02-08 23:39:02 +0000 +@@ -348,6 +348,14 @@ + /* Create empty basic block after the specified block. */ + basic_block (* sched_create_empty_bb) (basic_block); + ++/* Return the number of cycles until INSN is expected to be ready. ++ Return zero if it already is. */ ++static int ++insn_delay (rtx insn) ++{ ++ return MAX (INSN_TICK (insn) - clock_var, 0); ++} ++ + static int + may_trap_exp (const_rtx x, int is_store) + { +@@ -571,10 +579,10 @@ + + /* Do register pressure sensitive insn scheduling if the flag is set + up. */ +-bool sched_pressure_p; ++enum sched_pressure_algorithm sched_pressure; + + /* Map regno -> its cover class. The map defined only when +- SCHED_PRESSURE_P is true. */ ++ SCHED_PRESSURE != SCHED_PRESSURE_NONE. */ + enum reg_class *sched_regno_cover_class; + + /* The current register pressure. Only elements corresponding cover +@@ -602,10 +610,12 @@ + bitmap_clear (region_ref_regs); + } + +-/* Update current register pressure related info after birth (if +- BIRTH_P) or death of register REGNO. */ +-static void +-mark_regno_birth_or_death (int regno, bool birth_p) ++/* PRESSURE[CL] describes the pressure on register class CL. Update it ++ for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO. ++ LIVE tracks the set of live registers; if it is null, assume that ++ every birth or death is genuine. */ ++static inline void ++mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p) + { + enum reg_class cover_class; + +@@ -616,15 +626,17 @@ + { + if (birth_p) + { +- bitmap_set_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class] +- += ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)]; ++ if (!live || bitmap_set_bit (live, regno)) ++ pressure[cover_class] ++ += (ira_reg_class_nregs ++ [cover_class][PSEUDO_REGNO_MODE (regno)]); + } + else + { +- bitmap_clear_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class] +- -= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)]; ++ if (!live || bitmap_clear_bit (live, regno)) ++ pressure[cover_class] ++ -= (ira_reg_class_nregs ++ [cover_class][PSEUDO_REGNO_MODE (regno)]); + } + } + } +@@ -633,13 +645,13 @@ + { + if (birth_p) + { +- bitmap_set_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class]++; ++ if (!live || bitmap_set_bit (live, regno)) ++ pressure[cover_class]++; + } + else + { +- bitmap_clear_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class]--; ++ if (!live || bitmap_clear_bit (live, regno)) ++ pressure[cover_class]--; + } + } + } +@@ -657,8 +669,10 @@ + curr_reg_pressure[ira_reg_class_cover[i]] = 0; + bitmap_clear (curr_reg_live); + EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi) +- if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j)) +- mark_regno_birth_or_death (j, true); ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ || current_nr_blocks == 1 ++ || bitmap_bit_p (region_ref_regs, j)) ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, j, true); + } + + /* Mark registers in X as mentioned in the current region. */ +@@ -712,7 +726,8 @@ + if (regno == INVALID_REGNUM) + break; + if (! bitmap_bit_p (df_get_live_in (bb), regno)) +- mark_regno_birth_or_death (regno, true); ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ regno, true); + } + #endif + } +@@ -956,19 +971,19 @@ + return true; + } + +-/* Compute the number of nondebug forward deps of an insn. */ ++/* Compute the number of nondebug deps in list LIST for INSN. */ + + static int +-dep_list_size (rtx insn) ++dep_list_size (rtx insn, sd_list_types_def list) + { + sd_iterator_def sd_it; + dep_t dep; + int dbgcount = 0, nodbgcount = 0; + + if (!MAY_HAVE_DEBUG_INSNS) +- return sd_lists_size (insn, SD_LIST_FORW); ++ return sd_lists_size (insn, list); + +- FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) ++ FOR_EACH_DEP (insn, list, sd_it, dep) + { + if (DEBUG_INSN_P (DEP_CON (dep))) + dbgcount++; +@@ -976,7 +991,7 @@ + nodbgcount++; + } + +- gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, SD_LIST_FORW)); ++ gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list)); + + return nodbgcount; + } +@@ -995,7 +1010,7 @@ + { + int this_priority = -1; + +- if (dep_list_size (insn) == 0) ++ if (dep_list_size (insn, SD_LIST_FORW) == 0) + /* ??? We should set INSN_PRIORITY to insn_cost when and insn has + some forward deps but all of them are ignored by + contributes_to_priority hook. At the moment we set priority of +@@ -1091,6 +1106,22 @@ + qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \ + while (0) + ++/* For each cover class CL, set DEATH[CL] to the number of registers ++ in that class that die in INSN. */ ++ ++static void ++calculate_reg_deaths (rtx insn, int *death) ++{ ++ int i; ++ struct reg_use_data *use; ++ ++ for (i = 0; i < ira_reg_class_cover_size; i++) ++ death[ira_reg_class_cover[i]] = 0; ++ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) ++ if (dying_use_p (use)) ++ mark_regno_birth_or_death (0, death, use->regno, true); ++} ++ + /* Setup info about the current register pressure impact of scheduling + INSN at the current scheduling point. */ + static void +@@ -1102,23 +1133,12 @@ + enum reg_class cl; + struct reg_pressure_data *pressure_info; + int *max_reg_pressure; +- struct reg_use_data *use; + static int death[N_REG_CLASSES]; + + gcc_checking_assert (!DEBUG_INSN_P (insn)); + + excess_cost_change = 0; +- for (i = 0; i < ira_reg_class_cover_size; i++) +- death[ira_reg_class_cover[i]] = 0; +- for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) +- if (dying_use_p (use)) +- { +- cl = sched_regno_cover_class[use->regno]; +- if (use->regno < FIRST_PSEUDO_REGISTER) +- death[cl]++; +- else +- death[cl] += ira_reg_class_nregs[cl][PSEUDO_REGNO_MODE (use->regno)]; +- } ++ calculate_reg_deaths (insn, death); + pressure_info = INSN_REG_PRESSURE (insn); + max_reg_pressure = INSN_MAX_REG_PRESSURE (insn); + gcc_assert (pressure_info != NULL && max_reg_pressure != NULL); +@@ -1139,7 +1159,765 @@ + } + INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change; + } +- ++ ++/* This is the first page of code related to SCHED_PRESSURE_MODEL. ++ It tries to make the scheduler take register pressure into account ++ without introducing too many unnecessary stalls. It hooks into the ++ main scheduling algorithm at several points: ++ ++ - Before scheduling starts, model_start_schedule constructs a ++ "model schedule" for the current block. This model schedule is ++ chosen solely to keep register pressure down. It does not take the ++ target's pipeline or the original instruction order into account, ++ except as a tie-breaker. It also doesn't work to a particular ++ pressure limit. ++ ++ This model schedule gives us an idea of what pressure can be ++ achieved for the block gives us an example of a schedule that ++ keeps to that pressure. It also makes the final schedule less ++ dependent on the original instruction order. This is important ++ because the original order can either be "wide" (many values live ++ at once, such as in user-scheduled code) or "narrow" (few values ++ live at once, such as after loop unrolling, where several ++ iterations are executed sequentially). ++ ++ We do not apply this model schedule to the rtx stream. We simply ++ record it in model_schedule. We also compute the maximum pressure, ++ MP, that was seen during this schedule. ++ ++ - Instructions are added to the ready queue even if they require ++ a stall. The length of the stall is instead computed as: ++ ++ MAX (INSN_TICK (INSN) - clock_var, 0) ++ ++ (= insn_delay). This allows rank_for_schedule to choose between ++ introducing a deliberate stall or increasing pressure. ++ ++ - Before sorting the ready queue, model_set_excess_costs assigns ++ a pressure-based cost to each ready instruction in the queue. ++ This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE ++ (ECC for short) and is effectively measured in cycles. ++ ++ - rank_for_schedule ranks instructions based on: ++ ++ ECC (insn) + insn_delay (insn) ++ ++ then as: ++ ++ insn_delay (insn) ++ ++ So, for example, an instruction X1 with an ECC of 1 that can issue ++ now will win over an instruction X0 with an ECC of zero that would ++ introduce a stall of one cycle. However, an instruction X2 with an ++ ECC of 2 that can issue now will lose to X0. ++ ++ - When an instruction is scheduled, model_recompute updates the model ++ schedule with the new pressures (some of which might now exceed the ++ original maximum pressure MP). model_update_limit_points then searches ++ for the new point of maximum pressure, if not already known. */ ++ ++/* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL ++ from surrounding debug information. */ ++#define MODEL_BAR \ ++ ";;\t\t+------------------------------------------------------\n" ++ ++/* Information about the pressure on a particular register class at a ++ particular point of the model schedule. */ ++struct model_pressure_data { ++ /* The pressure at this point of the model schedule, or -1 if the ++ point is associated with an instruction that has already been ++ scheduled. */ ++ int ref_pressure; ++ ++ /* The maximum pressure during or after this point of the model schedule. */ ++ int max_pressure; ++}; ++ ++/* Per-instruction information that is used while building the model ++ schedule. Here, "schedule" refers to the model schedule rather ++ than the main schedule. */ ++struct model_insn_info { ++ /* The instruction itself. */ ++ rtx insn; ++ ++ /* If this instruction is in model_worklist, these fields link to the ++ previous (higher-priority) and next (lower-priority) instructions ++ in the list. */ ++ struct model_insn_info *prev; ++ struct model_insn_info *next; ++ ++ /* While constructing the schedule, QUEUE_INDEX describes whether an ++ instruction has already been added to the schedule (QUEUE_SCHEDULED), ++ is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE). ++ old_queue records the value that QUEUE_INDEX had before scheduling ++ started, so that we can restore it once the schedule is complete. */ ++ int old_queue; ++ ++ /* The relative importance of an unscheduled instruction. Higher ++ values indicate greater importance. */ ++ unsigned int model_priority; ++ ++ /* The length of the longest path of satisfied true dependencies ++ that leads to this instruction. */ ++ unsigned int depth; ++ ++ /* The length of the longest path of dependencies of any kind ++ that leads from this instruction. */ ++ unsigned int alap; ++ ++ /* The number of predecessor nodes that must still be scheduled. */ ++ int unscheduled_preds; ++}; ++ ++/* Information about the pressure limit for a particular register class. ++ This structure is used when applying a model schedule to the main ++ schedule. */ ++struct model_pressure_limit { ++ /* The maximum register pressure seen in the original model schedule. */ ++ int orig_pressure; ++ ++ /* The maximum register pressure seen in the current model schedule ++ (which excludes instructions that have already been scheduled). */ ++ int pressure; ++ ++ /* The point of the current model schedule at which PRESSURE is first ++ reached. It is set to -1 if the value needs to be recomputed. */ ++ int point; ++}; ++ ++/* Describes a particular way of measuring register pressure. */ ++struct model_pressure_group { ++ /* Index CCI describes the maximum pressure on ira_reg_class_cover[CCI]. */ ++ struct model_pressure_limit limits[N_REG_CLASSES]; ++ ++ /* Index (POINT * ira_num_pressure_classes + CCI) describes the pressure ++ on register class ira_reg_class_cover[CCI] at point POINT of the ++ current model schedule. A POINT of model_num_insns describes the ++ pressure at the end of the schedule. */ ++ struct model_pressure_data *model; ++}; ++ ++/* Index POINT gives the instruction at point POINT of the model schedule. ++ This array doesn't change during main scheduling. */ ++static VEC (rtx, heap) *model_schedule; ++ ++/* The list of instructions in the model worklist, sorted in order of ++ decreasing priority. */ ++static struct model_insn_info *model_worklist; ++ ++/* Index I describes the instruction with INSN_LUID I. */ ++static struct model_insn_info *model_insns; ++ ++/* The number of instructions in the model schedule. */ ++static int model_num_insns; ++ ++/* The index of the first instruction in model_schedule that hasn't yet been ++ added to the main schedule, or model_num_insns if all of them have. */ ++static int model_curr_point; ++ ++/* Describes the pressure before each instruction in the model schedule. */ ++static struct model_pressure_group model_before_pressure; ++ ++/* The first unused model_priority value (as used in model_insn_info). */ ++static unsigned int model_next_priority; ++ ++ ++/* The model_pressure_data for ira_reg_class_cover[CCI] in GROUP ++ at point POINT of the model schedule. */ ++#define MODEL_PRESSURE_DATA(GROUP, POINT, CCI) \ ++ (&(GROUP)->model[(POINT) * ira_reg_class_cover_size + (CCI)]) ++ ++/* The maximum pressure on ira_reg_class_cover[CCI] in GROUP at or ++ after point POINT of the model schedule. */ ++#define MODEL_MAX_PRESSURE(GROUP, POINT, CCI) \ ++ (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->max_pressure) ++ ++/* The pressure on ira_reg_class_cover[CCI] in GROUP at point POINT ++ of the model schedule. */ ++#define MODEL_REF_PRESSURE(GROUP, POINT, CCI) \ ++ (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->ref_pressure) ++ ++/* Information about INSN that is used when creating the model schedule. */ ++#define MODEL_INSN_INFO(INSN) \ ++ (&model_insns[INSN_LUID (INSN)]) ++ ++/* The instruction at point POINT of the model schedule. */ ++#define MODEL_INSN(POINT) \ ++ (VEC_index (rtx, model_schedule, POINT)) ++ ++ ++/* Return INSN's index in the model schedule, or model_num_insns if it ++ doesn't belong to that schedule. */ ++ ++static int ++model_index (rtx insn) ++{ ++ if (INSN_MODEL_INDEX (insn) == 0) ++ return model_num_insns; ++ return INSN_MODEL_INDEX (insn) - 1; ++} ++ ++/* Make sure that GROUP->limits is up-to-date for the current point ++ of the model schedule. */ ++ ++static void ++model_update_limit_points_in_group (struct model_pressure_group *group) ++{ ++ int cci, max_pressure, point; ++ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ /* We may have passed the final point at which the pressure in ++ group->limits[cci].pressure was reached. Update the limit if so. */ ++ max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, cci); ++ group->limits[cci].pressure = max_pressure; ++ ++ /* Find the point at which MAX_PRESSURE is first reached. We need ++ to search in three cases: ++ ++ - We've already moved past the previous pressure point. ++ In this case we search forward from model_curr_point. ++ ++ - We scheduled the previous point of maximum pressure ahead of ++ its position in the model schedule, but doing so didn't bring ++ the pressure point earlier. In this case we search forward ++ from that previous pressure point. ++ ++ - Scheduling an instruction early caused the maximum pressure ++ to decrease. In this case we will have set the pressure ++ point to -1, and we search forward from model_curr_point. */ ++ point = MAX (group->limits[cci].point, model_curr_point); ++ while (point < model_num_insns ++ && MODEL_REF_PRESSURE (group, point, cci) < max_pressure) ++ point++; ++ group->limits[cci].point = point; ++ ++ gcc_assert (MODEL_REF_PRESSURE (group, point, cci) == max_pressure); ++ gcc_assert (MODEL_MAX_PRESSURE (group, point, cci) == max_pressure); ++ } ++} ++ ++/* Make sure that all register-pressure limits are up-to-date for the ++ current position in the model schedule. */ ++ ++static void ++model_update_limit_points (void) ++{ ++ model_update_limit_points_in_group (&model_before_pressure); ++} ++ ++/* Return the model_index of the last unscheduled use in chain USE ++ outside of USE's instruction. Return -1 if there are no other uses, ++ or model_num_insns if the register is live at the end of the block. */ ++ ++static int ++model_last_use_except (struct reg_use_data *use) ++{ ++ struct reg_use_data *next; ++ int last, index; ++ ++ last = -1; ++ for (next = use->next_regno_use; next != use; next = next->next_regno_use) ++ if (NONDEBUG_INSN_P (next->insn) ++ && QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED) ++ { ++ index = model_index (next->insn); ++ if (index == model_num_insns) ++ return model_num_insns; ++ if (last < index) ++ last = index; ++ } ++ return last; ++} ++ ++/* An instruction with model_index POINT has just been scheduled, and it ++ adds DELTA to the pressure on ira_reg_class_cover[CCI] after POINT - 1. ++ Update MODEL_REF_PRESSURE (GROUP, POINT, CCI) and ++ MODEL_MAX_PRESSURE (GROUP, POINT, CCI) accordingly. */ ++ ++static void ++model_start_update_pressure (struct model_pressure_group *group, ++ int point, int cci, int delta) ++{ ++ int next_max_pressure; ++ ++ if (point == model_num_insns) ++ { ++ /* The instruction wasn't part of the model schedule; it was moved ++ from a different block. Update the pressure for the end of ++ the model schedule. */ ++ MODEL_REF_PRESSURE (group, point, cci) += delta; ++ MODEL_MAX_PRESSURE (group, point, cci) += delta; ++ } ++ else ++ { ++ /* Record that this instruction has been scheduled. Nothing now ++ changes between POINT and POINT + 1, so get the maximum pressure ++ from the latter. If the maximum pressure decreases, the new ++ pressure point may be before POINT. */ ++ MODEL_REF_PRESSURE (group, point, cci) = -1; ++ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci); ++ if (MODEL_MAX_PRESSURE (group, point, cci) > next_max_pressure) ++ { ++ MODEL_MAX_PRESSURE (group, point, cci) = next_max_pressure; ++ if (group->limits[cci].point == point) ++ group->limits[cci].point = -1; ++ } ++ } ++} ++ ++/* Record that scheduling a later instruction has changed the pressure ++ at point POINT of the model schedule by DELTA (which might be 0). ++ Update GROUP accordingly. Return nonzero if these changes might ++ trigger changes to previous points as well. */ ++ ++static int ++model_update_pressure (struct model_pressure_group *group, ++ int point, int cci, int delta) ++{ ++ int ref_pressure, max_pressure, next_max_pressure; ++ ++ /* If POINT hasn't yet been scheduled, update its pressure. */ ++ ref_pressure = MODEL_REF_PRESSURE (group, point, cci); ++ if (ref_pressure >= 0 && delta != 0) ++ { ++ ref_pressure += delta; ++ MODEL_REF_PRESSURE (group, point, cci) = ref_pressure; ++ ++ /* Check whether the maximum pressure in the overall schedule ++ has increased. (This means that the MODEL_MAX_PRESSURE of ++ every point <= POINT will need to increae too; see below.) */ ++ if (group->limits[cci].pressure < ref_pressure) ++ group->limits[cci].pressure = ref_pressure; ++ ++ /* If we are at maximum pressure, and the maximum pressure ++ point was previously unknown or later than POINT, ++ bring it forward. */ ++ if (group->limits[cci].pressure == ref_pressure ++ && !IN_RANGE (group->limits[cci].point, 0, point)) ++ group->limits[cci].point = point; ++ ++ /* If POINT used to be the point of maximum pressure, but isn't ++ any longer, we need to recalculate it using a forward walk. */ ++ if (group->limits[cci].pressure > ref_pressure ++ && group->limits[cci].point == point) ++ group->limits[cci].point = -1; ++ } ++ ++ /* Update the maximum pressure at POINT. Changes here might also ++ affect the maximum pressure at POINT - 1. */ ++ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci); ++ max_pressure = MAX (ref_pressure, next_max_pressure); ++ if (MODEL_MAX_PRESSURE (group, point, cci) != max_pressure) ++ { ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ return 1; ++ } ++ return 0; ++} ++ ++/* INSN has just been scheduled. Update the model schedule accordingly. */ ++ ++static void ++model_recompute (rtx insn) ++{ ++ struct { ++ int last_use; ++ int regno; ++ } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS]; ++ struct reg_use_data *use; ++ struct reg_pressure_data *reg_pressure; ++ int delta[N_REG_CLASSES]; ++ int cci, point, mix, new_last, cl, ref_pressure, queue; ++ unsigned int i, num_uses, num_pending_births; ++ bool print_p; ++ ++ /* The destinations of INSN were previously live from POINT onwards, but are ++ now live from model_curr_point onwards. Set up DELTA accordingly. */ ++ point = model_index (insn); ++ reg_pressure = INSN_REG_PRESSURE (insn); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta[cl] = reg_pressure[cci].set_increase; ++ } ++ ++ /* Record which registers previously died at POINT, but which now die ++ before POINT. Adjust DELTA so that it represents the effect of ++ this change after POINT - 1. Set NUM_PENDING_BIRTHS to the number of ++ registers that will be born in the range [model_curr_point, POINT). */ ++ num_uses = 0; ++ num_pending_births = 0; ++ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) ++ { ++ new_last = model_last_use_except (use); ++ if (new_last < point) ++ { ++ gcc_assert (num_uses < ARRAY_SIZE (uses)); ++ uses[num_uses].last_use = new_last; ++ uses[num_uses].regno = use->regno; ++ /* This register is no longer live after POINT - 1. */ ++ mark_regno_birth_or_death (NULL, delta, use->regno, false); ++ num_uses++; ++ if (new_last >= 0) ++ num_pending_births++; ++ } ++ } ++ ++ /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT. ++ Also set each group pressure limit for POINT. */ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ model_start_update_pressure (&model_before_pressure, ++ point, cci, delta[cl]); ++ } ++ ++ /* Walk the model schedule backwards, starting immediately before POINT. */ ++ print_p = false; ++ if (point != model_curr_point) ++ do ++ { ++ point--; ++ insn = MODEL_INSN (point); ++ queue = QUEUE_INDEX (insn); ++ ++ if (queue != QUEUE_SCHEDULED) ++ { ++ /* DELTA describes the effect of the move on the register pressure ++ after POINT. Make it describe the effect on the pressure ++ before POINT. */ ++ i = 0; ++ while (i < num_uses) ++ { ++ if (uses[i].last_use == point) ++ { ++ /* This register is now live again. */ ++ mark_regno_birth_or_death (NULL, delta, ++ uses[i].regno, true); ++ ++ /* Remove this use from the array. */ ++ uses[i] = uses[num_uses - 1]; ++ num_uses--; ++ num_pending_births--; ++ } ++ else ++ i++; ++ } ++ ++ if (sched_verbose >= 5) ++ { ++ char buf[2048]; ++ ++ if (!print_p) ++ { ++ fprintf (sched_dump, MODEL_BAR); ++ fprintf (sched_dump, ";;\t\t| New pressure for model" ++ " schedule\n"); ++ fprintf (sched_dump, MODEL_BAR); ++ print_p = true; ++ } ++ ++ print_pattern (buf, PATTERN (insn), 0); ++ fprintf (sched_dump, ";;\t\t| %3d %4d %-30s ", ++ point, INSN_UID (insn), buf); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure, ++ point, cci); ++ fprintf (sched_dump, " %s:[%d->%d]", ++ reg_class_names[ira_reg_class_cover[cci]], ++ ref_pressure, ref_pressure + delta[cl]); ++ } ++ fprintf (sched_dump, "\n"); ++ } ++ } ++ ++ /* Adjust the pressure at POINT. Set MIX to nonzero if POINT - 1 ++ might have changed as well. */ ++ mix = num_pending_births; ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ mix |= delta[cl]; ++ mix |= model_update_pressure (&model_before_pressure, ++ point, cci, delta[cl]); ++ } ++ } ++ while (mix && point > model_curr_point); ++ ++ if (print_p) ++ fprintf (sched_dump, MODEL_BAR); ++} ++ ++/* model_spill_cost (CL, P, P') returns the cost of increasing the ++ pressure on CL from P to P'. We use this to calculate a "base ECC", ++ baseECC (CL, X), for each cover class CL and each instruction X. ++ Supposing X changes the pressure on CL from P to P', and that the ++ maximum pressure on CL in the current model schedule is MP', then: ++ ++ * if X occurs before or at the next point of maximum pressure in ++ the model schedule and P' > MP', then: ++ ++ baseECC (CL, X) = model_spill_cost (CL, MP, P') ++ ++ The idea is that the pressure after scheduling a fixed set of ++ instructions -- in this case, the set up to and including the ++ next maximum pressure point -- is going to be the same regardless ++ of the order; we simply want to keep the intermediate pressure ++ under control. Thus X has a cost of zero unless scheduling it ++ now would exceed MP'. ++ ++ If all increases in the set are by the same amount, no zero-cost ++ instruction will ever cause the pressure to exceed MP'. However, ++ if X is instead moved past an instruction X' with pressure in the ++ range (MP' - (P' - P), MP'), the pressure at X' will increase ++ beyond MP'. Since baseECC is very much a heuristic anyway, ++ it doesn't seem worth the overhead of tracking cases like these. ++ ++ The cost of exceeding MP' is always based on the original maximum ++ pressure MP. This is so that going 2 registers over the original ++ limit has the same cost regardless of whether it comes from two ++ separate +1 deltas or from a single +2 delta. ++ ++ * if X occurs after the next point of maximum pressure in the model ++ schedule and P' > P, then: ++ ++ baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P)) ++ ++ That is, if we move X forward across a point of maximum pressure, ++ and if X increases the pressure by P' - P, then we conservatively ++ assume that scheduling X next would increase the maximum pressure ++ by P' - P. Again, the cost of doing this is based on the original ++ maximum pressure MP, for the same reason as above. ++ ++ * if P' < P, P > MP, and X occurs at or after the next point of ++ maximum pressure, then: ++ ++ baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P) ++ ++ That is, if we have already exceeded the original maximum pressure MP, ++ and if X might reduce the maximum pressure again -- or at least push ++ it further back, and thus allow more scheduling freedom -- it is given ++ a negative cost to reflect the improvement. ++ ++ * otherwise, ++ ++ baseECC (CL, X) = 0 ++ ++ In this case, X is not expected to affect the maximum pressure MP', ++ so it has zero cost. ++ ++ We then create a combined value baseECC (X) that is the sum of ++ baseECC (CL, X) for each cover class CL. ++ ++ baseECC (X) could itself be used as the ECC value described above. ++ However, this is often too conservative, in the sense that it ++ tends to make high-priority instructions that increase pressure ++ wait too long in cases where introducing a spill would be better. ++ For this reason the final ECC is a priority-adjusted form of ++ baseECC (X). Specifically, we calculate: ++ ++ P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X) ++ baseP = MAX { P (X) | baseECC (X) <= 0 } ++ ++ Then: ++ ++ ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0) ++ ++ Thus an instruction's effect on pressure is ignored if it has a high ++ enough priority relative to the ones that don't increase pressure. ++ Negative values of baseECC (X) do not increase the priority of X ++ itself, but they do make it harder for other instructions to ++ increase the pressure further. ++ ++ This pressure cost is deliberately timid. The intention has been ++ to choose a heuristic that rarely interferes with the normal list ++ scheduler in cases where that scheduler would produce good code. ++ We simply want to curb some of its worst excesses. */ ++ ++/* Return the cost of increasing the pressure in class CL from FROM to TO. ++ ++ Here we use the very simplistic cost model that every register above ++ ira_available_class_regs[CL] has a spill cost of 1. We could use other ++ measures instead, such as one based on MEMORY_MOVE_COST. However: ++ ++ (1) In order for an instruction to be scheduled, the higher cost ++ would need to be justified in a single saving of that many stalls. ++ This is overly pessimistic, because the benefit of spilling is ++ often to avoid a sequence of several short stalls rather than ++ a single long one. ++ ++ (2) The cost is still arbitrary. Because we are not allocating ++ registers during scheduling, we have no way of knowing for ++ sure how many memory accesses will be required by each spill, ++ where the spills will be placed within the block, or even ++ which block(s) will contain the spills. ++ ++ So a higher cost than 1 is often too conservative in practice, ++ forcing blocks to contain unnecessary stalls instead of spill code. ++ The simple cost below seems to be the best compromise. It reduces ++ the interference with the normal list scheduler, which helps make ++ it more suitable for a default-on option. */ ++ ++static int ++model_spill_cost (int cl, int from, int to) ++{ ++ from = MAX (from, ira_available_class_regs[cl]); ++ return MAX (to, from) - from; ++} ++ ++/* Return baseECC (ira_reg_class_cover[CCI], POINT), given that ++ P = curr_reg_pressure[ira_reg_class_cover[CCI]] and that ++ P' = P + DELTA. */ ++ ++static int ++model_excess_group_cost (struct model_pressure_group *group, ++ int point, int cci, int delta) ++{ ++ int pressure, cl; ++ ++ cl = ira_reg_class_cover[cci]; ++ if (delta < 0 && point >= group->limits[cci].point) ++ { ++ pressure = MAX (group->limits[cci].orig_pressure, ++ curr_reg_pressure[cl] + delta); ++ return -model_spill_cost (cl, pressure, curr_reg_pressure[cl]); ++ } ++ ++ if (delta > 0) ++ { ++ if (point > group->limits[cci].point) ++ pressure = group->limits[cci].pressure + delta; ++ else ++ pressure = curr_reg_pressure[cl] + delta; ++ ++ if (pressure > group->limits[cci].pressure) ++ return model_spill_cost (cl, group->limits[cci].orig_pressure, ++ pressure); ++ } ++ ++ return 0; ++} ++ ++/* Return baseECC (MODEL_INSN (INSN)). Dump the costs to sched_dump ++ if PRINT_P. */ ++ ++static int ++model_excess_cost (rtx insn, bool print_p) ++{ ++ int point, cci, cl, cost, this_cost, delta; ++ struct reg_pressure_data *insn_reg_pressure; ++ int insn_death[N_REG_CLASSES]; ++ ++ calculate_reg_deaths (insn, insn_death); ++ point = model_index (insn); ++ insn_reg_pressure = INSN_REG_PRESSURE (insn); ++ cost = 0; ++ ++ if (print_p) ++ fprintf (sched_dump, ";;\t\t| %3d %4d | %4d %+3d |", point, ++ INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn)); ++ ++ /* Sum up the individual costs for each register class. */ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta = insn_reg_pressure[cci].set_increase - insn_death[cl]; ++ this_cost = model_excess_group_cost (&model_before_pressure, ++ point, cci, delta); ++ cost += this_cost; ++ if (print_p) ++ fprintf (sched_dump, " %s:[%d base cost %d]", ++ reg_class_names[cl], delta, this_cost); ++ } ++ ++ if (print_p) ++ fprintf (sched_dump, "\n"); ++ ++ return cost; ++} ++ ++/* Dump the next points of maximum pressure for GROUP. */ ++ ++static void ++model_dump_pressure_points (struct model_pressure_group *group) ++{ ++ int cci, cl; ++ ++ fprintf (sched_dump, ";;\t\t| pressure points"); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ fprintf (sched_dump, " %s:[%d->%d at ", reg_class_names[cl], ++ curr_reg_pressure[cl], group->limits[cci].pressure); ++ if (group->limits[cci].point < model_num_insns) ++ fprintf (sched_dump, "%d:%d]", group->limits[cci].point, ++ INSN_UID (MODEL_INSN (group->limits[cci].point))); ++ else ++ fprintf (sched_dump, "end]"); ++ } ++ fprintf (sched_dump, "\n"); ++} ++ ++/* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1]. */ ++ ++static void ++model_set_excess_costs (rtx *insns, int count) ++{ ++ int i, cost, priority_base, priority; ++ bool print_p; ++ ++ /* Record the baseECC value for each instruction in the model schedule, ++ except that negative costs are converted to zero ones now rather thatn ++ later. Do not assign a cost to debug instructions, since they must ++ not change code-generation decisions. Experiments suggest we also ++ get better results by not assigning a cost to instructions from ++ a different block. ++ ++ Set PRIORITY_BASE to baseP in the block comment above. This is the ++ maximum priority of the "cheap" instructions, which should always ++ include the next model instruction. */ ++ priority_base = 0; ++ print_p = false; ++ for (i = 0; i < count; i++) ++ if (INSN_MODEL_INDEX (insns[i])) ++ { ++ if (sched_verbose >= 6 && !print_p) ++ { ++ fprintf (sched_dump, MODEL_BAR); ++ fprintf (sched_dump, ";;\t\t| Pressure costs for ready queue\n"); ++ model_dump_pressure_points (&model_before_pressure); ++ fprintf (sched_dump, MODEL_BAR); ++ print_p = true; ++ } ++ cost = model_excess_cost (insns[i], print_p); ++ if (cost <= 0) ++ { ++ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]) - cost; ++ priority_base = MAX (priority_base, priority); ++ cost = 0; ++ } ++ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost; ++ } ++ if (print_p) ++ fprintf (sched_dump, MODEL_BAR); ++ ++ /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each ++ instruction. */ ++ for (i = 0; i < count; i++) ++ { ++ cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]); ++ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]); ++ if (cost > 0 && priority > priority_base) ++ { ++ cost += priority_base - priority; ++ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0); ++ } ++ } ++} ++ + /* Returns a positive value if x is preferred; returns a negative value if + y is preferred. Should never return 0, since that will make the sort + unstable. */ +@@ -1170,23 +1948,20 @@ + /* Make sure that priority of TMP and TMP2 are initialized. */ + gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2)); + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { + int diff; + + /* Prefer insn whose scheduling results in the smallest register + pressure excess. */ + if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp) +- + (INSN_TICK (tmp) > clock_var +- ? INSN_TICK (tmp) - clock_var : 0) ++ + insn_delay (tmp) + - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2) +- - (INSN_TICK (tmp2) > clock_var +- ? INSN_TICK (tmp2) - clock_var : 0))) != 0) ++ - insn_delay (tmp2)))) + return diff; + } + +- +- if (sched_pressure_p ++ if (sched_pressure != SCHED_PRESSURE_NONE + && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var)) + { + if (INSN_TICK (tmp) <= clock_var) +@@ -1277,11 +2052,22 @@ + return val; + } + ++ /* Prefer instructions that occur earlier in the model schedule. */ ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ { ++ int diff; ++ ++ diff = model_index (tmp) - model_index (tmp2); ++ if (diff != 0) ++ return diff; ++ } ++ + /* Prefer the insn which has more later insns that depend on it. + This gives the scheduler more freedom when scheduling later + instructions at the expense of added register pressure. */ + +- val = (dep_list_size (tmp2) - dep_list_size (tmp)); ++ val = (dep_list_size (tmp2, SD_LIST_FORW) ++ - dep_list_size (tmp, SD_LIST_FORW)); + + if (flag_sched_dep_count_heuristic && val != 0) + return val; +@@ -1480,12 +2266,15 @@ + int i; + rtx *first = ready_lastpos (ready); + +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + { + for (i = 0; i < ready->n_ready; i++) + if (!DEBUG_INSN_P (first[i])) + setup_insn_reg_pressure_info (first[i]); + } ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns) ++ model_set_excess_costs (first, ready->n_ready); + SCHED_SORT (first, ready->n_ready); + } + +@@ -1551,10 +2340,12 @@ + gcc_checking_assert (!DEBUG_INSN_P (insn)); + + for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) +- if (dying_use_p (use) && bitmap_bit_p (curr_reg_live, use->regno)) +- mark_regno_birth_or_death (use->regno, false); ++ if (dying_use_p (use)) ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ use->regno, false); + for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set) +- mark_regno_birth_or_death (set->regno, true); ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ set->regno, true); + } + + /* Set up or update (if UPDATE_P) max register pressure (see its +@@ -1626,11 +2417,618 @@ + void + sched_setup_bb_reg_pressure_info (basic_block bb, rtx after) + { +- gcc_assert (sched_pressure_p); ++ gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED); + initiate_bb_reg_pressure_info (bb); + setup_insn_max_reg_pressure (after, false); + } +- ++ ++/* Return (in order): ++ ++ - positive if INSN adversely affects the pressure on one ++ register class ++ ++ - negative if INSN reduces the pressure on one register class ++ ++ - 0 if INSN doesn't affect the pressure on any register class. */ ++ ++static int ++model_classify_pressure (struct model_insn_info *insn) ++{ ++ struct reg_pressure_data *reg_pressure; ++ int death[N_REG_CLASSES]; ++ int cci, cl, sum; ++ ++ calculate_reg_deaths (insn->insn, death); ++ reg_pressure = INSN_REG_PRESSURE (insn->insn); ++ sum = 0; ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ if (death[cl] < reg_pressure[cci].set_increase) ++ return 1; ++ sum += reg_pressure[cci].set_increase - death[cl]; ++ } ++ return sum; ++} ++ ++/* Return true if INSN1 should come before INSN2 in the model schedule. */ ++ ++static int ++model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2) ++{ ++ unsigned int height1, height2; ++ unsigned int priority1, priority2; ++ ++ /* Prefer instructions with a higher model priority. */ ++ if (insn1->model_priority != insn2->model_priority) ++ return insn1->model_priority > insn2->model_priority; ++ ++ /* Combine the length of the longest path of satisfied true dependencies ++ that leads to each instruction (depth) with the length of the longest ++ path of any dependencies that leads from the instruction (alap). ++ Prefer instructions with the greatest combined length. If the combined ++ lengths are equal, prefer instructions with the greatest depth. ++ ++ The idea is that, if we have a set S of "equal" instructions that each ++ have ALAP value X, and we pick one such instruction I, any true-dependent ++ successors of I that have ALAP value X - 1 should be preferred over S. ++ This encourages the schedule to be "narrow" rather than "wide". ++ However, if I is a low-priority instruction that we decided to ++ schedule because of its model_classify_pressure, and if there ++ is a set of higher-priority instructions T, the aforementioned ++ successors of I should not have the edge over T. */ ++ height1 = insn1->depth + insn1->alap; ++ height2 = insn2->depth + insn2->alap; ++ if (height1 != height2) ++ return height1 > height2; ++ if (insn1->depth != insn2->depth) ++ return insn1->depth > insn2->depth; ++ ++ /* We have no real preference between INSN1 an INSN2 as far as attempts ++ to reduce pressure go. Prefer instructions with higher priorities. */ ++ priority1 = INSN_PRIORITY (insn1->insn); ++ priority2 = INSN_PRIORITY (insn2->insn); ++ if (priority1 != priority2) ++ return priority1 > priority2; ++ ++ /* Use the original rtl sequence as a tie-breaker. */ ++ return insn1 < insn2; ++} ++ ++/* Add INSN to the model worklist immediately after PREV. Add it to the ++ beginning of the list if PREV is null. */ ++ ++static void ++model_add_to_worklist_at (struct model_insn_info *insn, ++ struct model_insn_info *prev) ++{ ++ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE); ++ QUEUE_INDEX (insn->insn) = QUEUE_READY; ++ ++ insn->prev = prev; ++ if (prev) ++ { ++ insn->next = prev->next; ++ prev->next = insn; ++ } ++ else ++ { ++ insn->next = model_worklist; ++ model_worklist = insn; ++ } ++ if (insn->next) ++ insn->next->prev = insn; ++} ++ ++/* Remove INSN from the model worklist. */ ++ ++static void ++model_remove_from_worklist (struct model_insn_info *insn) ++{ ++ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY); ++ QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE; ++ ++ if (insn->prev) ++ insn->prev->next = insn->next; ++ else ++ model_worklist = insn->next; ++ if (insn->next) ++ insn->next->prev = insn->prev; ++} ++ ++/* Add INSN to the model worklist. Start looking for a suitable position ++ between neighbors PREV and NEXT, testing at most MAX_SCHED_READY_INSNS ++ insns either side. A null PREV indicates the beginning of the list and ++ a null NEXT indicates the end. */ ++ ++static void ++model_add_to_worklist (struct model_insn_info *insn, ++ struct model_insn_info *prev, ++ struct model_insn_info *next) ++{ ++ int count; ++ ++ count = MAX_SCHED_READY_INSNS; ++ if (count > 0 && prev && model_order_p (insn, prev)) ++ do ++ { ++ count--; ++ prev = prev->prev; ++ } ++ while (count > 0 && prev && model_order_p (insn, prev)); ++ else ++ while (count > 0 && next && model_order_p (next, insn)) ++ { ++ count--; ++ prev = next; ++ next = next->next; ++ } ++ model_add_to_worklist_at (insn, prev); ++} ++ ++/* INSN may now have a higher priority (in the model_order_p sense) ++ than before. Move it up the worklist if necessary. */ ++ ++static void ++model_promote_insn (struct model_insn_info *insn) ++{ ++ struct model_insn_info *prev; ++ int count; ++ ++ prev = insn->prev; ++ count = MAX_SCHED_READY_INSNS; ++ while (count > 0 && prev && model_order_p (insn, prev)) ++ { ++ count--; ++ prev = prev->prev; ++ } ++ if (prev != insn->prev) ++ { ++ model_remove_from_worklist (insn); ++ model_add_to_worklist_at (insn, prev); ++ } ++} ++ ++/* Add INSN to the end of the model schedule. */ ++ ++static void ++model_add_to_schedule (rtx insn) ++{ ++ unsigned int point; ++ ++ gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); ++ QUEUE_INDEX (insn) = QUEUE_SCHEDULED; ++ ++ point = VEC_length (rtx, model_schedule); ++ VEC_quick_push (rtx, model_schedule, insn); ++ INSN_MODEL_INDEX (insn) = point + 1; ++} ++ ++/* Analyze the instructions that are to be scheduled, setting up ++ MODEL_INSN_INFO (...) and model_num_insns accordingly. Add ready ++ instructions to model_worklist. */ ++ ++static void ++model_analyze_insns (void) ++{ ++ rtx start, end, iter; ++ sd_iterator_def sd_it; ++ dep_t dep; ++ struct model_insn_info *insn, *con; ++ ++ model_num_insns = 0; ++ start = PREV_INSN (current_sched_info->next_tail); ++ end = current_sched_info->prev_head; ++ for (iter = start; iter != end; iter = PREV_INSN (iter)) ++ if (NONDEBUG_INSN_P (iter)) ++ { ++ insn = MODEL_INSN_INFO (iter); ++ insn->insn = iter; ++ FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep) ++ { ++ con = MODEL_INSN_INFO (DEP_CON (dep)); ++ if (con->insn && insn->alap < con->alap + 1) ++ insn->alap = con->alap + 1; ++ } ++ ++ insn->old_queue = QUEUE_INDEX (iter); ++ QUEUE_INDEX (iter) = QUEUE_NOWHERE; ++ ++ insn->unscheduled_preds = dep_list_size (iter, SD_LIST_HARD_BACK); ++ if (insn->unscheduled_preds == 0) ++ model_add_to_worklist (insn, NULL, model_worklist); ++ ++ model_num_insns++; ++ } ++} ++ ++/* The global state describes the register pressure at the start of the ++ model schedule. Initialize GROUP accordingly. */ ++ ++static void ++model_init_pressure_group (struct model_pressure_group *group) ++{ ++ int cci, cl; ++ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ group->limits[cci].pressure = curr_reg_pressure[cl]; ++ group->limits[cci].point = 0; ++ } ++ /* Use index model_num_insns to record the state after the last ++ instruction in the model schedule. */ ++ group->model = XNEWVEC (struct model_pressure_data, ++ (model_num_insns + 1) * ira_reg_class_cover_size); ++} ++ ++/* Record that MODEL_REF_PRESSURE (GROUP, POINT, CCI) is PRESSURE. ++ Update the maximum pressure for the whole schedule. */ ++ ++static void ++model_record_pressure (struct model_pressure_group *group, ++ int point, int cci, int pressure) ++{ ++ MODEL_REF_PRESSURE (group, point, cci) = pressure; ++ if (group->limits[cci].pressure < pressure) ++ { ++ group->limits[cci].pressure = pressure; ++ group->limits[cci].point = point; ++ } ++} ++ ++/* INSN has just been added to the end of the model schedule. Record its ++ register-pressure information. */ ++ ++static void ++model_record_pressures (struct model_insn_info *insn) ++{ ++ struct reg_pressure_data *reg_pressure; ++ int point, cci, cl, delta; ++ int death[N_REG_CLASSES]; ++ ++ point = model_index (insn->insn); ++ if (sched_verbose >= 2) ++ { ++ char buf[2048]; ++ ++ if (point == 0) ++ { ++ fprintf (sched_dump, "\n;;\tModel schedule:\n;;\n"); ++ fprintf (sched_dump, ";;\t| idx insn | mpri hght dpth prio |\n"); ++ } ++ print_pattern (buf, PATTERN (insn->insn), 0); ++ fprintf (sched_dump, ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s ", ++ point, INSN_UID (insn->insn), insn->model_priority, ++ insn->depth + insn->alap, insn->depth, ++ INSN_PRIORITY (insn->insn), buf); ++ } ++ calculate_reg_deaths (insn->insn, death); ++ reg_pressure = INSN_REG_PRESSURE (insn->insn); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta = reg_pressure[cci].set_increase - death[cl]; ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, " %s:[%d,%+d]", reg_class_names[cl], ++ curr_reg_pressure[cl], delta); ++ model_record_pressure (&model_before_pressure, point, cci, ++ curr_reg_pressure[cl]); ++ } ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, "\n"); ++} ++ ++/* All instructions have been added to the model schedule. Record the ++ final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs. */ ++ ++static void ++model_record_final_pressures (struct model_pressure_group *group) ++{ ++ int point, cci, max_pressure, ref_pressure, cl; ++ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ /* Record the final pressure for this class. */ ++ cl = ira_reg_class_cover[cci]; ++ point = model_num_insns; ++ ref_pressure = curr_reg_pressure[cl]; ++ model_record_pressure (group, point, cci, ref_pressure); ++ ++ /* Record the original maximum pressure. */ ++ group->limits[cci].orig_pressure = group->limits[cci].pressure; ++ ++ /* Update the MODEL_MAX_PRESSURE for every point of the schedule. */ ++ max_pressure = ref_pressure; ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ while (point > 0) ++ { ++ point--; ++ ref_pressure = MODEL_REF_PRESSURE (group, point, cci); ++ max_pressure = MAX (max_pressure, ref_pressure); ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ } ++ } ++} ++ ++/* Update all successors of INSN, given that INSN has just been scheduled. */ ++ ++static void ++model_add_successors_to_worklist (struct model_insn_info *insn) ++{ ++ sd_iterator_def sd_it; ++ struct model_insn_info *con; ++ dep_t dep; ++ ++ FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep) ++ { ++ con = MODEL_INSN_INFO (DEP_CON (dep)); ++ /* Ignore debug instructions, and instructions from other blocks. */ ++ if (con->insn) ++ { ++ con->unscheduled_preds--; ++ ++ /* Update the depth field of each true-dependent successor. ++ Increasing the depth gives them a higher priority than ++ before. */ ++ if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1) ++ { ++ con->depth = insn->depth + 1; ++ if (QUEUE_INDEX (con->insn) == QUEUE_READY) ++ model_promote_insn (con); ++ } ++ ++ /* If this is a true dependency, or if there are no remaining ++ dependencies for CON (meaning that CON only had non-true ++ dependencies), make sure that CON is on the worklist. ++ We don't bother otherwise because it would tend to fill the ++ worklist with a lot of low-priority instructions that are not ++ yet ready to issue. */ ++ if ((con->depth > 0 || con->unscheduled_preds == 0) ++ && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE) ++ model_add_to_worklist (con, insn, insn->next); ++ } ++ } ++} ++ ++/* Give INSN a higher priority than any current instruction, then give ++ unscheduled predecessors of INSN a higher priority still. If any of ++ those predecessors are not on the model worklist, do the same for its ++ predecessors, and so on. */ ++ ++static void ++model_promote_predecessors (struct model_insn_info *insn) ++{ ++ struct model_insn_info *pro, *first; ++ sd_iterator_def sd_it; ++ dep_t dep; ++ ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, ";;\t+--- priority of %d = %d, priority of", ++ INSN_UID (insn->insn), model_next_priority); ++ insn->model_priority = model_next_priority++; ++ model_remove_from_worklist (insn); ++ model_add_to_worklist_at (insn, NULL); ++ ++ first = NULL; ++ for (;;) ++ { ++ FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep) ++ { ++ pro = MODEL_INSN_INFO (DEP_PRO (dep)); ++ /* The first test is to ignore debug instructions, and instructions ++ from other blocks. */ ++ if (pro->insn ++ && pro->model_priority != model_next_priority ++ && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED) ++ { ++ pro->model_priority = model_next_priority; ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, " %d", INSN_UID (pro->insn)); ++ if (QUEUE_INDEX (pro->insn) == QUEUE_READY) ++ { ++ /* PRO is already in the worklist, but it now has ++ a higher priority than before. Move it at the ++ appropriate place. */ ++ model_remove_from_worklist (pro); ++ model_add_to_worklist (pro, NULL, model_worklist); ++ } ++ else ++ { ++ /* PRO isn't in the worklist. Recursively process ++ its predecessors until we find one that is. */ ++ pro->next = first; ++ first = pro; ++ } ++ } ++ } ++ if (!first) ++ break; ++ insn = first; ++ first = insn->next; ++ } ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, " = %d\n", model_next_priority); ++ model_next_priority++; ++} ++ ++/* Pick one instruction from model_worklist and process it. */ ++ ++static void ++model_choose_insn (void) ++{ ++ struct model_insn_info *insn, *fallback; ++ int count; ++ ++ if (sched_verbose >= 7) ++ { ++ fprintf (sched_dump, ";;\t+--- worklist:\n"); ++ insn = model_worklist; ++ count = MAX_SCHED_READY_INSNS; ++ while (count > 0 && insn) ++ { ++ fprintf (sched_dump, ";;\t+--- %d [%d, %d, %d, %d]\n", ++ INSN_UID (insn->insn), insn->model_priority, ++ insn->depth + insn->alap, insn->depth, ++ INSN_PRIORITY (insn->insn)); ++ count--; ++ insn = insn->next; ++ } ++ } ++ ++ /* Look for a ready instruction whose model_classify_priority is zero ++ or negative, picking the highest-priority one. Adding such an ++ instruction to the schedule now should do no harm, and may actually ++ do some good. ++ ++ Failing that, see whether there is an instruction with the highest ++ extant model_priority that is not yet ready, but which would reduce ++ pressure if it became ready. This is designed to catch cases like: ++ ++ (set (mem (reg R1)) (reg R2)) ++ ++ where the instruction is the last remaining use of R1 and where the ++ value of R2 is not yet available (or vice versa). The death of R1 ++ means that this instruction already reduces pressure. It is of ++ course possible that the computation of R2 involves other registers ++ that are hard to kill, but such cases are rare enough for this ++ heuristic to be a win in general. ++ ++ Failing that, just pick the highest-priority instruction in the ++ worklist. */ ++ count = MAX_SCHED_READY_INSNS; ++ insn = model_worklist; ++ fallback = 0; ++ for (;;) ++ { ++ if (count == 0 || !insn) ++ { ++ insn = fallback ? fallback : model_worklist; ++ break; ++ } ++ if (insn->unscheduled_preds) ++ { ++ if (model_worklist->model_priority == insn->model_priority ++ && !fallback ++ && model_classify_pressure (insn) < 0) ++ fallback = insn; ++ } ++ else ++ { ++ if (model_classify_pressure (insn) <= 0) ++ break; ++ } ++ count--; ++ insn = insn->next; ++ } ++ ++ if (sched_verbose >= 7 && insn != model_worklist) ++ { ++ if (insn->unscheduled_preds) ++ fprintf (sched_dump, ";;\t+--- promoting insn %d, with dependencies\n", ++ INSN_UID (insn->insn)); ++ else ++ fprintf (sched_dump, ";;\t+--- promoting insn %d, which is ready\n", ++ INSN_UID (insn->insn)); ++ } ++ if (insn->unscheduled_preds) ++ /* INSN isn't yet ready to issue. Give all its predecessors the ++ highest priority. */ ++ model_promote_predecessors (insn); ++ else ++ { ++ /* INSN is ready. Add it to the end of model_schedule and ++ process its successors. */ ++ model_add_successors_to_worklist (insn); ++ model_remove_from_worklist (insn); ++ model_add_to_schedule (insn->insn); ++ model_record_pressures (insn); ++ update_register_pressure (insn->insn); ++ } ++} ++ ++/* Restore all QUEUE_INDEXs to the values that they had before ++ model_start_schedule was called. */ ++ ++static void ++model_reset_queue_indices (void) ++{ ++ unsigned int i; ++ rtx insn; ++ ++ FOR_EACH_VEC_ELT (rtx, model_schedule, i, insn) ++ QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue; ++} ++ ++/* We have calculated the model schedule and spill costs. Print a summary ++ to sched_dump. */ ++ ++static void ++model_dump_pressure_summary (void) ++{ ++ int cci, cl; ++ ++ fprintf (sched_dump, ";; Pressure summary:"); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ fprintf (sched_dump, " %s:%d", reg_class_names[cl], ++ model_before_pressure.limits[cci].pressure); ++ } ++ fprintf (sched_dump, "\n\n"); ++} ++ ++/* Initialize the SCHED_PRESSURE_MODEL information for the current ++ scheduling region. */ ++ ++static void ++model_start_schedule (void) ++{ ++ basic_block bb; ++ ++ model_next_priority = 1; ++ model_schedule = VEC_alloc (rtx, heap, sched_max_luid); ++ model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid); ++ ++ bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head)); ++ initiate_reg_pressure_info (df_get_live_in (bb)); ++ ++ model_analyze_insns (); ++ model_init_pressure_group (&model_before_pressure); ++ while (model_worklist) ++ model_choose_insn (); ++ gcc_assert (model_num_insns == (int) VEC_length (rtx, model_schedule)); ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, "\n"); ++ ++ model_record_final_pressures (&model_before_pressure); ++ model_reset_queue_indices (); ++ ++ XDELETEVEC (model_insns); ++ ++ model_curr_point = 0; ++ initiate_reg_pressure_info (df_get_live_in (bb)); ++ if (sched_verbose >= 1) ++ model_dump_pressure_summary (); ++} ++ ++/* Free the information associated with GROUP. */ ++ ++static void ++model_finalize_pressure_group (struct model_pressure_group *group) ++{ ++ XDELETEVEC (group->model); ++} ++ ++/* Free the information created by model_start_schedule. */ ++ ++static void ++model_end_schedule (void) ++{ ++ model_finalize_pressure_group (&model_before_pressure); ++ VEC_free (rtx, heap, model_schedule); ++} ++ + /* INSN is the "currently executing insn". Launch each insn which was + waiting on INSN. READY is the ready list which contains the insns + that are ready to fire. CLOCK is the current cycle. The function +@@ -1667,10 +3065,14 @@ + reg_class_names[ira_reg_class_cover[i]], + pressure_info[i].set_increase, pressure_info[i].change); + } ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ && model_index (insn) == model_curr_point) ++ fprintf (sched_dump, ":model %d", model_curr_point); + fputc ('\n', sched_dump); + } + +- if (sched_pressure_p && !DEBUG_INSN_P (insn)) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn)) + update_reg_and_insn_max_reg_pressure (insn); + + /* Scheduling instruction should have all its dependencies resolved and +@@ -1728,6 +3130,24 @@ + gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); + QUEUE_INDEX (insn) = QUEUE_SCHEDULED; + ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ && NONDEBUG_INSN_P (insn)) ++ { ++ if (model_index (insn) == model_curr_point) ++ do ++ model_curr_point++; ++ while (model_curr_point < model_num_insns ++ && (QUEUE_INDEX (MODEL_INSN (model_curr_point)) ++ == QUEUE_SCHEDULED)); ++ else ++ model_recompute (insn); ++ model_update_limit_points (); ++ update_register_pressure (insn); ++ if (sched_verbose >= 2) ++ print_curr_reg_pressure (); ++ } ++ + gcc_assert (INSN_TICK (insn) >= MIN_TICK); + if (INSN_TICK (insn) > clock_var) + /* INSN has been prematurely moved from the queue to the ready list. +@@ -2056,7 +3476,16 @@ + /* If the ready list is full, delay the insn for 1 cycle. + See the comment in schedule_block for the rationale. */ + if (!reload_completed +- && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS ++ && (ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS ++ || (sched_pressure == SCHED_PRESSURE_MODEL ++ /* Limit pressure recalculations to MAX_SCHED_READY_INSNS ++ instructions too. */ ++ && model_index (insn) > (model_curr_point ++ + MAX_SCHED_READY_INSNS))) ++ && !(sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ /* Always allow the next model instruction to issue. */ ++ && model_index (insn) == model_curr_point) + && !SCHED_GROUP_P (insn) + && insn != skip_insn) + { +@@ -2293,12 +3722,12 @@ + fprintf (sched_dump, " %s:%d", + (*current_sched_info->print_insn) (p[i], 0), + INSN_LUID (p[i])); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + fprintf (sched_dump, "(cost=%d", + INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i])); + if (INSN_TICK (p[i]) > clock_var) + fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + fprintf (sched_dump, ")"); + } + fprintf (sched_dump, "\n"); +@@ -2609,8 +4038,8 @@ + { + if (state_dead_lock_p (state) + || insn_finishes_cycle_p (insn)) +- /* We won't issue any more instructions in the next +- choice_state. */ ++ /* We won't issue any more instructions in the next ++ choice_state. */ + top->rest = 0; + else + top->rest--; +@@ -2813,6 +4242,59 @@ + } + } + ++/* Examine all insns on the ready list and queue those which can't be ++ issued in this cycle. TEMP_STATE is temporary scheduler state we ++ can use as scratch space. If FIRST_CYCLE_INSN_P is true, no insns ++ have been issued for the current cycle, which means it is valid to ++ issue an asm statement. */ ++ ++static void ++prune_ready_list (state_t temp_state, bool first_cycle_insn_p) ++{ ++ int i; ++ ++ restart: ++ for (i = 0; i < ready.n_ready; i++) ++ { ++ rtx insn = ready_element (&ready, i); ++ int cost = 0; ++ ++ if (recog_memoized (insn) < 0) ++ { ++ if (!first_cycle_insn_p ++ && (GET_CODE (PATTERN (insn)) == ASM_INPUT ++ || asm_noperands (PATTERN (insn)) >= 0)) ++ cost = 1; ++ } ++ else if (sched_pressure != SCHED_PRESSURE_NONE) ++ { ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && INSN_TICK (insn) <= clock_var) ++ { ++ memcpy (temp_state, curr_state, dfa_state_size); ++ if (state_transition (temp_state, insn) >= 0) ++ INSN_TICK (insn) = clock_var + 1; ++ } ++ cost = 0; ++ } ++ else ++ { ++ memcpy (temp_state, curr_state, dfa_state_size); ++ cost = state_transition (temp_state, insn); ++ if (cost < 0) ++ cost = 0; ++ else if (cost == 0) ++ cost = 1; ++ } ++ if (cost >= 1) ++ { ++ ready_remove (&ready, i); ++ queue_insn (insn, cost); ++ goto restart; ++ } ++ } ++} ++ + /* Use forward list scheduling to rearrange insns of block pointed to by + TARGET_BB, possibly bringing insns from subsequent blocks in the same + region. */ +@@ -2882,6 +4364,9 @@ + in try_ready () (which is called through init_ready_list ()). */ + (*current_sched_info->init_ready_list) (); + ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ model_start_schedule (); ++ + /* The algorithm is O(n^2) in the number of ready insns at any given + time in the worst case. Before reload we are more likely to have + big lists so truncate them to a reasonable size. */ +@@ -2963,6 +4448,10 @@ + } + while (advance > 0); + ++ prune_ready_list (temp_state, true); ++ if (ready.n_ready == 0) ++ continue; ++ + if (sort_p) + { + /* Sort the ready list based on priority. */ +@@ -3040,7 +4529,7 @@ + fprintf (sched_dump, ";;\tReady list (t = %3d): ", + clock_var); + debug_ready_list (&ready); +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + print_curr_reg_pressure (); + } + +@@ -3084,7 +4573,8 @@ + else + insn = ready_remove_first (&ready); + +- if (sched_pressure_p && INSN_TICK (insn) > clock_var) ++ if (sched_pressure != SCHED_PRESSURE_NONE ++ && INSN_TICK (insn) > clock_var) + { + ready_add (&ready, insn, true); + advance = 1; +@@ -3112,44 +4602,6 @@ + } + + sort_p = TRUE; +- memcpy (temp_state, curr_state, dfa_state_size); +- if (recog_memoized (insn) < 0) +- { +- asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT +- || asm_noperands (PATTERN (insn)) >= 0); +- if (!first_cycle_insn_p && asm_p) +- /* This is asm insn which is tried to be issued on the +- cycle not first. Issue it on the next cycle. */ +- cost = 1; +- else +- /* A USE insn, or something else we don't need to +- understand. We can't pass these directly to +- state_transition because it will trigger a +- fatal error for unrecognizable insns. */ +- cost = 0; +- } +- else if (sched_pressure_p) +- cost = 0; +- else +- { +- cost = state_transition (temp_state, insn); +- if (cost < 0) +- cost = 0; +- else if (cost == 0) +- cost = 1; +- } +- +- if (cost >= 1) +- { +- queue_insn (insn, cost); +- if (SCHED_GROUP_P (insn)) +- { +- advance = cost; +- break; +- } +- +- continue; +- } + + if (current_sched_info->can_schedule_ready_p + && ! (*current_sched_info->can_schedule_ready_p) (insn)) +@@ -3200,11 +4652,17 @@ + reemit_notes (insn); + last_scheduled_insn = insn; + +- if (memcmp (curr_state, temp_state, dfa_state_size) != 0) +- { +- cycle_issued_insns++; +- memcpy (curr_state, temp_state, dfa_state_size); +- } ++ if (recog_memoized (insn) >= 0) ++ { ++ cost = state_transition (curr_state, insn); ++ if (sched_pressure != SCHED_PRESSURE_WEIGHTED) ++ gcc_assert (cost < 0); ++ cycle_issued_insns++; ++ asm_p = false; ++ } ++ else ++ asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT ++ || asm_noperands (PATTERN (insn)) >= 0); + + if (targetm.sched.variable_issue) + can_issue_more = +@@ -3225,6 +4683,9 @@ + + first_cycle_insn_p = false; + ++ if (ready.n_ready > 0) ++ prune_ready_list (temp_state, false); ++ + /* Sort the ready list based on priority. This must be + redone here, as schedule_insn may have readied additional + insns that will not be sorted correctly. */ +@@ -3321,6 +4782,9 @@ + } + } + ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ model_end_schedule (); ++ + if (sched_verbose) + fprintf (sched_dump, ";; total time = %d\n", clock_var); + +@@ -3424,10 +4888,14 @@ + if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON)) + targetm.sched.dispatch_do (NULL_RTX, DISPATCH_INIT); + +- sched_pressure_p = (flag_sched_pressure && ! reload_completed +- && common_sched_info->sched_pass_id == SCHED_RGN_PASS); ++ if (flag_sched_pressure ++ && !reload_completed ++ && common_sched_info->sched_pass_id == SCHED_RGN_PASS) ++ sched_pressure = flag_sched_pressure_algorithm; ++ else ++ sched_pressure = SCHED_PRESSURE_NONE; + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + ira_setup_eliminable_regset (); + + /* Initialize SPEC_INFO. */ +@@ -3504,7 +4972,7 @@ + if (targetm.sched.init_global) + targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1); + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { + int i, max_regno = max_reg_num (); + +@@ -3517,8 +4985,11 @@ + ? ira_class_translate[REGNO_REG_CLASS (i)] + : reg_cover_class (i)); + curr_reg_live = BITMAP_ALLOC (NULL); +- saved_reg_live = BITMAP_ALLOC (NULL); +- region_ref_regs = BITMAP_ALLOC (NULL); ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) ++ { ++ saved_reg_live = BITMAP_ALLOC (NULL); ++ region_ref_regs = BITMAP_ALLOC (NULL); ++ } + } + + curr_state = xmalloc (dfa_state_size); +@@ -3618,12 +5089,15 @@ + sched_finish (void) + { + haifa_finish_h_i_d (); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) ++ { ++ BITMAP_FREE (region_ref_regs); ++ BITMAP_FREE (saved_reg_live); ++ } ++ BITMAP_FREE (curr_reg_live); + free (sched_regno_cover_class); +- BITMAP_FREE (region_ref_regs); +- BITMAP_FREE (saved_reg_live); +- BITMAP_FREE (curr_reg_live); + } + free (curr_state); + +@@ -3936,7 +5410,7 @@ + INSN_TICK (next) = tick; + + delay = tick - clock_var; +- if (delay <= 0 || sched_pressure_p) ++ if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE) + delay = QUEUE_READY; + + change_queue_index (next, delay); +@@ -5185,7 +6659,7 @@ + if (insn == jump) + break; + +- if (dep_list_size (insn) == 0) ++ if (dep_list_size (insn, SD_LIST_FORW) == 0) + { + dep_def _new_dep, *new_dep = &_new_dep; + +@@ -5556,6 +7030,7 @@ + + FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data) + { ++ free (data->max_reg_pressure); + if (data->reg_pressure != NULL) + free (data->reg_pressure); + for (use = data->reg_use_list; use != NULL; use = next) + +=== modified file 'gcc/sched-deps.c' +--- old/gcc/sched-deps.c 2011-12-08 13:33:58 +0000 ++++ new/gcc/sched-deps.c 2012-02-08 23:39:45 +0000 +@@ -450,7 +450,7 @@ + static void add_dependence_list_and_free (struct deps_desc *, rtx, + rtx *, int, enum reg_note); + static void delete_all_dependences (rtx); +-static void fixup_sched_groups (rtx); ++static void chain_to_prev_insn (rtx); + + static void flush_pending_lists (struct deps_desc *, rtx, int, int); + static void sched_analyze_1 (struct deps_desc *, rtx, rtx); +@@ -1490,7 +1490,7 @@ + the previous nonnote insn. */ + + static void +-fixup_sched_groups (rtx insn) ++chain_to_prev_insn (rtx insn) + { + sd_iterator_def sd_it; + dep_t dep; +@@ -1999,7 +1999,7 @@ + static struct reg_pressure_data *pressure_info; + rtx link; + +- gcc_assert (sched_pressure_p); ++ gcc_assert (sched_pressure != SCHED_PRESSURE_NONE); + + if (! INSN_P (insn)) + return; +@@ -2030,8 +2030,9 @@ + len = sizeof (struct reg_pressure_data) * ira_reg_class_cover_size; + pressure_info + = INSN_REG_PRESSURE (insn) = (struct reg_pressure_data *) xmalloc (len); +- INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size +- * sizeof (int), 1); ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) ++ INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size ++ * sizeof (int), 1); + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cl = ira_reg_class_cover[i]; +@@ -2775,7 +2776,7 @@ + || (NONJUMP_INSN_P (insn) && control_flow_insn_p (insn))) + reg_pending_barrier = MOVE_BARRIER; + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { + setup_insn_reg_uses (deps, insn); + setup_insn_reg_pressure_info (insn); +@@ -3076,7 +3077,7 @@ + instructions that follow seem like they should be part + of the call group. + +- Also, if we did, fixup_sched_groups() would move the ++ Also, if we did, chain_to_prev_insn would move the + deps of the debug insn to the call insn, modifying + non-debug post-dependency counts of the debug insn + dependencies and otherwise messing with the scheduling +@@ -3222,6 +3223,37 @@ + return true; + } + ++/* Return true if INSN should be made dependent on the previous instruction ++ group, and if all INSN's dependencies should be moved to the first ++ instruction of that group. */ ++ ++static bool ++chain_to_prev_insn_p (rtx insn) ++{ ++ rtx prev, x; ++ ++ /* INSN forms a group with the previous instruction. */ ++ if (SCHED_GROUP_P (insn)) ++ return true; ++ ++ /* If the previous instruction clobbers a register R and this one sets ++ part of R, the clobber was added specifically to help us track the ++ liveness of R. There's no point scheduling the clobber and leaving ++ INSN behind, especially if we move the clobber to another block. */ ++ prev = prev_nonnote_nondebug_insn (insn); ++ if (prev ++ && INSN_P (prev) ++ && BLOCK_FOR_INSN (prev) == BLOCK_FOR_INSN (insn) ++ && GET_CODE (PATTERN (prev)) == CLOBBER) ++ { ++ x = XEXP (PATTERN (prev), 0); ++ if (set_of (x, insn)) ++ return true; ++ } ++ ++ return false; ++} ++ + /* Analyze INSN with DEPS as a context. */ + void + deps_analyze_insn (struct deps_desc *deps, rtx insn) +@@ -3358,8 +3390,9 @@ + + /* Fixup the dependencies in the sched group. */ + if ((NONJUMP_INSN_P (insn) || JUMP_P (insn)) +- && SCHED_GROUP_P (insn) && !sel_sched_p ()) +- fixup_sched_groups (insn); ++ && chain_to_prev_insn_p (insn) ++ && !sel_sched_p ()) ++ chain_to_prev_insn (insn); + } + + /* Initialize DEPS for the new block beginning with HEAD. */ + +=== modified file 'gcc/sched-int.h' +--- old/gcc/sched-int.h 2011-02-02 04:31:35 +0000 ++++ new/gcc/sched-int.h 2012-02-08 23:39:02 +0000 +@@ -651,7 +651,7 @@ + + /* Do register pressure sensitive insn scheduling if the flag is set + up. */ +-extern bool sched_pressure_p; ++extern enum sched_pressure_algorithm sched_pressure; + + /* Map regno -> its cover class. The map defined only when + SCHED_PRESSURE_P is true. */ +@@ -773,16 +773,16 @@ + + short cost; + ++ /* '> 0' if priority is valid, ++ '== 0' if priority was not yet computed, ++ '< 0' if priority in invalid and should be recomputed. */ ++ signed char priority_status; ++ + /* Set if there's DEF-USE dependence between some speculatively + moved load insn and this one. */ + unsigned int fed_by_spec_load : 1; + unsigned int is_load_insn : 1; + +- /* '> 0' if priority is valid, +- '== 0' if priority was not yet computed, +- '< 0' if priority in invalid and should be recomputed. */ +- signed char priority_status; +- + /* What speculations are necessary to apply to schedule the instruction. */ + ds_t todo_spec; + +@@ -817,6 +817,7 @@ + /* Info about how scheduling the insn changes cost of register + pressure excess (between source and target). */ + int reg_pressure_excess_cost_change; ++ int model_index; + }; + + typedef struct _haifa_insn_data haifa_insn_data_def; +@@ -839,6 +840,7 @@ + #define INSN_REG_PRESSURE_EXCESS_COST_CHANGE(INSN) \ + (HID (INSN)->reg_pressure_excess_cost_change) + #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status) ++#define INSN_MODEL_INDEX(INSN) (HID (INSN)->model_index) + + typedef struct _haifa_deps_insn_data haifa_deps_insn_data_def; + typedef haifa_deps_insn_data_def *haifa_deps_insn_data_t; + +=== modified file 'gcc/sched-rgn.c' +--- old/gcc/sched-rgn.c 2011-06-04 10:15:48 +0000 ++++ new/gcc/sched-rgn.c 2012-02-08 23:38:13 +0000 +@@ -2943,7 +2943,7 @@ + + sched_extend_ready_list (rgn_n_insns); + +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + { + sched_init_region_reg_pressure_info (); + for (bb = 0; bb < current_nr_blocks; bb++) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch new file mode 100644 index 0000000000..1a5448403e --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch @@ -0,0 +1,28 @@ +2012-02-20 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_print_operand): Avoid null-pointer + dereference from MEM_SIZE. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-02-09 00:47:59 +0000 ++++ new/gcc/config/arm/arm.c 2012-02-20 15:32:26 +0000 +@@ -17446,6 +17446,7 @@ + rtx addr; + bool postinc = FALSE; + unsigned align, memsize, align_bits; ++ rtx memsize_rtx; + + gcc_assert (GET_CODE (x) == MEM); + addr = XEXP (x, 0); +@@ -17460,7 +17461,8 @@ + instruction (for some alignments) as an aid to the memory subsystem + of the target. */ + align = MEM_ALIGN (x) >> 3; +- memsize = INTVAL (MEM_SIZE (x)); ++ memsize_rtx = MEM_SIZE (x); ++ memsize = memsize_rtx ? INTVAL (memsize_rtx) : 0; + + /* Only certain alignment specifiers are supported by the hardware. */ + if (memsize == 16 && (align % 32) == 0) + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch new file mode 100644 index 0000000000..9bfd969df6 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch @@ -0,0 +1,126 @@ +2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + 2011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + * config/arm/arm.c (vfp3_const_double_for_fract_bits): Define. + * config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare. + * config/arm/constraints.md ("Dt"): New constraint. + * config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal): + New. + * config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New. + (*arm_combine_vcvt_f32_u32): New. + + LP:#900426 + + 2011-12-06 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + * config/arm/vfp.md (*combine_vcvt_f64_<FCVTI32typename>): Fix + formatting character for vmov.f64 case. + +2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + gcc/ + * config/arm/arm.c (arm_print_operand): Remove wrongly merged code. + (vfp3_const_double_for_fract_bits): Likewise. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-12-06 10:42:29 +0000 ++++ new/gcc/config/arm/arm-protos.h 2012-02-22 13:31:54 +0000 +@@ -238,6 +238,7 @@ + }; + + extern const struct tune_params *current_tune; ++extern int vfp3_const_double_for_fract_bits (rtx); + #endif /* RTX_CODE */ + + #endif /* ! GCC_ARM_PROTOS_H */ + +=== modified file 'gcc/config/arm/constraints.md' +--- old/gcc/config/arm/constraints.md 2011-12-06 10:42:29 +0000 ++++ new/gcc/config/arm/constraints.md 2012-02-22 13:31:54 +0000 +@@ -29,7 +29,7 @@ + ;; in Thumb-1 state: I, J, K, L, M, N, O + + ;; The following multi-letter normal constraints have been used: +-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ++;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz + ;; in Thumb-1 state: Pa, Pb, Pc, Pd + ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py + +@@ -291,6 +291,12 @@ + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) + ++(define_constraint "Dt" ++ "@internal ++ In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation" ++ (and (match_code "const_double") ++ (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) ++ + (define_memory_constraint "Ut" + "@internal + In ARM/Thumb-2 state an address valid for loading/storing opaque structure + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-12-06 10:42:29 +0000 ++++ new/gcc/config/arm/predicates.md 2012-02-22 13:31:54 +0000 +@@ -725,6 +725,11 @@ + return true; + }) + ++(define_predicate "const_double_vcvt_power_of_two_reciprocal" ++ (and (match_code "const_double") ++ (match_test "TARGET_32BIT && TARGET_VFP ++ && vfp3_const_double_for_fract_bits (op)"))) ++ + (define_special_predicate "neon_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) + +=== modified file 'gcc/config/arm/vfp.md' +--- old/gcc/config/arm/vfp.md 2011-12-06 10:42:29 +0000 ++++ new/gcc/config/arm/vfp.md 2012-02-22 13:31:54 +0000 +@@ -1131,9 +1131,40 @@ + (set_attr "type" "fcmpd")] + ) + ++;; Fixed point to floating point conversions. ++(define_code_iterator FCVT [unsigned_float float]) ++(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) ++ ++(define_insn "*combine_vcvt_f32_<FCVTI32typename>" ++ [(set (match_operand:SF 0 "s_register_operand" "=t") ++ (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0")) ++ (match_operand 2 ++ "const_double_vcvt_power_of_two_reciprocal" "Dt")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math" ++ "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2" ++ [(set_attr "predicable" "no") ++ (set_attr "type" "f_cvt")] ++) ++ ++;; Not the ideal way of implementing this. Ideally we would be able to split ++;; this into a move to a DP register and then a vcvt.f64.i32 ++(define_insn "*combine_vcvt_f64_<FCVTI32typename>" ++ [(set (match_operand:DF 0 "s_register_operand" "=x,x,w") ++ (mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r")) ++ (match_operand 2 ++ "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math ++ && !TARGET_VFP_SINGLE" ++ "@ ++ vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2 ++ vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2 ++ vmov.f64\\t%P0, %1, %1\; vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2" ++ [(set_attr "predicable" "no") ++ (set_attr "type" "f_cvt") ++ (set_attr "length" "8")] ++) + + ;; Store multiple insn used in function prologue. +- + (define_insn "*push_multi_vfp" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch new file mode 100644 index 0000000000..5ce71a5138 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch @@ -0,0 +1,80 @@ + 2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + LP:#922474 + gcc/ + * config/arm/sync.md (sync_lock_releasedi): Define. + (arm_sync_lock_releasedi): Likewise. + gcc/testsuite + Backport from mainline. + 2012-01-30 Greta Yorsh <Greta.Yorsh@arm.com> + * gcc.target/arm/di-longlong64-sync-withldrexd.c: Accept + new code generated for __sync_lock_release. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2012-02-01 14:13:07 +0000 ++++ new/gcc/config/arm/arm.md 2012-02-22 18:37:56 +0000 +@@ -157,6 +157,7 @@ + (VUNSPEC_SYNC_OP 23) ; Represent a sync_<op> + (VUNSPEC_SYNC_NEW_OP 24) ; Represent a sync_new_<op> + (VUNSPEC_SYNC_OLD_OP 25) ; Represent a sync_old_<op> ++ (VUNSPEC_SYNC_RELEASE 26) ; Represent a sync_lock_release. + ] + ) + + +=== modified file 'gcc/config/arm/sync.md' +--- old/gcc/config/arm/sync.md 2011-10-14 15:47:15 +0000 ++++ new/gcc/config/arm/sync.md 2012-02-22 18:37:56 +0000 +@@ -494,3 +494,36 @@ + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) + ++(define_expand "sync_lock_releasedi" ++ [(match_operand:DI 0 "memory_operand") ++ (match_operand:DI 1 "s_register_operand")] ++ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER" ++ { ++ struct arm_sync_generator generator; ++ rtx tmp1 = gen_reg_rtx (DImode); ++ generator.op = arm_sync_generator_omn; ++ generator.u.omn = gen_arm_sync_lock_releasedi; ++ arm_expand_sync (DImode, &generator, operands[1], operands[0], NULL, tmp1); ++ DONE; ++ } ++) ++ ++(define_insn "arm_sync_lock_releasedi" ++ [(set (match_operand:DI 2 "s_register_operand" "=&r") ++ (unspec_volatile:DI [(match_operand:DI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:DI 0 "s_register_operand" "r")] ++ VUNSPEC_SYNC_RELEASE)) ++ (clobber (reg:CC CC_REGNUM)) ++ (clobber (match_scratch:SI 3 "=&r"))] ++ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER" ++ { ++ return arm_output_sync_insn (insn, operands); ++ } ++ [(set_attr "sync_memory" "1") ++ (set_attr "sync_result" "2") ++ (set_attr "sync_t1" "2") ++ (set_attr "sync_t2" "3") ++ (set_attr "sync_new_value" "0") ++ (set_attr "conds" "clob") ++ (set_attr "predicable" "no")] ++) + +=== modified file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c' +--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2011-10-14 15:56:32 +0000 ++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2012-02-22 18:37:56 +0000 +@@ -10,8 +10,8 @@ + #include "../../gcc.dg/di-longlong64-sync-1.c" + + /* We should be using ldrexd, strexd and no helpers or shorter ldrex. */ +-/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */ +-/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */ ++/* { dg-final { scan-assembler-times "\tldrexd" 48 } } */ ++/* { dg-final { scan-assembler-times "\tstrexd" 48 } } */ + /* { dg-final { scan-assembler-not "__sync_" } } */ + /* { dg-final { scan-assembler-not "ldrex\t" } } */ + /* { dg-final { scan-assembler-not "strex\t" } } */ + diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch new file mode 100644 index 0000000000..092650dc9d --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch @@ -0,0 +1,46 @@ + 2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> + + Backport from mainline. + gcc/ + 2012-02-21 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + + Revert r183011 + * config/arm/arm-cores.def (cortex-a15): Use generic Cortex tuning + parameters. + * config/arm/arm.c (arm_cortex_a15_tune): Remove. + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2012-01-23 00:36:02 +0000 ++++ new/gcc/config/arm/arm-cores.def 2012-02-22 15:53:56 +0000 +@@ -129,7 +129,7 @@ + ARM_CORE("cortex-a7", cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) ++ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) + ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) + ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) + ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2012-02-24 16:20:29 +0000 ++++ new/gcc/config/arm/arm.c 2012-02-24 17:33:58 +0000 +@@ -988,17 +988,6 @@ + arm_default_branch_cost + }; + +-const struct tune_params arm_cortex_a15_tune = +-{ +- arm_9e_rtx_costs, +- NULL, +- 1, /* Constant limit. */ +- 1, /* Max cond insns. */ +- ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */ +- false, /* Prefer constant pool. */ +- arm_cortex_a5_branch_cost +-}; +- + const struct tune_params arm_fa726te_tune = + { + arm_9e_rtx_costs, + |