94 files changed, 45251 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch
new file mode 100644
index 0000000000..ef33afff70
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/fix_linaro_106872.patch
@@ -0,0 +1,45 @@
+Index: gcc-4_6-branch/gcc/config/arm/arm.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.c	2012-03-05 17:14:09.901129286 -0800
++++ gcc-4_6-branch/gcc/config/arm/arm.c	2012-03-05 17:18:23.061141606 -0800
+@@ -17525,6 +17525,13 @@
+       }
+       return;
+ 
++    case 'v':
++      {
++        gcc_assert (GET_CODE (x) == CONST_DOUBLE);
++        fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
++        return;
++      }
++
+     /* Register specifier for vld1.16/vst1.16.  Translate the S register
+        number into a D register number and element index.  */
+     case 'z':
+@@ -24925,4 +24932,26 @@
+   return 4;
+ }
+ 
++int
++vfp3_const_double_for_fract_bits (rtx operand)
++{
++  REAL_VALUE_TYPE r0;
++  
++  if (GET_CODE (operand) != CONST_DOUBLE)
++    return 0;
++  
++  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
++  if (exact_real_inverse (DFmode, &r0))
++    {
++      if (exact_real_truncate (DFmode, &r0))
++       {
++	 HOST_WIDE_INT value = real_to_integer (&r0);
++	 value = value & 0xffffffff;
++	 if ((value != 0) && ( (value & (value - 1)) == 0))
++	   return int_log2 (value);
++       }
++    }
++  return 0;
++}
++
+ #include "gt-arm.h"
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch
new file mode 100644
index 0000000000..4c573f401e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106720.patch
@@ -0,0 +1,51 @@
+2011-02-21  Andrew Stubbs  <ams@codesourcery.com>
+	    Julian Brown  <julian@codesourcery.com>
+	    Mark Shinwell  <shinwell@codesourcery.com>
+
+	Forward-ported from Linaro GCC 4.5 (bzr99324).
+
+	gcc/
+	* config/arm/arm.h (arm_class_likely_spilled_p): Check against
+	LO_REGS only for Thumb-1.
+	(MODE_BASE_REG_CLASS): Restrict base registers to those which can
+	be used in short instructions when optimising for size on Thumb-2.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-01-29 03:20:57 +0000
++++ new/gcc/config/arm/arm.c	2011-02-21 14:04:51 +0000
+@@ -22304,14 +22304,16 @@
+ 
+ /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+  
+-   We need to define this for LO_REGS on thumb.  Otherwise we can end up
+-   using r0-r4 for function arguments, r7 for the stack frame and don't
+-   have enough left over to do doubleword arithmetic.  */
+-
++   We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
++   using r0-r4 for function arguments, r7 for the stack frame and don't have
++   enough left over to do doubleword arithmetic.  For Thumb-2 all the
++   potentially problematic instructions accept high registers so this is not
++   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
++   that require many low registers.  */
+ static bool
+ arm_class_likely_spilled_p (reg_class_t rclass)
+ {
+-  if ((TARGET_THUMB && rclass == LO_REGS)
++  if ((TARGET_THUMB1 && rclass == LO_REGS)
+       || rclass  == CC_REG)
+     return true;
+ 
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-01-29 03:20:57 +0000
++++ new/gcc/config/arm/arm.h	2011-02-21 14:04:51 +0000
+@@ -1185,7 +1185,7 @@
+    when addressing quantities in QI or HI mode; if we don't know the
+    mode, then we must be conservative.  */
+ #define MODE_BASE_REG_CLASS(MODE)					\
+-    (TARGET_32BIT ? CORE_REGS :					\
++    (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS :      \
+      (((MODE) == SImode) ? BASE_REGS : LO_REGS))
+ 
+ /* For Thumb we can not support SP+reg addressing, so we return LO_REGS
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch
new file mode 100644
index 0000000000..4b0079e1dc
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106733.patch
@@ -0,0 +1,653 @@
+2011-03-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* doc/invoke.texi (max-stores-to-sink): Document.
+	* params.h (MAX_STORES_TO_SINK): Define.
+	* opts.c (finish_options): Set MAX_STORES_TO_SINK to 0
+	if either vectorization or if-conversion is disabled.
+	* tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from
+	tree-vect-data-refs.c vect_equal_offsets.
+	(dr_equal_offsets_p): New function.
+	(find_data_references_in_bb): Remove static.
+	* tree-data-ref.h (find_data_references_in_bb): Declare.
+	(dr_equal_offsets_p): Likewise.
+	* tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c.
+	(vect_drs_dependent_in_basic_block): Update calls to
+	vect_equal_offsets.
+	(vect_check_interleaving): Likewise.
+	* tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h.
+	(cond_if_else_store_replacement): Rename to...
+	(cond_if_else_store_replacement_1): ... this.  Change arguments and
+	documentation.
+	(cond_if_else_store_replacement): New function.
+	* Makefile.in (tree-ssa-phiopt.o): Adjust dependencies.
+	* params.def (PARAM_MAX_STORES_TO_SINK): Define.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-cselim-1.c: New test.
+	* gcc.dg/vect/vect-cselim-2.c: New test.
+
+=== modified file 'gcc/Makefile.in'
+--- old/gcc/Makefile.in	2011-03-26 09:20:34 +0000
++++ new/gcc/Makefile.in	2011-04-18 11:31:29 +0000
+@@ -2422,7 +2422,8 @@
+ tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
+    $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \
+-   $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h
++   $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \
++   $(TREE_DATA_REF_H)
+ tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+    $(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \
+    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2011-03-29 14:24:42 +0000
++++ new/gcc/doc/invoke.texi	2011-04-18 11:31:29 +0000
+@@ -8909,6 +8909,11 @@
+ The maximum number of namespaces to consult for suggestions when C++
+ name lookup fails for an identifier.  The default is 1000.
+ 
++@item max-stores-to-sink
++The maximum number of conditional stores paires that can be sunk.  Set to 0
++if either vectorization (@option{-ftree-vectorize}) or if-conversion
++(@option{-ftree-loop-if-convert}) is disabled.  The default is 2.
++
+ @end table
+ @end table
+ 
+
+=== modified file 'gcc/opts.c'
+--- old/gcc/opts.c	2011-02-17 22:51:57 +0000
++++ new/gcc/opts.c	2011-03-27 09:38:18 +0000
+@@ -823,6 +823,12 @@
+ 	  opts->x_flag_split_stack = 0;
+ 	}
+     }
++
++  /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
++     is disabled.  */
++  if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert)
++    maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0,
++                           opts->x_param_values, opts_set->x_param_values);
+ }
+ 
+ #define LEFT_COLUMN	27
+
+=== modified file 'gcc/params.def'
+--- old/gcc/params.def	2011-03-26 09:20:34 +0000
++++ new/gcc/params.def	2011-04-18 11:31:29 +0000
+@@ -883,6 +883,13 @@
+ 	  "name lookup fails",
+ 	  1000, 0, 0)
+ 
++/* Maximum number of conditional store pairs that can be sunk.  */
++DEFPARAM (PARAM_MAX_STORES_TO_SINK,
++          "max-stores-to-sink",
++          "Maximum number of conditional store pairs that can be sunk",
++          2, 0, 0)
++
++
+ /*
+ Local variables:
+ mode:c
+
+=== modified file 'gcc/params.h'
+--- old/gcc/params.h	2011-01-13 13:41:03 +0000
++++ new/gcc/params.h	2011-03-27 09:38:18 +0000
+@@ -206,4 +206,6 @@
+   PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
+ #define MIN_NONDEBUG_INSN_UID \
+   PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
++#define MAX_STORES_TO_SINK \
++  PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
+ #endif /* ! GCC_PARAMS_H */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c	2011-03-27 09:38:18 +0000
+@@ -0,0 +1,86 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 50
++
++typedef struct {
++  short a;
++  short b;
++} data;
++
++data in1[N], in2[N], out[N];
++short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42};
++short out1[N], out2[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++  int i;
++  short c, d;
++
++  /* Vectorizable with conditional store sinking.  */
++  for (i = 0; i < N; i++)
++    {
++      c = in1[i].b;
++      d = in2[i].b;
++
++      if (c >= d)
++        {
++          out[i].b = c;
++          out[i].a = d + 5;
++        }
++      else
++        {
++          out[i].b = d - 12;
++          out[i].a = c + d;
++        }
++    }
++
++  /* Not vectorizable.  */
++  for (i = 0; i < N; i++)
++    {
++      c = in1[i].b;
++      d = in2[i].b;
++
++      if (c >= d)
++        {
++          out1[i] = c;
++        }
++      else
++        {
++          out2[i] = c + d;
++        }
++    }
++}
++
++int
++main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in1[i].a = i;
++      in1[i].b = i + 2;
++      in2[i].a = 5;
++      in2[i].b = i + 5;
++      __asm__ volatile ("");
++    }
++
++  foo ();
++
++  for (i = 0; i < N; i++)
++    {
++      if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || {! vect_strided } } } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c	2011-03-27 09:38:18 +0000
+@@ -0,0 +1,65 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 50
++
++int a[N], b[N], in1[N], in2[N];
++int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42};
++
++__attribute__ ((noinline)) void
++foo (int *pa, int *pb)
++{
++  int i;
++  int c, d;
++
++  /* Store sinking should not work here since the pointers may alias.  */
++  for (i = 0; i < N; i++)
++    {
++      c = in1[i];
++      d = in2[i];
++
++      if (c >= d)
++        {
++          *pa = c;
++          *pb = d + 5;
++        }
++      else
++        {
++          *pb = d - 12;
++          *pa = c + d;
++        }
++
++      pa++;
++      pb++;
++    }
++}
++
++int
++main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in1[i] = i;
++      in2[i] = i + 5;
++      __asm__ volatile ("");
++    }
++
++  foo (a, b);
++
++  for (i = 0; i < N; i++)
++    {
++      if (a[i] != result[2*i] || b[i] != result[2*i+1])
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect"  } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/tree-data-ref.c'
+--- old/gcc/tree-data-ref.c	2011-02-05 01:39:20 +0000
++++ new/gcc/tree-data-ref.c	2011-03-27 09:38:18 +0000
+@@ -991,6 +991,48 @@
+   return dr;
+ }
+ 
++/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
++   expressions.  */
++static bool
++dr_equal_offsets_p1 (tree offset1, tree offset2)
++{
++  bool res;
++
++  STRIP_NOPS (offset1);
++  STRIP_NOPS (offset2);
++
++  if (offset1 == offset2)
++    return true;
++
++  if (TREE_CODE (offset1) != TREE_CODE (offset2)
++      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
++    return false;
++
++  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
++                             TREE_OPERAND (offset2, 0));
++
++  if (!res || !BINARY_CLASS_P (offset1))
++    return res;
++
++  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
++                             TREE_OPERAND (offset2, 1));
++
++  return res;
++}
++
++/* Check if DRA and DRB have equal offsets.  */
++bool
++dr_equal_offsets_p (struct data_reference *dra,
++                    struct data_reference *drb)
++{
++  tree offset1, offset2;
++
++  offset1 = DR_OFFSET (dra);
++  offset2 = DR_OFFSET (drb);
++
++  return dr_equal_offsets_p1 (offset1, offset2);
++}
++
+ /* Returns true if FNA == FNB.  */
+ 
+ static bool
+@@ -4294,7 +4336,7 @@
+    DATAREFS.  Returns chrec_dont_know when failing to analyze a
+    difficult case, returns NULL_TREE otherwise.  */
+ 
+-static tree
++tree
+ find_data_references_in_bb (struct loop *loop, basic_block bb,
+                             VEC (data_reference_p, heap) **datarefs)
+ {
+
+=== modified file 'gcc/tree-data-ref.h'
+--- old/gcc/tree-data-ref.h	2011-01-25 21:24:23 +0000
++++ new/gcc/tree-data-ref.h	2011-03-27 09:38:18 +0000
+@@ -426,10 +426,14 @@
+ extern void compute_all_dependences (VEC (data_reference_p, heap) *,
+ 				     VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
+ 				     bool);
++extern tree find_data_references_in_bb (struct loop *, basic_block,
++                                        VEC (data_reference_p, heap) **);
+ 
+ extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
+ extern bool dr_may_alias_p (const struct data_reference *,
+ 			    const struct data_reference *);
++extern bool dr_equal_offsets_p (struct data_reference *,
++                                struct data_reference *);
+ 
+ 
+ /* Return true when the base objects of data references A and B are
+
+=== modified file 'gcc/tree-ssa-phiopt.c'
+--- old/gcc/tree-ssa-phiopt.c	2010-11-03 15:18:50 +0000
++++ new/gcc/tree-ssa-phiopt.c	2011-03-27 09:38:18 +0000
+@@ -34,6 +34,8 @@
+ #include "langhooks.h"
+ #include "pointer-set.h"
+ #include "domwalk.h"
++#include "cfgloop.h"
++#include "tree-data-ref.h"
+ 
+ static unsigned int tree_ssa_phiopt (void);
+ static unsigned int tree_ssa_phiopt_worker (bool);
+@@ -1292,35 +1294,18 @@
+   return true;
+ }
+ 
+-/* Do the main work of conditional store replacement.  We already know
+-   that the recognized pattern looks like so:
+-
+-   split:
+-     if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
+-   THEN_BB:
+-     X = Y;
+-     goto JOIN_BB;
+-   ELSE_BB:
+-     X = Z;
+-     fallthrough (edge E0)
+-   JOIN_BB:
+-     some more
+-
+-   We check that THEN_BB and ELSE_BB contain only one store
+-   that the stores have a "simple" RHS.  */
++/* Do the main work of conditional store replacement.  */
+ 
+ static bool
+-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
+-				basic_block join_bb)
++cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
++				  basic_block join_bb, gimple then_assign,
++				  gimple else_assign)
+ {
+-  gimple then_assign = last_and_only_stmt (then_bb);
+-  gimple else_assign = last_and_only_stmt (else_bb);
+   tree lhs_base, lhs, then_rhs, else_rhs;
+   source_location then_locus, else_locus;
+   gimple_stmt_iterator gsi;
+   gimple newphi, new_stmt;
+ 
+-  /* Check if then_bb and else_bb contain only one store each.  */
+   if (then_assign == NULL
+       || !gimple_assign_single_p (then_assign)
+       || else_assign == NULL
+@@ -1385,6 +1370,190 @@
+   return true;
+ }
+ 
++/* Conditional store replacement.  We already know
++   that the recognized pattern looks like so:
++
++   split:
++     if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
++   THEN_BB:
++     ...
++     X = Y;
++     ...
++     goto JOIN_BB;
++   ELSE_BB:
++     ...
++     X = Z;
++     ...
++     fallthrough (edge E0)
++   JOIN_BB:
++     some more
++
++   We check that it is safe to sink the store to JOIN_BB by verifying that
++   there are no read-after-write or write-after-write dependencies in
++   THEN_BB and ELSE_BB.  */
++
++static bool
++cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
++                                basic_block join_bb)
++{
++  gimple then_assign = last_and_only_stmt (then_bb);
++  gimple else_assign = last_and_only_stmt (else_bb);
++  VEC (data_reference_p, heap) *then_datarefs, *else_datarefs;
++  VEC (ddr_p, heap) *then_ddrs, *else_ddrs;
++  gimple then_store, else_store;
++  bool found, ok = false, res;
++  struct data_dependence_relation *ddr;
++  data_reference_p then_dr, else_dr;
++  int i, j;
++  tree then_lhs, else_lhs;
++  VEC (gimple, heap) *then_stores, *else_stores;
++  basic_block blocks[3];
++
++  if (MAX_STORES_TO_SINK == 0)
++    return false;
++
++  /* Handle the case with single statement in THEN_BB and ELSE_BB.  */
++  if (then_assign && else_assign)
++    return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
++                                             then_assign, else_assign);
++
++  /* Find data references.  */
++  then_datarefs = VEC_alloc (data_reference_p, heap, 1);
++  else_datarefs = VEC_alloc (data_reference_p, heap, 1);
++  if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
++        == chrec_dont_know)
++      || !VEC_length (data_reference_p, then_datarefs)
++      || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
++        == chrec_dont_know)
++      || !VEC_length (data_reference_p, else_datarefs))
++    {
++      free_data_refs (then_datarefs);
++      free_data_refs (else_datarefs);
++      return false;
++    }
++
++  /* Find pairs of stores with equal LHS.  */
++  then_stores = VEC_alloc (gimple, heap, 1);
++  else_stores = VEC_alloc (gimple, heap, 1);
++  FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr)
++    {
++      if (DR_IS_READ (then_dr))
++        continue;
++
++      then_store = DR_STMT (then_dr);
++      then_lhs = gimple_assign_lhs (then_store);
++      found = false;
++
++      FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr)
++        {
++          if (DR_IS_READ (else_dr))
++            continue;
++
++          else_store = DR_STMT (else_dr);
++          else_lhs = gimple_assign_lhs (else_store);
++
++          if (operand_equal_p (then_lhs, else_lhs, 0))
++            {
++              found = true;
++              break;
++            }
++        }
++
++      if (!found)
++        continue;
++
++      VEC_safe_push (gimple, heap, then_stores, then_store);
++      VEC_safe_push (gimple, heap, else_stores, else_store);
++    }
++
++  /* No pairs of stores found.  */
++  if (!VEC_length (gimple, then_stores)
++      || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK)
++    {
++      free_data_refs (then_datarefs);
++      free_data_refs (else_datarefs);
++      VEC_free (gimple, heap, then_stores);
++      VEC_free (gimple, heap, else_stores);
++      return false;
++    }
++
++  /* Compute and check data dependencies in both basic blocks.  */
++  then_ddrs = VEC_alloc (ddr_p, heap, 1);
++  else_ddrs = VEC_alloc (ddr_p, heap, 1);
++  compute_all_dependences (then_datarefs, &then_ddrs, NULL, false);
++  compute_all_dependences (else_datarefs, &else_ddrs, NULL, false);
++  blocks[0] = then_bb;
++  blocks[1] = else_bb;
++  blocks[2] = join_bb;
++  renumber_gimple_stmt_uids_in_blocks (blocks, 3);
++
++  /* Check that there are no read-after-write or write-after-write dependencies
++     in THEN_BB.  */
++  FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr)
++    {
++      struct data_reference *dra = DDR_A (ddr);
++      struct data_reference *drb = DDR_B (ddr);
++
++      if (DDR_ARE_DEPENDENT (ddr) != chrec_known
++          && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
++               && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
++              || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
++                  && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
++              || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
++        {
++          free_dependence_relations (then_ddrs);
++          free_dependence_relations (else_ddrs);
++          free_data_refs (then_datarefs);
++          free_data_refs (else_datarefs);
++          VEC_free (gimple, heap, then_stores);
++          VEC_free (gimple, heap, else_stores);
++          return false;
++        }
++    }
++
++  /* Check that there are no read-after-write or write-after-write dependencies
++     in ELSE_BB.  */
++  FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr)
++    {
++      struct data_reference *dra = DDR_A (ddr);
++      struct data_reference *drb = DDR_B (ddr);
++
++      if (DDR_ARE_DEPENDENT (ddr) != chrec_known
++          && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
++               && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
++              || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
++                  && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
++              || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
++        {
++          free_dependence_relations (then_ddrs);
++          free_dependence_relations (else_ddrs);
++          free_data_refs (then_datarefs);
++          free_data_refs (else_datarefs);
++          VEC_free (gimple, heap, then_stores);
++          VEC_free (gimple, heap, else_stores);
++          return false;
++        }
++    }
++
++  /* Sink stores with same LHS.  */
++  FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store)
++    {
++      else_store = VEC_index (gimple, else_stores, i);
++      res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
++                                              then_store, else_store);
++      ok = ok || res;
++    }
++
++  free_dependence_relations (then_ddrs);
++  free_dependence_relations (else_ddrs);
++  free_data_refs (then_datarefs);
++  free_data_refs (else_datarefs);
++  VEC_free (gimple, heap, then_stores);
++  VEC_free (gimple, heap, else_stores);
++
++  return ok;
++}
++
+ /* Always do these optimizations if we have SSA
+    trees to work on.  */
+ static bool
+
+=== modified file 'gcc/tree-vect-data-refs.c'
+--- old/gcc/tree-vect-data-refs.c	2011-02-25 11:18:14 +0000
++++ new/gcc/tree-vect-data-refs.c	2011-03-27 09:38:18 +0000
+@@ -289,39 +289,6 @@
+     }
+ }
+ 
+-
+-/* Function vect_equal_offsets.
+-
+-   Check if OFFSET1 and OFFSET2 are identical expressions.  */
+-
+-static bool
+-vect_equal_offsets (tree offset1, tree offset2)
+-{
+-  bool res;
+-
+-  STRIP_NOPS (offset1);
+-  STRIP_NOPS (offset2);
+-
+-  if (offset1 == offset2)
+-    return true;
+-
+-  if (TREE_CODE (offset1) != TREE_CODE (offset2)
+-      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
+-    return false;
+-
+-  res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
+-			    TREE_OPERAND (offset2, 0));
+-
+-  if (!res || !BINARY_CLASS_P (offset1))
+-    return res;
+-
+-  res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
+-			    TREE_OPERAND (offset2, 1));
+-
+-  return res;
+-}
+-
+-
+ /* Check dependence between DRA and DRB for basic block vectorization.
+    If the accesses share same bases and offsets, we can compare their initial
+    constant offsets to decide whether they differ or not.  In case of a read-
+@@ -352,7 +319,7 @@
+            || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+            || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+            != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+-      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
++      || !dr_equal_offsets_p (dra, drb))
+     return true;
+ 
+   /* Check the types.  */
+@@ -402,7 +369,7 @@
+ 	   || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+ 	   || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+ 	   != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+-      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
++      || !dr_equal_offsets_p (dra, drb)
+       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
+       || DR_IS_READ (dra) != DR_IS_READ (drb))
+     return false;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch
new file mode 100644
index 0000000000..017b1df7e3
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106737.patch
@@ -0,0 +1,126 @@
+2011-04-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2008-12-03  Daniel Jacobowitz  <dan@codesourcery.com>
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New.
+	* lib/target-supports.exp (check_effective_target_vect_shift_char): New
+	function.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-shift-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-shift-3.c	2011-04-21 13:51:06 +0000
+@@ -0,0 +1,37 @@
++/* { dg-require-effective-target vect_shift } */
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++
++#define N 32
++
++unsigned short dst[N] __attribute__((aligned(N)));
++unsigned short src[N] __attribute__((aligned(N)));
++
++__attribute__ ((noinline))
++void array_shift(void)
++{
++  int i;
++  for (i = 0; i < N; i++)
++    dst[i] = src[i] >> 3;
++}
++
++int main()
++{
++  volatile int i;
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    src[i] = i << 3;
++
++  array_shift ();
++
++  for (i = 0; i < N; i++)
++    if (dst[i] != i)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-shift-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-shift-4.c	2011-04-21 13:51:06 +0000
+@@ -0,0 +1,37 @@
++/* { dg-require-effective-target vect_shift_char } */
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++
++#define N 32
++
++unsigned char dst[N] __attribute__((aligned(N)));
++unsigned char src[N] __attribute__((aligned(N)));
++
++__attribute__ ((noinline))
++void array_shift(void)
++{
++  int i;
++  for (i = 0; i < N; i++)
++    dst[i] = src[i] >> 3;
++}
++
++int main()
++{
++  volatile int i;
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    src[i] = i << 3;
++
++  array_shift ();
++
++  for (i = 0; i < N; i++)
++    if (dst[i] != i)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-02-19 15:31:15 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-04-21 13:51:06 +0000
+@@ -2308,6 +2308,26 @@
+ }
+ 
+ 
++# Return 1 if the target supports hardware vector shift operation for char.
++
++proc check_effective_target_vect_shift_char { } {
++    global et_vect_shift_char_saved
++
++    if [info exists et_vect_shift_char_saved] {
++	verbose "check_effective_target_vect_shift_char: using cached result" 2
++    } else {
++	set et_vect_shift_char_saved 0
++	if { ([istarget powerpc*-*-*]
++             && ![istarget powerpc-*-linux*paired*])
++	     || [check_effective_target_arm32] } {
++	   set et_vect_shift_char_saved 1
++	}
++    }
++
++    verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2
++    return $et_vect_shift_char_saved
++}
++
+ # Return 1 if the target supports hardware vectors of long, 0 otherwise.
+ #
+ # This can change for different subtargets so do not cache the result.
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch
new file mode 100644
index 0000000000..3dde3b29a1
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106738.patch
@@ -0,0 +1,177 @@
+2011-04-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-04-03  Richard Guenther  <rguenther@suse.de>
+		    Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all
+	non-variable offsets and compare the remaining bases of the two
+	accesses instead of looking for exact same data-ref.
+
+	gcc/testsuite/
+	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test.
+	* gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with
+	-ftree-loop-if-convert-stores.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c'
+--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c	2011-04-24 07:45:49 +0000
+@@ -0,0 +1,69 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 50
++
++typedef struct {
++  short a;
++  short b;
++} data;
++
++data in1[N], in2[N], out[N];
++short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42};
++short out1[N], out2[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++  int i;
++  short c, d;
++
++  for (i = 0; i < N; i++)
++    {
++      c = in1[i].b;
++      d = in2[i].b;
++
++      if (c >= d)
++        {
++          out[i].b = in1[i].a;
++          out[i].a = d + 5;
++        }
++      else
++        {
++          out[i].b = d - 12;
++          out[i].a = in2[i].a + d;
++        }
++    }
++}
++
++int
++main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in1[i].a = i;
++      in1[i].b = i + 2;
++      in2[i].a = 5;
++      in2[i].b = i + 5;
++      __asm__ volatile ("");
++    }
++
++  foo ();
++
++  for (i = 0; i < N; i++)
++    {
++      if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || {! vect_strided } } } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
+--- old/gcc/testsuite/gcc.dg/vect/vect.exp	2010-11-22 21:49:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect.exp	2011-04-24 07:45:49 +0000
+@@ -210,6 +210,12 @@
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]]  \
+         "" $DEFAULT_VECTCFLAGS
+ 
++# -ftree-loop-if-convert-stores
++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
++lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores"
++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]]  \
++        "" $DEFAULT_VECTCFLAGS
++
+ # With -O3.
+ # Don't allow IPA cloning, because it throws our counts out of whack.
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+
+=== modified file 'gcc/tree-if-conv.c'
+--- old/gcc/tree-if-conv.c	2011-02-23 16:49:52 +0000
++++ new/gcc/tree-if-conv.c	2011-04-24 07:45:49 +0000
+@@ -464,8 +464,8 @@
+ /* Returns true when the memory references of STMT are read or written
+    unconditionally.  In other words, this function returns true when
+    for every data reference A in STMT there exist other accesses to
+-   the same data reference with predicates that add up (OR-up) to the
+-   true predicate: this ensures that the data reference A is touched
++   a data reference with the same base with predicates that add up (OR-up) to
++   the true predicate: this ensures that the data reference A is touched
+    (read or written) on every iteration of the if-converted loop.  */
+ 
+ static bool
+@@ -489,21 +489,38 @@
+ 	  continue;
+ 
+ 	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
+-	  if (DR_STMT (b) != stmt
+-	      && same_data_refs (a, b))
+-	    {
+-	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+-
+-	      if (DR_RW_UNCONDITIONALLY (b) == 1
+-		  || is_true_predicate (cb)
+-		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
+-								 ca, cb)))
+-		{
+-		  DR_RW_UNCONDITIONALLY (a) = 1;
+-		  DR_RW_UNCONDITIONALLY (b) = 1;
+-		  found = true;
+-		  break;
+-		}
++          {
++            tree ref_base_a = DR_REF (a);
++            tree ref_base_b = DR_REF (b);
++
++            if (DR_STMT (b) == stmt)
++              continue;
++
++            while (TREE_CODE (ref_base_a) == COMPONENT_REF
++                   || TREE_CODE (ref_base_a) == IMAGPART_EXPR
++                   || TREE_CODE (ref_base_a) == REALPART_EXPR)
++              ref_base_a = TREE_OPERAND (ref_base_a, 0);
++
++            while (TREE_CODE (ref_base_b) == COMPONENT_REF
++                   || TREE_CODE (ref_base_b) == IMAGPART_EXPR
++                   || TREE_CODE (ref_base_b) == REALPART_EXPR)
++              ref_base_b = TREE_OPERAND (ref_base_b, 0);
++
++  	    if (!operand_equal_p (ref_base_a, ref_base_b, 0))
++	      {
++	        tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
++
++	        if (DR_RW_UNCONDITIONALLY (b) == 1
++		    || is_true_predicate (cb)
++		    || is_true_predicate (ca
++                        = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
++		  {
++		    DR_RW_UNCONDITIONALLY (a) = 1;
++  		    DR_RW_UNCONDITIONALLY (b) = 1;
++		    found = true;
++		    break;
++		  }
++               }
+ 	    }
+ 
+ 	if (!found)
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch
new file mode 100644
index 0000000000..2c14ceb8cb
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106739.patch
@@ -0,0 +1,140 @@
+2011-05-02  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-03-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_autovectorize_vector_sizes): New function.
+	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
+	accesses to preserve the meaning of the test for doubleword vectors.
+	* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
+	* gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-03-02 11:38:43 +0000
++++ new/gcc/config/arm/arm.c	2011-04-28 11:46:58 +0000
+@@ -250,6 +250,7 @@
+ 						     bool is_packed);
+ static void arm_conditional_register_usage (void);
+ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
++static unsigned int arm_autovectorize_vector_sizes (void);
+ 
+ 
+ /* Table of machine attributes.  */
+@@ -395,6 +396,9 @@
+ #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+ #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+ #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
++  arm_autovectorize_vector_sizes
+ 
+ #undef  TARGET_MACHINE_DEPENDENT_REORG
+ #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
+@@ -23511,6 +23515,12 @@
+     }
+ }
+ 
++static unsigned int
++arm_autovectorize_vector_sizes (void)
++{
++  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
++}
++
+ static bool
+ arm_vector_alignment_reachable (const_tree type, bool is_packed)
+ {
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c	2011-04-28 11:46:58 +0000
+@@ -8,7 +8,7 @@
+ void with_restrict(int * __restrict p)
+ {
+   int i;
+-  int *q = p - 2;
++  int *q = p - 1;
+ 
+   for (i = 0; i < 1000; ++i) {
+     p[i] = q[i];
+@@ -19,7 +19,7 @@
+ void without_restrict(int * p)
+ {
+   int i;
+-  int *q = p - 2;
++  int *q = p - 1;
+ 
+   for (i = 0; i < 1000; ++i) {
+     p[i] = q[i];
+@@ -38,8 +38,8 @@
+     a[i] = b[i] = i;
+   }
+ 
+-  with_restrict(a + 2);
+-  without_restrict(b + 2);
++  with_restrict(a + 1);
++  without_restrict(b + 1);
+ 
+   for (i = 0; i < 1002; ++i) {
+     if (a[i] != b[i])
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-3.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-3.c	2011-04-28 11:46:58 +0000
+@@ -4,9 +4,9 @@
+ #include <stdarg.h>
+ #include "tree-vect.h"
+ 
+-#define N 8 
++#define N 12 
+ 
+-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+ 
+ int
+ main1 ()
+@@ -101,7 +101,7 @@
+     }
+ 
+   /* SLP with unrolling by 8.  */
+-  for (i = 0; i < N/2; i++)
++  for (i = 0; i < N/4; i++)
+     {
+       out[i*9] = in[i*9];
+       out[i*9 + 1] = in[i*9 + 1];
+@@ -115,7 +115,7 @@
+     }
+ 
+   /* check results:  */
+-  for (i = 0; i < N/2; i++)
++  for (i = 0; i < N/4; i++)
+     {
+       if (out[i*9] !=  in[i*9]
+          || out[i*9 + 1] != in[i*9 + 1]
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c	2011-04-28 11:46:58 +0000
+@@ -17,7 +17,7 @@
+   float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+   float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+   float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+-  float E[4] = {0,1,2,480};
++  float E[4] = {0,480,960,1440};
+   float s;
+ 
+   int i, j;
+@@ -55,7 +55,7 @@
+       s = 0;
+       for (j=0; j<N; j+=4)
+ 	s += C[j];
+-      B[i+3] = B[i] + s;
++      B[i+1] = B[i] + s;
+     }
+ 
+   /* check results:  */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch
new file mode 100644
index 0000000000..6e76c21272
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106741.patch
@@ -0,0 +1,255 @@
+2011-04-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-04-15  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+	gcc/
+	* combine.c (subst, combine_simlify_rtx): Add new argument, use it
+	to track processing of conditionals.  Update all callers.
+	(try_combine, simplify_if_then_else): Update.
+
+	2011-04-25  Maxim Kuvyrkov  <maxim@codesourcery.com>
+		    Eric Botcazou <ebotcazou@adacore.com>
+
+	gcc/
+	* combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals
+	for STORE_FLAG_VALUE==-1 case.
+
+=== modified file 'gcc/combine.c'
+Index: gcc-4_6-branch/gcc/combine.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/combine.c	2012-03-05 00:16:20.000000000 -0800
++++ gcc-4_6-branch/gcc/combine.c	2012-03-05 16:05:01.212928507 -0800
+@@ -391,8 +391,8 @@
+ static void undo_all (void);
+ static void undo_commit (void);
+ static rtx *find_split_point (rtx *, rtx, bool);
+-static rtx subst (rtx, rtx, rtx, int, int);
+-static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
++static rtx subst (rtx, rtx, rtx, int, int, int);
++static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int);
+ static rtx simplify_if_then_else (rtx);
+ static rtx simplify_set (rtx);
+ static rtx simplify_logical (rtx);
+@@ -3119,12 +3119,12 @@
+ 	  if (i1)
+ 	    {
+ 	      subst_low_luid = DF_INSN_LUID (i1);
+-	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0);
++	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
+ 	    }
+ 	  else
+ 	    {
+ 	      subst_low_luid = DF_INSN_LUID (i2);
+-	      i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0);
++	      i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
+ 	    }
+ 	}
+ 
+@@ -3136,7 +3136,7 @@
+ 	 self-referential RTL when we will be substituting I1SRC for I1DEST
+ 	 later.  Likewise if I0 feeds into I2, either directly or indirectly
+ 	 through I1, and I0DEST is in I0SRC.  */
+-      newpat = subst (PATTERN (i3), i2dest, i2src, 0,
++      newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0,
+ 		      (i1_feeds_i2_n && i1dest_in_i1src)
+ 		      || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
+ 			  && i0dest_in_i0src));
+@@ -3180,7 +3180,7 @@
+ 	 copy of I1SRC each time we substitute it, in order to avoid creating
+ 	 self-referential RTL when we will be substituting I0SRC for I0DEST
+ 	 later.  */
+-      newpat = subst (newpat, i1dest, i1src, 0,
++      newpat = subst (newpat, i1dest, i1src, 0, 0,
+ 		      i0_feeds_i1_n && i0dest_in_i0src);
+       substed_i1 = 1;
+ 
+@@ -3214,7 +3214,7 @@
+ 
+       n_occurrences = 0;
+       subst_low_luid = DF_INSN_LUID (i0);
+-      newpat = subst (newpat, i0dest, i0src, 0, 0);
++      newpat = subst (newpat, i0dest, i0src, 0, 0, 0);
+       substed_i0 = 1;
+     }
+ 
+@@ -3276,7 +3276,7 @@
+ 	{
+ 	  rtx t = i1pat;
+ 	  if (i0_feeds_i1_n)
+-	    t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0);
++	    t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0);
+ 
+ 	  XVECEXP (newpat, 0, --total_sets) = t;
+ 	}
+@@ -3284,10 +3284,10 @@
+ 	{
+ 	  rtx t = i2pat;
+ 	  if (i1_feeds_i2_n)
+-	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
++	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
+ 		       i0_feeds_i1_n && i0dest_in_i0src);
+ 	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
+-	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0);
++	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0);
+ 
+ 	  XVECEXP (newpat, 0, --total_sets) = t;
+ 	}
+@@ -4959,11 +4959,13 @@
+ 
+    IN_DEST is nonzero if we are processing the SET_DEST of a SET.
+ 
++   IN_COND is nonzero if we are on top level of the condition.
++
+    UNIQUE_COPY is nonzero if each substitution must be unique.  We do this
+    by copying if `n_occurrences' is nonzero.  */
+ 
+ static rtx
+-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
++subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
+ {
+   enum rtx_code code = GET_CODE (x);
+   enum machine_mode op0_mode = VOIDmode;
+@@ -5024,7 +5026,7 @@
+       && GET_CODE (XVECEXP (x, 0, 0)) == SET
+       && GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
+     {
+-      new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy);
++      new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy);
+ 
+       /* If this substitution failed, this whole thing fails.  */
+       if (GET_CODE (new_rtx) == CLOBBER
+@@ -5041,7 +5043,7 @@
+ 	      && GET_CODE (dest) != CC0
+ 	      && GET_CODE (dest) != PC)
+ 	    {
+-	      new_rtx = subst (dest, from, to, 0, unique_copy);
++	      new_rtx = subst (dest, from, to, 0, 0, unique_copy);
+ 
+ 	      /* If this substitution failed, this whole thing fails.  */
+ 	      if (GET_CODE (new_rtx) == CLOBBER
+@@ -5087,8 +5089,8 @@
+ 		    }
+ 		  else
+ 		    {
+-		      new_rtx = subst (XVECEXP (x, i, j), from, to, 0,
+-				   unique_copy);
++		      new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0,
++				       unique_copy);
+ 
+ 		      /* If this substitution failed, this whole thing
+ 			 fails.  */
+@@ -5165,7 +5167,9 @@
+ 				&& (code == SUBREG || code == STRICT_LOW_PART
+ 				    || code == ZERO_EXTRACT))
+ 			       || code == SET)
+-			      && i == 0), unique_copy);
++			      && i == 0),
++				 code == IF_THEN_ELSE && i == 0,
++				 unique_copy);
+ 
+ 	      /* If we found that we will have to reject this combination,
+ 		 indicate that by returning the CLOBBER ourselves, rather than
+@@ -5222,7 +5226,7 @@
+       /* If X is sufficiently simple, don't bother trying to do anything
+ 	 with it.  */
+       if (code != CONST_INT && code != REG && code != CLOBBER)
+-	x = combine_simplify_rtx (x, op0_mode, in_dest);
++	x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
+ 
+       if (GET_CODE (x) == code)
+ 	break;
+@@ -5242,10 +5246,12 @@
+    expression.
+ 
+    OP0_MODE is the original mode of XEXP (x, 0).  IN_DEST is nonzero
+-   if we are inside a SET_DEST.  */
++   if we are inside a SET_DEST.  IN_COND is nonzero if we are on the top level
++   of a condition.  */
+ 
+ static rtx
+-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
++combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest,
++		      int in_cond)
+ {
+   enum rtx_code code = GET_CODE (x);
+   enum machine_mode mode = GET_MODE (x);
+@@ -5300,8 +5306,8 @@
+ 	     false arms to store-flag values.  Be careful to use copy_rtx
+ 	     here since true_rtx or false_rtx might share RTL with x as a
+ 	     result of the if_then_else_cond call above.  */
+-	  true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0);
+-	  false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0);
++	  true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0);
++	  false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0);
+ 
+ 	  /* If true_rtx and false_rtx are not general_operands, an if_then_else
+ 	     is unlikely to be simpler.  */
+@@ -5645,7 +5651,7 @@
+ 	{
+ 	  /* Try to simplify the expression further.  */
+ 	  rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
+-	  temp = combine_simplify_rtx (tor, VOIDmode, in_dest);
++	  temp = combine_simplify_rtx (tor, VOIDmode, in_dest, 0);
+ 
+ 	  /* If we could, great.  If not, do not go ahead with the IOR
+ 	     replacement, since PLUS appears in many special purpose
+@@ -5738,7 +5744,16 @@
+ 	     ZERO_EXTRACT is indeed appropriate, it will be placed back by
+ 	     the call to make_compound_operation in the SET case.  */
+ 
+-	  if (STORE_FLAG_VALUE == 1
++	  if (in_cond)
++	    /* Don't apply below optimizations if the caller would
++	       prefer a comparison rather than a value.
++	       E.g., for the condition in an IF_THEN_ELSE most targets need
++	       an explicit comparison.  */
++	    {
++	      ;
++	    }
++
++	  else if (STORE_FLAG_VALUE == 1
+ 	      && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
+ 	      && op1 == const0_rtx
+ 	      && mode == GET_MODE (op0)
+@@ -5784,7 +5799,10 @@
+ 
+ 	  /* If STORE_FLAG_VALUE is -1, we have cases similar to
+ 	     those above.  */
+-	  if (STORE_FLAG_VALUE == -1
++	  if (in_cond)
++	    ;
++
++	  else if (STORE_FLAG_VALUE == -1
+ 	      && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
+ 	      && op1 == const0_rtx
+ 	      && (num_sign_bit_copies (op0, mode)
+@@ -5982,11 +6000,11 @@
+       if (reg_mentioned_p (from, true_rtx))
+ 	true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
+ 				      from, true_val),
+-		      pc_rtx, pc_rtx, 0, 0);
++			  pc_rtx, pc_rtx, 0, 0, 0);
+       if (reg_mentioned_p (from, false_rtx))
+ 	false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code,
+ 				   from, false_val),
+-		       pc_rtx, pc_rtx, 0, 0);
++			   pc_rtx, pc_rtx, 0, 0, 0);
+ 
+       SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
+       SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
+@@ -6203,11 +6221,11 @@
+ 	{
+ 	  temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
+ 						 cond_op0, cond_op1),
+-			pc_rtx, pc_rtx, 0, 0);
++			pc_rtx, pc_rtx, 0, 0, 0);
+ 	  temp = simplify_gen_binary (MULT, m, temp,
+ 				      simplify_gen_binary (MULT, m, c1,
+ 							   const_true_rtx));
+-	  temp = subst (temp, pc_rtx, pc_rtx, 0, 0);
++	  temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0);
+ 	  temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
+ 
+ 	  if (extend_op != UNKNOWN)
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch
new file mode 100644
index 0000000000..395c08cab7
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106742.patch
@@ -0,0 +1,6125 @@
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	From  Richard Earnshaw  <rearnsha@arm.com>
+
+	PR target/46329
+	* gcc.target/arm/pr46329.c: New test.
+
+	gcc/
+	PR target/46329
+	* config/arm/arm.c (arm_legitimate_constant_p_1): Return false
+	for all Neon struct constants.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* targhooks.h (default_legitimate_constant_p); Declare.
+	* targhooks.c (default_legitimate_constant_p): New function.
+
+	Backport from mainline:
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* target.def (legitimate_constant_p): New hook.
+	* doc/tm.texi.in (LEGITIMATE_CONSTANT_P): Replace with...
+	(TARGET_LEGITIMATE_CONSTANT_P): ...this.
+	* doc/tm.texi: Regenerate.
+	* calls.c (precompute_register_parameters): Replace uses of
+	LEGITIMATE_CONSTANT_P with targetm.legitimate_constant_p.
+	(emit_library_call_value_1): Likewise.
+	* expr.c (move_block_to_reg, can_store_by_pieces, emit_move_insn)
+	(compress_float_constant, emit_push_insn, expand_expr_real_1): Likewise.
+	* recog.c (general_operand, immediate_operand): Likewise.
+	* reload.c (find_reloads_toplev, find_reloads_address_part): Likewise.
+	* reload1.c (init_eliminable_invariants): Likewise.
+
+	* config/arm/arm-protos.h (arm_cannot_force_const_mem): Delete.
+	* config/arm/arm.h (ARM_LEGITIMATE_CONSTANT_P): Likewise.
+	(THUMB_LEGITIMATE_CONSTANT_P, LEGITIMATE_CONSTANT_P): Likewise.
+	* config/arm/arm.c (TARGET_LEGITIMATE_CONSTANT_P): Define.
+	(arm_legitimate_constant_p_1, thumb_legitimate_constant_p)
+	(arm_legitimate_constant_p): New functions.
+	(arm_cannot_force_const_mem): Make static.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* hooks.h (hook_bool_mode_uhwi_false): Declare.
+	* hooks.c (hook_bool_mode_uhwi_false): New function.
+	* target.def (array_mode_supported_p): New hook.
+	* doc/tm.texi.in (TARGET_ARRAY_MODE_SUPPORTED_P): Add @hook.
+	* doc/tm.texi: Regenerate.
+	* stor-layout.c (mode_for_array): New function.
+	(layout_type): Use it.
+	* config/arm/arm.c (arm_array_mode_supported_p): New function.
+	(TARGET_ARRAY_MODE_SUPPORTED_P): Define.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the
+	size of a '%A' memory reference.
+	(T_DREG, T_QREG): New neon_builtin_type_bits.
+	(arm_init_neon_builtins): Assert that the load and store operands
+	are neon_struct_operands.
+	(locate_neon_builtin_icode): Provide the neon_builtin_type_bits.
+	(NEON_ARG_MEMORY): New builtin_arg.
+	(neon_dereference_pointer): New function.
+	(arm_expand_neon_args): Add a neon_builtin_type_bits argument.
+	Handle NEON_ARG_MEMORY.
+	(arm_expand_neon_builtin): Update after above interface changes.
+	Use NEON_ARG_MEMORY for loads and stores.
+	* config/arm/predicates.md (neon_struct_operand): New predicate.
+	* config/arm/iterators.md (V_two_elem): Tweak formatting.
+	(V_three_elem): Use BLKmode for accesses that have no associated mode.
+	(V_four_elem): Tweak formatting.
+	* config/arm/neon.md (neon_vld1<mode>, neon_vld1_dup<mode>)
+	(neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>)
+	(neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>)
+	(neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>)
+	(neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>)
+	(neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>)
+	(neon_vst4<mode>): Replace pointer operand with a memory operand.
+	Use %A in the output template.
+	(neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>)
+	(neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>)
+	(neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve
+	the width of the memory access.  Remove post-increment.
+	* config/arm/neon-testgen.ml: Allow addresses to have an alignment.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/neon-vld3-1.c: New test.
+	* gcc.target/arm/neon-vst3-1.c: New test.
+	* gcc.target/arm/neon/v*.c: Regenerate.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-03-30  Richard Sandiford  <richard.sandiford@linaro.org>
+		    Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/43590
+	* config/arm/neon.md (neon_vld3qa<mode>, neon_vld4qa<mode>): Remove
+	operand 1 and reshuffle the operands to match.
+	(neon_vld3<mode>, neon_vld4<mode>): Update accordingly.
+
+=== modified file 'gcc/calls.c'
+Index: gcc-4_6-branch/gcc/calls.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/calls.c	2011-06-24 08:33:49.000000000 -0700
++++ gcc-4_6-branch/gcc/calls.c	2011-09-16 20:16:00.217564705 -0700
+@@ -686,7 +686,7 @@
+ 	/* If the value is a non-legitimate constant, force it into a
+ 	   pseudo now.  TLS symbols sometimes need a call to resolve.  */
+ 	if (CONSTANT_P (args[i].value)
+-	    && !LEGITIMATE_CONSTANT_P (args[i].value))
++	    && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
+ 	  args[i].value = force_reg (args[i].mode, args[i].value);
+ 
+ 	/* If we are to promote the function arg to a wider mode,
+@@ -3449,7 +3449,8 @@
+ 
+       /* Make sure it is a reasonable operand for a move or push insn.  */
+       if (!REG_P (addr) && !MEM_P (addr)
+-	  && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr)))
++	  && !(CONSTANT_P (addr)
++	       && targetm.legitimate_constant_p (Pmode, addr)))
+ 	addr = force_operand (addr, NULL_RTX);
+ 
+       argvec[count].value = addr;
+@@ -3490,7 +3491,7 @@
+ 
+       /* Make sure it is a reasonable operand for a move or push insn.  */
+       if (!REG_P (val) && !MEM_P (val)
+-	  && ! (CONSTANT_P (val) && LEGITIMATE_CONSTANT_P (val)))
++	  && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val)))
+ 	val = force_operand (val, NULL_RTX);
+ 
+       if (pass_by_reference (&args_so_far, mode, NULL_TREE, 1))
+Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h	2011-06-24 08:33:37.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/arm-protos.h	2011-09-16 20:16:00.217564705 -0700
+@@ -81,7 +81,6 @@
+ extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
+ 						     bool);
+ extern bool arm_tls_referenced_p (rtx);
+-extern bool arm_cannot_force_const_mem (rtx);
+ 
+ extern int cirrus_memory_offset (rtx);
+ extern int arm_coproc_mem_operand (rtx, bool);
+Index: gcc-4_6-branch/gcc/config/arm/arm.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.c	2011-09-16 20:14:34.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/arm.c	2011-09-16 20:16:00.237564275 -0700
+@@ -143,6 +143,8 @@
+ static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+ 				 tree);
+ static bool arm_have_conditional_execution (void);
++static bool arm_cannot_force_const_mem (rtx);
++static bool arm_legitimate_constant_p (enum machine_mode, rtx);
+ static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+ static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+ static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+@@ -241,6 +243,8 @@
+ static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
+ static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
+ static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
++static bool arm_array_mode_supported_p (enum machine_mode,
++					unsigned HOST_WIDE_INT);
+ static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+ static bool arm_class_likely_spilled_p (reg_class_t);
+ static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+@@ -394,6 +398,8 @@
+ #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
+ #undef TARGET_VECTOR_MODE_SUPPORTED_P
+ #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
++#undef TARGET_ARRAY_MODE_SUPPORTED_P
++#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
+ #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+ #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+ #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+@@ -523,6 +529,9 @@
+ #undef TARGET_HAVE_CONDITIONAL_EXECUTION
+ #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+ 
++#undef TARGET_LEGITIMATE_CONSTANT_P
++#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
++
+ #undef TARGET_CANNOT_FORCE_CONST_MEM
+ #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+ 
+@@ -6539,9 +6548,47 @@
+   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
+ }
+ 
++/* Implement TARGET_LEGITIMATE_CONSTANT_P.
++
++   On the ARM, allow any integer (invalid ones are removed later by insn
++   patterns), nice doubles and symbol_refs which refer to the function's
++   constant pool XXX.
++
++   When generating pic allow anything.  */
++
++static bool
++arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
++{
++  /* At present, we have no support for Neon structure constants, so forbid
++     them here.  It might be possible to handle simple cases like 0 and -1
++     in future.  */
++  if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
++    return false;
++
++  return flag_pic || !label_mentioned_p (x);
++}
++
++static bool
++thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
++{
++  return (GET_CODE (x) == CONST_INT
++	  || GET_CODE (x) == CONST_DOUBLE
++	  || CONSTANT_ADDRESS_P (x)
++	  || flag_pic);
++}
++
++static bool
++arm_legitimate_constant_p (enum machine_mode mode, rtx x)
++{
++  return (!arm_cannot_force_const_mem (x)
++	  && (TARGET_32BIT
++	      ? arm_legitimate_constant_p_1 (mode, x)
++	      : thumb_legitimate_constant_p (mode, x)));
++}
++
+ /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+ 
+-bool
++static bool
+ arm_cannot_force_const_mem (rtx x)
+ {
+   rtx base, offset;
+@@ -16598,7 +16645,7 @@
+       {
+ 	rtx addr;
+ 	bool postinc = FALSE;
+-	unsigned align, modesize, align_bits;
++	unsigned align, memsize, align_bits;
+ 
+ 	gcc_assert (GET_CODE (x) == MEM);
+ 	addr = XEXP (x, 0);
+@@ -16613,12 +16660,12 @@
+ 	   instruction (for some alignments) as an aid to the memory subsystem
+ 	   of the target.  */
+ 	align = MEM_ALIGN (x) >> 3;
+-	modesize = GET_MODE_SIZE (GET_MODE (x));
++	memsize = INTVAL (MEM_SIZE (x));
+ 	
+ 	/* Only certain alignment specifiers are supported by the hardware.  */
+-	if (modesize == 16 && (align % 32) == 0)
++	if (memsize == 16 && (align % 32) == 0)
+ 	  align_bits = 256;
+-	else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
++	else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
+ 	  align_bits = 128;
+ 	else if ((align % 8) == 0)
+ 	  align_bits = 64;
+@@ -18278,12 +18325,14 @@
+   T_V2SI  = 0x0004,
+   T_V2SF  = 0x0008,
+   T_DI    = 0x0010,
++  T_DREG  = 0x001F,
+   T_V16QI = 0x0020,
+   T_V8HI  = 0x0040,
+   T_V4SI  = 0x0080,
+   T_V4SF  = 0x0100,
+   T_V2DI  = 0x0200,
+   T_TI	  = 0x0400,
++  T_QREG  = 0x07E0,
+   T_EI	  = 0x0800,
+   T_OI	  = 0x1000
+ };
+@@ -18929,10 +18978,9 @@
+ 		    if (is_load && k == 1)
+ 		      {
+ 		        /* Neon load patterns always have the memory operand
+-			   (a SImode pointer) in the operand 1 position.  We
+-			   want a const pointer to the element type in that
+-			   position.  */
+-		        gcc_assert (insn_data[icode].operand[k].mode == SImode);
++			   in the operand 1 position.  */
++			gcc_assert (insn_data[icode].operand[k].predicate
++				    == neon_struct_operand);
+ 
+ 			switch (1 << j)
+ 			  {
+@@ -18967,10 +19015,9 @@
+ 		    else if (is_store && k == 0)
+ 		      {
+ 		        /* Similarly, Neon store patterns use operand 0 as
+-			   the memory location to store to (a SImode pointer).
+-			   Use a pointer to the element type of the store in
+-			   that position.  */
+-			gcc_assert (insn_data[icode].operand[k].mode == SImode);
++			   the memory location to store to.  */
++			gcc_assert (insn_data[icode].operand[k].predicate
++				    == neon_struct_operand);
+ 
+ 			switch (1 << j)
+ 			  {
+@@ -19290,12 +19337,13 @@
+ }
+ 
+ static enum insn_code
+-locate_neon_builtin_icode (int fcode, neon_itype *itype)
++locate_neon_builtin_icode (int fcode, neon_itype *itype,
++			   enum neon_builtin_type_bits *type_bit)
+ {
+   neon_builtin_datum key
+     = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
+   neon_builtin_datum *found;
+-  int idx;
++  int idx, type, ntypes;
+ 
+   key.base_fcode = fcode;
+   found = (neon_builtin_datum *)
+@@ -19308,20 +19356,84 @@
+   if (itype)
+     *itype = found->itype;
+ 
++  if (type_bit)
++    {
++      ntypes = 0;
++      for (type = 0; type < T_MAX; type++)
++	if (found->bits & (1 << type))
++	  {
++	    if (ntypes == idx)
++	      break;
++	    ntypes++;
++	  }
++      gcc_assert (type < T_MAX);
++      *type_bit = (enum neon_builtin_type_bits) (1 << type);
++    }
+   return found->codes[idx];
+ }
+ 
+ typedef enum {
+   NEON_ARG_COPY_TO_REG,
+   NEON_ARG_CONSTANT,
++  NEON_ARG_MEMORY,
+   NEON_ARG_STOP
+ } builtin_arg;
+ 
+ #define NEON_MAX_BUILTIN_ARGS 5
+ 
++/* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
++   and return an expression for the accessed memory.
++
++   The intrinsic function operates on a block of registers that has
++   mode REG_MODE.  This block contains vectors of type TYPE_BIT.
++   The function references the memory at EXP in mode MEM_MODE;
++   this mode may be BLKmode if no more suitable mode is available.  */
++
++static tree
++neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
++			  enum machine_mode reg_mode,
++			  enum neon_builtin_type_bits type_bit)
++{
++  HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
++  tree elem_type, upper_bound, array_type;
++
++  /* Work out the size of the register block in bytes.  */
++  reg_size = GET_MODE_SIZE (reg_mode);
++
++  /* Work out the size of each vector in bytes.  */
++  gcc_assert (type_bit & (T_DREG | T_QREG));
++  vector_size = (type_bit & T_QREG ? 16 : 8);
++
++  /* Work out how many vectors there are.  */
++  gcc_assert (reg_size % vector_size == 0);
++  nvectors = reg_size / vector_size;
++
++  /* Work out how many elements are being loaded or stored.
++     MEM_MODE == REG_MODE implies a one-to-one mapping between register
++     and memory elements; anything else implies a lane load or store.  */
++  if (mem_mode == reg_mode)
++    nelems = vector_size * nvectors;
++  else
++    nelems = nvectors;
++
++  /* Work out the type of each element.  */
++  gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
++  elem_type = TREE_TYPE (TREE_TYPE (exp));
++
++  /* Create a type that describes the full access.  */
++  upper_bound = build_int_cst (size_type_node, nelems - 1);
++  array_type = build_array_type (elem_type, build_index_type (upper_bound));
++
++  /* Dereference EXP using that type.  */
++  exp = convert (build_pointer_type (array_type), exp);
++  return fold_build2 (MEM_REF, array_type, exp,
++		      build_int_cst (TREE_TYPE (exp), 0));
++}
++
+ /* Expand a Neon builtin.  */
+ static rtx
+ arm_expand_neon_args (rtx target, int icode, int have_retval,
++		      enum neon_builtin_type_bits type_bit,
+ 		      tree exp, ...)
+ {
+   va_list ap;
+@@ -19330,7 +19442,9 @@
+   rtx op[NEON_MAX_BUILTIN_ARGS];
+   enum machine_mode tmode = insn_data[icode].operand[0].mode;
+   enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
++  enum machine_mode other_mode;
+   int argc = 0;
++  int opno;
+ 
+   if (have_retval
+       && (!target
+@@ -19348,26 +19462,46 @@
+         break;
+       else
+         {
++          opno = argc + have_retval;
++          mode[argc] = insn_data[icode].operand[opno].mode;
+           arg[argc] = CALL_EXPR_ARG (exp, argc);
++          if (thisarg == NEON_ARG_MEMORY)
++            {
++              other_mode = insn_data[icode].operand[1 - opno].mode;
++              arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
++                                                    other_mode, type_bit);
++            }
+           op[argc] = expand_normal (arg[argc]);
+-          mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
+ 
+           switch (thisarg)
+             {
+             case NEON_ARG_COPY_TO_REG:
+               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
+-              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
++              if (!(*insn_data[icode].operand[opno].predicate)
+                      (op[argc], mode[argc]))
+                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+               break;
+ 
+             case NEON_ARG_CONSTANT:
+               /* FIXME: This error message is somewhat unhelpful.  */
+-              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
++              if (!(*insn_data[icode].operand[opno].predicate)
+                     (op[argc], mode[argc]))
+ 		error ("argument must be a constant");
+               break;
+ 
++            case NEON_ARG_MEMORY:
++	      gcc_assert (MEM_P (op[argc]));
++	      PUT_MODE (op[argc], mode[argc]);
++	      /* ??? arm_neon.h uses the same built-in functions for signed
++		 and unsigned accesses, casting where necessary.  This isn't
++		 alias safe.  */
++	      set_mem_alias_set (op[argc], 0);
++	      if (!(*insn_data[icode].operand[opno].predicate)
++                    (op[argc], mode[argc]))
++		op[argc] = (replace_equiv_address
++			    (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
++              break;
++
+             case NEON_ARG_STOP:
+               gcc_unreachable ();
+             }
+@@ -19446,14 +19580,15 @@
+ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+ {
+   neon_itype itype;
+-  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
++  enum neon_builtin_type_bits type_bit;
++  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit);
+ 
+   switch (itype)
+     {
+     case NEON_UNOP:
+     case NEON_CONVERT:
+     case NEON_DUPLANE:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_BINOP:
+@@ -19463,90 +19598,90 @@
+     case NEON_SCALARMULH:
+     case NEON_SHIFTINSERT:
+     case NEON_LOGICBINOP:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_STOP);
+ 
+     case NEON_TERNOP:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_GETLANE:
+     case NEON_FIXCONV:
+     case NEON_SHIFTIMM:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
+         NEON_ARG_STOP);
+ 
+     case NEON_CREATE:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_DUP:
+     case NEON_SPLIT:
+     case NEON_REINTERP:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_COMBINE:
+     case NEON_VTBL:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_RESULTPAIR:
+-      return arm_expand_neon_args (target, icode, 0, exp,
++      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_STOP);
+ 
+     case NEON_LANEMUL:
+     case NEON_LANEMULL:
+     case NEON_LANEMULH:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_LANEMAC:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SHIFTACC:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SCALARMAC:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+ 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SELECT:
+     case NEON_VTBX:
+-      return arm_expand_neon_args (target, icode, 1, exp,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
+ 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_STOP);
+ 
+     case NEON_LOAD1:
+     case NEON_LOADSTRUCT:
+-      return arm_expand_neon_args (target, icode, 1, exp,
+-	NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++	NEON_ARG_MEMORY, NEON_ARG_STOP);
+ 
+     case NEON_LOAD1LANE:
+     case NEON_LOADSTRUCTLANE:
+-      return arm_expand_neon_args (target, icode, 1, exp,
+-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
++      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ 	NEON_ARG_STOP);
+ 
+     case NEON_STORE1:
+     case NEON_STORESTRUCT:
+-      return arm_expand_neon_args (target, icode, 0, exp,
+-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
++      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
++	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_STORE1LANE:
+     case NEON_STORESTRUCTLANE:
+-      return arm_expand_neon_args (target, icode, 0, exp,
+-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
++      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
++	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ 	NEON_ARG_STOP);
+     }
+ 
+@@ -22265,6 +22400,20 @@
+     return true;
+ 
+   return false;
++}
++
++/* Implements target hook array_mode_supported_p.  */
++
++static bool
++arm_array_mode_supported_p (enum machine_mode mode,
++			    unsigned HOST_WIDE_INT nelems)
++{
++  if (TARGET_NEON
++      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
++      && (nelems >= 2 && nelems <= 4))
++    return true;
++
++  return false;
+ }
+ 
+ /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+Index: gcc-4_6-branch/gcc/config/arm/arm.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.h	2011-09-16 20:14:33.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/arm.h	2011-09-16 20:16:00.237564275 -0700
+@@ -1777,27 +1777,6 @@
+ #define TARGET_DEFAULT_WORD_RELOCATIONS 0
+ #endif
+ 
+-/* Nonzero if the constant value X is a legitimate general operand.
+-   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
+-
+-   On the ARM, allow any integer (invalid ones are removed later by insn
+-   patterns), nice doubles and symbol_refs which refer to the function's
+-   constant pool XXX.
+-
+-   When generating pic allow anything.  */
+-#define ARM_LEGITIMATE_CONSTANT_P(X)	(flag_pic || ! label_mentioned_p (X))
+-
+-#define THUMB_LEGITIMATE_CONSTANT_P(X)	\
+- (   GET_CODE (X) == CONST_INT		\
+-  || GET_CODE (X) == CONST_DOUBLE	\
+-  || CONSTANT_ADDRESS_P (X)		\
+-  || flag_pic)
+-
+-#define LEGITIMATE_CONSTANT_P(X)			\
+-  (!arm_cannot_force_const_mem (X)			\
+-   && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X)	\
+-		    : THUMB_LEGITIMATE_CONSTANT_P (X)))
+-
+ #ifndef SUBTARGET_NAME_ENCODING_LENGTHS
+ #define SUBTARGET_NAME_ENCODING_LENGTHS
+ #endif
+Index: gcc-4_6-branch/gcc/config/arm/iterators.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/iterators.md	2011-06-24 08:33:37.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/iterators.md	2011-09-16 20:16:00.237564275 -0700
+@@ -194,24 +194,22 @@
+ 
+ ;; Mode of pair of elements for each vector mode, to define transfer
+ ;; size for structure lane/dup loads and stores.
+-(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
+-                  (V4HI "SI") (V8HI "SI")
++(define_mode_attr V_two_elem [(V8QI "HI")   (V16QI "HI")
++                              (V4HI "SI")   (V8HI "SI")
+                               (V2SI "V2SI") (V4SI "V2SI")
+                               (V2SF "V2SF") (V4SF "V2SF")
+                               (DI "V2DI")   (V2DI "V2DI")])
+ 
+ ;; Similar, for three elements.
+-;; ??? Should we define extra modes so that sizes of all three-element
+-;; accesses can be accurately represented?
+-(define_mode_attr V_three_elem [(V8QI "SI")   (V16QI "SI")
+-                    (V4HI "V4HI") (V8HI "V4HI")
+-                                (V2SI "V4SI") (V4SI "V4SI")
+-                                (V2SF "V4SF") (V4SF "V4SF")
+-                                (DI "EI")     (V2DI "EI")])
++(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
++                                (V4HI "BLK") (V8HI "BLK")
++                                (V2SI "BLK") (V4SI "BLK")
++                                (V2SF "BLK") (V4SF "BLK")
++                                (DI "EI")    (V2DI "EI")])
+ 
+ ;; Similar, for four elements.
+ (define_mode_attr V_four_elem [(V8QI "SI")   (V16QI "SI")
+-                   (V4HI "V4HI") (V8HI "V4HI")
++                               (V4HI "V4HI") (V8HI "V4HI")
+                                (V2SI "V4SI") (V4SI "V4SI")
+                                (V2SF "V4SF") (V4SF "V4SF")
+                                (DI "OI")     (V2DI "OI")])
+Index: gcc-4_6-branch/gcc/config/arm/neon-testgen.ml
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/neon-testgen.ml	2011-06-24 08:33:37.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/neon-testgen.ml	2011-09-16 20:16:00.237564275 -0700
+@@ -177,7 +177,7 @@
+       let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in
+         "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}"
+     | (PtrTo elt | CstPtrTo elt) ->
+-      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]"
++      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]"
+     | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+     | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+     | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
+Index: gcc-4_6-branch/gcc/config/arm/neon.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/neon.md	2011-07-19 21:50:44.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/neon.md	2011-09-16 20:16:00.247564269 -0700
+@@ -4250,16 +4250,16 @@
+ 
+ (define_insn "neon_vld1<mode>"
+   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+-        (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))]
++        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
+                     UNSPEC_VLD1))]
+   "TARGET_NEON"
+-  "vld1.<V_sz_elem>\t%h0, [%1]"
++  "vld1.<V_sz_elem>\t%h0, %A1"
+   [(set_attr "neon_type" "neon_vld1_1_2_regs")]
+ )
+ 
+ (define_insn "neon_vld1_lane<mode>"
+   [(set (match_operand:VDX 0 "s_register_operand" "=w")
+-        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
+                      (match_operand:VDX 2 "s_register_operand" "0")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VLD1_LANE))]
+@@ -4270,9 +4270,9 @@
+   if (lane < 0 || lane >= max)
+     error ("lane out of range");
+   if (max == 1)
+-    return "vld1.<V_sz_elem>\t%P0, [%1]";
++    return "vld1.<V_sz_elem>\t%P0, %A1";
+   else
+-    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
++    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+@@ -4282,7 +4282,7 @@
+ 
+ (define_insn "neon_vld1_lane<mode>"
+   [(set (match_operand:VQX 0 "s_register_operand" "=w")
+-        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
+                      (match_operand:VQX 2 "s_register_operand" "0")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VLD1_LANE))]
+@@ -4301,9 +4301,9 @@
+     }
+   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
+   if (max == 2)
+-    return "vld1.<V_sz_elem>\t%P0, [%1]";
++    return "vld1.<V_sz_elem>\t%P0, %A1";
+   else
+-    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
++    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+@@ -4313,14 +4313,14 @@
+ 
+ (define_insn "neon_vld1_dup<mode>"
+   [(set (match_operand:VDX 0 "s_register_operand" "=w")
+-        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
++        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+                     UNSPEC_VLD1_DUP))]
+   "TARGET_NEON"
+ {
+   if (GET_MODE_NUNITS (<MODE>mode) > 1)
+-    return "vld1.<V_sz_elem>\t{%P0[]}, [%1]";
++    return "vld1.<V_sz_elem>\t{%P0[]}, %A1";
+   else
+-    return "vld1.<V_sz_elem>\t%h0, [%1]";
++    return "vld1.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+@@ -4330,14 +4330,14 @@
+ 
+ (define_insn "neon_vld1_dup<mode>"
+   [(set (match_operand:VQX 0 "s_register_operand" "=w")
+-        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
++        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+                     UNSPEC_VLD1_DUP))]
+   "TARGET_NEON"
+ {
+   if (GET_MODE_NUNITS (<MODE>mode) > 2)
+-    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
++    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
+   else
+-    return "vld1.<V_sz_elem>\t%h0, [%1]";
++    return "vld1.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+@@ -4346,15 +4346,15 @@
+ )
+ 
+ (define_insn "neon_vst1<mode>"
+-  [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
+ 	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
+ 		     UNSPEC_VST1))]
+   "TARGET_NEON"
+-  "vst1.<V_sz_elem>\t%h1, [%0]"
++  "vst1.<V_sz_elem>\t%h1, %A0"
+   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+ 
+ (define_insn "neon_vst1_lane<mode>"
+-  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
+ 	(vec_select:<V_elem>
+ 	  (match_operand:VDX 1 "s_register_operand" "w")
+ 	  (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
+@@ -4365,9 +4365,9 @@
+   if (lane < 0 || lane >= max)
+     error ("lane out of range");
+   if (max == 1)
+-    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
++    return "vst1.<V_sz_elem>\t{%P1}, %A0";
+   else
+-    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
++    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
+@@ -4375,7 +4375,7 @@
+                     (const_string "neon_vst1_vst2_lane")))])
+ 
+ (define_insn "neon_vst1_lane<mode>"
+-  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
+         (vec_select:<V_elem>
+            (match_operand:VQX 1 "s_register_operand" "w")
+            (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
+@@ -4394,24 +4394,24 @@
+     }
+   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
+   if (max == 2)
+-    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
++    return "vst1.<V_sz_elem>\t{%P1}, %A0";
+   else
+-    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
++    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
+ }
+   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+ )
+ 
+ (define_insn "neon_vld2<mode>"
+   [(set (match_operand:TI 0 "s_register_operand" "=w")
+-        (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD2))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vld1.64\t%h0, [%1]";
++    return "vld1.64\t%h0, %A1";
+   else
+-    return "vld2.<V_sz_elem>\t%h0, [%1]";
++    return "vld2.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -4421,16 +4421,16 @@
+ 
+ (define_insn "neon_vld2<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+-        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD2))]
+   "TARGET_NEON"
+-  "vld2.<V_sz_elem>\t%h0, [%1]"
++  "vld2.<V_sz_elem>\t%h0, %A1"
+   [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
+ 
+ (define_insn "neon_vld2_lane<mode>"
+   [(set (match_operand:TI 0 "s_register_operand" "=w")
+-        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:TI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -4447,7 +4447,7 @@
+   ops[1] = gen_rtx_REG (DImode, regno + 2);
+   ops[2] = operands[1];
+   ops[3] = operands[3];
+-  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
++  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+@@ -4455,7 +4455,7 @@
+ 
+ (define_insn "neon_vld2_lane<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+-        (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:OI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -4477,7 +4477,7 @@
+   ops[1] = gen_rtx_REG (DImode, regno + 4);
+   ops[2] = operands[1];
+   ops[3] = GEN_INT (lane);
+-  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
++  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+@@ -4485,15 +4485,15 @@
+ 
+ (define_insn "neon_vld2_dup<mode>"
+   [(set (match_operand:TI 0 "s_register_operand" "=w")
+-        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD2_DUP))]
+   "TARGET_NEON"
+ {
+   if (GET_MODE_NUNITS (<MODE>mode) > 1)
+-    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
++    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
+   else
+-    return "vld1.<V_sz_elem>\t%h0, [%1]";
++    return "vld1.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+@@ -4502,16 +4502,16 @@
+ )
+ 
+ (define_insn "neon_vst2<mode>"
+-  [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
+         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VST2))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vst1.64\t%h1, [%0]";
++    return "vst1.64\t%h1, %A0";
+   else
+-    return "vst2.<V_sz_elem>\t%h1, [%0]";
++    return "vst2.<V_sz_elem>\t%h1, %A0";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -4520,17 +4520,17 @@
+ )
+ 
+ (define_insn "neon_vst2<mode>"
+-  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+ 	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+ 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ 		   UNSPEC_VST2))]
+   "TARGET_NEON"
+-  "vst2.<V_sz_elem>\t%h1, [%0]"
++  "vst2.<V_sz_elem>\t%h1, %A0"
+   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
+ )
+ 
+ (define_insn "neon_vst2_lane<mode>"
+-  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
+ 	(unspec:<V_two_elem>
+ 	  [(match_operand:TI 1 "s_register_operand" "w")
+ 	   (match_operand:SI 2 "immediate_operand" "i")
+@@ -4548,14 +4548,14 @@
+   ops[1] = gen_rtx_REG (DImode, regno);
+   ops[2] = gen_rtx_REG (DImode, regno + 2);
+   ops[3] = operands[2];
+-  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
++  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+ )
+ 
+ (define_insn "neon_vst2_lane<mode>"
+-  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
+         (unspec:<V_two_elem>
+            [(match_operand:OI 1 "s_register_operand" "w")
+             (match_operand:SI 2 "immediate_operand" "i")
+@@ -4578,7 +4578,7 @@
+   ops[1] = gen_rtx_REG (DImode, regno);
+   ops[2] = gen_rtx_REG (DImode, regno + 4);
+   ops[3] = GEN_INT (lane);
+-  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
++  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+@@ -4586,15 +4586,15 @@
+ 
+ (define_insn "neon_vld3<mode>"
+   [(set (match_operand:EI 0 "s_register_operand" "=w")
+-        (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD3))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vld1.64\t%h0, [%1]";
++    return "vld1.64\t%h0, %A1";
+   else
+-    return "vld3.<V_sz_elem>\t%h0, [%1]";
++    return "vld3.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -4603,27 +4603,25 @@
+ )
+ 
+ (define_expand "neon_vld3<mode>"
+-  [(match_operand:CI 0 "s_register_operand" "=w")
+-   (match_operand:SI 1 "s_register_operand" "+r")
++  [(match_operand:CI 0 "s_register_operand")
++   (match_operand:CI 1 "neon_struct_operand")
+    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_NEON"
+ {
+-  emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[0],
+-                                    operands[1], operands[1]));
+-  emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0],
+-                                    operands[1], operands[1]));
++  rtx mem;
++
++  mem = adjust_address (operands[1], EImode, 0);
++  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
++  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
++  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
+   DONE;
+ })
+ 
+ (define_insn "neon_vld3qa<mode>"
+   [(set (match_operand:CI 0 "s_register_operand" "=w")
+-        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
+-                    (match_operand:CI 1 "s_register_operand" "0")
++        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VLD3A))
+-   (set (match_operand:SI 2 "s_register_operand" "=r")
+-        (plus:SI (match_dup 3)
+-		 (const_int 24)))]
++                   UNSPEC_VLD3A))]
+   "TARGET_NEON"
+ {
+   int regno = REGNO (operands[0]);
+@@ -4631,8 +4629,8 @@
+   ops[0] = gen_rtx_REG (DImode, regno);
+   ops[1] = gen_rtx_REG (DImode, regno + 4);
+   ops[2] = gen_rtx_REG (DImode, regno + 8);
+-  ops[3] = operands[2];
+-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
++  ops[3] = operands[1];
++  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld3_vld4")]
+@@ -4640,13 +4638,10 @@
+ 
+ (define_insn "neon_vld3qb<mode>"
+   [(set (match_operand:CI 0 "s_register_operand" "=w")
+-        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
+-                    (match_operand:CI 1 "s_register_operand" "0")
++        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
++                    (match_operand:CI 2 "s_register_operand" "0")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VLD3B))
+-   (set (match_operand:SI 2 "s_register_operand" "=r")
+-        (plus:SI (match_dup 3)
+-		 (const_int 24)))]
++                   UNSPEC_VLD3B))]
+   "TARGET_NEON"
+ {
+   int regno = REGNO (operands[0]);
+@@ -4654,8 +4649,8 @@
+   ops[0] = gen_rtx_REG (DImode, regno + 2);
+   ops[1] = gen_rtx_REG (DImode, regno + 6);
+   ops[2] = gen_rtx_REG (DImode, regno + 10);
+-  ops[3] = operands[2];
+-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
++  ops[3] = operands[1];
++  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld3_vld4")]
+@@ -4663,7 +4658,7 @@
+ 
+ (define_insn "neon_vld3_lane<mode>"
+   [(set (match_operand:EI 0 "s_register_operand" "=w")
+-        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:EI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -4681,7 +4676,7 @@
+   ops[2] = gen_rtx_REG (DImode, regno + 4);
+   ops[3] = operands[1];
+   ops[4] = operands[3];
+-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
++  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
+                    ops);
+   return "";
+ }
+@@ -4690,7 +4685,7 @@
+ 
+ (define_insn "neon_vld3_lane<mode>"
+   [(set (match_operand:CI 0 "s_register_operand" "=w")
+-        (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:CI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -4713,7 +4708,7 @@
+   ops[2] = gen_rtx_REG (DImode, regno + 8);
+   ops[3] = operands[1];
+   ops[4] = GEN_INT (lane);
+-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
++  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
+                    ops);
+   return "";
+ }
+@@ -4722,7 +4717,7 @@
+ 
+ (define_insn "neon_vld3_dup<mode>"
+   [(set (match_operand:EI 0 "s_register_operand" "=w")
+-        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD3_DUP))]
+   "TARGET_NEON"
+@@ -4735,11 +4730,11 @@
+       ops[1] = gen_rtx_REG (DImode, regno + 2);
+       ops[2] = gen_rtx_REG (DImode, regno + 4);
+       ops[3] = operands[1];
+-      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops);
++      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops);
+       return "";
+     }
+   else
+-    return "vld1.<V_sz_elem>\t%h0, [%1]";
++    return "vld1.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+@@ -4747,16 +4742,16 @@
+                     (const_string "neon_vld1_1_2_regs")))])
+ 
+ (define_insn "neon_vst3<mode>"
+-  [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VST3))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vst1.64\t%h1, [%0]";
++    return "vst1.64\t%h1, %A0";
+   else
+-    return "vst3.<V_sz_elem>\t%h1, [%0]";
++    return "vst3.<V_sz_elem>\t%h1, %A0";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -4764,62 +4759,60 @@
+                     (const_string "neon_vst2_4_regs_vst3_vst4")))])
+ 
+ (define_expand "neon_vst3<mode>"
+-  [(match_operand:SI 0 "s_register_operand" "+r")
+-   (match_operand:CI 1 "s_register_operand" "w")
++  [(match_operand:CI 0 "neon_struct_operand")
++   (match_operand:CI 1 "s_register_operand")
+    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_NEON"
+ {
+-  emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1]));
+-  emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1]));
++  rtx mem;
++
++  mem = adjust_address (operands[0], EImode, 0);
++  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
++  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
++  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
+   DONE;
+ })
+ 
+ (define_insn "neon_vst3qa<mode>"
+-  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
+-        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
++  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
++        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VST3A))
+-   (set (match_operand:SI 0 "s_register_operand" "=r")
+-        (plus:SI (match_dup 1)
+-		 (const_int 24)))]
++                   UNSPEC_VST3A))]
+   "TARGET_NEON"
+ {
+-  int regno = REGNO (operands[2]);
++  int regno = REGNO (operands[1]);
+   rtx ops[4];
+   ops[0] = operands[0];
+   ops[1] = gen_rtx_REG (DImode, regno);
+   ops[2] = gen_rtx_REG (DImode, regno + 4);
+   ops[3] = gen_rtx_REG (DImode, regno + 8);
+-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
++  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+ )
+ 
+ (define_insn "neon_vst3qb<mode>"
+-  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
+-        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
++  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
++        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VST3B))
+-   (set (match_operand:SI 0 "s_register_operand" "=r")
+-        (plus:SI (match_dup 1)
+-		 (const_int 24)))]
++                   UNSPEC_VST3B))]
+   "TARGET_NEON"
+ {
+-  int regno = REGNO (operands[2]);
++  int regno = REGNO (operands[1]);
+   rtx ops[4];
+   ops[0] = operands[0];
+   ops[1] = gen_rtx_REG (DImode, regno + 2);
+   ops[2] = gen_rtx_REG (DImode, regno + 6);
+   ops[3] = gen_rtx_REG (DImode, regno + 10);
+-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
++  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+ )
+ 
+ (define_insn "neon_vst3_lane<mode>"
+-  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
+         (unspec:<V_three_elem>
+            [(match_operand:EI 1 "s_register_operand" "w")
+             (match_operand:SI 2 "immediate_operand" "i")
+@@ -4838,7 +4831,7 @@
+   ops[2] = gen_rtx_REG (DImode, regno + 2);
+   ops[3] = gen_rtx_REG (DImode, regno + 4);
+   ops[4] = operands[2];
+-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
++  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
+                    ops);
+   return "";
+ }
+@@ -4846,7 +4839,7 @@
+ )
+ 
+ (define_insn "neon_vst3_lane<mode>"
+-  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
+         (unspec:<V_three_elem>
+            [(match_operand:CI 1 "s_register_operand" "w")
+             (match_operand:SI 2 "immediate_operand" "i")
+@@ -4870,7 +4863,7 @@
+   ops[2] = gen_rtx_REG (DImode, regno + 4);
+   ops[3] = gen_rtx_REG (DImode, regno + 8);
+   ops[4] = GEN_INT (lane);
+-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
++  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
+                    ops);
+   return "";
+ }
+@@ -4878,15 +4871,15 @@
+ 
+ (define_insn "neon_vld4<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+-        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD4))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vld1.64\t%h0, [%1]";
++    return "vld1.64\t%h0, %A1";
+   else
+-    return "vld4.<V_sz_elem>\t%h0, [%1]";
++    return "vld4.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -4895,27 +4888,25 @@
+ )
+ 
+ (define_expand "neon_vld4<mode>"
+-  [(match_operand:XI 0 "s_register_operand" "=w")
+-   (match_operand:SI 1 "s_register_operand" "+r")
++  [(match_operand:XI 0 "s_register_operand")
++   (match_operand:XI 1 "neon_struct_operand")
+    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_NEON"
+ {
+-  emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0],
+-                                    operands[1], operands[1]));
+-  emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0],
+-                                    operands[1], operands[1]));
++  rtx mem;
++
++  mem = adjust_address (operands[1], OImode, 0);
++  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
++  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
++  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
+   DONE;
+ })
+ 
+ (define_insn "neon_vld4qa<mode>"
+   [(set (match_operand:XI 0 "s_register_operand" "=w")
+-        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
+-                    (match_operand:XI 1 "s_register_operand" "0")
++        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VLD4A))
+-   (set (match_operand:SI 2 "s_register_operand" "=r")
+-        (plus:SI (match_dup 3)
+-		 (const_int 32)))]
++                   UNSPEC_VLD4A))]
+   "TARGET_NEON"
+ {
+   int regno = REGNO (operands[0]);
+@@ -4924,8 +4915,8 @@
+   ops[1] = gen_rtx_REG (DImode, regno + 4);
+   ops[2] = gen_rtx_REG (DImode, regno + 8);
+   ops[3] = gen_rtx_REG (DImode, regno + 12);
+-  ops[4] = operands[2];
+-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
++  ops[4] = operands[1];
++  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld3_vld4")]
+@@ -4933,13 +4924,10 @@
+ 
+ (define_insn "neon_vld4qb<mode>"
+   [(set (match_operand:XI 0 "s_register_operand" "=w")
+-        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
+-                    (match_operand:XI 1 "s_register_operand" "0")
++        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
++                    (match_operand:XI 2 "s_register_operand" "0")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VLD4B))
+-   (set (match_operand:SI 2 "s_register_operand" "=r")
+-        (plus:SI (match_dup 3)
+-		 (const_int 32)))]
++                   UNSPEC_VLD4B))]
+   "TARGET_NEON"
+ {
+   int regno = REGNO (operands[0]);
+@@ -4948,8 +4936,8 @@
+   ops[1] = gen_rtx_REG (DImode, regno + 6);
+   ops[2] = gen_rtx_REG (DImode, regno + 10);
+   ops[3] = gen_rtx_REG (DImode, regno + 14);
+-  ops[4] = operands[2];
+-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
++  ops[4] = operands[1];
++  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vld3_vld4")]
+@@ -4957,7 +4945,7 @@
+ 
+ (define_insn "neon_vld4_lane<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+-        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:OI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -4976,7 +4964,7 @@
+   ops[3] = gen_rtx_REG (DImode, regno + 6);
+   ops[4] = operands[1];
+   ops[5] = operands[3];
+-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
++  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
+                    ops);
+   return "";
+ }
+@@ -4985,7 +4973,7 @@
+ 
+ (define_insn "neon_vld4_lane<mode>"
+   [(set (match_operand:XI 0 "s_register_operand" "=w")
+-        (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                     (match_operand:XI 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+@@ -5009,7 +4997,7 @@
+   ops[3] = gen_rtx_REG (DImode, regno + 12);
+   ops[4] = operands[1];
+   ops[5] = GEN_INT (lane);
+-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
++  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
+                    ops);
+   return "";
+ }
+@@ -5018,7 +5006,7 @@
+ 
+ (define_insn "neon_vld4_dup<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+-        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
++        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VLD4_DUP))]
+   "TARGET_NEON"
+@@ -5032,12 +5020,12 @@
+       ops[2] = gen_rtx_REG (DImode, regno + 4);
+       ops[3] = gen_rtx_REG (DImode, regno + 6);
+       ops[4] = operands[1];
+-      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]",
++      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
+                        ops);
+       return "";
+     }
+   else
+-    return "vld1.<V_sz_elem>\t%h0, [%1]";
++    return "vld1.<V_sz_elem>\t%h0, %A1";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+@@ -5046,16 +5034,16 @@
+ )
+ 
+ (define_insn "neon_vst4<mode>"
+-  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    UNSPEC_VST4))]
+   "TARGET_NEON"
+ {
+   if (<V_sz_elem> == 64)
+-    return "vst1.64\t%h1, [%0]";
++    return "vst1.64\t%h1, %A0";
+   else
+-    return "vst4.<V_sz_elem>\t%h1, [%0]";
++    return "vst4.<V_sz_elem>\t%h1, %A0";
+ }
+   [(set (attr "neon_type")
+       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+@@ -5064,64 +5052,62 @@
+ )
+ 
+ (define_expand "neon_vst4<mode>"
+-  [(match_operand:SI 0 "s_register_operand" "+r")
+-   (match_operand:XI 1 "s_register_operand" "w")
++  [(match_operand:XI 0 "neon_struct_operand")
++   (match_operand:XI 1 "s_register_operand")
+    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   "TARGET_NEON"
+ {
+-  emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1]));
+-  emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1]));
++  rtx mem;
++
++  mem = adjust_address (operands[0], OImode, 0);
++  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
++  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
++  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
+   DONE;
+ })
+ 
+ (define_insn "neon_vst4qa<mode>"
+-  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
+-        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
++  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
++        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VST4A))
+-   (set (match_operand:SI 0 "s_register_operand" "=r")
+-        (plus:SI (match_dup 1)
+-		 (const_int 32)))]
++                   UNSPEC_VST4A))]
+   "TARGET_NEON"
+ {
+-  int regno = REGNO (operands[2]);
++  int regno = REGNO (operands[1]);
+   rtx ops[5];
+   ops[0] = operands[0];
+   ops[1] = gen_rtx_REG (DImode, regno);
+   ops[2] = gen_rtx_REG (DImode, regno + 4);
+   ops[3] = gen_rtx_REG (DImode, regno + 8);
+   ops[4] = gen_rtx_REG (DImode, regno + 12);
+-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
++  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+ )
+ 
+ (define_insn "neon_vst4qb<mode>"
+-  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
+-        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
++  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
++        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
+                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+-                   UNSPEC_VST4B))
+-   (set (match_operand:SI 0 "s_register_operand" "=r")
+-        (plus:SI (match_dup 1)
+-		 (const_int 32)))]
++                   UNSPEC_VST4B))]
+   "TARGET_NEON"
+ {
+-  int regno = REGNO (operands[2]);
++  int regno = REGNO (operands[1]);
+   rtx ops[5];
+   ops[0] = operands[0];
+   ops[1] = gen_rtx_REG (DImode, regno + 2);
+   ops[2] = gen_rtx_REG (DImode, regno + 6);
+   ops[3] = gen_rtx_REG (DImode, regno + 10);
+   ops[4] = gen_rtx_REG (DImode, regno + 14);
+-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
++  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
+   return "";
+ }
+   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+ )
+ 
+ (define_insn "neon_vst4_lane<mode>"
+-  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
+         (unspec:<V_four_elem>
+            [(match_operand:OI 1 "s_register_operand" "w")
+             (match_operand:SI 2 "immediate_operand" "i")
+@@ -5141,7 +5127,7 @@
+   ops[3] = gen_rtx_REG (DImode, regno + 4);
+   ops[4] = gen_rtx_REG (DImode, regno + 6);
+   ops[5] = operands[2];
+-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
++  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
+                    ops);
+   return "";
+ }
+@@ -5149,7 +5135,7 @@
+ )
+ 
+ (define_insn "neon_vst4_lane<mode>"
+-  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
++  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
+         (unspec:<V_four_elem>
+            [(match_operand:XI 1 "s_register_operand" "w")
+             (match_operand:SI 2 "immediate_operand" "i")
+@@ -5174,7 +5160,7 @@
+   ops[3] = gen_rtx_REG (DImode, regno + 8);
+   ops[4] = gen_rtx_REG (DImode, regno + 12);
+   ops[5] = GEN_INT (lane);
+-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
++  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
+                    ops);
+   return "";
+ }
+Index: gcc-4_6-branch/gcc/config/arm/predicates.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/predicates.md	2011-09-16 19:58:21.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/predicates.md	2011-09-16 20:19:03.967834108 -0700
+@@ -686,3 +686,8 @@
+ 
+ (define_special_predicate "add_operator"
+ 			 (match_code "plus"))
++
++(define_special_predicate "neon_struct_operand"
++  (and (match_code "mem")
++       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
++
+Index: gcc-4_6-branch/gcc/doc/tm.texi
+===================================================================
+--- gcc-4_6-branch.orig/gcc/doc/tm.texi	2011-06-24 08:13:00.000000000 -0700
++++ gcc-4_6-branch/gcc/doc/tm.texi	2011-09-16 20:16:00.257564628 -0700
+@@ -2533,7 +2533,7 @@
+ register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when
+ @var{x} is a floating-point constant.  If the constant can't be loaded
+ into any kind of register, code generation will be better if
+-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
+ of using @code{TARGET_PREFERRED_RELOAD_CLASS}.
+ 
+ If an insn has pseudos in it after register allocation, reload will go
+@@ -2570,8 +2570,8 @@
+ register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when
+ @var{x} is a floating-point constant.  If the constant can't be loaded
+ into any kind of register, code generation will be better if
+-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
+-of using @code{PREFERRED_RELOAD_CLASS}.
++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
++of using @code{TARGET_PREFERRED_RELOAD_CLASS}.
+ 
+ If an insn has pseudos in it after register allocation, reload will go
+ through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS}
+@@ -4319,6 +4319,34 @@
+ must have move patterns for this mode.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} bool TARGET_ARRAY_MODE_SUPPORTED_P (enum machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{nelems})
++Return true if GCC should try to use a scalar mode to store an array
++of @var{nelems} elements, given that each element has mode @var{mode}.
++Returning true here overrides the usual @code{MAX_FIXED_MODE} limit
++and allows GCC to use any defined integer mode.
++
++One use of this hook is to support vector load and store operations
++that operate on several homogeneous vectors.  For example, ARM NEON
++has operations like:
++
++@smallexample
++int8x8x3_t vld3_s8 (const int8_t *)
++@end smallexample
++
++where the return type is defined as:
++
++@smallexample
++typedef struct int8x8x3_t
++@{
++  int8x8_t val[3];
++@} int8x8x3_t;
++@end smallexample
++
++If this hook allows @code{val} to have a scalar mode, then
++@code{int8x8x3_t} can have the same mode.  GCC can then store
++@code{int8x8x3_t}s in registers rather than forcing them onto the stack.
++@end deftypefn
++
+ @deftypefn {Target Hook} bool TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P (enum machine_mode @var{mode})
+ Define this to return nonzero for machine modes for which the port has
+ small register classes.  If this target hook returns nonzero for a given
+@@ -5577,13 +5605,13 @@
+ @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook.
+ @end defmac
+ 
+-@defmac LEGITIMATE_CONSTANT_P (@var{x})
+-A C expression that is nonzero if @var{x} is a legitimate constant for
+-an immediate operand on the target machine.  You can assume that
+-@var{x} satisfies @code{CONSTANT_P}, so you need not check this.  In fact,
+-@samp{1} is a suitable definition for this macro on machines where
+-anything @code{CONSTANT_P} is valid.
+-@end defmac
++@deftypefn {Target Hook} bool TARGET_LEGITIMATE_CONSTANT_P (enum machine_mode @var{mode}, rtx @var{x})
++This hook returns true if @var{x} is a legitimate constant for a
++@var{mode}-mode immediate operand on the target machine.  You can assume that
++@var{x} satisfies @code{CONSTANT_P}, so you need not check this.
++
++The default definition returns true.
++@end deftypefn
+ 
+ @deftypefn {Target Hook} rtx TARGET_DELEGITIMIZE_ADDRESS (rtx @var{x})
+ This hook is used to undo the possibly obfuscating effects of the
+Index: gcc-4_6-branch/gcc/doc/tm.texi.in
+===================================================================
+--- gcc-4_6-branch.orig/gcc/doc/tm.texi.in	2011-06-24 08:13:00.000000000 -0700
++++ gcc-4_6-branch/gcc/doc/tm.texi.in	2011-09-16 20:16:00.257564628 -0700
+@@ -2521,7 +2521,7 @@
+ register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when
+ @var{x} is a floating-point constant.  If the constant can't be loaded
+ into any kind of register, code generation will be better if
+-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
+ of using @code{TARGET_PREFERRED_RELOAD_CLASS}.
+ 
+ If an insn has pseudos in it after register allocation, reload will go
+@@ -2558,8 +2558,8 @@
+ register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when
+ @var{x} is a floating-point constant.  If the constant can't be loaded
+ into any kind of register, code generation will be better if
+-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
+-of using @code{PREFERRED_RELOAD_CLASS}.
++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
++of using @code{TARGET_PREFERRED_RELOAD_CLASS}.
+ 
+ If an insn has pseudos in it after register allocation, reload will go
+ through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS}
+@@ -4305,6 +4305,8 @@
+ must have move patterns for this mode.
+ @end deftypefn
+ 
++@hook TARGET_ARRAY_MODE_SUPPORTED_P
++
+ @hook TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+ Define this to return nonzero for machine modes for which the port has
+ small register classes.  If this target hook returns nonzero for a given
+@@ -5555,13 +5557,13 @@
+ @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook.
+ @end defmac
+ 
+-@defmac LEGITIMATE_CONSTANT_P (@var{x})
+-A C expression that is nonzero if @var{x} is a legitimate constant for
+-an immediate operand on the target machine.  You can assume that
+-@var{x} satisfies @code{CONSTANT_P}, so you need not check this.  In fact,
+-@samp{1} is a suitable definition for this macro on machines where
+-anything @code{CONSTANT_P} is valid.
+-@end defmac
++@hook TARGET_LEGITIMATE_CONSTANT_P
++This hook returns true if @var{x} is a legitimate constant for a
++@var{mode}-mode immediate operand on the target machine.  You can assume that
++@var{x} satisfies @code{CONSTANT_P}, so you need not check this.
++
++The default definition returns true.
++@end deftypefn
+ 
+ @hook TARGET_DELEGITIMIZE_ADDRESS
+ This hook is used to undo the possibly obfuscating effects of the
+Index: gcc-4_6-branch/gcc/expr.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/expr.c	2011-09-16 20:14:32.000000000 -0700
++++ gcc-4_6-branch/gcc/expr.c	2011-09-16 20:16:00.267564792 -0700
+@@ -1497,7 +1497,7 @@
+   if (nregs == 0)
+     return;
+ 
+-  if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
++  if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x))
+     x = validize_mem (force_const_mem (mode, x));
+ 
+   /* See if the machine can do this with a load multiple insn.  */
+@@ -2308,7 +2308,7 @@
+ 		    offset -= size;
+ 
+ 		  cst = (*constfun) (constfundata, offset, mode);
+-		  if (!LEGITIMATE_CONSTANT_P (cst))
++		  if (!targetm.legitimate_constant_p (mode, cst))
+ 		    return 0;
+ 
+ 		  if (!reverse)
+@@ -3363,7 +3363,7 @@
+ 
+       y_cst = y;
+ 
+-      if (!LEGITIMATE_CONSTANT_P (y))
++      if (!targetm.legitimate_constant_p (mode, y))
+ 	{
+ 	  y = force_const_mem (mode, y);
+ 
+@@ -3419,7 +3419,7 @@
+ 
+   REAL_VALUE_FROM_CONST_DOUBLE (r, y);
+ 
+-  if (LEGITIMATE_CONSTANT_P (y))
++  if (targetm.legitimate_constant_p (dstmode, y))
+     oldcost = rtx_cost (y, SET, speed);
+   else
+     oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed);
+@@ -3442,7 +3442,7 @@
+ 
+       trunc_y = CONST_DOUBLE_FROM_REAL_VALUE (r, srcmode);
+ 
+-      if (LEGITIMATE_CONSTANT_P (trunc_y))
++      if (targetm.legitimate_constant_p (srcmode, trunc_y))
+ 	{
+ 	  /* Skip if the target needs extra instructions to perform
+ 	     the extension.  */
+@@ -3855,7 +3855,7 @@
+ 	 by setting SKIP to 0.  */
+       skip = (reg_parm_stack_space == 0) ? 0 : not_stack;
+ 
+-      if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
++      if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x))
+ 	x = validize_mem (force_const_mem (mode, x));
+ 
+       /* If X is a hard register in a non-integer mode, copy it into a pseudo;
+@@ -9108,7 +9108,7 @@
+ 	   constant and we don't need a memory reference.  */
+ 	if (CONSTANT_P (op0)
+ 	    && mode2 != BLKmode
+-	    && LEGITIMATE_CONSTANT_P (op0)
++	    && targetm.legitimate_constant_p (mode2, op0)
+ 	    && !must_force_mem)
+ 	  op0 = force_reg (mode2, op0);
+ 
+Index: gcc-4_6-branch/gcc/hooks.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/hooks.c	2011-06-24 08:33:48.000000000 -0700
++++ gcc-4_6-branch/gcc/hooks.c	2011-09-16 20:16:00.267564792 -0700
+@@ -101,6 +101,15 @@
+   return true;
+ }
+ 
++/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT)
++   and returns false.  */
++bool
++hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED,
++			   unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED)
++{
++  return false;
++}
++
+ /* Generic hook that takes (FILE *, const char *) and does nothing.  */
+ void
+ hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED)
+Index: gcc-4_6-branch/gcc/hooks.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/hooks.h	2011-06-24 08:33:48.000000000 -0700
++++ gcc-4_6-branch/gcc/hooks.h	2011-09-16 20:16:00.267564792 -0700
+@@ -34,6 +34,8 @@
+ extern bool hook_bool_mode_true (enum machine_mode);
+ extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx);
+ extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx);
++extern bool hook_bool_mode_uhwi_false (enum machine_mode,
++				       unsigned HOST_WIDE_INT);
+ extern bool hook_bool_tree_false (tree);
+ extern bool hook_bool_const_tree_false (const_tree);
+ extern bool hook_bool_tree_true (tree);
+Index: gcc-4_6-branch/gcc/recog.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/recog.c	2011-06-24 08:33:49.000000000 -0700
++++ gcc-4_6-branch/gcc/recog.c	2011-09-16 20:16:00.277564886 -0700
+@@ -930,7 +930,9 @@
+     return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode
+ 	     || mode == VOIDmode)
+ 	    && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op))
+-	    && LEGITIMATE_CONSTANT_P (op));
++	    && targetm.legitimate_constant_p (mode == VOIDmode
++					      ? GET_MODE (op)
++					      : mode, op));
+ 
+   /* Except for certain constants with VOIDmode, already checked for,
+      OP's mode must match MODE if MODE specifies a mode.  */
+@@ -1107,7 +1109,9 @@
+ 	  && (GET_MODE (op) == mode || mode == VOIDmode
+ 	      || GET_MODE (op) == VOIDmode)
+ 	  && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op))
+-	  && LEGITIMATE_CONSTANT_P (op));
++	  && targetm.legitimate_constant_p (mode == VOIDmode
++					    ? GET_MODE (op)
++					    : mode, op));
+ }
+ 
+ /* Returns 1 if OP is an operand that is a CONST_INT.  */
+Index: gcc-4_6-branch/gcc/reload.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/reload.c	2011-06-24 08:33:49.000000000 -0700
++++ gcc-4_6-branch/gcc/reload.c	2011-09-16 20:16:00.277564886 -0700
+@@ -4721,7 +4721,8 @@
+ 	    simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno],
+ 				 GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
+ 	  gcc_assert (tem);
+-	  if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem))
++	  if (CONSTANT_P (tem)
++	      && !targetm.legitimate_constant_p (GET_MODE (x), tem))
+ 	    {
+ 	      tem = force_const_mem (GET_MODE (x), tem);
+ 	      i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0),
+@@ -6049,7 +6050,7 @@
+ 			   enum reload_type type, int ind_levels)
+ {
+   if (CONSTANT_P (x)
+-      && (! LEGITIMATE_CONSTANT_P (x)
++      && (!targetm.legitimate_constant_p (mode, x)
+ 	  || targetm.preferred_reload_class (x, rclass) == NO_REGS))
+     {
+       x = force_const_mem (mode, x);
+@@ -6059,7 +6060,7 @@
+ 
+   else if (GET_CODE (x) == PLUS
+ 	   && CONSTANT_P (XEXP (x, 1))
+-	   && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1))
++	   && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1))
+ 	       || targetm.preferred_reload_class (XEXP (x, 1), rclass)
+ 		   == NO_REGS))
+     {
+Index: gcc-4_6-branch/gcc/reload1.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/reload1.c	2011-06-24 08:33:49.000000000 -0700
++++ gcc-4_6-branch/gcc/reload1.c	2011-09-16 20:16:00.277564886 -0700
+@@ -4155,6 +4155,9 @@
+ 		}
+ 	      else if (function_invariant_p (x))
+ 		{
++		  enum machine_mode mode;
++
++		  mode = GET_MODE (SET_DEST (set));
+ 		  if (GET_CODE (x) == PLUS)
+ 		    {
+ 		      /* This is PLUS of frame pointer and a constant,
+@@ -4167,12 +4170,11 @@
+ 		      reg_equiv_invariant[i] = x;
+ 		      num_eliminable_invariants++;
+ 		    }
+-		  else if (LEGITIMATE_CONSTANT_P (x))
++		  else if (targetm.legitimate_constant_p (mode, x))
+ 		    reg_equiv_constant[i] = x;
+ 		  else
+ 		    {
+-		      reg_equiv_memory_loc[i]
+-			= force_const_mem (GET_MODE (SET_DEST (set)), x);
++		      reg_equiv_memory_loc[i] = force_const_mem (mode, x);
+ 		      if (! reg_equiv_memory_loc[i])
+ 			reg_equiv_init[i] = NULL_RTX;
+ 		    }
+Index: gcc-4_6-branch/gcc/stor-layout.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/stor-layout.c	2011-06-24 08:33:49.000000000 -0700
++++ gcc-4_6-branch/gcc/stor-layout.c	2011-09-16 20:16:00.287564867 -0700
+@@ -546,6 +546,34 @@
+   return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT));
+ }
+ 
++/* Return the natural mode of an array, given that it is SIZE bytes in
++   total and has elements of type ELEM_TYPE.  */
++
++static enum machine_mode
++mode_for_array (tree elem_type, tree size)
++{
++  tree elem_size;
++  unsigned HOST_WIDE_INT int_size, int_elem_size;
++  bool limit_p;
++
++  /* One-element arrays get the component type's mode.  */
++  elem_size = TYPE_SIZE (elem_type);
++  if (simple_cst_equal (size, elem_size))
++    return TYPE_MODE (elem_type);
++
++  limit_p = true;
++  if (host_integerp (size, 1) && host_integerp (elem_size, 1))
++    {
++      int_size = tree_low_cst (size, 1);
++      int_elem_size = tree_low_cst (elem_size, 1);
++      if (int_elem_size > 0
++	  && int_size % int_elem_size == 0
++	  && targetm.array_mode_supported_p (TYPE_MODE (elem_type),
++					     int_size / int_elem_size))
++	limit_p = false;
++    }
++  return mode_for_size_tree (size, MODE_INT, limit_p);
++}
+ 
+ /* Subroutine of layout_decl: Force alignment required for the data type.
+    But if the decl itself wants greater alignment, don't override that.  */
+@@ -2039,14 +2067,8 @@
+ 	    && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
+ 		|| TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
+ 	  {
+-	    /* One-element arrays get the component type's mode.  */
+-	    if (simple_cst_equal (TYPE_SIZE (type),
+-				  TYPE_SIZE (TREE_TYPE (type))))
+-	      SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type)));
+-	    else
+-	      SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type),
+-						       MODE_INT, 1));
+-
++	    SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
++						 TYPE_SIZE (type)));
+ 	    if (TYPE_MODE (type) != BLKmode
+ 		&& STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
+ 		&& TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+Index: gcc-4_6-branch/gcc/target.def
+===================================================================
+--- gcc-4_6-branch.orig/gcc/target.def	2011-06-24 08:33:48.000000000 -0700
++++ gcc-4_6-branch/gcc/target.def	2011-09-16 20:16:00.287564867 -0700
+@@ -1344,6 +1344,13 @@
+  unsigned, (unsigned nunroll, struct loop *loop),
+  NULL)
+ 
++/* True if X is a legitimate MODE-mode immediate operand.  */
++DEFHOOK
++(legitimate_constant_p,
++ "",
++ bool, (enum machine_mode mode, rtx x),
++ default_legitimate_constant_p)
++
+ /* True if the constant X cannot be placed in the constant pool.  */
+ DEFHOOK
+ (cannot_force_const_mem,
+@@ -1611,6 +1618,38 @@
+  bool, (enum machine_mode mode),
+  hook_bool_mode_false)
+ 
++/* True if we should try to use a scalar mode to represent an array,
++   overriding the usual MAX_FIXED_MODE limit.  */
++DEFHOOK
++(array_mode_supported_p,
++ "Return true if GCC should try to use a scalar mode to store an array\n\
++of @var{nelems} elements, given that each element has mode @var{mode}.\n\
++Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\
++and allows GCC to use any defined integer mode.\n\
++\n\
++One use of this hook is to support vector load and store operations\n\
++that operate on several homogeneous vectors.  For example, ARM NEON\n\
++has operations like:\n\
++\n\
++@smallexample\n\
++int8x8x3_t vld3_s8 (const int8_t *)\n\
++@end smallexample\n\
++\n\
++where the return type is defined as:\n\
++\n\
++@smallexample\n\
++typedef struct int8x8x3_t\n\
++@{\n\
++  int8x8_t val[3];\n\
++@} int8x8x3_t;\n\
++@end smallexample\n\
++\n\
++If this hook allows @code{val} to have a scalar mode, then\n\
++@code{int8x8x3_t} can have the same mode.  GCC can then store\n\
++@code{int8x8x3_t}s in registers rather than forcing them onto the stack.",
++ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems),
++ hook_bool_mode_uhwi_false)
++
+ /* Compute cost of moving data from a register of class FROM to one of
+    TO, using MODE.  */
+ DEFHOOK
+Index: gcc-4_6-branch/gcc/targhooks.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/targhooks.c	2011-06-24 08:33:48.000000000 -0700
++++ gcc-4_6-branch/gcc/targhooks.c	2011-09-16 20:16:00.287564867 -0700
+@@ -1519,4 +1519,15 @@
+     { OPT_LEVELS_NONE, 0, NULL, 0 }
+   };
+ 
++bool
++default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
++			       rtx x ATTRIBUTE_UNUSED)
++{
++#ifdef LEGITIMATE_CONSTANT_P
++  return LEGITIMATE_CONSTANT_P (x);
++#else
++  return true;
++#endif
++}
++
+ #include "gt-targhooks.h"
+Index: gcc-4_6-branch/gcc/targhooks.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/targhooks.h	2011-06-24 08:33:48.000000000 -0700
++++ gcc-4_6-branch/gcc/targhooks.h	2011-09-16 20:16:00.287564867 -0700
+@@ -183,3 +183,4 @@
+ 
+ extern void *default_get_pch_validity (size_t *);
+ extern const char *default_pch_valid_p (const void *, size_t);
++extern bool default_legitimate_constant_p (enum machine_mode, rtx);
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vld3-1.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vld3-1.c	2011-09-16 20:16:00.287564867 -0700
+@@ -0,0 +1,27 @@
++/* { dg-do run } */
++/* { dg-require-effective-target arm_neon_hw } */
++/* { dg-options "-O2" } */
++/* { dg-add-options arm_neon } */
++
++#include "arm_neon.h"
++
++uint32_t buffer[12];
++
++void __attribute__((noinline))
++foo (uint32_t *a)
++{
++  uint32x4x3_t x;
++
++  x = vld3q_u32 (a);
++  x.val[0] = vaddq_u32 (x.val[0], x.val[1]);
++  vst3q_u32 (a, x);
++}
++
++int
++main (void)
++{
++  buffer[0] = 1;
++  buffer[1] = 2;
++  foo (buffer);
++  return buffer[0] != 3;
++}
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vst3-1.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon-vst3-1.c	2011-09-16 20:16:00.287564867 -0700
+@@ -0,0 +1,25 @@
++/* { dg-do run } */
++/* { dg-require-effective-target arm_neon_hw } */
++/* { dg-options "-O2" } */
++/* { dg-add-options arm_neon } */
++
++#include "arm_neon.h"
++
++uint32_t buffer[64];
++
++void __attribute__((noinline))
++foo (uint32_t *a)
++{
++  uint32x4x3_t x;
++
++  x = vld3q_u32 (a);
++  a[35] = 1;
++  vst3q_lane_u32 (a + 32, x, 1);
++}
++
++int
++main (void)
++{
++  foo (buffer);
++  return buffer[35] != 1;
++}
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c	2011-09-16 20:16:00.297564810 -0700
+@@ -15,5 +15,5 @@
+   out_float32x4_t = vld1q_dup_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c	2011-09-16 20:16:00.297564810 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x8_t = vld1q_dup_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c	2011-09-16 20:16:00.297564810 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x16_t = vld1q_dup_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c	2011-09-16 20:16:00.297564810 -0700
+@@ -15,5 +15,5 @@
+   out_int16x8_t = vld1q_dup_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c	2011-09-16 20:16:00.297564810 -0700
+@@ -15,5 +15,5 @@
+   out_int32x4_t = vld1q_dup_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_int64x2_t = vld1q_dup_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_int8x16_t = vld1q_dup_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x8_t = vld1q_dup_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x4_t = vld1q_dup_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x2_t = vld1q_dup_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c	2011-09-16 20:16:00.347564808 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x16_t = vld1q_dup_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_float32x4_t = vld1q_lane_f32 (0, arg1_float32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x8_t = vld1q_lane_p16 (0, arg1_poly16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_poly8x16_t = vld1q_lane_p8 (0, arg1_poly8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_int16x8_t = vld1q_lane_s16 (0, arg1_int16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_int32x4_t = vld1q_lane_s32 (0, arg1_int32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_int64x2_t = vld1q_lane_s64 (0, arg1_int64x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_int8x16_t = vld1q_lane_s8 (0, arg1_int8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x8_t = vld1q_lane_u16 (0, arg1_uint16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x4_t = vld1q_lane_u32 (0, arg1_uint32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_uint64x2_t = vld1q_lane_u64 (0, arg1_uint64x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c	2011-09-16 20:16:00.347564808 -0700
+@@ -16,5 +16,5 @@
+   out_uint8x16_t = vld1q_lane_u8 (0, arg1_uint8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_float32x4_t = vld1q_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x8_t = vld1q_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x16_t = vld1q_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int16x8_t = vld1q_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int32x4_t = vld1q_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int64x2_t = vld1q_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int8x16_t = vld1q_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x8_t = vld1q_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x4_t = vld1q_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x2_t = vld1q_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x16_t = vld1q_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2_t = vld1_dup_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4_t = vld1_dup_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8_t = vld1_dup_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4_t = vld1_dup_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2_t = vld1_dup_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1_t = vld1_dup_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8_t = vld1_dup_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4_t = vld1_dup_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c	2011-09-16 20:16:00.357564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2_t = vld1_dup_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c	2011-09-16 20:16:00.367564848 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1_t = vld1_dup_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c	2011-09-16 20:16:00.367564848 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8_t = vld1_dup_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c	2011-09-16 20:16:00.367564848 -0700
+@@ -16,5 +16,5 @@
+   out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1f32.c	2011-09-16 20:16:00.367564848 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2_t = vld1_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p16.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4_t = vld1_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1p8.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8_t = vld1_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s16.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4_t = vld1_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s32.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2_t = vld1_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s64.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1_t = vld1_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1s8.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8_t = vld1_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u16.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4_t = vld1_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u32.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2_t = vld1_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u64.c	2011-09-16 20:16:00.377564842 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1_t = vld1_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld1u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld1u8.c	2011-09-16 20:16:00.387564830 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8_t = vld1_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c	2011-09-16 20:16:00.387564830 -0700
+@@ -16,5 +16,5 @@
+   out_float32x4x2_t = vld2q_lane_f32 (0, arg1_float32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c	2011-09-16 20:16:00.387564830 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x8x2_t = vld2q_lane_p16 (0, arg1_poly16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c	2011-09-16 20:16:00.387564830 -0700
+@@ -16,5 +16,5 @@
+   out_int16x8x2_t = vld2q_lane_s16 (0, arg1_int16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c	2011-09-16 20:16:00.387564830 -0700
+@@ -16,5 +16,5 @@
+   out_int32x4x2_t = vld2q_lane_s32 (0, arg1_int32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c	2011-09-16 20:16:00.397564843 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x8x2_t = vld2q_lane_u16 (0, arg1_uint16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c	2011-09-16 20:16:00.397564843 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x4x2_t = vld2q_lane_u32 (0, arg1_uint32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_float32x4x2_t = vld2q_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_poly16x8x2_t = vld2q_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_poly8x16x2_t = vld2q_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_int16x8x2_t = vld2q_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_int32x4x2_t = vld2q_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_int8x16x2_t = vld2q_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c	2011-09-16 20:16:00.397564843 -0700
+@@ -15,6 +15,6 @@
+   out_uint16x8x2_t = vld2q_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,6 +15,6 @@
+   out_uint32x4x2_t = vld2q_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,6 +15,6 @@
+   out_uint8x16x2_t = vld2q_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x2_t = vld2_dup_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x2_t = vld2_dup_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x2_t = vld2_dup_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x2_t = vld2_dup_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x2_t = vld2_dup_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c	2011-09-16 20:16:00.407564879 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x2_t = vld2_dup_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c	2011-09-16 20:16:00.417564906 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x2_t = vld2_dup_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c	2011-09-16 20:16:00.417564906 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x2_t = vld2_dup_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c	2011-09-16 20:16:00.417564906 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x2_t = vld2_dup_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c	2011-09-16 20:16:00.417564906 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x2_t = vld2_dup_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c	2011-09-16 20:16:00.417564906 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x2_t = vld2_dup_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c	2011-09-16 20:16:00.417564906 -0700
+@@ -16,5 +16,5 @@
+   out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2f32.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x2_t = vld2_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p16.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x2_t = vld2_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2p8.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x2_t = vld2_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s16.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x2_t = vld2_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s32.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x2_t = vld2_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s64.c	2011-09-16 20:16:00.427564921 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x2_t = vld2_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2s8.c	2011-09-16 20:16:00.437564924 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x2_t = vld2_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u16.c	2011-09-16 20:16:00.437564924 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x2_t = vld2_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u32.c	2011-09-16 20:16:00.437564924 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x2_t = vld2_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u64.c	2011-09-16 20:16:00.437564924 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x2_t = vld2_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld2u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld2u8.c	2011-09-16 20:16:00.437564924 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x2_t = vld2_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c	2011-09-16 20:16:00.437564924 -0700
+@@ -16,5 +16,5 @@
+   out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c	2011-09-16 20:16:00.437564924 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c	2011-09-16 20:16:00.447564932 -0700
+@@ -16,5 +16,5 @@
+   out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c	2011-09-16 20:16:00.447564932 -0700
+@@ -16,5 +16,5 @@
+   out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c	2011-09-16 20:16:00.447564932 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c	2011-09-16 20:16:00.447564932 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_float32x4x3_t = vld3q_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_poly16x8x3_t = vld3q_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_poly8x16x3_t = vld3q_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_int16x8x3_t = vld3q_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_int32x4x3_t = vld3q_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_int8x16x3_t = vld3q_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_uint16x8x3_t = vld3q_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c	2011-09-16 20:16:00.447564932 -0700
+@@ -15,6 +15,6 @@
+   out_uint32x4x3_t = vld3q_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,6 +15,6 @@
+   out_uint8x16x3_t = vld3q_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x3_t = vld3_dup_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x3_t = vld3_dup_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x3_t = vld3_dup_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x3_t = vld3_dup_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x3_t = vld3_dup_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x3_t = vld3_dup_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x3_t = vld3_dup_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x3_t = vld3_dup_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x3_t = vld3_dup_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x3_t = vld3_dup_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x3_t = vld3_dup_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_float32x2x3_t = vld3_lane_f32 (0, arg1_float32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x4x3_t = vld3_lane_p16 (0, arg1_poly16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_poly8x8x3_t = vld3_lane_p8 (0, arg1_poly8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_int16x4x3_t = vld3_lane_s16 (0, arg1_int16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_int32x2x3_t = vld3_lane_s32 (0, arg1_int32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_int8x8x3_t = vld3_lane_s8 (0, arg1_int8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x4x3_t = vld3_lane_u16 (0, arg1_uint16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x2x3_t = vld3_lane_u32 (0, arg1_uint32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -16,5 +16,5 @@
+   out_uint8x8x3_t = vld3_lane_u8 (0, arg1_uint8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3f32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x3_t = vld3_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x3_t = vld3_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3p8.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x3_t = vld3_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s16.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x3_t = vld3_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s32.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x3_t = vld3_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s64.c	2011-09-16 20:16:00.457564944 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x3_t = vld3_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3s8.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x3_t = vld3_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u16.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x3_t = vld3_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u32.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x3_t = vld3_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u64.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x3_t = vld3_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld3u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld3u8.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x3_t = vld3_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c	2011-09-16 20:16:00.467564964 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,6 +15,6 @@
+   out_float32x4x4_t = vld4q_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,6 +15,6 @@
+   out_poly16x8x4_t = vld4q_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c	2011-09-16 20:16:00.467564964 -0700
+@@ -15,6 +15,6 @@
+   out_poly8x16x4_t = vld4q_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_int16x8x4_t = vld4q_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_int32x4x4_t = vld4q_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_int8x16x4_t = vld4q_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_uint16x8x4_t = vld4q_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_uint32x4x4_t = vld4q_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,6 +15,6 @@
+   out_uint8x16x4_t = vld4q_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x4_t = vld4_dup_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x4_t = vld4_dup_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x4_t = vld4_dup_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x4_t = vld4_dup_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c	2011-09-16 20:16:00.477564991 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x4_t = vld4_dup_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x4_t = vld4_dup_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x4_t = vld4_dup_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x4_t = vld4_dup_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x4_t = vld4_dup_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x4_t = vld4_dup_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c	2011-09-16 20:16:00.487565006 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x4_t = vld4_dup_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c	2011-09-16 20:16:00.487565006 -0700
+@@ -16,5 +16,5 @@
+   out_float32x2x4_t = vld4_lane_f32 (0, arg1_float32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c	2011-09-16 20:16:00.487565006 -0700
+@@ -16,5 +16,5 @@
+   out_poly16x4x4_t = vld4_lane_p16 (0, arg1_poly16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c	2011-09-16 20:16:00.487565006 -0700
+@@ -16,5 +16,5 @@
+   out_poly8x8x4_t = vld4_lane_p8 (0, arg1_poly8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_int16x4x4_t = vld4_lane_s16 (0, arg1_int16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_int32x2x4_t = vld4_lane_s32 (0, arg1_int32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_int8x8x4_t = vld4_lane_s8 (0, arg1_int8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_uint16x4x4_t = vld4_lane_u16 (0, arg1_uint16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_uint32x2x4_t = vld4_lane_u32 (0, arg1_uint32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c	2011-09-16 20:16:00.497565009 -0700
+@@ -16,5 +16,5 @@
+   out_uint8x8x4_t = vld4_lane_u8 (0, arg1_uint8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4f32.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_float32x2x4_t = vld4_f32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p16.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_poly16x4x4_t = vld4_p16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4p8.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_poly8x8x4_t = vld4_p8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s16.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_int16x4x4_t = vld4_s16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s32.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_int32x2x4_t = vld4_s32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s64.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_int64x1x4_t = vld4_s64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4s8.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_int8x8x4_t = vld4_s8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u16.c	2011-09-16 20:16:00.497565009 -0700
+@@ -15,5 +15,5 @@
+   out_uint16x4x4_t = vld4_u16 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u32.c	2011-09-16 20:16:00.507565013 -0700
+@@ -15,5 +15,5 @@
+   out_uint32x2x4_t = vld4_u32 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u64.c	2011-09-16 20:16:00.507565013 -0700
+@@ -15,5 +15,5 @@
+   out_uint64x1x4_t = vld4_u64 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vld4u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vld4u8.c	2011-09-16 20:16:00.507565013 -0700
+@@ -15,5 +15,5 @@
+   out_uint8x8x4_t = vld4_u8 (0);
+ }
+ 
+-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_f32 (arg0_float32_t, arg1_float32x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c	2011-09-16 20:16:00.507565013 -0700
+@@ -16,5 +16,5 @@
+   vst1q_s16 (arg0_int16_t, arg1_int16x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_s32 (arg0_int32_t, arg1_int32x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_s64 (arg0_int64_t, arg1_int64x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_s8 (arg0_int8_t, arg1_int8x16_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1f32.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_f32 (arg0_float32_t, arg1_float32x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p16.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_p16 (arg0_poly16_t, arg1_poly16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1p8.c	2011-09-16 20:16:00.517565031 -0700
+@@ -16,5 +16,5 @@
+   vst1_p8 (arg0_poly8_t, arg1_poly8x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_s16 (arg0_int16_t, arg1_int16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_s32 (arg0_int32_t, arg1_int32x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s64.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_s64 (arg0_int64_t, arg1_int64x1_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1s8.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_s8 (arg0_int8_t, arg1_int8x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_u16 (arg0_uint16_t, arg1_uint16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_u32 (arg0_uint32_t, arg1_uint32x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u64.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_u64 (arg0_uint64_t, arg1_uint64x1_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst1u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst1u8.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst1_u8 (arg0_uint8_t, arg1_uint8x8_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,5 +16,5 @@
+   vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c	2011-09-16 20:16:00.527565060 -0700
+@@ -16,6 +16,6 @@
+   vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,6 +16,6 @@
+   vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_f32 (arg0_float32_t, arg1_float32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_p16 (arg0_poly16_t, arg1_poly16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_p8 (arg0_poly8_t, arg1_poly8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_s16 (arg0_int16_t, arg1_int16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_s32 (arg0_int32_t, arg1_int32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_s8 (arg0_int8_t, arg1_int8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_u16 (arg0_uint16_t, arg1_uint16x4x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_u32 (arg0_uint32_t, arg1_uint32x2x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_lane_u8 (arg0_uint8_t, arg1_uint8x8x2_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2f32.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_f32 (arg0_float32_t, arg1_float32x2x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p16.c	2011-09-16 20:16:00.537565077 -0700
+@@ -16,5 +16,5 @@
+   vst2_p16 (arg0_poly16_t, arg1_poly16x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2p8.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_p8 (arg0_poly8_t, arg1_poly8x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s16.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_s16 (arg0_int16_t, arg1_int16x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s32.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_s32 (arg0_int32_t, arg1_int32x2x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s64.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_s64 (arg0_int64_t, arg1_int64x1x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2s8.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_s8 (arg0_int8_t, arg1_int8x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u16.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u32.c	2011-09-16 20:16:00.547565082 -0700
+@@ -16,5 +16,5 @@
+   vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u64.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst2u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst2u8.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,5 +16,5 @@
+   vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c	2011-09-16 20:16:00.557565092 -0700
+@@ -16,6 +16,6 @@
+   vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,6 +16,6 @@
+   vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c	2011-09-16 20:16:00.567565108 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c	2011-09-16 20:16:00.577565135 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3f32.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_f32 (arg0_float32_t, arg1_float32x2x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p16.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3p8.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s16.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_s16 (arg0_int16_t, arg1_int16x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s32.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_s32 (arg0_int32_t, arg1_int32x2x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s64.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_s64 (arg0_int64_t, arg1_int64x1x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3s8.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_s8 (arg0_int8_t, arg1_int8x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u16.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u32.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u64.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst3u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst3u8.c	2011-09-16 20:16:00.587565144 -0700
+@@ -16,5 +16,5 @@
+   vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,5 +16,5 @@
+   vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,6 +16,6 @@
+   vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,6 +16,6 @@
+   vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,6 +16,6 @@
+   vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,6 +16,6 @@
+   vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c	2011-09-16 20:16:00.597565156 -0700
+@@ -16,6 +16,6 @@
+   vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,6 +16,6 @@
+   vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,6 +16,6 @@
+   vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,6 +16,6 @@
+   vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,6 +16,6 @@
+   vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4f32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4f32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4f32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_f32 (arg0_float32_t, arg1_float32x2x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4p16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4p8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4p8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_s16 (arg0_int16_t, arg1_int16x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_s32 (arg0_int32_t, arg1_int32x2x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s64.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_s64 (arg0_int64_t, arg1_int64x1x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4s8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4s8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_s8 (arg0_int8_t, arg1_int8x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u16.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u16.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u16.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u32.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u32.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u32.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u64.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u64.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u64.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u8.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/neon/vst4u8.c	2011-06-24 08:13:40.000000000 -0700
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/neon/vst4u8.c	2011-09-16 20:16:00.607565171 -0700
+@@ -16,5 +16,5 @@
+   vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t);
+ }
+ 
+-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
++/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+ /* { dg-final { cleanup-saved-temps } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr46329.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr46329.c	2011-09-16 20:16:00.617565191 -0700
+@@ -0,0 +1,9 @@
++/* { dg-options "-O2" } */
++/* { dg-add-options arm_neon } */
++
++int __attribute__ ((vector_size (32))) x;
++void
++foo (void)
++{
++  x <<= x;
++}
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch
new file mode 100644
index 0000000000..004f0131cf
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106744.patch
@@ -0,0 +1,21 @@
+2011-05-06  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	From Sergey Grechanik  <mouseentity@ispras.ru>, approved for mainline
+
+	* config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS
+	for constant vectors.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-05-03 15:18:07 +0000
++++ new/gcc/config/arm/arm.c	2011-05-06 11:33:02 +0000
+@@ -9193,7 +9193,7 @@
+   /* The neon move patterns handle all legitimate vector and struct
+      addresses.  */
+   if (TARGET_NEON
+-      && MEM_P (x)
++      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
+       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ 	  || VALID_NEON_STRUCT_MODE (mode)))
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch
new file mode 100644
index 0000000000..ce0272431d
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106746.patch
@@ -0,0 +1,24 @@
+2011-05-12  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-05  Michael Hope  <michael.hope@linaro.org>
+
+	PR pch/45979
+	* config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for
+	__ARM_EABI__ hosts.
+
+=== modified file 'gcc/config/host-linux.c'
+--- old/gcc/config/host-linux.c	2010-11-29 14:09:41 +0000
++++ new/gcc/config/host-linux.c	2011-05-06 20:19:30 +0000
+@@ -84,6 +84,8 @@
+ # define TRY_EMPTY_VM_SPACE	0x60000000
+ #elif defined(__mc68000__)
+ # define TRY_EMPTY_VM_SPACE	0x40000000
++#elif defined(__ARM_EABI__)
++# define TRY_EMPTY_VM_SPACE	0x60000000
+ #else
+ # define TRY_EMPTY_VM_SPACE	0
+ #endif
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch
new file mode 100644
index 0000000000..7885b7af49
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106747.patch
@@ -0,0 +1,640 @@
+2011-05-13  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* loop-doloop.c (doloop_condition_get): Support new form of
+	doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
+	* config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
+	(doloop_end): New.
+	* config/arm/arm.md (*addsi3_compare0): Remove "*".
+	* params.def (sms-min-sc): New param flag.
+	* doc/invoke.texi (sms-min-sc): Document it.
+	* ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge
+	enters the branch create an anti edge in the opposite direction
+	to prevent the creation of reg-moves.
+	* modulo-sched.c: Adjust comment to reflect the fact we are
+	scheduling closing branch.
+	(PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine.
+	(stage_count): New field in struct partial_schedule.
+	(calculate_stage_count): New function.
+	(normalize_sched_times): Rename to reset_sched_times and handle
+	incrementing the sched time of the nodes by a constant value
+	passed as parameter.
+	(duplicate_insns_of_cycles): Skip closing branch.
+	(sms_schedule_by_order): Schedule closing branch.
+	(ps_insn_find_column): Handle closing branch.
+	(sms_schedule): Call reset_sched_times and adjust the code to
+	support scheduling of the closing branch. Use sms-min-sc.
+	Support new form of doloop pattern.
+	(ps_insert_empty_row): Update calls to normalize_sched_times
+	and rotate_partial_schedule functions.
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-05-06 11:28:27 +0000
++++ new/gcc/config/arm/arm.md	2011-05-13 13:42:39 +0000
+@@ -791,7 +791,7 @@
+   ""
+ )
+ 
+-(define_insn "*addsi3_compare0"
++(define_insn "addsi3_compare0"
+   [(set (reg:CC_NOOV CC_REGNUM)
+ 	(compare:CC_NOOV
+ 	 (plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
+
+=== modified file 'gcc/config/arm/thumb2.md'
+--- old/gcc/config/arm/thumb2.md	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/thumb2.md	2011-05-11 07:15:47 +0000
+@@ -836,7 +836,7 @@
+   "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+ )
+ 
+-(define_insn "*thumb2_addsi3_compare0"
++(define_insn "thumb2_addsi3_compare0"
+   [(set (reg:CC_NOOV CC_REGNUM)
+ 	(compare:CC_NOOV
+ 	  (plus:SI (match_operand:SI 1 "s_register_operand" "l,  0, r")
+@@ -1118,3 +1118,54 @@
+   "
+   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+   ")
++
++;; Define the subtract-one-and-jump insns so loop.c
++;; knows what to generate.
++(define_expand "doloop_end"
++  [(use (match_operand 0 "" ""))      ; loop pseudo
++   (use (match_operand 1 "" ""))      ; iterations; zero if unknown
++   (use (match_operand 2 "" ""))      ; max iterations
++   (use (match_operand 3 "" ""))      ; loop level
++   (use (match_operand 4 "" ""))]     ; label
++  "TARGET_32BIT"
++  "
++ {
++   /* Currently SMS relies on the do-loop pattern to recognize loops
++      where (1) the control part consists of all insns defining and/or
++      using a certain 'count' register and (2) the loop count can be
++      adjusted by modifying this register prior to the loop.
++      ??? The possible introduction of a new block to initialize the
++      new IV can potentially affect branch optimizations.  */
++   if (optimize > 0 && flag_modulo_sched)
++   {
++     rtx s0;
++     rtx bcomp;
++     rtx loc_ref;
++     rtx cc_reg;
++     rtx insn;
++     rtx cmp;
++
++    /* Only use this on innermost loops.  */
++     if (INTVAL (operands[3]) > 1)
++       FAIL;
++     if (GET_MODE (operands[0]) != SImode)
++       FAIL;
++
++     s0 = operands [0];
++     if (TARGET_THUMB2)
++       insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
++     else
++       insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
++
++     cmp = XVECEXP (PATTERN (insn), 0, 0);
++     cc_reg = SET_DEST (cmp);
++     bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
++     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
++     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
++                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
++                                                        loc_ref, pc_rtx)));
++     DONE;
++   }else
++      FAIL;
++}")
++
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2010-11-30 11:41:24 +0000
++++ new/gcc/ddg.c	2011-05-11 07:15:47 +0000
+@@ -197,6 +197,11 @@
+         }
+     }
+ 
++  /* If a true dep edge enters the branch create an anti edge in the
++     opposite direction to prevent the creation of reg-moves.  */
++  if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
++    create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
++
+    latency = dep_cost (link);
+    e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+    add_edge_to_ddg (g, e);
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2011-04-18 11:31:29 +0000
++++ new/gcc/doc/invoke.texi	2011-05-11 07:15:47 +0000
+@@ -8730,6 +8730,10 @@
+ The maximum number of best instructions in the ready list that are considered
+ for renaming in the selective scheduler.  The default value is 2.
+ 
++@item sms-min-sc
++The minimum value of stage count that swing modulo scheduler will
++generate.  The default value is 2.
++
+ @item max-last-value-rtl
+ The maximum size measured as number of RTLs that can be recorded in an expression
+ in combiner for a pseudo register as last known value of that register.  The default
+
+=== modified file 'gcc/loop-doloop.c'
+--- old/gcc/loop-doloop.c	2010-11-30 11:41:24 +0000
++++ new/gcc/loop-doloop.c	2011-05-11 07:15:47 +0000
+@@ -78,6 +78,8 @@
+   rtx inc_src;
+   rtx condition;
+   rtx pattern;
++  rtx cc_reg = NULL_RTX;
++  rtx reg_orig = NULL_RTX;
+ 
+   /* The canonical doloop pattern we expect has one of the following
+      forms:
+@@ -96,7 +98,16 @@
+      2)  (set (reg) (plus (reg) (const_int -1))
+          (set (pc) (if_then_else (reg != 0)
+ 	                         (label_ref (label))
+-			         (pc))).  */
++			         (pc))).  
++
++     Some targets (ARM) do the comparison before the branch, as in the
++     following form:
++
++     3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
++                   (set (reg) (plus (reg) (const_int -1)))])
++        (set (pc) (if_then_else (cc == NE)
++                                (label_ref (label))
++                                (pc))) */
+ 
+   pattern = PATTERN (doloop_pat);
+ 
+@@ -104,19 +115,47 @@
+     {
+       rtx cond;
+       rtx prev_insn = prev_nondebug_insn (doloop_pat);
++      rtx cmp_arg1, cmp_arg2;
++      rtx cmp_orig;
+ 
+-      /* We expect the decrement to immediately precede the branch.  */
++      /* In case the pattern is not PARALLEL we expect two forms
++	 of doloop which are cases 2) and 3) above: in case 2) the
++	 decrement immediately precedes the branch, while in case 3)
++	 the compare and decrement instructions immediately precede
++	 the branch.  */
+ 
+       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
+         return 0;
+ 
+       cmp = pattern;
+-      inc = PATTERN (PREV_INSN (doloop_pat));
++      if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
++        {
++	  /* The third case: the compare and decrement instructions
++	     immediately precede the branch.  */
++	  cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
++	  if (GET_CODE (cmp_orig) != SET)
++	    return 0;
++	  if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
++	    return 0;
++	  cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
++          cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
++	  if (cmp_arg2 != const0_rtx 
++	      || GET_CODE (cmp_arg1) != PLUS)
++	    return 0;
++	  reg_orig = XEXP (cmp_arg1, 0);
++	  if (XEXP (cmp_arg1, 1) != GEN_INT (-1) 
++	      || !REG_P (reg_orig))
++	    return 0;
++	  cc_reg = SET_DEST (cmp_orig);
++	  
++	  inc = XVECEXP (PATTERN (prev_insn), 0, 1);
++	}
++      else
++        inc = PATTERN (prev_insn);
+       /* We expect the condition to be of the form (reg != 0)  */
+       cond = XEXP (SET_SRC (cmp), 0);
+       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
+         return 0;
+-
+     }
+   else
+     {
+@@ -162,11 +201,15 @@
+     return 0;
+ 
+   if ((XEXP (condition, 0) == reg)
++      /* For the third case:  */  
++      || ((cc_reg != NULL_RTX)
++	  && (XEXP (condition, 0) == cc_reg)
++	  && (reg_orig == reg))
+       || (GET_CODE (XEXP (condition, 0)) == PLUS
+-		   && XEXP (XEXP (condition, 0), 0) == reg))
++	  && XEXP (XEXP (condition, 0), 0) == reg))
+    {
+      if (GET_CODE (pattern) != PARALLEL)
+-     /*  The second form we expect:
++     /*  For the second form we expect:
+ 
+          (set (reg) (plus (reg) (const_int -1))
+          (set (pc) (if_then_else (reg != 0)
+@@ -181,7 +224,24 @@
+                      (set (reg) (plus (reg) (const_int -1)))
+                      (additional clobbers and uses)])
+ 
+-         So we return that form instead.
++        For the third form we expect:
++
++        (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
++                   (set (reg) (plus (reg) (const_int -1)))])
++        (set (pc) (if_then_else (cc == NE)
++                                (label_ref (label))
++                                (pc))) 
++
++        which is equivalent to the following:
++
++        (parallel [(set (cc) (compare (reg,  1))
++                   (set (reg) (plus (reg) (const_int -1)))
++                   (set (pc) (if_then_else (NE == cc)
++                                           (label_ref (label))
++                                           (pc))))])
++
++        So we return the second form instead for the two cases.
++
+      */
+         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
+ 
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-02-14 17:59:10 +0000
++++ new/gcc/modulo-sched.c	2011-05-11 07:15:47 +0000
+@@ -84,14 +84,13 @@
+       II cycles (i.e. use register copies to prevent a def from overwriting
+       itself before reaching the use).
+ 
+-    SMS works with countable loops (1) whose control part can be easily
+-    decoupled from the rest of the loop and (2) whose loop count can
+-    be easily adjusted.  This is because we peel a constant number of
+-    iterations into a prologue and epilogue for which we want to avoid
+-    emitting the control part, and a kernel which is to iterate that
+-    constant number of iterations less than the original loop.  So the
+-    control part should be a set of insns clearly identified and having
+-    its own iv, not otherwise used in the loop (at-least for now), which
++    SMS works with countable loops whose loop count can be easily
++    adjusted.  This is because we peel a constant number of iterations
++    into a prologue and epilogue for which we want to avoid emitting
++    the control part, and a kernel which is to iterate that constant
++    number of iterations less than the original loop.  So the control
++    part should be a set of insns clearly identified and having its
++    own iv, not otherwise used in the loop (at-least for now), which
+     initializes a register before the loop to the number of iterations.
+     Currently SMS relies on the do-loop pattern to recognize such loops,
+     where (1) the control part comprises of all insns defining and/or
+@@ -116,8 +115,10 @@
+ 
+ /* The number of different iterations the nodes in ps span, assuming
+    the stage boundaries are placed efficiently.  */
+-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
+-			     + 1 + (ps)->ii - 1) / (ps)->ii)
++#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
++                         + 1 + ii - 1) / ii)
++/* The stage count of ps.  */
++#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
+ 
+ /* A single instruction in the partial schedule.  */
+ struct ps_insn
+@@ -155,6 +156,8 @@
+   int max_cycle;
+ 
+   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
++
++  int stage_count;  /* The stage count of the partial schedule.  */
+ };
+ 
+ /* We use this to record all the register replacements we do in
+@@ -195,7 +198,7 @@
+                                     rtx, rtx);
+ static void duplicate_insns_of_cycles (partial_schedule_ptr,
+ 				       int, int, int, rtx);
+-
++static int calculate_stage_count (partial_schedule_ptr ps);
+ #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
+ #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
+ #define SCHED_FIRST_REG_MOVE(x) \
+@@ -310,10 +313,10 @@
+      either a single (parallel) branch-on-count or a (non-parallel)
+      branch immediately preceded by a single (decrement) insn.  */
+   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
+-                             : PREV_INSN (tail));
++                             : prev_nondebug_insn (tail));
+ 
+   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
+-    if (reg_mentioned_p (reg, insn))
++    if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn))
+       {
+         if (dump_file)
+         {
+@@ -569,13 +572,12 @@
+     }
+ }
+ 
+-/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
+-   of SCHED_ROW and SCHED_STAGE.  */
++/* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
++   SCHED_ROW and SCHED_STAGE.  */
+ static void
+-normalize_sched_times (partial_schedule_ptr ps)
++reset_sched_times (partial_schedule_ptr ps, int amount)
+ {
+   int row;
+-  int amount = PS_MIN_CYCLE (ps);
+   int ii = ps->ii;
+   ps_insn_ptr crr_insn;
+ 
+@@ -584,19 +586,43 @@
+       {
+ 	ddg_node_ptr u = crr_insn->node;
+ 	int normalized_time = SCHED_TIME (u) - amount;
++	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
++        int sc_until_cycle_zero, stage;
+ 
+-	if (dump_file)
+-	  fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
+-		   min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
+-		   (u), ps->min_cycle);
++        if (dump_file)
++          {
++            /* Print the scheduling times after the rotation.  */
++            fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
++                     "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
++                     INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
++                     normalized_time);
++            if (JUMP_P (crr_insn->node->insn))
++              fprintf (dump_file, " (branch)");
++            fprintf (dump_file, "\n");
++          }
++	
+ 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+ 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+ 	SCHED_TIME (u) = normalized_time;
+-	SCHED_ROW (u) = normalized_time % ii;
+-	SCHED_STAGE (u) = normalized_time / ii;
++	SCHED_ROW (u) = SMODULO (normalized_time, ii);
++      
++        /* The calculation of stage count is done adding the number
++           of stages before cycle zero and after cycle zero.  */
++	sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
++	
++	if (SCHED_TIME (u) < 0)
++	  {
++	    stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
++	    SCHED_STAGE (u) = sc_until_cycle_zero - stage;
++	  }
++	else
++	  {
++	    stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
++	    SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
++	  }
+       }
+ }
+-
++ 
+ /* Set SCHED_COLUMN of each node according to its position in PS.  */
+ static void
+ set_columns_for_ps (partial_schedule_ptr ps)
+@@ -646,9 +672,12 @@
+ 
+         /* Do not duplicate any insn which refers to count_reg as it
+            belongs to the control part.
++           The closing branch is scheduled as well and thus should
++           be ignored.
+            TODO: This should be done by analyzing the control part of
+            the loop.  */
+-        if (reg_mentioned_p (count_reg, u_node->insn))
++        if (reg_mentioned_p (count_reg, u_node->insn)
++            || JUMP_P (ps_ij->node->insn))
+           continue;
+ 
+ 	if (for_prolog)
+@@ -1009,9 +1038,11 @@
+ 	continue;
+       }
+ 
+-      /* Don't handle BBs with calls or barriers, or !single_set insns,
+-         or auto-increment insns (to avoid creating invalid reg-moves
+-         for the auto-increment insns).
++      /* Don't handle BBs with calls or barriers or auto-increment insns 
++	 (to avoid creating invalid reg-moves for the auto-increment insns),
++	 or !single_set with the exception of instructions that include
++	 count_reg---these instructions are part of the control part
++	 that do-loop recognizes.
+          ??? Should handle auto-increment insns.
+          ??? Should handle insns defining subregs.  */
+      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+@@ -1021,7 +1052,8 @@
+         if (CALL_P (insn)
+             || BARRIER_P (insn)
+             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
+-                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
++                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
++                && !reg_mentioned_p (count_reg, insn))
+             || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
+             || (INSN_P (insn) && (set = single_set (insn))
+                 && GET_CODE (SET_DEST (set)) == SUBREG))
+@@ -1049,7 +1081,11 @@
+ 	  continue;
+ 	}
+ 
+-      if (! (g = create_ddg (bb, 0)))
++      /* Always schedule the closing branch with the rest of the
++         instructions. The branch is rotated to be in row ii-1 at the
++         end of the scheduling procedure to make sure it's the last
++         instruction in the iteration.  */
++      if (! (g = create_ddg (bb, 1)))
+         {
+           if (dump_file)
+ 	    fprintf (dump_file, "SMS create_ddg failed\n");
+@@ -1157,14 +1193,17 @@
+ 
+       ps = sms_schedule_by_order (g, mii, maxii, node_order);
+ 
+-      if (ps){
+-	stage_count = PS_STAGE_COUNT (ps);
+-        gcc_assert(stage_count >= 1);
+-      }
++       if (ps)
++       {
++         stage_count = calculate_stage_count (ps);
++         gcc_assert(stage_count >= 1);
++         PS_STAGE_COUNT(ps) = stage_count;
++       }
+ 
+-      /* Stage count of 1 means that there is no interleaving between
+-         iterations, let the scheduling passes do the job.  */
+-      if (stage_count <= 1
++      /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
++         1 means that there is no interleaving between iterations thus
++         we let the scheduling passes do the job in this case.  */
++      if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
+ 	  || (count_init && (loop_count <= stage_count))
+ 	  || (flag_branch_probabilities && (trip_count <= stage_count)))
+ 	{
+@@ -1182,32 +1221,24 @@
+       else
+ 	{
+ 	  struct undo_replace_buff_elem *reg_move_replaces;
+-
+-	  if (dump_file)
+-	    {
++          int amount = SCHED_TIME (g->closing_branch) + 1;
++	  
++	  /* Set the stage boundaries.	The closing_branch was scheduled
++	     and should appear in the last (ii-1) row.  */
++	  reset_sched_times (ps, amount);
++	  rotate_partial_schedule (ps, amount);
++	  set_columns_for_ps (ps);
++
++	  canon_loop (loop);
++
++          if (dump_file)
++            {
+ 	      fprintf (dump_file,
+ 		       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
+ 		       stage_count);
+ 	      print_partial_schedule (ps, dump_file);
+-	      fprintf (dump_file,
+-		       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
+-		       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
+ 	    }
+-
+-	  /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
+-	     the closing_branch was scheduled and should appear in the last (ii-1)
+-	     row.  Otherwise, we are free to schedule the branch, and we let nodes
+-	     that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
+-	     row; this should reduce stage_count to minimum.
+-             TODO: Revisit the issue of scheduling the insns of the
+-             control part relative to the branch when the control part
+-             has more than one insn.  */
+-	  normalize_sched_times (ps);
+-	  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
+-	  set_columns_for_ps (ps);
+-
+-	  canon_loop (loop);
+-
++ 
+           /* case the BCT count is not known , Do loop-versioning */
+ 	  if (count_reg && ! count_init)
+             {
+@@ -1760,12 +1791,6 @@
+ 	      continue;
+ 	    }
+ 
+-	  if (JUMP_P (insn)) /* Closing branch handled later.  */
+-	    {
+-	      RESET_BIT (tobe_scheduled, u);
+-	      continue;
+-	    }
+-
+ 	  if (TEST_BIT (sched_nodes, u))
+ 	    continue;
+ 
+@@ -1893,8 +1918,8 @@
+   if (dump_file)
+     fprintf (dump_file, "split_row=%d\n", split_row);
+ 
+-  normalize_sched_times (ps);
+-  rotate_partial_schedule (ps, ps->min_cycle);
++  reset_sched_times (ps, PS_MIN_CYCLE (ps));
++  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
+ 
+   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
+   for (row = 0; row < split_row; row++)
+@@ -2571,6 +2596,7 @@
+   ps_insn_ptr next_ps_i;
+   ps_insn_ptr first_must_follow = NULL;
+   ps_insn_ptr last_must_precede = NULL;
++  ps_insn_ptr last_in_row = NULL;
+   int row;
+ 
+   if (! ps_i)
+@@ -2597,8 +2623,37 @@
+ 	  else
+             last_must_precede = next_ps_i;
+         }
++      /* The closing branch must be the last in the row.  */
++      if (must_precede 
++	  && TEST_BIT (must_precede, next_ps_i->node->cuid) 
++	  && JUMP_P (next_ps_i->node->insn))     
++	return false;
++             
++       last_in_row = next_ps_i;
+     }
+ 
++  /* The closing branch is scheduled as well.  Make sure there is no
++     dependent instruction after it as the branch should be the last
++     instruction in the row.  */
++  if (JUMP_P (ps_i->node->insn)) 
++    {
++      if (first_must_follow)
++	return false;
++      if (last_in_row)
++	{
++	  /* Make the branch the last in the row.  New instructions
++	     will be inserted at the beginning of the row or after the
++	     last must_precede instruction thus the branch is guaranteed
++	     to remain the last instruction in the row.  */
++	  last_in_row->next_in_row = ps_i;
++	  ps_i->prev_in_row = last_in_row;
++	  ps_i->next_in_row = NULL;
++	}
++      else
++	ps->rows[row] = ps_i;
++      return true;
++    }
++  
+   /* Now insert the node after INSERT_AFTER_PSI.  */
+ 
+   if (! last_must_precede)
+@@ -2820,6 +2875,24 @@
+   return ps_i;
+ }
+ 
++/* Calculate the stage count of the partial schedule PS.  The calculation
++   takes into account the rotation to bring the closing branch to row
++   ii-1.  */
++int
++calculate_stage_count (partial_schedule_ptr ps)
++{
++  int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
++  int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
++  int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
++  int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
++
++  /* The calculation of stage count is done adding the number of stages
++     before cycle zero and after cycle zero.  */ 
++  stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
++
++  return stage_count;
++}
++
+ /* Rotate the rows of PS such that insns scheduled at time
+    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
+ void
+
+=== modified file 'gcc/params.def'
+--- old/gcc/params.def	2011-04-18 11:31:29 +0000
++++ new/gcc/params.def	2011-05-11 07:15:47 +0000
+@@ -344,6 +344,11 @@
+ 	 "sms-max-ii-factor",
+ 	 "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop",
+ 	 100, 0, 0)
++/* The minimum value of stage count that swing modulo scheduler will generate.  */
++DEFPARAM(PARAM_SMS_MIN_SC,
++        "sms-min-sc",
++        "The minimum value of stage count that swing modulo scheduler will generate.",
++        2, 1, 1)
+ DEFPARAM(PARAM_SMS_DFA_HISTORY,
+ 	 "sms-dfa-history",
+ 	 "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA",
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch
new file mode 100644
index 0000000000..9c62102db5
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106750.patch
@@ -0,0 +1,30 @@
+ 2011-05-13  Revital Eres  <revital.eres@linaro.org>
+ 
+ 	gcc/
+	* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
+	* modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails.
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2011-05-11 07:15:47 +0000
++++ new/gcc/ddg.c	2011-05-13 16:03:40 +0000
+@@ -1016,6 +1016,7 @@
+   for (i = 0; i < all_sccs->num_sccs; i++)
+     free_scc (all_sccs->sccs[i]);
+ 
++  free (all_sccs->sccs);
+   free (all_sccs);
+ }
+ 
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-05-11 07:15:47 +0000
++++ new/gcc/modulo-sched.c	2011-05-13 16:03:40 +0000
+@@ -1216,7 +1216,6 @@
+ 	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
+ 	      fprintf (dump_file, ")\n");
+ 	    }
+-	  continue;
+ 	}
+       else
+ 	{
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch
new file mode 100644
index 0000000000..c26ee5bde4
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106751.patch
@@ -0,0 +1,134 @@
+2011-06-02  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	Backport from mainline:
+
+	2011-03-21  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	gcc/
+	* simplify-rtx.c (simplify_binary_operation_1): Handle
+	(xor (and A B) C) case when B and C are both constants.
+
+	gcc/testsuite/
+	* gcc.target/arm/xor-and.c: New.
+
+	2011-03-18  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	gcc/
+	* combine.c (try_combine): Do simplification only call of
+	subst() on i2 even when i1 is present. Update comments.
+
+	gcc/testsuite/
+	* gcc.target/arm/unsigned-extend-1.c: New.
+
+=== modified file 'gcc/combine.c'
+--- old/gcc/combine.c	2011-05-06 11:28:27 +0000
++++ new/gcc/combine.c	2011-05-27 14:31:18 +0000
+@@ -3089,7 +3089,7 @@
+       /* It is possible that the source of I2 or I1 may be performing
+ 	 an unneeded operation, such as a ZERO_EXTEND of something
+ 	 that is known to have the high part zero.  Handle that case
+-	 by letting subst look at the innermost one of them.
++	 by letting subst look at the inner insns.
+ 
+ 	 Another way to do this would be to have a function that tries
+ 	 to simplify a single insn instead of merging two or more
+@@ -3114,11 +3114,9 @@
+ 	      subst_low_luid = DF_INSN_LUID (i1);
+ 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
+ 	    }
+-	  else
+-	    {
+-	      subst_low_luid = DF_INSN_LUID (i2);
+-	      i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
+-	    }
++
++	  subst_low_luid = DF_INSN_LUID (i2);
++	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
+ 	}
+ 
+       n_occurrences = 0;		/* `subst' counts here */
+
+=== modified file 'gcc/simplify-rtx.c'
+--- old/gcc/simplify-rtx.c	2011-03-26 09:24:06 +0000
++++ new/gcc/simplify-rtx.c	2011-05-27 14:31:18 +0000
+@@ -2484,6 +2484,46 @@
+ 							XEXP (op0, 1), mode),
+ 				    op1);
+ 
++      /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P),
++	 we can transform like this:
++            (A&B)^C == ~(A&B)&C | ~C&(A&B)
++                    == (~A|~B)&C | ~C&(A&B)    * DeMorgan's Law
++                    == ~A&C | ~B&C | A&(~C&B)  * Distribute and re-order
++	 Attempt a few simplifications when B and C are both constants.  */
++      if (GET_CODE (op0) == AND
++	  && CONST_INT_P (op1)
++	  && CONST_INT_P (XEXP (op0, 1)))
++	{
++	  rtx a = XEXP (op0, 0);
++	  rtx b = XEXP (op0, 1);
++	  rtx c = op1;
++	  HOST_WIDE_INT bval = INTVAL (b);
++	  HOST_WIDE_INT cval = INTVAL (c);
++
++	  rtx na_c
++	    = simplify_binary_operation (AND, mode,
++					 simplify_gen_unary (NOT, mode, a, mode),
++					 c);
++	  if ((~cval & bval) == 0)
++	    {
++	      /* Try to simplify ~A&C | ~B&C.  */
++	      if (na_c != NULL_RTX)
++		return simplify_gen_binary (IOR, mode, na_c,
++					    GEN_INT (~bval & cval));
++	    }
++	  else
++	    {
++	      /* If ~A&C is zero, simplify A&(~C&B) | ~B&C.  */
++	      if (na_c == const0_rtx)
++		{
++		  rtx a_nc_b = simplify_gen_binary (AND, mode, a,
++						    GEN_INT (~cval & bval));
++		  return simplify_gen_binary (IOR, mode, a_nc_b,
++					      GEN_INT (~bval & cval));
++		}
++	    }
++	}
++
+       /* (xor (comparison foo bar) (const_int 1)) can become the reversed
+ 	 comparison if STORE_FLAG_VALUE is 1.  */
+       if (STORE_FLAG_VALUE == 1
+
+=== added file 'gcc/testsuite/gcc.target/arm/unsigned-extend-1.c'
+--- old/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c	2011-05-27 14:31:18 +0000
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv6" } */
++
++unsigned char foo (unsigned char c)
++{
++  return (c >= '0') && (c <= '9');
++}
++
++/* { dg-final { scan-assembler-not "uxtb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/xor-and.c'
+--- old/gcc/testsuite/gcc.target/arm/xor-and.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/xor-and.c	2011-05-27 14:31:18 +0000
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O -march=armv6" } */
++
++unsigned short foo (unsigned short x)
++{
++  x ^= 0x4002;
++  x >>= 1;
++  x |= 0x8000;
++  return x;
++}
++
++/* { dg-final { scan-assembler "orr" } } */
++/* { dg-final { scan-assembler-not "mvn" } } */
++/* { dg-final { scan-assembler-not "uxth" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch
new file mode 100644
index 0000000000..bda39e8faa
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106753.patch
@@ -0,0 +1,5027 @@
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* gimple.c (gimple_build_call_internal_1): Add missing call to
+	gimple_call_reset_alias_info.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/vect-strided-u16-i3.c: New test.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* lib/target-supports.exp (check_effective_target_vect_strided):
+	Replace with...
+	(check_effective_target_vect_strided2)
+	(check_effective_target_vect_strided3)
+	(check_effective_target_vect_strided4)
+	(check_effective_target_vect_strided8): ...these new functions.
+
+	* gcc.dg/vect/O3-pr39675-2.c: Update accordingly.
+	* gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c: Likewise.
+	* gcc.dg/vect/fast-math-slp-27.c: Likewise.
+	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: Likewise.
+	* gcc.dg/vect/pr37539.c: Likewise.
+	* gcc.dg/vect/slp-11a.c: Likewise.
+	* gcc.dg/vect/slp-11b.c: Likewise.
+	* gcc.dg/vect/slp-11c.c: Likewise.
+	* gcc.dg/vect/slp-12a.c: Likewise.
+	* gcc.dg/vect/slp-12b.c: Likewise.
+	* gcc.dg/vect/slp-18.c: Likewise.
+	* gcc.dg/vect/slp-19a.c: Likewise.
+	* gcc.dg/vect/slp-19b.c: Likewise.
+	* gcc.dg/vect/slp-21.c: Likewise.
+	* gcc.dg/vect/slp-23.c: Likewise.
+	* gcc.dg/vect/vect-cselim-1.c: Likewise.
+
+	* gcc.dg/vect/fast-math-vect-complex-3.c: Use vect_stridedN
+	instead of vect_interleave && vect_extract_even_odd.
+	* gcc.dg/vect/no-scevccp-outer-10a.c: Likewise.
+	* gcc.dg/vect/no-scevccp-outer-10b.c: Likewise.
+	* gcc.dg/vect/no-scevccp-outer-20.c: Likewise.
+	* gcc.dg/vect/vect-1.c: Likewise.
+	* gcc.dg/vect/vect-10.c: Likewise.
+	* gcc.dg/vect/vect-98.c: Likewise.
+	* gcc.dg/vect/vect-107.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u32-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i2-gap.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Likewise.
+	* gcc.dg/vect/vect-strided-float.c: Likewise.
+	* gcc.dg/vect/vect-strided-mult-char-ls.c: Likewise.
+	* gcc.dg/vect/vect-strided-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-same-dr.c: Likewise.
+	* gcc.dg/vect/vect-strided-u16-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-i8.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i2-gap.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8.c: Likewise.
+	* gcc.dg/vect/vect-vfa-03.c: Likewise.
+
+	* gcc.dg/vect/no-scevccp-outer-18.c: Add vect_stridedN to the
+	target condition.
+	* gcc.dg/vect/pr30843.c: Likewise.
+	* gcc.dg/vect/pr33866.c: Likewise.
+	* gcc.dg/vect/slp-reduc-6.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-u32-i2.c: Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/slp-11.c: Split into...
+	* gcc.dg/vect/slp-11a.c, gcc.dg/vect/slp-11b.c,
+	gcc.dg/vect/slp-11c.c: ...these tests.
+	* gcc.dg/vect/slp-12a.c: Split 4-stride loop into...
+	* gcc.dg/vect/slp-12c.c: ...this new test.
+	* gcc.dg/vect/slp-19.c: Split into...
+	* gcc.dg/vect/slp-19a.c, gcc.dg/vect/slp-19b.c,
+	gcc.dg/vect/slp-19c.c: ...these new tests.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* lib/target-supports.exp
+	(check_effective_target_vect_extract_even_odd_wide): Delete.
+	(check_effective_target_vect_strided_wide): Likewise.
+	* gcc.dg/vect/O3-pr39675-2.c: Use the non-wide versions instead.
+	* gcc.dg/vect/fast-math-pr35982.c: Likewise.
+	* gcc.dg/vect/fast-math-vect-complex-3.c: Likewise.
+	* gcc.dg/vect/pr37539.c: Likewise.
+	* gcc.dg/vect/slp-11.c: Likewise.
+	* gcc.dg/vect/slp-12a.c: Likewise.
+	* gcc.dg/vect/slp-12b.c: Likewise.
+	* gcc.dg/vect/slp-19.c: Likewise.
+	* gcc.dg/vect/slp-23.c: Likewise.
+	* gcc.dg/vect/vect-1.c: Likewise.
+	* gcc.dg/vect/vect-98.c: Likewise.
+	* gcc.dg/vect/vect-107.c: Likewise.
+	* gcc.dg/vect/vect-strided-float.c: Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/vect.exp: Run the main tests twice, one with -flto
+	and once without.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainlie:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (vec_load_lanes<mode><mode>): New expanders,
+	(vec_store_lanes<mode><mode>): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
+	* optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
+	convert_optab_index values.
+	(vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
+	* genopinit.c (optabs): Initialize the new optabs.
+	* internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
+	* internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
+	(expand_STORE_LANES): New functions.
+	* tree.h (build_array_type_nelts): Declare.
+	* tree.c (build_array_type_nelts): New function.
+	* tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
+	(vect_model_load_cost): Likewise.
+	(vect_store_lanes_supported, vect_load_lanes_supported)
+	(vect_record_strided_load_vectors): Declare.
+	* tree-vect-data-refs.c (vect_lanes_optab_supported_p)
+	(vect_store_lanes_supported, vect_load_lanes_supported): New functions.
+	(vect_transform_strided_load): Split out statement recording into...
+	(vect_record_strided_load_vectors): ...this new function.
+	* tree-vect-stmts.c (create_vector_array, read_vector_array)
+	(write_vector_array, create_array_ref): New functions.
+	(vect_model_store_cost): Add store_lanes_p argument.
+	(vect_model_load_cost): Add load_lanes_p argument.
+	(vectorizable_store): Try to use store-lanes functions for
+	interleaved stores.
+	(vectorizable_load): Likewise load-lanes and loads.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Update call
+	to vect_model_store_cost.
+	(vect_build_slp_tree): Likewise vect_model_load_cost.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_store): Only chain one related
+	statement per copy.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* tree-inline.c (estimate_num_insns): Likewise.
+
+	Backport from mainline:
+
+	2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* Makefile.in (INTERNAL_FN_DEF, INTERNAL_FN_H): Define.
+	(GIMPLE_H): Include $(INTERNAL_FN_H).
+	(OBJS-common): Add internal-fn.o.
+	(internal-fn.o): New rule.
+	* internal-fn.def: New file.
+	* internal-fn.h: Likewise.
+	* internal-fn.c: Likewise.
+	* gimple.h: Include internal-fn.h.
+	(GF_CALL_INTERNAL): New gf_mask.
+	(gimple_statement_call): Put fntype into a union with a new
+	internal_fn field.
+	(gimple_build_call_internal): Declare.
+	(gimple_build_call_internal_vec): Likewise.
+	(gimple_call_same_target_p): Likewise.
+	(gimple_call_internal_p): New function.
+	(gimple_call_internal_fn): Likewise.
+	(gimple_call_set_fn): Assert that the function is not internal.
+	(gimple_call_set_fndecl): Likewise.
+	(gimple_call_set_internal_fn): New function.
+	(gimple_call_addr_fndecl): Handle null functions.
+	(gimple_call_return_type): Likewise.
+	[---- Plus backport adjustments:
+	(GF_CALL_INTERNAL_FN_SHIFT): New macro.
+	(GF_CALL_INTERNAL_FN): New gf_mask.
+	----]
+	* gimple.c (gimple_build_call_internal_1): New function.
+	(gimple_build_call_internal): Likewise.
+	(gimple_build_call_internal_vec): Likewise.
+	(gimple_call_same_target_p): Likewise.
+	(gimple_call_flags): Handle calls to internal functions.
+	(gimple_call_fnspec): New function.
+	(gimple_call_arg_flags, gimple_call_return_flags): Use it.
+	(gimple_has_side_effects): Handle null functions.
+	(gimple_rhs_has_side_effects): Likewise.
+	(gimple_call_copy_skip_args): Handle calls to internal functions.
+	* cfgexpand.c (expand_call_stmt): Likewise.
+	* expr.c (expand_expr_real_1): Assert that the call isn't internal.
+	* gimple-low.c (gimple_check_call_args): Handle calls to internal
+	functions.
+	* gimple-pretty-print.c (dump_gimple_call): Likewise.
+	* ipa-prop.c (ipa_analyze_call_uses): Handle null functions.
+	* tree-cfg.c (verify_gimple_call): Handle calls to internal functions.
+	(do_warn_unused_result): Likewise.
+	[---- Plus backport adjustments:
+	(verify_stmt): Likewise.
+	----]
+	* tree-eh.c (same_handler_p): Use gimple_call_same_target_p.
+	* tree-ssa-ccp.c (ccp_fold_stmt): Handle calls to internal functions.
+	[---- Plus backport adjustments:
+	(fold_gimple_call): Likewise.
+	----]
+	* tree-ssa-dom.c (hashable_expr): Use the gimple statement to record
+	the target of a call.
+	(initialize_hash_element): Update accordingly.
+	(hashable_expr_equal_p): Use gimple_call_same_target_p.
+	(iterative_hash_hashable_expr): Handle calls to internal functions.
+	(print_expr_hash_elt): Likewise.
+	* tree-ssa-pre.c (can_value_number_call): Likewise.
+	(eliminate): Handle null functions.
+	* tree-ssa-sccvn.c (visit_use): Handle calls to internal functions.
+	* tree-ssa-structalias.c (find_func_aliases): Likewise.
+	* value-prof.c (gimple_ic_transform): Likewise.
+	(gimple_indirect_call_to_profile): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vectorizer.h (vect_strided_store_supported): Add a
+	HOST_WIDE_INT argument.
+	(vect_strided_load_supported): Likewise.
+	(vect_permute_store_chain): Return void.
+	(vect_transform_strided_load): Likewise.
+	(vect_permute_load_chain): Delete.
+	* tree-vect-data-refs.c (vect_strided_store_supported): Take a
+	count argument.  Check that the count is a power of two.
+	(vect_strided_load_supported): Likewise.
+	(vect_permute_store_chain): Return void.  Update after above changes.
+	Assert that the access is supported.
+	(vect_permute_load_chain): Likewise.
+	(vect_transform_strided_load): Return void.
+	* tree-vect-stmts.c (vectorizable_store): Update calls after
+	above interface changes.
+	(vectorizable_load): Likewise.
+	(vect_analyze_stmt): Don't check for strided powers of two here.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vectorizer.h (vect_create_data_ref_ptr): Add an extra
+	type parameter.
+	* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add an aggr_type
+	parameter.  Generalise code to handle arrays as well as vectors.
+	(vect_setup_realignment): Update accordingly.
+	* tree-vect-stmts.c (vectorizable_store): Likewise.
+	(vectorizable_load): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_load): Allocate and free dr_chain
+	within the per-copy loop.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_load): Print the number of copies
+	in the dump file.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-03-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/arm.h (CANNOT_CHANGE_MODE_CLASS): Restrict FPA_REGS
+	case to VFPv1.
+
+=== modified file 'gcc/Makefile.in'
+--- old/gcc/Makefile.in	2011-05-26 14:27:33 +0000
++++ new/gcc/Makefile.in	2011-06-02 12:12:00 +0000
+@@ -888,6 +888,8 @@
+ READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h
+ PARAMS_H = params.h params.def
+ BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def
++INTERNAL_FN_DEF = internal-fn.def
++INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF)
+ TREE_H = tree.h all-tree.def tree.def c-family/c-common.def \
+ 	$(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \
+ 	$(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \
+@@ -897,7 +899,7 @@
+ BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h
+ GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \
+ 	$(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \
+-	tree-ssa-alias.h vecir.h
++	tree-ssa-alias.h vecir.h $(INTERNAL_FN_H)
+ GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
+ COVERAGE_H = coverage.h $(GCOV_IO_H)
+ DEMANGLE_H = $(srcdir)/../include/demangle.h
+@@ -1269,6 +1271,7 @@
+ 	init-regs.o \
+ 	input.o \
+ 	integrate.o \
++	internal-fn.o \
+ 	intl.o \
+ 	ira.o \
+ 	ira-build.o \
+@@ -2751,6 +2754,8 @@
+    $(TM_H) $(TREE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
+    $(TREE_PASS_H) tree-ssa-propagate.h tree-pretty-print.h \
+    gimple-pretty-print.h
++internal-fn.o : internal-fn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
++   $(GIMPLE_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H)
+ gimple.o : gimple.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \
+    $(GGC_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) gt-gimple.h \
+    $(TREE_FLOW_H) value-prof.h $(FLAGS_H) $(DEMANGLE_H) \
+
+=== modified file 'gcc/cfgexpand.c'
+--- old/gcc/cfgexpand.c	2011-04-20 11:18:50 +0000
++++ new/gcc/cfgexpand.c	2011-06-02 12:12:00 +0000
+@@ -1839,12 +1839,17 @@
+ static void
+ expand_call_stmt (gimple stmt)
+ {
+-  tree exp;
+-  tree lhs = gimple_call_lhs (stmt);
++  tree exp, lhs;
+   size_t i;
+   bool builtin_p;
+   tree decl;
+ 
++  if (gimple_call_internal_p (stmt))
++    {
++      expand_internal_call (stmt);
++      return;
++    }
++
+   exp = build_vl_exp (CALL_EXPR, gimple_call_num_args (stmt) + 3);
+ 
+   CALL_EXPR_FN (exp) = gimple_call_fn (stmt);
+@@ -1882,6 +1887,7 @@
+   SET_EXPR_LOCATION (exp, gimple_location (stmt));
+   TREE_BLOCK (exp) = gimple_block (stmt);
+ 
++  lhs = gimple_call_lhs (stmt);
+   if (lhs)
+     expand_assignment (lhs, exp, false);
+   else
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-04-26 14:42:21 +0000
++++ new/gcc/config/arm/arm.h	2011-06-02 12:12:00 +0000
+@@ -1169,12 +1169,12 @@
+ }
+ 
+ /* FPA registers can't do subreg as all values are reformatted to internal
+-   precision.  VFP registers may only be accessed in the mode they
+-   were set.  */
+-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
+-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
+-   ? reg_classes_intersect_p (FPA_REGS, (CLASS))	\
+-     || reg_classes_intersect_p (VFP_REGS, (CLASS))	\
++   precision.  In VFPv1, VFP registers could only be accessed in the mode
++   they were set, so subregs would be invalid there too.  However, we don't
++   support VFPv1 at the moment, and the restriction was lifted in VFPv2.  */
++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
++  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
++   ? reg_classes_intersect_p (FPA_REGS, (CLASS))		\
+    : 0)
+ 
+ /* The class value for index registers, and the one for base regs.  */
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-05-06 11:28:27 +0000
++++ new/gcc/config/arm/neon.md	2011-06-02 12:12:00 +0000
+@@ -4248,6 +4248,12 @@
+   DONE;
+ })
+ 
++(define_expand "vec_load_lanes<mode><mode>"
++  [(set (match_operand:VDQX 0 "s_register_operand")
++        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
++                     UNSPEC_VLD1))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vld1<mode>"
+   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
+@@ -4345,6 +4351,12 @@
+                     (const_string "neon_vld1_1_2_regs")))]
+ )
+ 
++(define_expand "vec_store_lanes<mode><mode>"
++  [(set (match_operand:VDQX 0 "neon_struct_operand")
++	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
++		     UNSPEC_VST1))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vst1<mode>"
+   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
+ 	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
+@@ -4401,6 +4413,13 @@
+   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+ )
+ 
++(define_expand "vec_load_lanesti<mode>"
++  [(set (match_operand:TI 0 "s_register_operand")
++        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		   UNSPEC_VLD2))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vld2<mode>"
+   [(set (match_operand:TI 0 "s_register_operand" "=w")
+         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
+@@ -4419,6 +4438,13 @@
+                     (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))]
+ )
+ 
++(define_expand "vec_load_lanesoi<mode>"
++  [(set (match_operand:OI 0 "s_register_operand")
++        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
++                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		   UNSPEC_VLD2))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vld2<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+@@ -4501,6 +4527,13 @@
+                     (const_string "neon_vld1_1_2_regs")))]
+ )
+ 
++(define_expand "vec_store_lanesti<mode>"
++  [(set (match_operand:TI 0 "neon_struct_operand")
++	(unspec:TI [(match_operand:TI 1 "s_register_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++                   UNSPEC_VST2))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vst2<mode>"
+   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
+         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
+@@ -4519,6 +4552,13 @@
+                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")))]
+ )
+ 
++(define_expand "vec_store_lanesoi<mode>"
++  [(set (match_operand:OI 0 "neon_struct_operand")
++	(unspec:OI [(match_operand:OI 1 "s_register_operand")
++                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++                   UNSPEC_VST2))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vst2<mode>"
+   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+ 	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+@@ -4584,6 +4624,13 @@
+   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+ )
+ 
++(define_expand "vec_load_lanesei<mode>"
++  [(set (match_operand:EI 0 "s_register_operand")
++        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		   UNSPEC_VLD3))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vld3<mode>"
+   [(set (match_operand:EI 0 "s_register_operand" "=w")
+         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
+@@ -4602,6 +4649,16 @@
+                     (const_string "neon_vld3_vld4")))]
+ )
+ 
++(define_expand "vec_load_lanesci<mode>"
++  [(match_operand:CI 0 "s_register_operand")
++   (match_operand:CI 1 "neon_struct_operand")
++   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++  "TARGET_NEON"
++{
++  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
++  DONE;
++})
++
+ (define_expand "neon_vld3<mode>"
+   [(match_operand:CI 0 "s_register_operand")
+    (match_operand:CI 1 "neon_struct_operand")
+@@ -4741,6 +4798,13 @@
+                     (const_string "neon_vld3_vld4_all_lanes")
+                     (const_string "neon_vld1_1_2_regs")))])
+ 
++(define_expand "vec_store_lanesei<mode>"
++  [(set (match_operand:EI 0 "neon_struct_operand")
++	(unspec:EI [(match_operand:EI 1 "s_register_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++                   UNSPEC_VST3))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vst3<mode>"
+   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
+@@ -4758,6 +4822,16 @@
+                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                     (const_string "neon_vst2_4_regs_vst3_vst4")))])
+ 
++(define_expand "vec_store_lanesci<mode>"
++  [(match_operand:CI 0 "neon_struct_operand")
++   (match_operand:CI 1 "s_register_operand")
++   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++  "TARGET_NEON"
++{
++  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
++  DONE;
++})
++
+ (define_expand "neon_vst3<mode>"
+   [(match_operand:CI 0 "neon_struct_operand")
+    (match_operand:CI 1 "s_register_operand")
+@@ -4869,6 +4943,13 @@
+ }
+ [(set_attr "neon_type" "neon_vst3_vst4_lane")])
+ 
++(define_expand "vec_load_lanesoi<mode>"
++  [(set (match_operand:OI 0 "s_register_operand")
++        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++		   UNSPEC_VLD4))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vld4<mode>"
+   [(set (match_operand:OI 0 "s_register_operand" "=w")
+         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
+@@ -4887,6 +4968,16 @@
+                     (const_string "neon_vld3_vld4")))]
+ )
+ 
++(define_expand "vec_load_lanesxi<mode>"
++  [(match_operand:XI 0 "s_register_operand")
++   (match_operand:XI 1 "neon_struct_operand")
++   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++  "TARGET_NEON"
++{
++  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
++  DONE;
++})
++
+ (define_expand "neon_vld4<mode>"
+   [(match_operand:XI 0 "s_register_operand")
+    (match_operand:XI 1 "neon_struct_operand")
+@@ -5033,6 +5124,13 @@
+                     (const_string "neon_vld1_1_2_regs")))]
+ )
+ 
++(define_expand "vec_store_lanesoi<mode>"
++  [(set (match_operand:OI 0 "neon_struct_operand")
++	(unspec:OI [(match_operand:OI 1 "s_register_operand")
++                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++                   UNSPEC_VST4))]
++  "TARGET_NEON")
++
+ (define_insn "neon_vst4<mode>"
+   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+@@ -5051,6 +5149,16 @@
+                     (const_string "neon_vst2_4_regs_vst3_vst4")))]
+ )
+ 
++(define_expand "vec_store_lanesxi<mode>"
++  [(match_operand:XI 0 "neon_struct_operand")
++   (match_operand:XI 1 "s_register_operand")
++   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
++  "TARGET_NEON"
++{
++  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
++  DONE;
++})
++
+ (define_expand "neon_vst4<mode>"
+   [(match_operand:XI 0 "neon_struct_operand")
+    (match_operand:XI 1 "s_register_operand")
+
+=== modified file 'gcc/doc/md.texi'
+--- old/gcc/doc/md.texi	2011-01-03 20:52:22 +0000
++++ new/gcc/doc/md.texi	2011-05-05 15:43:06 +0000
+@@ -3935,6 +3935,48 @@
+ consecutive memory locations, operand 1 is the first register, and
+ operand 2 is a constant: the number of consecutive registers.
+ 
++@cindex @code{vec_load_lanes@var{m}@var{n}} instruction pattern
++@item @samp{vec_load_lanes@var{m}@var{n}}
++Perform an interleaved load of several vectors from memory operand 1
++into register operand 0.  Both operands have mode @var{m}.  The register
++operand is viewed as holding consecutive vectors of mode @var{n},
++while the memory operand is a flat array that contains the same number
++of elements.  The operation is equivalent to:
++
++@smallexample
++int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
++for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
++  for (i = 0; i < c; i++)
++    operand0[i][j] = operand1[j * c + i];
++@end smallexample
++
++For example, @samp{vec_load_lanestiv4hi} loads 8 16-bit values
++from memory into a register of mode @samp{TI}@.  The register
++contains two consecutive vectors of mode @samp{V4HI}@.
++
++This pattern can only be used if:
++@smallexample
++TARGET_ARRAY_MODE_SUPPORTED_P (@var{n}, @var{c})
++@end smallexample
++is true.  GCC assumes that, if a target supports this kind of
++instruction for some mode @var{n}, it also supports unaligned
++loads for vectors of mode @var{n}.
++
++@cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern
++@item @samp{vec_store_lanes@var{m}@var{n}}
++Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory
++and register operands reversed.  That is, the instruction is
++equivalent to:
++
++@smallexample
++int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
++for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
++  for (i = 0; i < c; i++)
++    operand0[j * c + i] = operand1[i][j];
++@end smallexample
++
++for a memory operand 0 and register operand 1.
++
+ @cindex @code{vec_set@var{m}} instruction pattern
+ @item @samp{vec_set@var{m}}
+ Set given field in the vector value.  Operand 0 is the vector to modify,
+
+=== modified file 'gcc/expr.c'
+--- old/gcc/expr.c	2011-05-26 14:27:33 +0000
++++ new/gcc/expr.c	2011-06-02 12:12:00 +0000
+@@ -8537,10 +8537,13 @@
+ 	  if (code == SSA_NAME
+ 	      && (g = SSA_NAME_DEF_STMT (ssa_name))
+ 	      && gimple_code (g) == GIMPLE_CALL)
+-	    pmode = promote_function_mode (type, mode, &unsignedp,
+-					   TREE_TYPE
+-					   (TREE_TYPE (gimple_call_fn (g))),
+-					   2);
++	    {
++	      gcc_assert (!gimple_call_internal_p (g));
++	      pmode = promote_function_mode (type, mode, &unsignedp,
++					     TREE_TYPE
++					     (TREE_TYPE (gimple_call_fn (g))),
++					     2);
++	    }
+ 	  else
+ 	    pmode = promote_decl_mode (exp, &unsignedp);
+ 	  gcc_assert (GET_MODE (decl_rtl) == pmode);
+
+=== modified file 'gcc/genopinit.c'
+--- old/gcc/genopinit.c	2011-01-03 20:52:22 +0000
++++ new/gcc/genopinit.c	2011-05-05 15:43:06 +0000
+@@ -74,6 +74,8 @@
+   "set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
+   "set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
+   "set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
++  "set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))",
++  "set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))",
+   "set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
+   "set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
+     set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
+
+=== modified file 'gcc/gimple-low.c'
+--- old/gcc/gimple-low.c	2011-02-08 11:15:53 +0000
++++ new/gcc/gimple-low.c	2011-05-05 15:42:22 +0000
+@@ -218,6 +218,10 @@
+   tree fndecl, parms, p;
+   unsigned int i, nargs;
+ 
++  /* Calls to internal functions always match their signature.  */
++  if (gimple_call_internal_p (stmt))
++    return true;
++
+   nargs = gimple_call_num_args (stmt);
+ 
+   /* Get argument types for verification.  */
+
+=== modified file 'gcc/gimple-pretty-print.c'
+--- old/gcc/gimple-pretty-print.c	2011-02-15 18:36:16 +0000
++++ new/gcc/gimple-pretty-print.c	2011-05-05 15:42:22 +0000
+@@ -596,8 +596,12 @@
+ 
+   if (flags & TDF_RAW)
+     {
+-      dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T",
+-                     gs, gimple_call_fn (gs), lhs);
++      if (gimple_call_internal_p (gs))
++	dump_gimple_fmt (buffer, spc, flags, "%G <%s, %T", gs,
++			 internal_fn_name (gimple_call_internal_fn (gs)), lhs);
++      else
++	dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T",
++			 gs, gimple_call_fn (gs), lhs);
+       if (gimple_call_num_args (gs) > 0)
+         {
+           pp_string (buffer, ", ");
+@@ -617,7 +621,10 @@
+ 
+ 	  pp_space (buffer);
+         }
+-      print_call_name (buffer, gimple_call_fn (gs), flags);
++      if (gimple_call_internal_p (gs))
++	pp_string (buffer, internal_fn_name (gimple_call_internal_fn (gs)));
++      else
++	print_call_name (buffer, gimple_call_fn (gs), flags);
+       pp_string (buffer, " (");
+       dump_gimple_call_args (buffer, gs, flags);
+       pp_character (buffer, ')');
+
+=== modified file 'gcc/gimple.c'
+--- old/gcc/gimple.c	2011-05-18 13:33:53 +0000
++++ new/gcc/gimple.c	2011-06-02 12:12:00 +0000
+@@ -276,6 +276,59 @@
+ }
+ 
+ 
++/* Helper for gimple_build_call_internal and gimple_build_call_internal_vec.
++   Build the basic components of a GIMPLE_CALL statement to internal
++   function FN with NARGS arguments.  */
++
++static inline gimple
++gimple_build_call_internal_1 (enum internal_fn fn, unsigned nargs)
++{
++  gimple s = gimple_build_with_ops (GIMPLE_CALL, ERROR_MARK, nargs + 3);
++  s->gsbase.subcode |= GF_CALL_INTERNAL;
++  gimple_call_set_internal_fn (s, fn);
++  gimple_call_reset_alias_info (s);
++  return s;
++}
++
++
++/* Build a GIMPLE_CALL statement to internal function FN.  NARGS is
++   the number of arguments.  The ... are the arguments.  */
++
++gimple
++gimple_build_call_internal (enum internal_fn fn, unsigned nargs, ...)
++{
++  va_list ap;
++  gimple call;
++  unsigned i;
++
++  call = gimple_build_call_internal_1 (fn, nargs);
++  va_start (ap, nargs);
++  for (i = 0; i < nargs; i++)
++    gimple_call_set_arg (call, i, va_arg (ap, tree));
++  va_end (ap);
++
++  return call;
++}
++
++
++/* Build a GIMPLE_CALL statement to internal function FN with the arguments
++   specified in vector ARGS.  */
++
++gimple
++gimple_build_call_internal_vec (enum internal_fn fn, VEC(tree, heap) *args)
++{
++  unsigned i, nargs;
++  gimple call;
++
++  nargs = VEC_length (tree, args);
++  call = gimple_build_call_internal_1 (fn, nargs);
++  for (i = 0; i < nargs; i++)
++    gimple_call_set_arg (call, i, VEC_index (tree, args, i));
++
++  return call;
++}
++
++
+ /* Build a GIMPLE_CALL statement from CALL_EXPR T.  Note that T is
+    assumed to be in GIMPLE form already.  Minimal checking is done of
+    this fact.  */
+@@ -1774,6 +1827,20 @@
+   return (gimple_body (fndecl) || (fn && fn->cfg));
+ }
+ 
++/* Return true if calls C1 and C2 are known to go to the same function.  */
++
++bool
++gimple_call_same_target_p (const_gimple c1, const_gimple c2)
++{
++  if (gimple_call_internal_p (c1))
++    return (gimple_call_internal_p (c2)
++	    && gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
++  else
++    return (gimple_call_fn (c1) == gimple_call_fn (c2)
++	    || (gimple_call_fndecl (c1)
++		&& gimple_call_fndecl (c1) == gimple_call_fndecl (c2)));
++}
++
+ /* Detect flags from a GIMPLE_CALL.  This is just like
+    call_expr_flags, but for gimple tuples.  */
+ 
+@@ -1786,6 +1853,8 @@
+ 
+   if (decl)
+     flags = flags_from_decl_or_type (decl);
++  else if (gimple_call_internal_p (stmt))
++    flags = internal_fn_flags (gimple_call_internal_fn (stmt));
+   else
+     {
+       t = TREE_TYPE (gimple_call_fn (stmt));
+@@ -1801,18 +1870,35 @@
+   return flags;
+ }
+ 
++/* Return the "fn spec" string for call STMT.  */
++
++static tree
++gimple_call_fnspec (const_gimple stmt)
++{
++  tree fn, type, attr;
++
++  fn = gimple_call_fn (stmt);
++  if (!fn)
++    return NULL_TREE;
++
++  type = TREE_TYPE (TREE_TYPE (fn));
++  if (!type)
++    return NULL_TREE;
++
++  attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
++  if (!attr)
++    return NULL_TREE;
++
++  return TREE_VALUE (TREE_VALUE (attr));
++}
++
+ /* Detects argument flags for argument number ARG on call STMT.  */
+ 
+ int
+ gimple_call_arg_flags (const_gimple stmt, unsigned arg)
+ {
+-  tree type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt)));
+-  tree attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
+-  if (!attr)
+-    return 0;
+-
+-  attr = TREE_VALUE (TREE_VALUE (attr));
+-  if (1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
++  tree attr = gimple_call_fnspec (stmt);
++  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
+     return 0;
+ 
+   switch (TREE_STRING_POINTER (attr)[1 + arg])
+@@ -1850,13 +1936,8 @@
+   if (gimple_call_flags (stmt) & ECF_MALLOC)
+     return ERF_NOALIAS;
+ 
+-  type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt)));
+-  attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
+-  if (!attr)
+-    return 0;
+-
+-  attr = TREE_VALUE (TREE_VALUE (attr));
+-  if (TREE_STRING_LENGTH (attr) < 1)
++  attr = gimple_call_fnspec (stmt);
++  if (!attr || TREE_STRING_LENGTH (attr) < 1)
+     return 0;
+ 
+   switch (TREE_STRING_POINTER (attr)[0])
+@@ -2293,6 +2374,7 @@
+   if (is_gimple_call (s))
+     {
+       unsigned nargs = gimple_call_num_args (s);
++      tree fn;
+ 
+       if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE)))
+         return true;
+@@ -2307,7 +2389,8 @@
+ 	  return true;
+ 	}
+ 
+-      if (TREE_SIDE_EFFECTS (gimple_call_fn (s)))
++      fn = gimple_call_fn (s);
++      if (fn && TREE_SIDE_EFFECTS (fn))
+         return true;
+ 
+       for (i = 0; i < nargs; i++)
+@@ -2349,14 +2432,15 @@
+   if (is_gimple_call (s))
+     {
+       unsigned nargs = gimple_call_num_args (s);
++      tree fn;
+ 
+       if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE)))
+         return true;
+ 
+       /* We cannot use gimple_has_volatile_ops here,
+          because we must ignore a volatile LHS.  */
+-      if (TREE_SIDE_EFFECTS (gimple_call_fn (s))
+-          || TREE_THIS_VOLATILE (gimple_call_fn (s)))
++      fn = gimple_call_fn (s);
++      if (fn && (TREE_SIDE_EFFECTS (fn) || TREE_THIS_VOLATILE (fn)))
+ 	{
+ 	  gcc_assert (gimple_has_volatile_ops (s));
+ 	  return true;
+@@ -3113,7 +3197,6 @@
+ gimple_call_copy_skip_args (gimple stmt, bitmap args_to_skip)
+ {
+   int i;
+-  tree fn = gimple_call_fn (stmt);
+   int nargs = gimple_call_num_args (stmt);
+   VEC(tree, heap) *vargs = VEC_alloc (tree, heap, nargs);
+   gimple new_stmt;
+@@ -3122,7 +3205,11 @@
+     if (!bitmap_bit_p (args_to_skip, i))
+       VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
+ 
+-  new_stmt = gimple_build_call_vec (fn, vargs);
++  if (gimple_call_internal_p (stmt))
++    new_stmt = gimple_build_call_internal_vec (gimple_call_internal_fn (stmt),
++					       vargs);
++  else
++    new_stmt = gimple_build_call_vec (gimple_call_fn (stmt), vargs);
+   VEC_free (tree, heap, vargs);
+   if (gimple_call_lhs (stmt))
+     gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+
+=== modified file 'gcc/gimple.h'
+--- old/gcc/gimple.h	2011-04-18 21:58:03 +0000
++++ new/gcc/gimple.h	2011-06-02 12:12:00 +0000
+@@ -30,6 +30,7 @@
+ #include "basic-block.h"
+ #include "tree-ssa-operands.h"
+ #include "tree-ssa-alias.h"
++#include "internal-fn.h"
+ 
+ struct gimple_seq_node_d;
+ typedef struct gimple_seq_node_d *gimple_seq_node;
+@@ -82,6 +83,8 @@
+ 			   name, a _DECL, a _REF, etc.  */
+ };
+ 
++#define GF_CALL_INTERNAL_FN_SHIFT 8
++
+ /* Specific flags for individual GIMPLE statements.  These flags are
+    always stored in gimple_statement_base.subcode and they may only be
+    defined for statement codes that do not use sub-codes.
+@@ -102,6 +105,8 @@
+     GF_CALL_TAILCALL		= 1 << 3,
+     GF_CALL_VA_ARG_PACK		= 1 << 4,
+     GF_CALL_NOTHROW		= 1 << 5,
++    GF_CALL_INTERNAL		= 1 << 6,
++    GF_CALL_INTERNAL_FN		= 0xff << GF_CALL_INTERNAL_FN_SHIFT,
+     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
+ 
+     /* True on an GIMPLE_OMP_RETURN statement if the return does not require
+@@ -817,6 +822,8 @@
+ 
+ gimple gimple_build_call_vec (tree, VEC(tree, heap) *);
+ gimple gimple_build_call (tree, unsigned, ...);
++gimple gimple_build_call_internal (enum internal_fn, unsigned, ...);
++gimple gimple_build_call_internal_vec (enum internal_fn, VEC(tree, heap) *);
+ gimple gimple_build_call_from_tree (tree);
+ gimple gimplify_assign (tree, tree, gimple_seq *);
+ gimple gimple_build_cond (enum tree_code, tree, tree, tree, tree);
+@@ -861,6 +868,7 @@
+ void gimple_seq_free (gimple_seq);
+ void gimple_seq_add_seq (gimple_seq *, gimple_seq);
+ gimple_seq gimple_seq_copy (gimple_seq);
++bool gimple_call_same_target_p (const_gimple, const_gimple);
+ int gimple_call_flags (const_gimple);
+ int gimple_call_return_flags (const_gimple);
+ int gimple_call_arg_flags (const_gimple, unsigned);
+@@ -2012,6 +2020,27 @@
+ }
+ 
+ 
++/* Return true if call GS calls an internal-only function, as enumerated
++   by internal_fn.  */
++
++static inline bool
++gimple_call_internal_p (const_gimple gs)
++{
++  GIMPLE_CHECK (gs, GIMPLE_CALL);
++  return (gs->gsbase.subcode & GF_CALL_INTERNAL) != 0;
++}
++
++
++/* Return the target of internal call GS.  */
++
++static inline enum internal_fn
++gimple_call_internal_fn (const_gimple gs)
++{
++  gcc_assert (gimple_call_internal_p (gs));
++  return (enum internal_fn) (gs->gsbase.subcode >> GF_CALL_INTERNAL_FN_SHIFT);
++}
++
++
+ /* Return a pointer to the tree node representing the function called by call
+    statement GS.  */
+ 
+@@ -2029,6 +2058,7 @@
+ gimple_call_set_fn (gimple gs, tree fn)
+ {
+   GIMPLE_CHECK (gs, GIMPLE_CALL);
++  gcc_assert (!gimple_call_internal_p (gs));
+   gimple_set_op (gs, 1, fn);
+ }
+ 
+@@ -2039,10 +2069,23 @@
+ gimple_call_set_fndecl (gimple gs, tree decl)
+ {
+   GIMPLE_CHECK (gs, GIMPLE_CALL);
++  gcc_assert (!gimple_call_internal_p (gs));
+   gimple_set_op (gs, 1, build_fold_addr_expr_loc (gimple_location (gs), decl));
+ }
+ 
+ 
++/* Set internal function FN to be the function called by call statement GS.  */
++
++static inline void
++gimple_call_set_internal_fn (gimple gs, enum internal_fn fn)
++{
++  GIMPLE_CHECK (gs, GIMPLE_CALL);
++  gcc_assert (gimple_call_internal_p (gs));
++  gs->gsbase.subcode &= ~GF_CALL_INTERNAL_FN;
++  gs->gsbase.subcode |= (int) fn << GF_CALL_INTERNAL_FN_SHIFT;
++}
++
++
+ /* If a given GIMPLE_CALL's callee is a FUNCTION_DECL, return it.
+    Otherwise return NULL.  This function is analogous to
+    get_callee_fndecl in tree land.  */
+@@ -2051,7 +2094,7 @@
+ gimple_call_fndecl (const_gimple gs)
+ {
+   tree addr = gimple_call_fn (gs);
+-  if (TREE_CODE (addr) == ADDR_EXPR)
++  if (addr && TREE_CODE (addr) == ADDR_EXPR)
+     {
+       tree fndecl = TREE_OPERAND (addr, 0);
+       if (TREE_CODE (fndecl) == MEM_REF)
+@@ -2073,8 +2116,13 @@
+ static inline tree
+ gimple_call_return_type (const_gimple gs)
+ {
+-  tree fn = gimple_call_fn (gs);
+-  tree type = TREE_TYPE (fn);
++  tree fn, type;
++
++  fn = gimple_call_fn (gs);
++  if (fn == NULL_TREE)
++    return TREE_TYPE (gimple_call_lhs (gs));
++
++  type = TREE_TYPE (fn);
+ 
+   /* See through the pointer.  */
+   type = TREE_TYPE (type);
+
+=== added file 'gcc/internal-fn.c'
+--- old/gcc/internal-fn.c	1970-01-01 00:00:00 +0000
++++ new/gcc/internal-fn.c	2011-05-05 15:43:06 +0000
+@@ -0,0 +1,147 @@
++/* Internal functions.
++   Copyright (C) 2011 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "gimple.h"
++#include "tree.h"
++#include "expr.h"
++#include "optabs.h"
++#include "recog.h"
++
++/* The names of each internal function, indexed by function number.  */
++const char *const internal_fn_name_array[] = {
++#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE,
++#include "internal-fn.def"
++#undef DEF_INTERNAL_FN
++  "<invalid-fn>"
++};
++
++/* The ECF_* flags of each internal function, indexed by function number.  */
++const int internal_fn_flags_array[] = {
++#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS,
++#include "internal-fn.def"
++#undef DEF_INTERNAL_FN
++  0
++};
++
++/* ARRAY_TYPE is an array of vector modes.  Return the associated insn
++   for load-lanes-style optab OPTAB.  The insn must exist.  */
++
++static enum insn_code
++get_multi_vector_move (tree array_type, convert_optab optab)
++{
++  enum insn_code icode;
++  enum machine_mode imode;
++  enum machine_mode vmode;
++
++  gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
++  imode = TYPE_MODE (array_type);
++  vmode = TYPE_MODE (TREE_TYPE (array_type));
++
++  icode = convert_optab_handler (optab, imode, vmode);
++  gcc_assert (icode != CODE_FOR_nothing);
++  return icode;
++}
++
++/* Expand LOAD_LANES call STMT.  */
++
++static void
++expand_LOAD_LANES (gimple stmt)
++{
++  tree type, lhs, rhs;
++  rtx target, mem;
++  enum insn_code icode;
++  const struct insn_operand_data *operand;
++
++  lhs = gimple_call_lhs (stmt);
++  rhs = gimple_call_arg (stmt, 0);
++  type = TREE_TYPE (lhs);
++
++  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
++  mem = expand_normal (rhs);
++
++  gcc_assert (REG_P (target));
++  gcc_assert (MEM_P (mem));
++  PUT_MODE (mem, TYPE_MODE (type));
++
++  icode = get_multi_vector_move (type, vec_load_lanes_optab);
++
++  operand = &insn_data[(int) icode].operand[1];
++  if (operand->predicate && !operand->predicate (mem, operand->mode))
++    mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
++
++  emit_insn (GEN_FCN (icode) (target, mem));
++}
++
++/* Expand STORE_LANES call STMT.  */
++
++static void
++expand_STORE_LANES (gimple stmt)
++{
++  tree type, lhs, rhs;
++  rtx target, reg;
++  enum insn_code icode;
++  const struct insn_operand_data *operand;
++
++  lhs = gimple_call_lhs (stmt);
++  rhs = gimple_call_arg (stmt, 0);
++  type = TREE_TYPE (rhs);
++
++  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
++  reg = expand_normal (rhs);
++
++  gcc_assert (MEM_P (target));
++  PUT_MODE (target, TYPE_MODE (type));
++
++  icode = get_multi_vector_move (type, vec_store_lanes_optab);
++
++  operand = &insn_data[(int) icode].operand[0];
++  if (operand->predicate && !operand->predicate (target, operand->mode))
++    target = replace_equiv_address (target,
++				    force_reg (Pmode, XEXP (target, 0)));
++
++  operand = &insn_data[(int) icode].operand[1];
++  if (operand->predicate && !operand->predicate (reg, operand->mode))
++    reg = force_reg (TYPE_MODE (type), reg);
++
++  emit_insn (GEN_FCN (icode) (target, reg));
++}
++
++/* Routines to expand each internal function, indexed by function number.
++   Each routine has the prototype:
++
++       expand_<NAME> (gimple stmt)
++
++   where STMT is the statement that performs the call. */
++static void (*const internal_fn_expanders[]) (gimple) = {
++#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE,
++#include "internal-fn.def"
++#undef DEF_INTERNAL_FN
++  0
++};
++
++/* Expand STMT, which is a call to internal function FN.  */
++
++void
++expand_internal_call (gimple stmt)
++{
++  internal_fn_expanders[(int) gimple_call_internal_fn (stmt)] (stmt);
++}
+
+=== added file 'gcc/internal-fn.def'
+--- old/gcc/internal-fn.def	1970-01-01 00:00:00 +0000
++++ new/gcc/internal-fn.def	2011-05-05 15:43:06 +0000
+@@ -0,0 +1,42 @@
++/* Internal functions.
++   Copyright (C) 2011 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* This file specifies a list of internal "functions".  These functions
++   differ from built-in functions in that they have no linkage and cannot
++   be called directly by the user.  They represent operations that are only
++   synthesised by GCC itself.
++
++   Internal functions are used instead of tree codes if the operation
++   and its operands are more naturally represented as a GIMPLE_CALL
++   than a GIMPLE_ASSIGN.
++
++   Each entry in this file has the form:
++
++     DEF_INTERNAL_FN (NAME, FLAGS)
++
++   where NAME is the name of the function and FLAGS is a set of
++   ECF_* flags.  Each entry must have a corresponding expander
++   of the form:
++
++     void expand_NAME (gimple stmt)
++
++   where STMT is the statement that performs the call.  */
++
++DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF)
++DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
+
+=== added file 'gcc/internal-fn.h'
+--- old/gcc/internal-fn.h	1970-01-01 00:00:00 +0000
++++ new/gcc/internal-fn.h	2011-05-05 15:42:22 +0000
+@@ -0,0 +1,52 @@
++/* Internal functions.
++   Copyright (C) 2011 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef GCC_INTERNAL_FN_H
++#define GCC_INTERNAL_FN_H
++
++enum internal_fn {
++#define DEF_INTERNAL_FN(CODE, FLAGS) IFN_##CODE,
++#include "internal-fn.def"
++#undef DEF_INTERNAL_FN
++  IFN_LAST
++};
++
++extern const char *const internal_fn_name_array[];
++extern const int internal_fn_flags_array[];
++
++/* Return the name of internal function FN.  The name is only meaningful
++   for dumps; it has no linkage.  */
++
++static inline const char *
++internal_fn_name (enum internal_fn fn)
++{
++  return internal_fn_name_array[(int) fn];
++}
++
++/* Return the ECF_* flags for function FN.  */
++
++static inline int
++internal_fn_flags (enum internal_fn fn)
++{
++  return internal_fn_flags_array[(int) fn];
++}
++
++extern void expand_internal_call (gimple);
++
++#endif
+
+=== modified file 'gcc/ipa-prop.c'
+--- old/gcc/ipa-prop.c	2011-04-18 21:58:03 +0000
++++ new/gcc/ipa-prop.c	2011-06-02 12:12:00 +0000
+@@ -1418,6 +1418,8 @@
+ {
+   tree target = gimple_call_fn (call);
+ 
++  if (!target)
++    return;
+   if (TREE_CODE (target) == SSA_NAME)
+     ipa_analyze_indirect_call_uses (node, info, parms_info, call, target);
+   else if (TREE_CODE (target) == OBJ_TYPE_REF)
+
+=== modified file 'gcc/optabs.h'
+--- old/gcc/optabs.h	2011-01-03 20:52:22 +0000
++++ new/gcc/optabs.h	2011-05-05 15:43:06 +0000
+@@ -578,6 +578,9 @@
+   COI_satfract,
+   COI_satfractuns,
+ 
++  COI_vec_load_lanes,
++  COI_vec_store_lanes,
++
+   COI_MAX
+ };
+ 
+@@ -598,6 +601,8 @@
+ #define fractuns_optab (&convert_optab_table[COI_fractuns])
+ #define satfract_optab (&convert_optab_table[COI_satfract])
+ #define satfractuns_optab (&convert_optab_table[COI_satfractuns])
++#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes])
++#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes])
+ 
+ /* Contains the optab used for each rtx code.  */
+ extern optab code_to_optab[NUM_RTX_CODE + 1];
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c'
+--- old/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c	2009-04-20 10:26:18 +0000
++++ new/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c	2011-05-05 15:46:10 +0000
+@@ -26,7 +26,7 @@
+     }
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided_wide } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided_wide } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c'
+--- old/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c	2011-05-05 15:46:10 +0000
+@@ -113,7 +113,7 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target { vect_strided && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target { vect_strided8 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided8 && vect_int_mult } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c'
+--- old/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c	2008-08-26 08:14:37 +0000
++++ new/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c	2011-05-05 15:44:00 +0000
+@@ -20,7 +20,7 @@
+   return avg;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide  } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide  } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd  } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd  } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c'
+--- old/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c	2010-08-26 11:13:58 +0000
++++ new/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c	2011-05-05 15:46:10 +0000
+@@ -13,5 +13,5 @@
+    }
+ }
+ 
+-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided } } } */
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c	2011-05-05 15:46:10 +0000
+@@ -56,5 +56,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave  && vect_extract_even_odd_wide } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c'
+--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c	2011-04-24 07:45:49 +0000
++++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c	2011-05-05 15:46:10 +0000
+@@ -65,5 +65,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || {! vect_strided } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || { ! vect_strided2 } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c	2011-05-05 15:46:10 +0000
+@@ -54,5 +54,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c	2011-05-05 15:46:10 +0000
+@@ -53,5 +53,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c	2007-10-21 09:01:16 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c	2011-05-05 15:46:10 +0000
+@@ -47,5 +47,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_interleave } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c	2011-05-05 15:46:10 +0000
+@@ -50,5 +50,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/pr30843.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr30843.c	2007-02-22 12:30:12 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr30843.c	2011-05-05 15:46:10 +0000
+@@ -20,6 +20,6 @@
+     }
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided4 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/pr33866.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr33866.c	2007-10-30 08:26:14 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr33866.c	2011-05-05 15:46:10 +0000
+@@ -27,6 +27,6 @@
+ }
+ 
+ /* Needs interleaving support.  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/pr37539.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr37539.c	2009-11-26 02:03:50 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr37539.c	2011-05-05 15:46:10 +0000
+@@ -40,7 +40,7 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_strided_wide } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_strided4 && vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+ 
+
+=== removed file 'gcc/testsuite/gcc.dg/vect/slp-11.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-11.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-11.c	1970-01-01 00:00:00 +0000
+@@ -1,113 +0,0 @@
+-/* { dg-require-effective-target vect_int } */
+-
+-#include <stdarg.h>
+-#include "tree-vect.h"
+-
+-#define N 8 
+-
+-int
+-main1 ()
+-{
+-  int i;
+-  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+-  float out2[N*8];
+-
+-  /* Different operations - not SLPable.  */
+-  for (i = 0; i < N; i++)
+-    {
+-      a0 = in[i*8] + 5;
+-      a1 = in[i*8 + 1] * 6;
+-      a2 = in[i*8 + 2] + 7;
+-      a3 = in[i*8 + 3] + 8;
+-      a4 = in[i*8 + 4] + 9;
+-      a5 = in[i*8 + 5] + 10;
+-      a6 = in[i*8 + 6] + 11;
+-      a7 = in[i*8 + 7] + 12;
+-
+-      b0 = a0 * 3;
+-      b1 = a1 * 2;
+-      b2 = a2 * 12;
+-      b3 = a3 * 5;
+-      b4 = a4 * 8;
+-      b5 = a5 * 4;
+-      b6 = a6 * 3;
+-      b7 = a7 * 2;
+-
+-      out[i*8] = b0 - 2;
+-      out[i*8 + 1] = b1 - 3; 
+-      out[i*8 + 2] = b2 - 2;
+-      out[i*8 + 3] = b3 - 1;
+-      out[i*8 + 4] = b4 - 8;
+-      out[i*8 + 5] = b5 - 7;
+-      out[i*8 + 6] = b6 - 3;
+-      out[i*8 + 7] = b7 - 7;
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N; i++)
+-    {
+-      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
+-         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
+-         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
+-         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
+-         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
+-         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
+-         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
+-         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
+-	abort ();
+-    }
+-
+-  /* Requires permutation - not SLPable.  */
+-  for (i = 0; i < N*2; i++)
+-    {
+-      out[i*4] = (in[i*4] + 2) * 3;
+-      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
+-      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
+-      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N*2; i++)
+-    {
+-      if (out[i*4] !=  (in[i*4] + 2) * 3
+-         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
+-         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
+-         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
+-        abort ();
+-    }
+-
+-  /* Different operations - not SLPable.  */
+-  for (i = 0; i < N*4; i++)
+-    {
+-      out2[i*2] = ((float) in[i*2] * 2 + 6) ;
+-      out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N*4; i++)
+-    {
+-      if (out2[i*2] !=  ((float) in[i*2] * 2 + 6)
+-         || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
+-        abort ();
+-    }
+-
+-
+-  return 0;
+-}
+-
+-int main (void)
+-{
+-  check_vect ();
+-
+-  main1 ();
+-
+-  return 0;
+-}
+-
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  { target { { vect_uintfloat_cvt && vect_strided_wide } &&  vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { target { { { ! vect_uintfloat_cvt } && vect_strided_wide } &&  vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! { vect_int_mult && vect_strided_wide } } } } }  */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
+-/* { dg-final { cleanup-tree-dump "vect" } } */
+-  
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-11a.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-11a.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-11a.c	2011-05-05 15:46:10 +0000
+@@ -0,0 +1,75 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 8
++
++int
++main1 ()
++{
++  int i;
++  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++
++  /* Different operations - not SLPable.  */
++  for (i = 0; i < N; i++)
++    {
++      a0 = in[i*8] + 5;
++      a1 = in[i*8 + 1] * 6;
++      a2 = in[i*8 + 2] + 7;
++      a3 = in[i*8 + 3] + 8;
++      a4 = in[i*8 + 4] + 9;
++      a5 = in[i*8 + 5] + 10;
++      a6 = in[i*8 + 6] + 11;
++      a7 = in[i*8 + 7] + 12;
++
++      b0 = a0 * 3;
++      b1 = a1 * 2;
++      b2 = a2 * 12;
++      b3 = a3 * 5;
++      b4 = a4 * 8;
++      b5 = a5 * 4;
++      b6 = a6 * 3;
++      b7 = a7 * 2;
++
++      out[i*8] = b0 - 2;
++      out[i*8 + 1] = b1 - 3;
++      out[i*8 + 2] = b2 - 2;
++      out[i*8 + 3] = b3 - 1;
++      out[i*8 + 4] = b4 - 8;
++      out[i*8 + 5] = b5 - 7;
++      out[i*8 + 6] = b6 - 3;
++      out[i*8 + 7] = b7 - 7;
++    }
++
++  /* check results:  */
++  for (i = 0; i < N; i++)
++    {
++      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
++         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
++         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
++         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
++         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
++         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
++         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
++         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
++	abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-11b.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-11b.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-11b.c	2011-05-05 15:46:10 +0000
+@@ -0,0 +1,49 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 8
++
++int
++main1 ()
++{
++  int i;
++  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++
++  /* Requires permutation - not SLPable.  */
++  for (i = 0; i < N*2; i++)
++    {
++      out[i*4] = (in[i*4] + 2) * 3;
++      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
++      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
++      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
++    }
++
++  /* check results:  */
++  for (i = 0; i < N*2; i++)
++    {
++      if (out[i*4] !=  (in[i*4] + 2) * 3
++         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
++         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
++         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided4 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-11c.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-11c.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-11c.c	2011-05-05 15:46:10 +0000
+@@ -0,0 +1,46 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 8
++
++int
++main1 ()
++{
++  int i;
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++  float out[N*8];
++
++  /* Different operations - not SLPable.  */
++  for (i = 0; i < N*4; i++)
++    {
++      out[i*2] = ((float) in[i*2] * 2 + 6) ;
++      out[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
++    }
++
++  /* check results:  */
++  for (i = 0; i < N*4; i++)
++    {
++      if (out[i*2] !=  ((float) in[i*2] * 2 + 6)
++         || out[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
++        abort ();
++    }
++
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-12a.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-12a.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-12a.c	2011-05-05 15:46:10 +0000
+@@ -11,7 +11,7 @@
+   int i;
+   unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+   unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+-  unsigned int ia[N], ib[N*2];
++  unsigned int ia[N];
+ 
+   for (i = 0; i < N; i++)
+     {
+@@ -61,27 +61,6 @@
+ 	abort ();
+     }
+ 
+-  for (i = 0; i < N*2; i++)
+-    {
+-      out[i*4] = (in[i*4] + 2) * 3;
+-      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
+-      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
+-      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
+-
+-      ib[i] = 7;
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N*2; i++)
+-    {
+-      if (out[i*4] !=  (in[i*4] + 2) * 3
+-         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
+-         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
+-         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7 
+-         || ib[i] != 7)
+-        abort ();
+-    }
+-
+   return 0;
+ }
+ 
+@@ -94,11 +73,8 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target { vect_strided_wide && vect_int_mult} } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided_wide && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target  { ! vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+-  
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-12b.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-12b.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-12b.c	2011-05-05 15:46:10 +0000
+@@ -43,9 +43,9 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { vect_strided_wide && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  {target { vect_strided_wide && vect_int_mult } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-12c.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-12c.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-12c.c	2011-05-05 15:44:41 +0000
+@@ -0,0 +1,53 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 8
++
++int
++main1 ()
++{
++  int i;
++  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++  unsigned int ia[N*2];
++
++  for (i = 0; i < N*2; i++)
++    {
++      out[i*4] = (in[i*4] + 2) * 3;
++      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
++      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
++      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
++
++      ia[i] = 7;
++    }
++
++  /* check results:  */
++  for (i = 0; i < N*2; i++)
++    {
++      if (out[i*4] !=  (in[i*4] + 2) * 3
++         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
++         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
++         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7
++         || ia[i] != 7)
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! vect_int_mult } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-18.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-18.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-18.c	2011-05-05 15:46:10 +0000
+@@ -91,7 +91,7 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== removed file 'gcc/testsuite/gcc.dg/vect/slp-19.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-19.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-19.c	1970-01-01 00:00:00 +0000
+@@ -1,154 +0,0 @@
+-/* { dg-require-effective-target vect_int } */
+-
+-#include <stdarg.h>
+-#include "tree-vect.h"
+-
+-#define N 16 
+-
+-int
+-main1 ()
+-{
+-  unsigned int i;
+-  unsigned int out[N*8];
+-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+-  unsigned int ia[N*2], a0, a1, a2, a3;
+-
+-  for (i = 0; i < N; i++)
+-    {
+-      out[i*8] = in[i*8];
+-      out[i*8 + 1] = in[i*8 + 1];
+-      out[i*8 + 2] = in[i*8 + 2];
+-      out[i*8 + 3] = in[i*8 + 3];
+-      out[i*8 + 4] = in[i*8 + 4];
+-      out[i*8 + 5] = in[i*8 + 5];
+-      out[i*8 + 6] = in[i*8 + 6];
+-      out[i*8 + 7] = in[i*8 + 7];
+-    
+-      ia[i] = in[i*8 + 2];
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N; i++)
+-    {
+-      if (out[i*8] !=  in[i*8]
+-         || out[i*8 + 1] != in[i*8 + 1]
+-         || out[i*8 + 2] != in[i*8 + 2]
+-         || out[i*8 + 3] != in[i*8 + 3]
+-         || out[i*8 + 4] != in[i*8 + 4]
+-         || out[i*8 + 5] != in[i*8 + 5]
+-         || out[i*8 + 6] != in[i*8 + 6]
+-         || out[i*8 + 7] != in[i*8 + 7]
+-         || ia[i] != in[i*8 + 2])
+-	abort ();
+-    }
+-
+-  for (i = 0; i < N*2; i++)
+-    {
+-      a0 = in[i*4] + 1;
+-      a1 = in[i*4 + 1] + 2;
+-      a2 = in[i*4 + 2] + 3;
+-      a3 = in[i*4 + 3] + 4;
+-
+-      out[i*4] = a0;
+-      out[i*4 + 1] = a1;
+-      out[i*4 + 2] = a2;
+-      out[i*4 + 3] = a3;
+-
+-      ia[i] = a2;
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N*2; i++)
+-    {
+-      if (out[i*4] !=  in[i*4] + 1
+-         || out[i*4 + 1] != in[i*4 + 1] + 2
+-         || out[i*4 + 2] != in[i*4 + 2] + 3
+-         || out[i*4 + 3] != in[i*4 + 3] + 4
+-         || ia[i] != in[i*4 + 2] + 3)
+-        abort ();
+-    }
+-
+-  /* The last stmt requires interleaving of not power of 2 size - not 
+-     vectorizable.  */
+-  for (i = 0; i < N/2; i++)
+-    {
+-      out[i*12] = in[i*12];
+-      out[i*12 + 1] = in[i*12 + 1];
+-      out[i*12 + 2] = in[i*12 + 2];
+-      out[i*12 + 3] = in[i*12 + 3];
+-      out[i*12 + 4] = in[i*12 + 4];
+-      out[i*12 + 5] = in[i*12 + 5];
+-      out[i*12 + 6] = in[i*12 + 6];
+-      out[i*12 + 7] = in[i*12 + 7];
+-      out[i*12 + 8] = in[i*12 + 8];
+-      out[i*12 + 9] = in[i*12 + 9];
+-      out[i*12 + 10] = in[i*12 + 10];
+-      out[i*12 + 11] = in[i*12 + 11];
+-
+-      ia[i] = in[i*12 + 7];
+-    }
+-
+-  /* check results:  */
+-  for (i = 0; i < N/2; i++)
+-    {
+-      if (out[i*12] !=  in[i*12]
+-         || out[i*12 + 1] != in[i*12 + 1]
+-         || out[i*12 + 2] != in[i*12 + 2]
+-         || out[i*12 + 3] != in[i*12 + 3]
+-         || out[i*12 + 4] != in[i*12 + 4]
+-         || out[i*12 + 5] != in[i*12 + 5]
+-         || out[i*12 + 6] != in[i*12 + 6]
+-         || out[i*12 + 7] != in[i*12 + 7]
+-         || out[i*12 + 8] != in[i*12 + 8]
+-         || out[i*12 + 9] != in[i*12 + 9]
+-         || out[i*12 + 10] != in[i*12 + 10]
+-         || out[i*12 + 11] != in[i*12 + 11]
+-         || ia[i] != in[i*12 + 7])
+-        abort ();
+-    }
+-
+-  /* Hybrid SLP with unrolling by 2.  */
+-  for (i = 0; i < N; i++)
+-    {
+-      out[i*6] = in[i*6];
+-      out[i*6 + 1] = in[i*6 + 1];
+-      out[i*6 + 2] = in[i*6 + 2];
+-      out[i*6 + 3] = in[i*6 + 3];
+-      out[i*6 + 4] = in[i*6 + 4];
+-      out[i*6 + 5] = in[i*6 + 5];
+-    
+-      ia[i] = i;
+-    } 
+-    
+-  /* check results:  */
+-  for (i = 0; i < N/2; i++)
+-    {
+-      if (out[i*6] !=  in[i*6]
+-         || out[i*6 + 1] != in[i*6 + 1]
+-         || out[i*6 + 2] != in[i*6 + 2]
+-         || out[i*6 + 3] != in[i*6 + 3]
+-         || out[i*6 + 4] != in[i*6 + 4]
+-         || out[i*6 + 5] != in[i*6 + 5]
+-         || ia[i] != i)
+-        abort ();
+-    }
+-
+-
+-  return 0;
+-}
+-
+-int main (void)
+-{
+-  check_vect ();
+-
+-  main1 ();
+-
+-  return 0;
+-}
+-
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target  vect_strided_wide  } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target  { ! { vect_strided_wide } } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target  vect_strided_wide  } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { ! { vect_strided_wide } } } } } */
+-/* { dg-final { cleanup-tree-dump "vect" } } */
+-  
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-19a.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-19a.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-19a.c	2011-05-05 15:46:10 +0000
+@@ -0,0 +1,61 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 16
++
++int
++main1 ()
++{
++  unsigned int i;
++  unsigned int out[N*8];
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++  unsigned int ia[N*2];
++
++  for (i = 0; i < N; i++)
++    {
++      out[i*8] = in[i*8];
++      out[i*8 + 1] = in[i*8 + 1];
++      out[i*8 + 2] = in[i*8 + 2];
++      out[i*8 + 3] = in[i*8 + 3];
++      out[i*8 + 4] = in[i*8 + 4];
++      out[i*8 + 5] = in[i*8 + 5];
++      out[i*8 + 6] = in[i*8 + 6];
++      out[i*8 + 7] = in[i*8 + 7];
++
++      ia[i] = in[i*8 + 2];
++    }
++
++  /* check results:  */
++  for (i = 0; i < N; i++)
++    {
++      if (out[i*8] !=  in[i*8]
++         || out[i*8 + 1] != in[i*8 + 1]
++         || out[i*8 + 2] != in[i*8 + 2]
++         || out[i*8 + 3] != in[i*8 + 3]
++         || out[i*8 + 4] != in[i*8 + 4]
++         || out[i*8 + 5] != in[i*8 + 5]
++         || out[i*8 + 6] != in[i*8 + 6]
++         || out[i*8 + 7] != in[i*8 + 7]
++         || ia[i] != in[i*8 + 2])
++	abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-19b.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-19b.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-19b.c	2011-05-05 15:46:10 +0000
+@@ -0,0 +1,58 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 16
++
++int
++main1 ()
++{
++  unsigned int i;
++  unsigned int out[N*8];
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++  unsigned int ia[N*2], a0, a1, a2, a3;
++
++  for (i = 0; i < N*2; i++)
++    {
++      a0 = in[i*4] + 1;
++      a1 = in[i*4 + 1] + 2;
++      a2 = in[i*4 + 2] + 3;
++      a3 = in[i*4 + 3] + 4;
++
++      out[i*4] = a0;
++      out[i*4 + 1] = a1;
++      out[i*4 + 2] = a2;
++      out[i*4 + 3] = a3;
++
++      ia[i] = a2;
++    }
++
++  /* check results:  */
++  for (i = 0; i < N*2; i++)
++    {
++      if (out[i*4] !=  in[i*4] + 1
++         || out[i*4 + 1] != in[i*4 + 1] + 2
++         || out[i*4 + 2] != in[i*4 + 2] + 3
++         || out[i*4 + 3] != in[i*4 + 3] + 4
++         || ia[i] != in[i*4 + 2] + 3)
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-19c.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-19c.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-19c.c	2011-05-05 15:44:41 +0000
+@@ -0,0 +1,95 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 16
++
++int
++main1 ()
++{
++  unsigned int i;
++  unsigned int out[N*8];
++  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
++  unsigned int ia[N*2], a0, a1, a2, a3;
++
++  /* The last stmt requires interleaving of not power of 2 size - not
++     vectorizable.  */
++  for (i = 0; i < N/2; i++)
++    {
++      out[i*12] = in[i*12];
++      out[i*12 + 1] = in[i*12 + 1];
++      out[i*12 + 2] = in[i*12 + 2];
++      out[i*12 + 3] = in[i*12 + 3];
++      out[i*12 + 4] = in[i*12 + 4];
++      out[i*12 + 5] = in[i*12 + 5];
++      out[i*12 + 6] = in[i*12 + 6];
++      out[i*12 + 7] = in[i*12 + 7];
++      out[i*12 + 8] = in[i*12 + 8];
++      out[i*12 + 9] = in[i*12 + 9];
++      out[i*12 + 10] = in[i*12 + 10];
++      out[i*12 + 11] = in[i*12 + 11];
++
++      ia[i] = in[i*12 + 7];
++    }
++
++  /* check results:  */
++  for (i = 0; i < N/2; i++)
++    {
++      if (out[i*12] !=  in[i*12]
++         || out[i*12 + 1] != in[i*12 + 1]
++         || out[i*12 + 2] != in[i*12 + 2]
++         || out[i*12 + 3] != in[i*12 + 3]
++         || out[i*12 + 4] != in[i*12 + 4]
++         || out[i*12 + 5] != in[i*12 + 5]
++         || out[i*12 + 6] != in[i*12 + 6]
++         || out[i*12 + 7] != in[i*12 + 7]
++         || out[i*12 + 8] != in[i*12 + 8]
++         || out[i*12 + 9] != in[i*12 + 9]
++         || out[i*12 + 10] != in[i*12 + 10]
++         || out[i*12 + 11] != in[i*12 + 11]
++         || ia[i] != in[i*12 + 7])
++        abort ();
++    }
++
++  /* Hybrid SLP with unrolling by 2.  */
++  for (i = 0; i < N; i++)
++    {
++      out[i*6] = in[i*6];
++      out[i*6 + 1] = in[i*6 + 1];
++      out[i*6 + 2] = in[i*6 + 2];
++      out[i*6 + 3] = in[i*6 + 3];
++      out[i*6 + 4] = in[i*6 + 4];
++      out[i*6 + 5] = in[i*6 + 5];
++
++      ia[i] = i;
++    }
++
++  /* check results:  */
++  for (i = 0; i < N/2; i++)
++    {
++      if (out[i*6] !=  in[i*6]
++         || out[i*6 + 1] != in[i*6 + 1]
++         || out[i*6 + 2] != in[i*6 + 2]
++         || out[i*6 + 3] != in[i*6 + 3]
++         || out[i*6 + 4] != in[i*6 + 4]
++         || out[i*6 + 5] != in[i*6 + 5]
++         || ia[i] != i)
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{
++  check_vect ();
++
++  main1 ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-21.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-21.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-21.c	2011-05-05 15:46:10 +0000
+@@ -199,9 +199,9 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided || vect_extract_even_odd } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided || vect_extract_even_odd } } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided }  } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided4 || vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided4 || vect_extract_even_odd } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 }  } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-23.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-23.c	2011-01-10 12:51:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-23.c	2011-05-05 15:46:10 +0000
+@@ -106,8 +106,8 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided_wide } && {! { vect_no_align} } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide || vect_no_align} } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-reduc-6.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c	2011-05-05 15:46:10 +0000
+@@ -42,7 +42,7 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! vect_unpack } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! { vect_unpack || vect_strided2 } } } } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-1.c	2010-08-19 10:23:50 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-1.c	2011-05-05 15:46:10 +0000
+@@ -85,6 +85,6 @@
+   fbar (a);
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-10.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-10.c	2010-05-27 12:23:45 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-10.c	2011-05-05 15:46:10 +0000
+@@ -22,5 +22,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-107.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-107.c	2008-08-19 08:06:54 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-107.c	2011-05-05 15:46:10 +0000
+@@ -40,6 +40,6 @@
+   return main1 ();
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-98.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-98.c	2008-08-02 11:05:47 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-98.c	2011-05-05 15:46:10 +0000
+@@ -38,6 +38,6 @@
+ }
+ 
+ /* Needs interleaving support.  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  { vect_interleave && vect_extract_even_odd_wide } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  vect_strided4 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c	2011-03-27 09:38:18 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c	2011-05-05 15:46:10 +0000
+@@ -82,5 +82,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || {! vect_strided } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || { ! vect_strided2 } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c	2011-05-05 15:46:10 +0000
+@@ -71,6 +71,6 @@
+   return 0;
+ }   
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c	2011-05-05 15:46:10 +0000
+@@ -55,6 +55,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c	2011-05-05 15:46:10 +0000
+@@ -68,6 +68,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c	2011-05-05 15:46:10 +0000
+@@ -62,6 +62,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c	2010-05-27 12:23:45 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c	2011-05-05 15:46:10 +0000
+@@ -61,6 +61,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c	2011-05-05 15:46:10 +0000
+@@ -69,6 +69,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c	2011-05-05 15:46:10 +0000
+@@ -76,6 +76,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c	2011-05-05 15:46:10 +0000
+@@ -81,6 +81,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-float.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-float.c	2008-08-19 08:06:54 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-float.c	2011-05-05 15:46:10 +0000
+@@ -39,7 +39,7 @@
+ }
+ 
+ /* Needs interleaving support.  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c	2011-05-05 15:46:10 +0000
+@@ -71,6 +71,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-mult.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c	2011-05-05 15:46:10 +0000
+@@ -71,6 +71,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c	2011-05-05 15:46:10 +0000
+@@ -72,5 +72,5 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c	2008-08-12 05:31:57 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c	2011-05-05 15:46:10 +0000
+@@ -55,6 +55,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave || vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c	2007-10-21 09:01:16 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c	2011-05-05 15:46:10 +0000
+@@ -65,8 +65,8 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target { vect_interleave && vect_pack_trunc  } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { { ! { vect_interleave } } && { vect_pack_trunc } } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target { { vect_interleave || vect_strided4 } && vect_pack_trunc } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { { ! { vect_interleave || vect_strided4 } } && { vect_pack_trunc } } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c	2011-05-05 15:46:10 +0000
+@@ -39,7 +39,7 @@
+ }
+ 
+ /* Needs interleaving support.  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave || vect_strided2 } } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c	2011-05-05 15:46:10 +0000
+@@ -55,6 +55,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c	2011-05-05 15:46:25 +0000
+@@ -0,0 +1,112 @@
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 128
++
++typedef struct {
++   unsigned short a;
++   unsigned short b;
++   unsigned short c;
++} s;
++
++#define A(I) (I)
++#define B(I) ((I) * 2)
++#define C(I) ((unsigned short) ~((I) ^ 0x18))
++
++void __attribute__ ((noinline))
++check1 (s *res)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    if (res[i].a != C (i)
++	|| res[i].b != A (i)
++	|| res[i].c != B (i))
++      abort ();
++}
++
++void __attribute__ ((noinline))
++check2 (unsigned short *res)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    if (res[i] != (unsigned short) (A (i) + B (i) + C (i)))
++      abort ();
++}
++
++void __attribute__ ((noinline))
++check3 (s *res)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    if (res[i].a != i
++	|| res[i].b != i
++	|| res[i].c != i)
++      abort ();
++}
++
++void __attribute__ ((noinline))
++check4 (unsigned short *res)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    if (res[i] != (unsigned short) (A (i) + B (i)))
++      abort ();
++}
++
++void __attribute__ ((noinline))
++main1 (s *arr)
++{
++  int i;
++  s *ptr = arr;
++  s res1[N];
++  unsigned short res2[N];
++
++  for (i = 0; i < N; i++)
++    {
++      res1[i].a = arr[i].c;
++      res1[i].b = arr[i].a;
++      res1[i].c = arr[i].b;
++    }
++  check1 (res1);
++
++  for (i = 0; i < N; i++)
++    res2[i] = arr[i].a + arr[i].b + arr[i].c;
++  check2 (res2);
++
++  for (i = 0; i < N; i++)
++    {
++      res1[i].a = i;
++      res1[i].b = i;
++      res1[i].c = i;
++    }
++  check3 (res1);
++
++  for (i = 0; i < N; i++)
++    res2[i] = arr[i].a + arr[i].b;
++  check4 (res2);
++}
++
++int main (void)
++{
++  int i;
++  s arr[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      arr[i].a = A (i);
++      arr[i].b = B (i);
++      arr[i].c = C (i);
++    }
++  main1 (arr);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target vect_strided3 } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c	2011-05-05 15:46:10 +0000
+@@ -68,6 +68,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c	2011-05-05 15:46:10 +0000
+@@ -63,6 +63,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c	2011-05-05 15:46:10 +0000
+@@ -77,6 +77,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c	2010-05-27 12:23:45 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c	2011-05-05 15:46:10 +0000
+@@ -60,6 +60,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c	2011-05-05 15:46:10 +0000
+@@ -71,6 +71,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c	2011-05-05 15:46:10 +0000
+@@ -54,6 +54,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+    
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c	2011-05-05 15:46:10 +0000
+@@ -78,6 +78,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c	2011-05-05 15:46:10 +0000
+@@ -98,6 +98,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c	2011-05-05 15:46:10 +0000
+@@ -83,6 +83,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c	2011-05-05 15:46:10 +0000
+@@ -85,6 +85,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+   
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-vfa-03.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c	2007-09-09 07:46:12 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c	2011-05-05 15:46:10 +0000
+@@ -53,6 +53,6 @@
+ } 
+ 
+ /* Needs interleaving support.  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  { vect_interleave && vect_extract_even_odd } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  vect_strided2 } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
+--- old/gcc/testsuite/gcc.dg/vect/vect.exp	2011-04-24 07:45:49 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect.exp	2011-05-05 15:43:31 +0000
+@@ -75,15 +75,20 @@
+ lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
+ 
+ # Main loop.
+-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]]  \
+-	"" $DEFAULT_VECTCFLAGS
+-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]]  \
+-	"" $DEFAULT_VECTCFLAGS
+-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]]  \
+-        "" $DEFAULT_VECTCFLAGS
+-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]]  \
+-        "" $VECT_SLP_CFLAGS
+-
++set VECT_ADDITIONAL_FLAGS [list ""]
++if { [check_effective_target_lto] } {
++    lappend VECT_ADDITIONAL_FLAGS "-flto"
++}
++foreach flags $VECT_ADDITIONAL_FLAGS {
++    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]]  \
++	$flags $DEFAULT_VECTCFLAGS
++    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]]  \
++	$flags $DEFAULT_VECTCFLAGS
++    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]]  \
++        $flags $DEFAULT_VECTCFLAGS
++    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]]  \
++        $flags $VECT_SLP_CFLAGS
++}
+ 
+ #### Tests with special options
+ global SAVED_DEFAULT_VECTCFLAGS
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-05-06 11:28:27 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-06-02 12:12:00 +0000
+@@ -3139,29 +3139,6 @@
+     return $et_vect_extract_even_odd_saved
+ }
+ 
+-# Return 1 if the target supports vector even/odd elements extraction of
+-# vectors with SImode elements or larger, 0 otherwise.
+-
+-proc check_effective_target_vect_extract_even_odd_wide { } {
+-    global et_vect_extract_even_odd_wide_saved
+-    
+-    if [info exists et_vect_extract_even_odd_wide_saved] {
+-        verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2
+-    } else {
+-        set et_vect_extract_even_odd_wide_saved 0 
+-        if { [istarget powerpc*-*-*] 
+-             || [istarget i?86-*-*]
+-             || [istarget x86_64-*-*]
+-             || [istarget ia64-*-*]
+-             || [istarget spu-*-*] } {
+-           set et_vect_extract_even_odd_wide_saved 1
+-        }
+-    }
+-
+-    verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2
+-    return $et_vect_extract_even_odd_wide_saved
+-}
+-
+ # Return 1 if the target supports vector interleaving, 0 otherwise.
+ 
+ proc check_effective_target_vect_interleave { } {
+@@ -3184,41 +3161,30 @@
+     return $et_vect_interleave_saved
+ }
+ 
+-# Return 1 if the target supports vector interleaving and extract even/odd, 0 otherwise.
+-proc check_effective_target_vect_strided { } {
+-    global et_vect_strided_saved
+-
+-    if [info exists et_vect_strided_saved] {
+-        verbose "check_effective_target_vect_strided: using cached result" 2
+-    } else {
+-        set et_vect_strided_saved 0
+-        if { [check_effective_target_vect_interleave]
+-             && [check_effective_target_vect_extract_even_odd] } {
+-           set et_vect_strided_saved 1
+-        }
+-    }
+-
+-    verbose "check_effective_target_vect_strided: returning $et_vect_strided_saved" 2
+-    return $et_vect_strided_saved
+-}
+-
+-# Return 1 if the target supports vector interleaving and extract even/odd
+-# for wide element types, 0 otherwise.
+-proc check_effective_target_vect_strided_wide { } {
+-    global et_vect_strided_wide_saved
+-
+-    if [info exists et_vect_strided_wide_saved] {
+-        verbose "check_effective_target_vect_strided_wide: using cached result" 2
+-    } else {
+-        set et_vect_strided_wide_saved 0
+-        if { [check_effective_target_vect_interleave]
+-             && [check_effective_target_vect_extract_even_odd_wide] } {
+-           set et_vect_strided_wide_saved 1
+-        }
+-    }
+-
+-    verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2
+-    return $et_vect_strided_wide_saved
++foreach N {2 3 4 8} {
++    eval [string map [list N $N] {
++	# Return 1 if the target supports 2-vector interleaving
++	proc check_effective_target_vect_stridedN { } {
++	    global et_vect_stridedN_saved
++
++	    if [info exists et_vect_stridedN_saved] {
++		verbose "check_effective_target_vect_stridedN: using cached result" 2
++	    } else {
++		set et_vect_stridedN_saved 0
++		if { (N & -N) == N
++		     && [check_effective_target_vect_interleave]
++		     && [check_effective_target_vect_extract_even_odd] } {
++		    set et_vect_stridedN_saved 1
++		}
++		if { [istarget arm*-*-*] && N >= 2 && N <= 4 } {
++		    set et_vect_stridedN_saved 1
++		}
++	    }
++
++	    verbose "check_effective_target_vect_stridedN: returning $et_vect_stridedN_saved" 2
++	    return $et_vect_stridedN_saved
++	}
++    }]
+ }
+ 
+ # Return 1 if the target supports section-anchors
+
+=== modified file 'gcc/tree-cfg.c'
+--- old/gcc/tree-cfg.c	2011-02-12 21:11:33 +0000
++++ new/gcc/tree-cfg.c	2011-05-05 15:42:22 +0000
+@@ -3046,7 +3046,26 @@
+   tree fntype;
+   unsigned i;
+ 
+-  if (TREE_CODE (fn) != OBJ_TYPE_REF
++  if (gimple_call_internal_p (stmt))
++    {
++      if (fn)
++	{
++	  error ("gimple call has two targets");
++	  debug_generic_stmt (fn);
++	  return true;
++	}
++    }
++  else
++    {
++      if (!fn)
++	{
++	  error ("gimple call has no target");
++	  return true;
++	}
++    }
++
++  if (fn
++      && TREE_CODE (fn) != OBJ_TYPE_REF
+       && !is_gimple_val (fn))
+     {
+       error ("invalid function in gimple call");
+@@ -3054,9 +3073,10 @@
+       return true;
+     }
+ 
+-  if (!POINTER_TYPE_P (TREE_TYPE  (fn))
+-      || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE
+-	  && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE))
++  if (fn
++      && (!POINTER_TYPE_P (TREE_TYPE  (fn))
++	  || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE
++	      && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE)))
+     {
+       error ("non-function in gimple call");
+       return true;
+@@ -3076,8 +3096,12 @@
+       return true;
+     }
+ 
+-  fntype = TREE_TYPE (TREE_TYPE (fn));
+-  if (gimple_call_lhs (stmt)
++  if (fn)
++    fntype = TREE_TYPE (TREE_TYPE (fn));
++  else
++    fntype = NULL_TREE;
++  if (fntype
++      && gimple_call_lhs (stmt)
+       && !useless_type_conversion_p (TREE_TYPE (gimple_call_lhs (stmt)),
+ 				     TREE_TYPE (fntype))
+       /* ???  At least C++ misses conversions at assignments from
+@@ -4130,9 +4154,10 @@
+      didn't see a function declaration before the call.  */
+   if (is_gimple_call (stmt))
+     {
+-      tree decl;
++      tree fn, decl;
+ 
+-      if (!is_gimple_call_addr (gimple_call_fn (stmt)))
++      fn = gimple_call_fn (stmt);
++      if (fn && !is_gimple_call_addr (fn))
+ 	{
+ 	  error ("invalid function in call statement");
+ 	  return true;
+@@ -7484,6 +7509,8 @@
+ 	case GIMPLE_CALL:
+ 	  if (gimple_call_lhs (g))
+ 	    break;
++	  if (gimple_call_internal_p (g))
++	    break;
+ 
+ 	  /* This is a naked call, as opposed to a GIMPLE_CALL with an
+ 	     LHS.  All calls whose value is ignored should be
+
+=== modified file 'gcc/tree-eh.c'
+--- old/gcc/tree-eh.c	2011-05-10 06:31:59 +0000
++++ new/gcc/tree-eh.c	2011-06-02 12:12:00 +0000
+@@ -2745,7 +2745,7 @@
+       || gimple_call_lhs (twos)
+       || gimple_call_chain (ones)
+       || gimple_call_chain (twos)
+-      || !operand_equal_p (gimple_call_fn (ones), gimple_call_fn (twos), 0)
++      || !gimple_call_same_target_p (ones, twos)
+       || gimple_call_num_args (ones) != gimple_call_num_args (twos))
+     return false;
+ 
+
+=== modified file 'gcc/tree-inline.c'
+--- old/gcc/tree-inline.c	2011-05-05 21:02:06 +0000
++++ new/gcc/tree-inline.c	2011-06-02 12:12:00 +0000
+@@ -3471,10 +3471,13 @@
+       {
+ 	tree decl = gimple_call_fndecl (stmt);
+ 	tree addr = gimple_call_fn (stmt);
+-	tree funtype = TREE_TYPE (addr);
++	tree funtype = NULL_TREE;
+ 	bool stdarg = false;
+ 
+-	if (POINTER_TYPE_P (funtype))
++	if (addr)
++	  funtype = TREE_TYPE (addr);
++
++	if (funtype && POINTER_TYPE_P (funtype))
+ 	  funtype = TREE_TYPE (funtype);
+ 
+ 	/* Do not special case builtins where we see the body.
+@@ -3514,7 +3517,7 @@
+ 	if (decl)
+ 	  funtype = TREE_TYPE (decl);
+ 
+-	if (!VOID_TYPE_P (TREE_TYPE (funtype)))
++	if (funtype && !VOID_TYPE_P (TREE_TYPE (funtype)))
+ 	  cost += estimate_move_cost (TREE_TYPE (funtype));
+ 
+ 	if (funtype)
+
+=== modified file 'gcc/tree-ssa-ccp.c'
+--- old/gcc/tree-ssa-ccp.c	2011-01-31 16:52:22 +0000
++++ new/gcc/tree-ssa-ccp.c	2011-05-05 15:42:22 +0000
+@@ -1279,7 +1279,10 @@
+ 
+     case GIMPLE_CALL:
+       {
+-	tree fn = valueize_op (gimple_call_fn (stmt));
++	tree fn = gimple_call_fn (stmt);
++	if (!fn)
++	  return NULL_TREE;
++	fn = valueize_op (fn);
+ 	if (TREE_CODE (fn) == ADDR_EXPR
+ 	    && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL
+ 	    && DECL_BUILT_IN (TREE_OPERAND (fn, 0)))
+@@ -2310,6 +2313,11 @@
+ 	    return true;
+ 	  }
+ 
++	/* Internal calls provide no argument types, so the extra laxity
++	   for normal calls does not apply.  */
++	if (gimple_call_internal_p (stmt))
++	  return false;
++
+ 	/* Propagate into the call arguments.  Compared to replace_uses_in
+ 	   this can use the argument slot types for type verification
+ 	   instead of the current argument type.  We also can safely
+
+=== modified file 'gcc/tree-ssa-dom.c'
+--- old/gcc/tree-ssa-dom.c	2011-02-14 17:59:10 +0000
++++ new/gcc/tree-ssa-dom.c	2011-05-05 15:42:22 +0000
+@@ -64,7 +64,7 @@
+     struct { enum tree_code op;  tree opnd; } unary;
+     struct { enum tree_code op;  tree opnd0, opnd1; } binary;
+     struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
+-    struct { tree fn; bool pure; size_t nargs; tree *args; } call;
++    struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
+   } ops;
+ };
+ 
+@@ -258,7 +258,7 @@
+ 
+       expr->type = TREE_TYPE (gimple_call_lhs (stmt));
+       expr->kind = EXPR_CALL;
+-      expr->ops.call.fn = gimple_call_fn (stmt);
++      expr->ops.call.fn_from = stmt;
+ 
+       if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
+         expr->ops.call.pure = true;
+@@ -422,8 +422,8 @@
+ 
+         /* If the calls are to different functions, then they
+            clearly cannot be equal.  */
+-        if (! operand_equal_p (expr0->ops.call.fn,
+-                               expr1->ops.call.fn, 0))
++        if (!gimple_call_same_target_p (expr0->ops.call.fn_from,
++                                        expr1->ops.call.fn_from))
+           return false;
+ 
+         if (! expr0->ops.call.pure)
+@@ -503,9 +503,15 @@
+       {
+         size_t i;
+         enum tree_code code = CALL_EXPR;
++        gimple fn_from;
+ 
+         val = iterative_hash_object (code, val);
+-        val = iterative_hash_expr (expr->ops.call.fn, val);
++        fn_from = expr->ops.call.fn_from;
++        if (gimple_call_internal_p (fn_from))
++          val = iterative_hash_hashval_t
++            ((hashval_t) gimple_call_internal_fn (fn_from), val);
++        else
++          val = iterative_hash_expr (gimple_call_fn (fn_from), val);
+         for (i = 0; i < expr->ops.call.nargs; i++)
+           val = iterative_hash_expr (expr->ops.call.args[i], val);
+       }
+@@ -565,8 +571,14 @@
+         {
+           size_t i;
+           size_t nargs = element->expr.ops.call.nargs;
++          gimple fn_from;
+ 
+-          print_generic_expr (stream, element->expr.ops.call.fn, 0);
++          fn_from = element->expr.ops.call.fn_from;
++          if (gimple_call_internal_p (fn_from))
++            fputs (internal_fn_name (gimple_call_internal_fn (fn_from)),
++                   stream);
++          else
++            print_generic_expr (stream, gimple_call_fn (fn_from), 0);
+           fprintf (stream, " (");
+           for (i = 0; i < nargs; i++)
+             {
+
+=== modified file 'gcc/tree-ssa-pre.c'
+--- old/gcc/tree-ssa-pre.c	2011-02-15 13:04:47 +0000
++++ new/gcc/tree-ssa-pre.c	2011-05-05 15:42:22 +0000
+@@ -2657,11 +2657,13 @@
+ }
+ 
+ /* Return true if we can value number the call in STMT.  This is true
+-   if we have a pure or constant call.  */
++   if we have a pure or constant call to a real function.  */
+ 
+ static bool
+ can_value_number_call (gimple stmt)
+ {
++  if (gimple_call_internal_p (stmt))
++    return false;
+   if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
+     return true;
+   return false;
+@@ -4187,6 +4189,7 @@
+   gimple_stmt_iterator gsi;
+   gimple stmt;
+   unsigned i;
++  tree fn;
+ 
+   FOR_EACH_BB (b)
+     {
+@@ -4378,9 +4381,10 @@
+ 	  /* Visit indirect calls and turn them into direct calls if
+ 	     possible.  */
+ 	  if (is_gimple_call (stmt)
+-	      && TREE_CODE (gimple_call_fn (stmt)) == SSA_NAME)
++	      && (fn = gimple_call_fn (stmt))
++	      && TREE_CODE (fn) == SSA_NAME)
+ 	    {
+-	      tree fn = VN_INFO (gimple_call_fn (stmt))->valnum;
++	      fn = VN_INFO (fn)->valnum;
+ 	      if (TREE_CODE (fn) == ADDR_EXPR
+ 		  && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL)
+ 		{
+
+=== modified file 'gcc/tree-ssa-sccvn.c'
+--- old/gcc/tree-ssa-sccvn.c	2011-05-12 14:08:00 +0000
++++ new/gcc/tree-ssa-sccvn.c	2011-06-02 12:12:00 +0000
+@@ -2982,7 +2982,8 @@
+ 	  /* ???  We should handle stores from calls.  */
+ 	  else if (TREE_CODE (lhs) == SSA_NAME)
+ 	    {
+-	      if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
++	      if (!gimple_call_internal_p (stmt)
++		  && gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
+ 		changed = visit_reference_op_call (lhs, stmt);
+ 	      else
+ 		changed = defs_to_varying (stmt);
+
+=== modified file 'gcc/tree-ssa-structalias.c'
+--- old/gcc/tree-ssa-structalias.c	2011-02-10 15:29:52 +0000
++++ new/gcc/tree-ssa-structalias.c	2011-05-05 15:42:22 +0000
+@@ -4319,6 +4319,7 @@
+ 	    /* Fallthru to general call handling.  */;
+ 	  }
+       if (!in_ipa_mode
++	  || gimple_call_internal_p (t)
+ 	  || (fndecl
+ 	      && (!(fi = lookup_vi_for_tree (fndecl))
+ 		  || !fi->is_fn_info)))
+
+=== modified file 'gcc/tree-vect-data-refs.c'
+--- old/gcc/tree-vect-data-refs.c	2011-05-06 11:28:27 +0000
++++ new/gcc/tree-vect-data-refs.c	2011-06-02 12:12:00 +0000
+@@ -43,6 +43,45 @@
+ #include "expr.h"
+ #include "optabs.h"
+ 
++/* Return true if load- or store-lanes optab OPTAB is implemented for
++   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
++
++static bool
++vect_lanes_optab_supported_p (const char *name, convert_optab optab,
++			      tree vectype, unsigned HOST_WIDE_INT count)
++{
++  enum machine_mode mode, array_mode;
++  bool limit_p;
++
++  mode = TYPE_MODE (vectype);
++  limit_p = !targetm.array_mode_supported_p (mode, count);
++  array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
++			      MODE_INT, limit_p);
++
++  if (array_mode == BLKmode)
++    {
++      if (vect_print_dump_info (REPORT_DETAILS))
++	fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
++		 GET_MODE_NAME (mode), count);
++      return false;
++    }
++
++  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
++    {
++      if (vect_print_dump_info (REPORT_DETAILS))
++	fprintf (vect_dump, "cannot use %s<%s><%s>",
++		 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
++      return false;
++    }
++
++  if (vect_print_dump_info (REPORT_DETAILS))
++    fprintf (vect_dump, "can use %s<%s><%s>",
++	     name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
++
++  return true;
++}
++
++
+ /* Return the smallest scalar part of STMT.
+    This is used to determine the vectype of the stmt.  We generally set the
+    vectype according to the type of the result (lhs).  For stmts whose
+@@ -2196,19 +2235,6 @@
+           return false;
+         }
+ 
+-      /* FORNOW: we handle only interleaving that is a power of 2.
+-         We don't fail here if it may be still possible to vectorize the
+-         group using SLP.  If not, the size of the group will be checked in
+-         vect_analyze_operations, and the vectorization will fail.  */
+-      if (exact_log2 (stride) == -1)
+-	{
+-	  if (vect_print_dump_info (REPORT_DETAILS))
+-	    fprintf (vect_dump, "interleaving is not a power of 2");
+-
+-	  if (slp_impossible)
+-	    return false;
+-	}
+-
+       if (stride == 0)
+         stride = count;
+ 
+@@ -2911,31 +2937,33 @@
+ 
+ /* Function vect_create_data_ref_ptr.
+ 
+-   Create a new pointer to vector type (vp), that points to the first location
+-   accessed in the loop by STMT, along with the def-use update chain to
+-   appropriately advance the pointer through the loop iterations. Also set
+-   aliasing information for the pointer.  This vector pointer is used by the
+-   callers to this function to create a memory reference expression for vector
+-   load/store access.
++   Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
++   location accessed in the loop by STMT, along with the def-use update
++   chain to appropriately advance the pointer through the loop iterations.
++   Also set aliasing information for the pointer.  This pointer is used by
++   the callers to this function to create a memory reference expression for
++   vector load/store access.
+ 
+    Input:
+    1. STMT: a stmt that references memory. Expected to be of the form
+          GIMPLE_ASSIGN <name, data-ref> or
+ 	 GIMPLE_ASSIGN <data-ref, name>.
+-   2. AT_LOOP: the loop where the vector memref is to be created.
+-   3. OFFSET (optional): an offset to be added to the initial address accessed
++   2. AGGR_TYPE: the type of the reference, which should be either a vector
++        or an array.
++   3. AT_LOOP: the loop where the vector memref is to be created.
++   4. OFFSET (optional): an offset to be added to the initial address accessed
+         by the data-ref in STMT.
+-   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
++   5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
+         pointing to the initial address.
+-   5. TYPE: if not NULL indicates the required type of the data-ref.
++   6. TYPE: if not NULL indicates the required type of the data-ref.
+ 
+    Output:
+    1. Declare a new ptr to vector_type, and have it point to the base of the
+       data reference (initial addressed accessed by the data reference).
+       For example, for vector of type V8HI, the following code is generated:
+ 
+-      v8hi *vp;
+-      vp = (v8hi *)initial_address;
++      v8hi *ap;
++      ap = (v8hi *)initial_address;
+ 
+       if OFFSET is not supplied:
+          initial_address = &a[init];
+@@ -2955,7 +2983,7 @@
+    4. Return the pointer.  */
+ 
+ tree
+-vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
++vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
+ 			  tree offset, tree *initial_address, gimple *ptr_incr,
+ 			  bool only_init, bool *inv_p)
+ {
+@@ -2965,17 +2993,16 @@
+   struct loop *loop = NULL;
+   bool nested_in_vect_loop = false;
+   struct loop *containing_loop = NULL;
+-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+-  tree vect_ptr_type;
+-  tree vect_ptr;
++  tree aggr_ptr_type;
++  tree aggr_ptr;
+   tree new_temp;
+   gimple vec_stmt;
+   gimple_seq new_stmt_list = NULL;
+   edge pe = NULL;
+   basic_block new_bb;
+-  tree vect_ptr_init;
++  tree aggr_ptr_init;
+   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+-  tree vptr;
++  tree aptr;
+   gimple_stmt_iterator incr_gsi;
+   bool insert_after;
+   bool negative;
+@@ -2986,6 +3013,9 @@
+   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+   tree base;
+ 
++  gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
++	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
++
+   if (loop_vinfo)
+     {
+       loop = LOOP_VINFO_LOOP (loop_vinfo);
+@@ -3020,8 +3050,9 @@
+   if (vect_print_dump_info (REPORT_DETAILS))
+     {
+       tree data_ref_base = base_name;
+-      fprintf (vect_dump, "create vector-pointer variable to type: ");
+-      print_generic_expr (vect_dump, vectype, TDF_SLIM);
++      fprintf (vect_dump, "create %s-pointer variable to type: ",
++	       tree_code_name[(int) TREE_CODE (aggr_type)]);
++      print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
+       if (TREE_CODE (data_ref_base) == VAR_DECL
+           || TREE_CODE (data_ref_base) == ARRAY_REF)
+         fprintf (vect_dump, "  vectorizing an array ref: ");
+@@ -3032,27 +3063,28 @@
+       print_generic_expr (vect_dump, base_name, TDF_SLIM);
+     }
+ 
+-  /* (1) Create the new vector-pointer variable.  */
+-  vect_ptr_type = build_pointer_type (vectype);
++  /* (1) Create the new aggregate-pointer variable.  */
++  aggr_ptr_type = build_pointer_type (aggr_type);
+   base = get_base_address (DR_REF (dr));
+   if (base
+       && TREE_CODE (base) == MEM_REF)
+-    vect_ptr_type
+-      = build_qualified_type (vect_ptr_type,
++    aggr_ptr_type
++      = build_qualified_type (aggr_ptr_type,
+ 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
+-  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
++  aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
+                                     get_name (base_name));
+ 
+-  /* Vector types inherit the alias set of their component type by default so
+-     we need to use a ref-all pointer if the data reference does not conflict
+-     with the created vector data reference because it is not addressable.  */
+-  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
++  /* Vector and array types inherit the alias set of their component
++     type by default so we need to use a ref-all pointer if the data
++     reference does not conflict with the created aggregated data
++     reference because it is not addressable.  */
++  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
+ 			      get_alias_set (DR_REF (dr))))
+     {
+-      vect_ptr_type
+-	= build_pointer_type_for_mode (vectype,
+-				       TYPE_MODE (vect_ptr_type), true);
+-      vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
++      aggr_ptr_type
++	= build_pointer_type_for_mode (aggr_type,
++				       TYPE_MODE (aggr_ptr_type), true);
++      aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
+ 					get_name (base_name));
+     }
+ 
+@@ -3063,14 +3095,14 @@
+       do
+ 	{
+ 	  tree lhs = gimple_assign_lhs (orig_stmt);
+-	  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
++	  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
+ 				      get_alias_set (lhs)))
+ 	    {
+-	      vect_ptr_type
+-		= build_pointer_type_for_mode (vectype,
+-					       TYPE_MODE (vect_ptr_type), true);
+-	      vect_ptr
+-		= vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
++	      aggr_ptr_type
++		= build_pointer_type_for_mode (aggr_type,
++					       TYPE_MODE (aggr_ptr_type), true);
++	      aggr_ptr
++		= vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
+ 					 get_name (base_name));
+ 	      break;
+ 	    }
+@@ -3080,7 +3112,7 @@
+       while (orig_stmt);
+     }
+ 
+-  add_referenced_var (vect_ptr);
++  add_referenced_var (aggr_ptr);
+ 
+   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
+      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
+@@ -3113,8 +3145,8 @@
+ 		vp2 = vp1 + step
+ 		if () goto LOOP   */
+ 
+-  /* (2) Calculate the initial address the vector-pointer, and set
+-         the vector-pointer to point to it before the loop.  */
++  /* (2) Calculate the initial address of the aggregate-pointer, and set
++     the aggregate-pointer to point to it before the loop.  */
+ 
+   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
+ 
+@@ -3133,17 +3165,17 @@
+ 
+   *initial_address = new_temp;
+ 
+-  /* Create: p = (vectype *) initial_base  */
++  /* Create: p = (aggr_type *) initial_base  */
+   if (TREE_CODE (new_temp) != SSA_NAME
+-      || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
++      || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
+     {
+-      vec_stmt = gimple_build_assign (vect_ptr,
+-				      fold_convert (vect_ptr_type, new_temp));
+-      vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
++      vec_stmt = gimple_build_assign (aggr_ptr,
++				      fold_convert (aggr_ptr_type, new_temp));
++      aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
+       /* Copy the points-to information if it exists. */
+       if (DR_PTR_INFO (dr))
+-	duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
+-      gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
++	duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
++      gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
+       if (pe)
+ 	{
+ 	  new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
+@@ -3153,19 +3185,19 @@
+ 	gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
+     }
+   else
+-    vect_ptr_init = new_temp;
++    aggr_ptr_init = new_temp;
+ 
+-  /* (3) Handle the updating of the vector-pointer inside the loop.
++  /* (3) Handle the updating of the aggregate-pointer inside the loop.
+      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
+      inner-loop nested in LOOP (during outer-loop vectorization).  */
+ 
+   /* No update in loop is required.  */
+   if (only_init && (!loop_vinfo || at_loop == loop))
+-    vptr = vect_ptr_init;
++    aptr = aggr_ptr_init;
+   else
+     {
+-      /* The step of the vector pointer is the Vector Size.  */
+-      tree step = TYPE_SIZE_UNIT (vectype);
++      /* The step of the aggregate pointer is the type size.  */
++      tree step = TYPE_SIZE_UNIT (aggr_type);
+       /* One exception to the above is when the scalar step of the load in
+ 	 LOOP is zero. In this case the step here is also zero.  */
+       if (*inv_p)
+@@ -3175,9 +3207,9 @@
+ 
+       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+ 
+-      create_iv (vect_ptr_init,
+-		 fold_convert (vect_ptr_type, step),
+-		 vect_ptr, loop, &incr_gsi, insert_after,
++      create_iv (aggr_ptr_init,
++		 fold_convert (aggr_ptr_type, step),
++		 aggr_ptr, loop, &incr_gsi, insert_after,
+ 		 &indx_before_incr, &indx_after_incr);
+       incr = gsi_stmt (incr_gsi);
+       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+@@ -3191,14 +3223,14 @@
+       if (ptr_incr)
+ 	*ptr_incr = incr;
+ 
+-      vptr = indx_before_incr;
++      aptr = indx_before_incr;
+     }
+ 
+   if (!nested_in_vect_loop || only_init)
+-    return vptr;
+-
+-
+-  /* (4) Handle the updating of the vector-pointer inside the inner-loop
++    return aptr;
++
++
++  /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
+      nested in LOOP, if exists.  */
+ 
+   gcc_assert (nested_in_vect_loop);
+@@ -3206,7 +3238,7 @@
+     {
+       standard_iv_increment_position (containing_loop, &incr_gsi,
+ 				      &insert_after);
+-      create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
++      create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
+ 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
+ 		 &indx_after_incr);
+       incr = gsi_stmt (incr_gsi);
+@@ -3343,13 +3375,22 @@
+    and FALSE otherwise.  */
+ 
+ bool
+-vect_strided_store_supported (tree vectype)
++vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
+ {
+   optab interleave_high_optab, interleave_low_optab;
+   enum machine_mode mode;
+ 
+   mode = TYPE_MODE (vectype);
+ 
++  /* vect_permute_store_chain requires the group size to be a power of two.  */
++  if (exact_log2 (count) == -1)
++    {
++      if (vect_print_dump_info (REPORT_DETAILS))
++	fprintf (vect_dump, "the size of the group of strided accesses"
++		 " is not a power of 2");
++      return false;
++    }
++
+   /* Check that the operation is supported.  */
+   interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
+ 					       vectype, optab_default);
+@@ -3374,6 +3415,18 @@
+ }
+ 
+ 
++/* Return TRUE if vec_store_lanes is available for COUNT vectors of
++   type VECTYPE.  */
++
++bool
++vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
++{
++  return vect_lanes_optab_supported_p ("vec_store_lanes",
++				       vec_store_lanes_optab,
++				       vectype, count);
++}
++
++
+ /* Function vect_permute_store_chain.
+ 
+    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
+@@ -3435,7 +3488,7 @@
+    I3:  4 12 20 28  5 13 21 30
+    I4:  6 14 22 30  7 15 23 31.  */
+ 
+-bool
++void
+ vect_permute_store_chain (VEC(tree,heap) *dr_chain,
+ 			  unsigned int length,
+ 			  gimple stmt,
+@@ -3449,9 +3502,7 @@
+   unsigned int j;
+   enum tree_code high_code, low_code;
+ 
+-  /* Check that the operation is supported.  */
+-  if (!vect_strided_store_supported (vectype))
+-    return false;
++  gcc_assert (vect_strided_store_supported (vectype, length));
+ 
+   *result_chain = VEC_copy (tree, heap, dr_chain);
+ 
+@@ -3504,7 +3555,6 @@
+ 	}
+       dr_chain = VEC_copy (tree, heap, *result_chain);
+     }
+-  return true;
+ }
+ 
+ /* Function vect_setup_realignment
+@@ -3674,8 +3724,9 @@
+ 
+       gcc_assert (!compute_in_loop);
+       vec_dest = vect_create_destination_var (scalar_dest, vectype);
+-      ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
+-				      &init_addr, &inc, true, &inv_p);
++      ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
++				      NULL_TREE, &init_addr, &inc,
++				      true, &inv_p);
+       new_stmt = gimple_build_assign_with_ops
+ 		   (BIT_AND_EXPR, NULL_TREE, ptr,
+ 		    build_int_cst (TREE_TYPE (ptr),
+@@ -3780,13 +3831,22 @@
+    and FALSE otherwise.  */
+ 
+ bool
+-vect_strided_load_supported (tree vectype)
++vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
+ {
+   optab perm_even_optab, perm_odd_optab;
+   enum machine_mode mode;
+ 
+   mode = TYPE_MODE (vectype);
+ 
++  /* vect_permute_load_chain requires the group size to be a power of two.  */
++  if (exact_log2 (count) == -1)
++    {
++      if (vect_print_dump_info (REPORT_DETAILS))
++	fprintf (vect_dump, "the size of the group of strided accesses"
++		 " is not a power of 2");
++      return false;
++    }
++
+   perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
+ 					 optab_default);
+   if (!perm_even_optab)
+@@ -3821,6 +3881,16 @@
+   return true;
+ }
+ 
++/* Return TRUE if vec_load_lanes is available for COUNT vectors of
++   type VECTYPE.  */
++
++bool
++vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
++{
++  return vect_lanes_optab_supported_p ("vec_load_lanes",
++				       vec_load_lanes_optab,
++				       vectype, count);
++}
+ 
+ /* Function vect_permute_load_chain.
+ 
+@@ -3898,7 +3968,7 @@
+    3rd vec (E2):  2 6 10 14 18 22 26 30
+    4th vec (E4):  3 7 11 15 19 23 27 31.  */
+ 
+-bool
++static void
+ vect_permute_load_chain (VEC(tree,heap) *dr_chain,
+ 			 unsigned int length,
+ 			 gimple stmt,
+@@ -3911,9 +3981,7 @@
+   int i;
+   unsigned int j;
+ 
+-  /* Check that the operation is supported.  */
+-  if (!vect_strided_load_supported (vectype))
+-    return false;
++  gcc_assert (vect_strided_load_supported (vectype, length));
+ 
+   *result_chain = VEC_copy (tree, heap, dr_chain);
+   for (i = 0; i < exact_log2 (length); i++)
+@@ -3956,7 +4024,6 @@
+ 	}
+       dr_chain = VEC_copy (tree, heap, *result_chain);
+     }
+-  return true;
+ }
+ 
+ 
+@@ -3967,24 +4034,32 @@
+    the scalar statements.
+ */
+ 
+-bool
++void
+ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
+ 			     gimple_stmt_iterator *gsi)
+ {
+-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+-  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+-  gimple next_stmt, new_stmt;
+   VEC(tree,heap) *result_chain = NULL;
+-  unsigned int i, gap_count;
+-  tree tmp_data_ref;
+ 
+   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
+      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
+      vectors, that are ready for vector computation.  */
+   result_chain = VEC_alloc (tree, heap, size);
+-  /* Permute.  */
+-  if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
+-    return false;
++  vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
++  vect_record_strided_load_vectors (stmt, result_chain);
++  VEC_free (tree, heap, result_chain);
++}
++
++/* RESULT_CHAIN contains the output of a group of strided loads that were
++   generated as part of the vectorization of STMT.  Assign the statement
++   for each vector to the associated scalar statement.  */
++
++void
++vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
++{
++  gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
++  gimple next_stmt, new_stmt;
++  unsigned int i, gap_count;
++  tree tmp_data_ref;
+ 
+   /* Put a permuted data-ref in the VECTORIZED_STMT field.
+      Since we scan the chain starting from it's first node, their order
+@@ -4046,9 +4121,6 @@
+ 	    break;
+         }
+     }
+-
+-  VEC_free (tree, heap, result_chain);
+-  return true;
+ }
+ 
+ /* Function vect_force_dr_alignment_p.
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2010-12-23 16:25:52 +0000
++++ new/gcc/tree-vect-slp.c	2011-05-05 15:43:06 +0000
+@@ -215,7 +215,8 @@
+ 	    vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
+ 	  else
+ 	    /* Store.  */
+-	    vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
++	    vect_model_store_cost (stmt_info, ncopies_for_cost, false,
++				   dt[0], slp_node);
+ 	}
+ 
+       else
+@@ -579,7 +580,7 @@
+ 
+                   /* Analyze costs (for the first stmt in the group).  */
+                   vect_model_load_cost (vinfo_for_stmt (stmt),
+-                                        ncopies_for_cost, *node);
++                                        ncopies_for_cost, false, *node);
+                 }
+ 
+               /* Store the place of this load in the interleaving chain.  In
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-04-18 07:38:11 +0000
++++ new/gcc/tree-vect-stmts.c	2011-06-02 12:12:00 +0000
+@@ -42,6 +42,82 @@
+ #include "langhooks.h"
+ 
+ 
++/* Return a variable of type ELEM_TYPE[NELEMS].  */
++
++static tree
++create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
++{
++  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
++			 "vect_array");
++}
++
++/* ARRAY is an array of vectors created by create_vector_array.
++   Return an SSA_NAME for the vector in index N.  The reference
++   is part of the vectorization of STMT and the vector is associated
++   with scalar destination SCALAR_DEST.  */
++
++static tree
++read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
++		   tree array, unsigned HOST_WIDE_INT n)
++{
++  tree vect_type, vect, vect_name, array_ref;
++  gimple new_stmt;
++
++  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
++  vect_type = TREE_TYPE (TREE_TYPE (array));
++  vect = vect_create_destination_var (scalar_dest, vect_type);
++  array_ref = build4 (ARRAY_REF, vect_type, array,
++		      build_int_cst (size_type_node, n),
++		      NULL_TREE, NULL_TREE);
++
++  new_stmt = gimple_build_assign (vect, array_ref);
++  vect_name = make_ssa_name (vect, new_stmt);
++  gimple_assign_set_lhs (new_stmt, vect_name);
++  vect_finish_stmt_generation (stmt, new_stmt, gsi);
++  mark_symbols_for_renaming (new_stmt);
++
++  return vect_name;
++}
++
++/* ARRAY is an array of vectors created by create_vector_array.
++   Emit code to store SSA_NAME VECT in index N of the array.
++   The store is part of the vectorization of STMT.  */
++
++static void
++write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
++		    tree array, unsigned HOST_WIDE_INT n)
++{
++  tree array_ref;
++  gimple new_stmt;
++
++  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
++		      build_int_cst (size_type_node, n),
++		      NULL_TREE, NULL_TREE);
++
++  new_stmt = gimple_build_assign (array_ref, vect);
++  vect_finish_stmt_generation (stmt, new_stmt, gsi);
++  mark_symbols_for_renaming (new_stmt);
++}
++
++/* PTR is a pointer to an array of type TYPE.  Return a representation
++   of *PTR.  The memory reference replaces those in FIRST_DR
++   (and its group).  */
++
++static tree
++create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
++{
++  struct ptr_info_def *pi;
++  tree mem_ref, alias_ptr_type;
++
++  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
++  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
++  /* Arrays have the same alignment as their type.  */
++  pi = get_ptr_info (ptr);
++  pi->align = TYPE_ALIGN_UNIT (type);
++  pi->misalign = 0;
++  return mem_ref;
++}
++
+ /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
+ 
+ /* Function vect_mark_relevant.
+@@ -648,7 +724,8 @@
+ 
+ void
+ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
+-		       enum vect_def_type dt, slp_tree slp_node)
++		       bool store_lanes_p, enum vect_def_type dt,
++		       slp_tree slp_node)
+ {
+   int group_size;
+   unsigned int inside_cost = 0, outside_cost = 0;
+@@ -685,9 +762,11 @@
+       first_dr = STMT_VINFO_DATA_REF (stmt_info);
+     }
+ 
+-  /* Is this an access in a group of stores, which provide strided access?
+-     If so, add in the cost of the permutes.  */
+-  if (group_size > 1)
++  /* We assume that the cost of a single store-lanes instruction is
++     equivalent to the cost of GROUP_SIZE separate stores.  If a strided
++     access is instead being provided by a permute-and-store operation,
++     include the cost of the permutes.  */
++  if (!store_lanes_p && group_size > 1)
+     {
+       /* Uses a high and low interleave operation for each needed permute.  */
+       inside_cost = ncopies * exact_log2(group_size) * group_size
+@@ -763,8 +842,8 @@
+    access scheme chosen.  */
+ 
+ void
+-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
+-
++vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
++		      slp_tree slp_node)
+ {
+   int group_size;
+   gimple first_stmt;
+@@ -789,9 +868,11 @@
+       first_dr = dr;
+     }
+ 
+-  /* Is this an access in a group of loads providing strided access?
+-     If so, add in the cost of the permutes.  */
+-  if (group_size > 1)
++  /* We assume that the cost of a single load-lanes instruction is
++     equivalent to the cost of GROUP_SIZE separate loads.  If a strided
++     access is instead being provided by a load-and-permute operation,
++     include the cost of the permutes.  */
++  if (!load_lanes_p && group_size > 1)
+     {
+       /* Uses an even and odd extract operations for each needed permute.  */
+       inside_cost = ncopies * exact_log2(group_size) * group_size
+@@ -3329,6 +3410,7 @@
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree elem_type;
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   struct loop *loop = NULL;
+   enum machine_mode vec_mode;
+@@ -3344,6 +3426,7 @@
+   int j;
+   gimple next_stmt, first_stmt = NULL;
+   bool strided_store = false;
++  bool store_lanes_p = false;
+   unsigned int group_size, i;
+   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
+   bool inv_p;
+@@ -3351,6 +3434,7 @@
+   bool slp = (slp_node != NULL);
+   unsigned int vec_num;
+   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
++  tree aggr_type;
+ 
+   if (loop_vinfo)
+     loop = LOOP_VINFO_LOOP (loop_vinfo);
+@@ -3404,7 +3488,8 @@
+ 
+   /* The scalar rhs type needs to be trivially convertible to the vector
+      component type.  This should always be the case.  */
+-  if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
++  elem_type = TREE_TYPE (vectype);
++  if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "???  operands of different types");
+@@ -3431,9 +3516,14 @@
+     {
+       strided_store = true;
+       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+-      if (!vect_strided_store_supported (vectype)
+-	  && !PURE_SLP_STMT (stmt_info) && !slp)
+-	return false;
++      if (!slp && !PURE_SLP_STMT (stmt_info))
++	{
++	  group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
++	  if (vect_store_lanes_supported (vectype, group_size))
++	    store_lanes_p = true;
++	  else if (!vect_strided_store_supported (vectype, group_size))
++	    return false;
++	}
+ 
+       if (first_stmt == stmt)
+ 	{
+@@ -3459,7 +3549,7 @@
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+-      vect_model_store_cost (stmt_info, ncopies, dt, NULL);
++      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
+       return true;
+     }
+ 
+@@ -3514,6 +3604,16 @@
+ 
+   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
+   gcc_assert (alignment_support_scheme);
++  /* Targets with store-lane instructions must not require explicit
++     realignment.  */
++  gcc_assert (!store_lanes_p
++	      || alignment_support_scheme == dr_aligned
++	      || alignment_support_scheme == dr_unaligned_supported);
++
++  if (store_lanes_p)
++    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
++  else
++    aggr_type = vectype;
+ 
+   /* In case the vectorization factor (VF) is bigger than the number
+      of elements that we can fit in a vectype (nunits), we have to generate
+@@ -3602,9 +3702,9 @@
+ 	  /* We should have catched mismatched types earlier.  */
+ 	  gcc_assert (useless_type_conversion_p (vectype,
+ 						 TREE_TYPE (vec_oprnd)));
+-	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
+-						  &dummy, &ptr_incr, false,
+-						  &inv_p);
++	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
++						  NULL_TREE, &dummy,
++						  &ptr_incr, false, &inv_p);
+ 	  gcc_assert (bb_vinfo || !inv_p);
+ 	}
+       else
+@@ -3625,76 +3725,101 @@
+ 	      VEC_replace(tree, dr_chain, i, vec_oprnd);
+ 	      VEC_replace(tree, oprnds, i, vec_oprnd);
+ 	    }
+-	  dataref_ptr =
+-		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+-	}
+-
+-      if (strided_store)
+-	{
+-	  result_chain = VEC_alloc (tree, heap, group_size);
+-	  /* Permute.  */
+-	  if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
+-					 &result_chain))
+-	    return false;
+-	}
+-
+-      next_stmt = first_stmt;
+-      for (i = 0; i < vec_num; i++)
+-	{
+-	  struct ptr_info_def *pi;
+-
+-	  if (i > 0)
+-	    /* Bump the vector pointer.  */
+-	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+-					   NULL_TREE);
+-
+-	  if (slp)
+-	    vec_oprnd = VEC_index (tree, vec_oprnds, i);
+-	  else if (strided_store)
+-	    /* For strided stores vectorized defs are interleaved in
+-	       vect_permute_store_chain().  */
+-	    vec_oprnd = VEC_index (tree, result_chain, i);
+-
+-	  data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
+-			     build_int_cst (reference_alias_ptr_type
+-					    (DR_REF (first_dr)), 0));
+-	  pi = get_ptr_info (dataref_ptr);
+-	  pi->align = TYPE_ALIGN_UNIT (vectype);
+-          if (aligned_access_p (first_dr))
+-	    pi->misalign = 0;
+-          else if (DR_MISALIGNMENT (first_dr) == -1)
+-	    {
+-	      TREE_TYPE (data_ref)
+-		= build_aligned_type (TREE_TYPE (data_ref),
+-				      TYPE_ALIGN (TREE_TYPE (vectype)));
+-	      pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+-	      pi->misalign = 0;
+-	    }
+-	  else
+-	    {
+-	      TREE_TYPE (data_ref)
+-		= build_aligned_type (TREE_TYPE (data_ref),
+-				      TYPE_ALIGN (TREE_TYPE (vectype)));
+-	      pi->misalign = DR_MISALIGNMENT (first_dr);
+-	    }
+-
+-	  /* Arguments are ready.  Create the new vector stmt.  */
+-	  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
++	  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
++					 TYPE_SIZE_UNIT (aggr_type));
++	}
++
++      if (store_lanes_p)
++	{
++	  tree vec_array;
++
++	  /* Combine all the vectors into an array.  */
++	  vec_array = create_vector_array (vectype, vec_num);
++	  for (i = 0; i < vec_num; i++)
++	    {
++	      vec_oprnd = VEC_index (tree, dr_chain, i);
++	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
++	    }
++
++	  /* Emit:
++	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
++	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
++	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
++	  gimple_call_set_lhs (new_stmt, data_ref);
+ 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ 	  mark_symbols_for_renaming (new_stmt);
+-
+-          if (slp)
+-            continue;
+-
+-          if (j == 0)
+-            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt =  new_stmt;
++	}
++      else
++	{
++	  new_stmt = NULL;
++	  if (strided_store)
++	    {
++	      result_chain = VEC_alloc (tree, heap, group_size);
++	      /* Permute.  */
++	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
++					&result_chain);
++	    }
++
++	  next_stmt = first_stmt;
++	  for (i = 0; i < vec_num; i++)
++	    {
++	      struct ptr_info_def *pi;
++
++	      if (i > 0)
++		/* Bump the vector pointer.  */
++		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
++					       stmt, NULL_TREE);
++
++	      if (slp)
++		vec_oprnd = VEC_index (tree, vec_oprnds, i);
++	      else if (strided_store)
++		/* For strided stores vectorized defs are interleaved in
++		   vect_permute_store_chain().  */
++		vec_oprnd = VEC_index (tree, result_chain, i);
++
++	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
++				 build_int_cst (reference_alias_ptr_type
++						(DR_REF (first_dr)), 0));
++	      pi = get_ptr_info (dataref_ptr);
++	      pi->align = TYPE_ALIGN_UNIT (vectype);
++	      if (aligned_access_p (first_dr))
++		pi->misalign = 0;
++	      else if (DR_MISALIGNMENT (first_dr) == -1)
++		{
++		  TREE_TYPE (data_ref)
++		    = build_aligned_type (TREE_TYPE (data_ref),
++					  TYPE_ALIGN (elem_type));
++		  pi->align = TYPE_ALIGN_UNIT (elem_type);
++		  pi->misalign = 0;
++		}
++	      else
++		{
++		  TREE_TYPE (data_ref)
++		    = build_aligned_type (TREE_TYPE (data_ref),
++					  TYPE_ALIGN (elem_type));
++		  pi->misalign = DR_MISALIGNMENT (first_dr);
++		}
++
++	      /* Arguments are ready.  Create the new vector stmt.  */
++	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
++	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
++	      mark_symbols_for_renaming (new_stmt);
++
++	      if (slp)
++		continue;
++
++	      next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
++	      if (!next_stmt)
++		break;
++	    }
++	}
++      if (!slp)
++	{
++	  if (j == 0)
++	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ 	  else
+ 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+-
+ 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
+-	  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+-	  if (!next_stmt)
+-	    break;
+ 	}
+     }
+ 
+@@ -3805,6 +3930,7 @@
+   bool nested_in_vect_loop = false;
+   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree elem_type;
+   tree new_temp;
+   enum machine_mode mode;
+   gimple new_stmt = NULL;
+@@ -3821,6 +3947,7 @@
+   gimple phi = NULL;
+   VEC(tree,heap) *dr_chain = NULL;
+   bool strided_load = false;
++  bool load_lanes_p = false;
+   gimple first_stmt;
+   tree scalar_type;
+   bool inv_p;
+@@ -3833,6 +3960,7 @@
+   enum tree_code code;
+   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+   int vf;
++  tree aggr_type;
+ 
+   if (loop_vinfo)
+     {
+@@ -3909,7 +4037,8 @@
+ 
+   /* The vector component type needs to be trivially convertible to the
+      scalar lhs.  This should always be the case.  */
+-  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
++  elem_type = TREE_TYPE (vectype);
++  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "???  operands of different types");
+@@ -3923,10 +4052,15 @@
+       /* FORNOW */
+       gcc_assert (! nested_in_vect_loop);
+ 
+-      /* Check if interleaving is supported.  */
+-      if (!vect_strided_load_supported (vectype)
+-	  && !PURE_SLP_STMT (stmt_info) && !slp)
+-	return false;
++      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
++      if (!slp && !PURE_SLP_STMT (stmt_info))
++	{
++	  group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
++	  if (vect_load_lanes_supported (vectype, group_size))
++	    load_lanes_p = true;
++	  else if (!vect_strided_load_supported (vectype, group_size))
++	    return false;
++	}
+     }
+ 
+   if (negative)
+@@ -3951,12 +4085,12 @@
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+-      vect_model_load_cost (stmt_info, ncopies, NULL);
++      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
+       return true;
+     }
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+-    fprintf (vect_dump, "transform load.");
++    fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
+ 
+   /** Transform.  **/
+ 
+@@ -3982,8 +4116,6 @@
+     	}
+       else
+ 	vec_num = group_size;
+-
+-      dr_chain = VEC_alloc (tree, heap, vec_num);
+     }
+   else
+     {
+@@ -3994,6 +4126,11 @@
+ 
+   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
+   gcc_assert (alignment_support_scheme);
++  /* Targets with load-lane instructions must not require explicit
++     realignment.  */
++  gcc_assert (!load_lanes_p
++	      || alignment_support_scheme == dr_aligned
++	      || alignment_support_scheme == dr_unaligned_supported);
+ 
+   /* In case the vectorization factor (VF) is bigger than the number
+      of elements that we can fit in a vectype (nunits), we have to generate
+@@ -4125,208 +4262,252 @@
+   if (negative)
+     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+ 
++  if (load_lanes_p)
++    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
++  else
++    aggr_type = vectype;
++
+   prev_stmt_info = NULL;
+   for (j = 0; j < ncopies; j++)
+     {
+       /* 1. Create the vector pointer update chain.  */
+       if (j == 0)
+-        dataref_ptr = vect_create_data_ref_ptr (first_stmt,
++        dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type,
+ 					        at_loop, offset,
+ 						&dummy, &ptr_incr, false,
+ 						&inv_p);
+       else
+-        dataref_ptr =
+-		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+-
+-      for (i = 0; i < vec_num; i++)
++        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
++				       TYPE_SIZE_UNIT (aggr_type));
++
++      if (strided_load || slp_perm)
++	dr_chain = VEC_alloc (tree, heap, vec_num);
++
++      if (load_lanes_p)
+ 	{
+-	  if (i > 0)
+-	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+-					   NULL_TREE);
+-
+-	  /* 2. Create the vector-load in the loop.  */
+-	  switch (alignment_support_scheme)
+-	    {
+-	    case dr_aligned:
+-	    case dr_unaligned_supported:
+-	      {
+-		struct ptr_info_def *pi;
+-		data_ref
+-		  = build2 (MEM_REF, vectype, dataref_ptr,
+-			    build_int_cst (reference_alias_ptr_type
+-					   (DR_REF (first_dr)), 0));
+-		pi = get_ptr_info (dataref_ptr);
+-		pi->align = TYPE_ALIGN_UNIT (vectype);
+-		if (alignment_support_scheme == dr_aligned)
+-		  {
+-		    gcc_assert (aligned_access_p (first_dr));
+-		    pi->misalign = 0;
+-		  }
+-		else if (DR_MISALIGNMENT (first_dr) == -1)
+-		  {
+-		    TREE_TYPE (data_ref)
+-		      = build_aligned_type (TREE_TYPE (data_ref),
+-					    TYPE_ALIGN (TREE_TYPE (vectype)));
+-		    pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+-		    pi->misalign = 0;
+-		  }
+-		else
+-		  {
+-		    TREE_TYPE (data_ref)
+-		      = build_aligned_type (TREE_TYPE (data_ref),
+-					    TYPE_ALIGN (TREE_TYPE (vectype)));
+-		    pi->misalign = DR_MISALIGNMENT (first_dr);
+-		  }
+-		break;
+-	      }
+-	    case dr_explicit_realign:
+-	      {
+-		tree ptr, bump;
+-		tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+-
+-		if (compute_in_loop)
+-		  msq = vect_setup_realignment (first_stmt, gsi,
+-						&realignment_token,
+-						dr_explicit_realign,
+-						dataref_ptr, NULL);
+-
+-		new_stmt = gimple_build_assign_with_ops
+-			     (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+-			      build_int_cst
+-			        (TREE_TYPE (dataref_ptr),
+-				 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+-		ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+-		gimple_assign_set_lhs (new_stmt, ptr);
+-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-		data_ref
+-		  = build2 (MEM_REF, vectype, ptr,
+-			    build_int_cst (reference_alias_ptr_type
+-					     (DR_REF (first_dr)), 0));
+-		vec_dest = vect_create_destination_var (scalar_dest, vectype);
+-		new_stmt = gimple_build_assign (vec_dest, data_ref);
+-		new_temp = make_ssa_name (vec_dest, new_stmt);
+-		gimple_assign_set_lhs (new_stmt, new_temp);
+-		gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+-		gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-		msq = new_temp;
+-
+-		bump = size_binop (MULT_EXPR, vs_minus_1,
+-				   TYPE_SIZE_UNIT (scalar_type));
+-		ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
+-		new_stmt = gimple_build_assign_with_ops
+-			     (BIT_AND_EXPR, NULL_TREE, ptr,
+-			      build_int_cst
+-			        (TREE_TYPE (ptr),
+-				 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+-		ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+-		gimple_assign_set_lhs (new_stmt, ptr);
+-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-		data_ref
+-		  = build2 (MEM_REF, vectype, ptr,
+-			    build_int_cst (reference_alias_ptr_type
+-					     (DR_REF (first_dr)), 0));
+-	        break;
+-	      }
+-	    case dr_explicit_realign_optimized:
+-	      new_stmt = gimple_build_assign_with_ops
+-			   (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+-			    build_int_cst
+-			      (TREE_TYPE (dataref_ptr),
+-			       -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+-	      new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+-	      gimple_assign_set_lhs (new_stmt, new_temp);
+-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-	      data_ref
+-		= build2 (MEM_REF, vectype, new_temp,
+-			  build_int_cst (reference_alias_ptr_type
+-					   (DR_REF (first_dr)), 0));
+-	      break;
+-	    default:
+-	      gcc_unreachable ();
+-	    }
+-	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
+-	  new_stmt = gimple_build_assign (vec_dest, data_ref);
+-	  new_temp = make_ssa_name (vec_dest, new_stmt);
+-	  gimple_assign_set_lhs (new_stmt, new_temp);
++	  tree vec_array;
++
++	  vec_array = create_vector_array (vectype, vec_num);
++
++	  /* Emit:
++	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
++	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
++	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
++	  gimple_call_set_lhs (new_stmt, vec_array);
+ 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ 	  mark_symbols_for_renaming (new_stmt);
+ 
+-	  /* 3. Handle explicit realignment if necessary/supported.  Create in
+-		loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
+-	  if (alignment_support_scheme == dr_explicit_realign_optimized
+-	      || alignment_support_scheme == dr_explicit_realign)
+-	    {
+-	      tree tmp;
+-
+-	      lsq = gimple_assign_lhs (new_stmt);
+-	      if (!realignment_token)
+-		realignment_token = dataref_ptr;
++	  /* Extract each vector into an SSA_NAME.  */
++	  for (i = 0; i < vec_num; i++)
++	    {
++	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
++					    vec_array, i);
++	      VEC_quick_push (tree, dr_chain, new_temp);
++	    }
++
++	  /* Record the mapping between SSA_NAMEs and statements.  */
++	  vect_record_strided_load_vectors (stmt, dr_chain);
++	}
++      else
++	{
++	  for (i = 0; i < vec_num; i++)
++	    {
++	      if (i > 0)
++		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
++					       stmt, NULL_TREE);
++
++	      /* 2. Create the vector-load in the loop.  */
++	      switch (alignment_support_scheme)
++		{
++		case dr_aligned:
++		case dr_unaligned_supported:
++		  {
++		    struct ptr_info_def *pi;
++		    data_ref
++		      = build2 (MEM_REF, vectype, dataref_ptr,
++				build_int_cst (reference_alias_ptr_type
++					       (DR_REF (first_dr)), 0));
++		    pi = get_ptr_info (dataref_ptr);
++		    pi->align = TYPE_ALIGN_UNIT (vectype);
++		    if (alignment_support_scheme == dr_aligned)
++		      {
++			gcc_assert (aligned_access_p (first_dr));
++			pi->misalign = 0;
++		      }
++		    else if (DR_MISALIGNMENT (first_dr) == -1)
++		      {
++			TREE_TYPE (data_ref)
++			  = build_aligned_type (TREE_TYPE (data_ref),
++						TYPE_ALIGN (elem_type));
++			pi->align = TYPE_ALIGN_UNIT (elem_type);
++			pi->misalign = 0;
++		      }
++		    else
++		      {
++			TREE_TYPE (data_ref)
++			  = build_aligned_type (TREE_TYPE (data_ref),
++						TYPE_ALIGN (elem_type));
++			pi->misalign = DR_MISALIGNMENT (first_dr);
++		      }
++		    break;
++		  }
++		case dr_explicit_realign:
++		  {
++		    tree ptr, bump;
++		    tree vs_minus_1
++		      = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
++
++		    if (compute_in_loop)
++		      msq = vect_setup_realignment (first_stmt, gsi,
++						    &realignment_token,
++						    dr_explicit_realign,
++						    dataref_ptr, NULL);
++
++		    new_stmt = gimple_build_assign_with_ops
++				 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
++				  build_int_cst
++				    (TREE_TYPE (dataref_ptr),
++				     -(HOST_WIDE_INT)
++				     TYPE_ALIGN_UNIT (vectype)));
++		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
++		    gimple_assign_set_lhs (new_stmt, ptr);
++		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
++		    data_ref
++		      = build2 (MEM_REF, vectype, ptr,
++				build_int_cst (reference_alias_ptr_type
++						 (DR_REF (first_dr)), 0));
++		    vec_dest = vect_create_destination_var (scalar_dest,
++							    vectype);
++		    new_stmt = gimple_build_assign (vec_dest, data_ref);
++		    new_temp = make_ssa_name (vec_dest, new_stmt);
++		    gimple_assign_set_lhs (new_stmt, new_temp);
++		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
++		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
++		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
++		    msq = new_temp;
++
++		    bump = size_binop (MULT_EXPR, vs_minus_1,
++				       TYPE_SIZE_UNIT (scalar_type));
++		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
++		    new_stmt = gimple_build_assign_with_ops
++				 (BIT_AND_EXPR, NULL_TREE, ptr,
++				  build_int_cst
++				    (TREE_TYPE (ptr),
++				     -(HOST_WIDE_INT)
++				     TYPE_ALIGN_UNIT (vectype)));
++		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
++		    gimple_assign_set_lhs (new_stmt, ptr);
++		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
++		    data_ref
++		      = build2 (MEM_REF, vectype, ptr,
++				build_int_cst (reference_alias_ptr_type
++						 (DR_REF (first_dr)), 0));
++		    break;
++		  }
++		case dr_explicit_realign_optimized:
++		  new_stmt = gimple_build_assign_with_ops
++			       (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
++				build_int_cst
++				  (TREE_TYPE (dataref_ptr),
++				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
++		  new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
++					    new_stmt);
++		  gimple_assign_set_lhs (new_stmt, new_temp);
++		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
++		  data_ref
++		    = build2 (MEM_REF, vectype, new_temp,
++			      build_int_cst (reference_alias_ptr_type
++					       (DR_REF (first_dr)), 0));
++		  break;
++		default:
++		  gcc_unreachable ();
++		}
+ 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+-	      tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
+-			    realignment_token);
+-	      new_stmt = gimple_build_assign (vec_dest, tmp);
++	      new_stmt = gimple_build_assign (vec_dest, data_ref);
+ 	      new_temp = make_ssa_name (vec_dest, new_stmt);
+ 	      gimple_assign_set_lhs (new_stmt, new_temp);
+ 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-
+-	      if (alignment_support_scheme == dr_explicit_realign_optimized)
+-		{
+-		  gcc_assert (phi);
+-		  if (i == vec_num - 1 && j == ncopies - 1)
+-		    add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
+-				 UNKNOWN_LOCATION);
+-		  msq = lsq;
+-		}
+-	    }
+-
+-	  /* 4. Handle invariant-load.  */
+-	  if (inv_p && !bb_vinfo)
+-	    {
+-	      gcc_assert (!strided_load);
+-	      gcc_assert (nested_in_vect_loop_p (loop, stmt));
+-	      if (j == 0)
+-		{
+-		  int k;
+-		  tree t = NULL_TREE;
+-		  tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
+-
+-		  /* CHECKME: bitpos depends on endianess?  */
+-		  bitpos = bitsize_zero_node;
+-		  vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
+-				    bitsize, bitpos);
+-		  vec_dest =
+-			vect_create_destination_var (scalar_dest, NULL_TREE);
+-		  new_stmt = gimple_build_assign (vec_dest, vec_inv);
+-                  new_temp = make_ssa_name (vec_dest, new_stmt);
++	      mark_symbols_for_renaming (new_stmt);
++
++	      /* 3. Handle explicit realignment if necessary/supported.
++		    Create in loop:
++		       vec_dest = realign_load (msq, lsq, realignment_token)  */
++	      if (alignment_support_scheme == dr_explicit_realign_optimized
++		  || alignment_support_scheme == dr_explicit_realign)
++		{
++		  tree tmp;
++
++		  lsq = gimple_assign_lhs (new_stmt);
++		  if (!realignment_token)
++		    realignment_token = dataref_ptr;
++		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
++		  tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
++				realignment_token);
++		  new_stmt = gimple_build_assign (vec_dest, tmp);
++		  new_temp = make_ssa_name (vec_dest, new_stmt);
+ 		  gimple_assign_set_lhs (new_stmt, new_temp);
+ 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ 
+-		  for (k = nunits - 1; k >= 0; --k)
+-		    t = tree_cons (NULL_TREE, new_temp, t);
+-		  /* FIXME: use build_constructor directly.  */
+-		  vec_inv = build_constructor_from_list (vectype, t);
+-		  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
++		  if (alignment_support_scheme == dr_explicit_realign_optimized)
++		    {
++		      gcc_assert (phi);
++		      if (i == vec_num - 1 && j == ncopies - 1)
++			add_phi_arg (phi, lsq,
++				     loop_latch_edge (containing_loop),
++				     UNKNOWN_LOCATION);
++		      msq = lsq;
++		    }
++		}
++
++	      /* 4. Handle invariant-load.  */
++	      if (inv_p && !bb_vinfo)
++		{
++		  gcc_assert (!strided_load);
++		  gcc_assert (nested_in_vect_loop_p (loop, stmt));
++		  if (j == 0)
++		    {
++		      int k;
++		      tree t = NULL_TREE;
++		      tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
++
++		      /* CHECKME: bitpos depends on endianess?  */
++		      bitpos = bitsize_zero_node;
++		      vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
++					bitsize, bitpos);
++		      vec_dest = vect_create_destination_var (scalar_dest,
++							      NULL_TREE);
++		      new_stmt = gimple_build_assign (vec_dest, vec_inv);
++		      new_temp = make_ssa_name (vec_dest, new_stmt);
++		      gimple_assign_set_lhs (new_stmt, new_temp);
++		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
++
++		      for (k = nunits - 1; k >= 0; --k)
++			t = tree_cons (NULL_TREE, new_temp, t);
++		      /* FIXME: use build_constructor directly.  */
++		      vec_inv = build_constructor_from_list (vectype, t);
++		      new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
++		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
++		    }
++		  else
++		    gcc_unreachable (); /* FORNOW. */
++		}
++
++	      if (negative)
++		{
++		  new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+ 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ 		}
+-	      else
+-		gcc_unreachable (); /* FORNOW. */
+-	    }
+-
+-	  if (negative)
+-	    {
+-	      new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+-	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
+-	    }
+-
+-	  /* Collect vector loads and later create their permutation in
+-	     vect_transform_strided_load ().  */
+-          if (strided_load || slp_perm)
+-            VEC_quick_push (tree, dr_chain, new_temp);
+-
+-         /* Store vector loads in the corresponding SLP_NODE.  */
+-	  if (slp && !slp_perm)
+-	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
++
++	      /* Collect vector loads and later create their permutation in
++		 vect_transform_strided_load ().  */
++	      if (strided_load || slp_perm)
++		VEC_quick_push (tree, dr_chain, new_temp);
++
++	      /* Store vector loads in the corresponding SLP_NODE.  */
++	      if (slp && !slp_perm)
++		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
++				new_stmt);
++	    }
+ 	}
+ 
+       if (slp && !slp_perm)
+@@ -4345,12 +4526,9 @@
+         {
+           if (strided_load)
+   	    {
+-	      if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
+-	        return false;
+-
++	      if (!load_lanes_p)
++		vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
+ 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+-              VEC_free (tree, heap, dr_chain);
+-	      dr_chain = VEC_alloc (tree, heap, group_size);
+ 	    }
+           else
+ 	    {
+@@ -4361,11 +4539,10 @@
+ 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
+ 	    }
+         }
++      if (dr_chain)
++	VEC_free (tree, heap, dr_chain);
+     }
+ 
+-  if (dr_chain)
+-    VEC_free (tree, heap, dr_chain);
+-
+   return true;
+ }
+ 
+@@ -4769,27 +4946,6 @@
+        return false;
+     }
+ 
+-  if (!PURE_SLP_STMT (stmt_info))
+-    {
+-      /* Groups of strided accesses whose size is not a power of 2 are not
+-         vectorizable yet using loop-vectorization.  Therefore, if this stmt
+-	 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
+-	 loop-based vectorized), the loop cannot be vectorized.  */
+-      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+-          && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
+-                                        DR_GROUP_FIRST_DR (stmt_info)))) == -1)
+-        {
+-          if (vect_print_dump_info (REPORT_DETAILS))
+-            {
+-              fprintf (vect_dump, "not vectorized: the size of group "
+-                                  "of strided accesses is not a power of 2");
+-              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+-            }
+-
+-          return false;
+-        }
+-    }
+-
+   return true;
+ }
+ 
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2010-12-23 16:25:52 +0000
++++ new/gcc/tree-vectorizer.h	2011-05-05 15:43:06 +0000
+@@ -788,9 +788,9 @@
+ extern tree vectorizable_function (gimple, tree, tree);
+ extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
+                                     slp_tree);
+-extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
+-                                   slp_tree);
+-extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
++extern void vect_model_store_cost (stmt_vec_info, int, bool,
++				   enum vect_def_type, slp_tree);
++extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree);
+ extern void vect_finish_stmt_generation (gimple, gimple,
+                                          gimple_stmt_iterator *);
+ extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
+@@ -823,21 +823,22 @@
+ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
+ extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
+ extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
+-extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *,
+-                                      gimple *, bool, bool *);
++extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
++				      tree *, gimple *, bool, bool *);
+ extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
+ extern tree vect_create_destination_var (tree, tree);
+-extern bool vect_strided_store_supported (tree);
+-extern bool vect_strided_load_supported (tree);
+-extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
++extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
++extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
++extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
++extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
++extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
+                                     gimple_stmt_iterator *, VEC(tree,heap) **);
+ extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
+                                     enum dr_alignment_support, tree,
+                                     struct loop **);
+-extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple,
+-                                    gimple_stmt_iterator *, VEC(tree,heap) **);
+-extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
++extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
+                                          gimple_stmt_iterator *);
++extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *);
+ extern int vect_get_place_in_interleaving_chain (gimple, gimple);
+ extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
+ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
+
+=== modified file 'gcc/tree.c'
+--- old/gcc/tree.c	2011-05-18 13:29:24 +0000
++++ new/gcc/tree.c	2011-06-02 12:12:00 +0000
+@@ -7321,6 +7321,15 @@
+   return build_array_type_1 (elt_type, index_type, false);
+ }
+ 
++/* Return a representation of ELT_TYPE[NELTS], using indices of type
++   sizetype.  */
++
++tree
++build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts)
++{
++  return build_array_type (elt_type, build_index_type (size_int (nelts - 1)));
++}
++
+ /* Recursively examines the array elements of TYPE, until a non-array
+    element type is found.  */
+ 
+
+=== modified file 'gcc/tree.h'
+--- old/gcc/tree.h	2011-03-11 22:38:58 +0000
++++ new/gcc/tree.h	2011-05-05 15:43:06 +0000
+@@ -4192,6 +4192,7 @@
+ extern tree build_index_type (tree);
+ extern tree build_array_type (tree, tree);
+ extern tree build_nonshared_array_type (tree, tree);
++extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT);
+ extern tree build_function_type (tree, tree);
+ extern tree build_function_type_list (tree, ...);
+ extern tree build_function_type_skip_args (tree, bitmap);
+
+=== modified file 'gcc/value-prof.c'
+--- old/gcc/value-prof.c	2011-01-29 03:54:56 +0000
++++ new/gcc/value-prof.c	2011-05-05 15:42:22 +0000
+@@ -1242,6 +1242,9 @@
+   if (TREE_CODE (callee) == FUNCTION_DECL)
+     return false;
+ 
++  if (gimple_call_internal_p (stmt))
++    return false;
++
+   histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL);
+   if (!histogram)
+     return false;
+@@ -1630,6 +1633,7 @@
+   tree callee;
+ 
+   if (gimple_code (stmt) != GIMPLE_CALL
++      || gimple_call_internal_p (stmt)
+       || gimple_call_fndecl (stmt) != NULL_TREE)
+     return;
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch
new file mode 100644
index 0000000000..b64991836b
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106754.patch
@@ -0,0 +1,329 @@
+2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns.
+	(*maddhisi4tb, *maddhisi4tt): New define_insns.
+
+	gcc/testsuite/
+	* gcc.target/arm/smlatb-1.c: New file.
+	* gcc.target/arm/smlatt-1.c: New file.
+	* gcc.target/arm/smlaltb-1.c: New file.
+	* gcc.target/arm/smlaltt-1.c: New file.
+
+2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-06-07  Bernd Schmidt  <bernds@codesourcery.com>
+		    Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
+	multiplies.
+	* doc/md.texi (Canonicalization of Instructions): Document widening
+	multiply canonicalization.
+
+	gcc/testsuite/
+	* gcc.target/arm/mla-2.c: New test.
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-05-13 13:42:39 +0000
++++ new/gcc/config/arm/arm.md	2011-06-02 15:58:33 +0000
+@@ -1809,6 +1809,36 @@
+    (set_attr "predicable" "yes")]
+ )
+ 
++;; Note: there is no maddhisi4ibt because this one is canonical form
++(define_insn "*maddhisi4tb"
++  [(set (match_operand:SI 0 "s_register_operand" "=r")
++	(plus:SI (mult:SI (ashiftrt:SI
++			   (match_operand:SI 1 "s_register_operand" "r")
++			   (const_int 16))
++			  (sign_extend:SI
++			   (match_operand:HI 2 "s_register_operand" "r")))
++		 (match_operand:SI 3 "s_register_operand" "r")))]
++  "TARGET_DSP_MULTIPLY"
++  "smlatb%?\\t%0, %1, %2, %3"
++  [(set_attr "insn" "smlaxy")
++   (set_attr "predicable" "yes")]
++)
++
++(define_insn "*maddhisi4tt"
++  [(set (match_operand:SI 0 "s_register_operand" "=r")
++	(plus:SI (mult:SI (ashiftrt:SI
++			   (match_operand:SI 1 "s_register_operand" "r")
++			   (const_int 16))
++			  (ashiftrt:SI
++			   (match_operand:SI 2 "s_register_operand" "r")
++			   (const_int 16)))
++		 (match_operand:SI 3 "s_register_operand" "r")))]
++  "TARGET_DSP_MULTIPLY"
++  "smlatt%?\\t%0, %1, %2, %3"
++  [(set_attr "insn" "smlaxy")
++   (set_attr "predicable" "yes")]
++)
++
+ (define_insn "*maddhidi4"
+   [(set (match_operand:DI 0 "s_register_operand" "=r")
+ 	(plus:DI
+@@ -1822,6 +1852,39 @@
+   [(set_attr "insn" "smlalxy")
+    (set_attr "predicable" "yes")])
+ 
++;; Note: there is no maddhidi4ibt because this one is canonical form
++(define_insn "*maddhidi4tb"
++  [(set (match_operand:DI 0 "s_register_operand" "=r")
++	(plus:DI
++	  (mult:DI (sign_extend:DI
++		    (ashiftrt:SI
++		     (match_operand:SI 1 "s_register_operand" "r")
++		     (const_int 16)))
++		   (sign_extend:DI
++		    (match_operand:HI 2 "s_register_operand" "r")))
++	  (match_operand:DI 3 "s_register_operand" "0")))]
++  "TARGET_DSP_MULTIPLY"
++  "smlaltb%?\\t%Q0, %R0, %1, %2"
++  [(set_attr "insn" "smlalxy")
++   (set_attr "predicable" "yes")])
++
++(define_insn "*maddhidi4tt"
++  [(set (match_operand:DI 0 "s_register_operand" "=r")
++	(plus:DI
++	  (mult:DI (sign_extend:DI
++		    (ashiftrt:SI
++		     (match_operand:SI 1 "s_register_operand" "r")
++		     (const_int 16)))
++		   (sign_extend:DI
++		    (ashiftrt:SI
++		     (match_operand:SI 2 "s_register_operand" "r")
++		     (const_int 16))))
++	  (match_operand:DI 3 "s_register_operand" "0")))]
++  "TARGET_DSP_MULTIPLY"
++  "smlaltt%?\\t%Q0, %R0, %1, %2"
++  [(set_attr "insn" "smlalxy")
++   (set_attr "predicable" "yes")])
++
+ (define_expand "mulsf3"
+   [(set (match_operand:SF          0 "s_register_operand" "")
+ 	(mult:SF (match_operand:SF 1 "s_register_operand" "")
+
+=== modified file 'gcc/doc/md.texi'
+--- old/gcc/doc/md.texi	2011-05-05 15:43:06 +0000
++++ new/gcc/doc/md.texi	2011-06-07 11:18:20 +0000
+@@ -5929,6 +5929,23 @@
+ will be written using @code{zero_extract} rather than the equivalent
+ @code{and} or @code{sign_extract} operations.
+ 
++@cindex @code{mult}, canonicalization of
++@item
++@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x})
++(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1}
++(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise
++for @code{zero_extend}.
++
++@item
++@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2}
++@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted
++to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2}
++@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for
++patterns using @code{zero_extend} and @code{lshiftrt}.  If the second
++operand of @code{mult} is also a shift, then that is extended also.
++This transformation is only applied when it can be proven that the
++original operation had sufficient precision to prevent overflow.
++
+ @end itemize
+ 
+ Further canonicalization rules are defined in the function
+
+=== modified file 'gcc/simplify-rtx.c'
+--- old/gcc/simplify-rtx.c	2011-05-27 14:31:18 +0000
++++ new/gcc/simplify-rtx.c	2011-06-02 12:32:16 +0000
+@@ -1000,6 +1000,48 @@
+ 	  && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
+ 	return XEXP (op, 0);
+ 
++      /* Extending a widening multiplication should be canonicalized to
++	 a wider widening multiplication.  */
++      if (GET_CODE (op) == MULT)
++	{
++	  rtx lhs = XEXP (op, 0);
++	  rtx rhs = XEXP (op, 1);
++	  enum rtx_code lcode = GET_CODE (lhs);
++	  enum rtx_code rcode = GET_CODE (rhs);
++
++	  /* Widening multiplies usually extend both operands, but sometimes
++	     they use a shift to extract a portion of a register.  */
++	  if ((lcode == SIGN_EXTEND
++	       || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
++	      && (rcode == SIGN_EXTEND
++		  || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
++	    {
++	      enum machine_mode lmode = GET_MODE (lhs);
++	      enum machine_mode rmode = GET_MODE (rhs);
++	      int bits;
++
++	      if (lcode == ASHIFTRT)
++		/* Number of bits not shifted off the end.  */
++		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
++	      else /* lcode == SIGN_EXTEND */
++		/* Size of inner mode.  */
++		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
++
++	      if (rcode == ASHIFTRT)
++		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
++	      else /* rcode == SIGN_EXTEND */
++		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
++
++	      /* We can only widen multiplies if the result is mathematiclly
++		 equivalent.  I.e. if overflow was impossible.  */
++	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
++		return simplify_gen_binary
++			 (MULT, mode,
++			  simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
++			  simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
++	    }
++	}
++
+       /* Check for a sign extension of a subreg of a promoted
+ 	 variable, where the promotion is sign-extended, and the
+ 	 target mode is the same as the variable's promotion.  */
+@@ -1071,6 +1113,48 @@
+ 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
+ 	return rtl_hooks.gen_lowpart_no_emit (mode, op);
+ 
++      /* Extending a widening multiplication should be canonicalized to
++	 a wider widening multiplication.  */
++      if (GET_CODE (op) == MULT)
++	{
++	  rtx lhs = XEXP (op, 0);
++	  rtx rhs = XEXP (op, 1);
++	  enum rtx_code lcode = GET_CODE (lhs);
++	  enum rtx_code rcode = GET_CODE (rhs);
++
++	  /* Widening multiplies usually extend both operands, but sometimes
++	     they use a shift to extract a portion of a register.  */
++	  if ((lcode == ZERO_EXTEND
++	       || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
++	      && (rcode == ZERO_EXTEND
++		  || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
++	    {
++	      enum machine_mode lmode = GET_MODE (lhs);
++	      enum machine_mode rmode = GET_MODE (rhs);
++	      int bits;
++
++	      if (lcode == LSHIFTRT)
++		/* Number of bits not shifted off the end.  */
++		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
++	      else /* lcode == ZERO_EXTEND */
++		/* Size of inner mode.  */
++		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
++
++	      if (rcode == LSHIFTRT)
++		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
++	      else /* rcode == ZERO_EXTEND */
++		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
++
++	      /* We can only widen multiplies if the result is mathematiclly
++		 equivalent.  I.e. if overflow was impossible.  */
++	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
++		return simplify_gen_binary
++			 (MULT, mode,
++			  simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
++			  simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
++	    }
++	}
++
+       /* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>).  */
+       if (GET_CODE (op) == ZERO_EXTEND)
+ 	return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
+
+=== added file 'gcc/testsuite/gcc.target/arm/mla-2.c'
+--- old/gcc/testsuite/gcc.target/arm/mla-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/mla-2.c	2011-06-02 12:32:16 +0000
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long foolong (long long x, short *a, short *b)
++{
++    return x + *a * *b;
++}
++
++/* { dg-final { scan-assembler "smlalbb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/smlaltb-1.c'
+--- old/gcc/testsuite/gcc.target/arm/smlaltb-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/smlaltb-1.c	2011-06-02 15:58:33 +0000
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long int
++foo (long long x, int in)
++{
++  short a = in & 0xffff;
++  short b = (in & 0xffff0000) >> 16;
++
++  return x + b * a;
++}
++
++/* { dg-final { scan-assembler "smlaltb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/smlaltt-1.c'
+--- old/gcc/testsuite/gcc.target/arm/smlaltt-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/smlaltt-1.c	2011-06-02 15:58:33 +0000
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long int
++foo (long long x, int in1, int in2)
++{
++  short a = (in1 & 0xffff0000) >> 16;
++  short b = (in2 & 0xffff0000) >> 16;
++
++  return x + b * a;
++}
++
++/* { dg-final { scan-assembler "smlaltt" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/smlatb-1.c'
+--- old/gcc/testsuite/gcc.target/arm/smlatb-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/smlatb-1.c	2011-06-02 15:58:33 +0000
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++int
++foo (int x, int in)
++{
++  short a = in & 0xffff;
++  short b = (in & 0xffff0000) >> 16;
++
++  return x + b * a;
++}
++
++/* { dg-final { scan-assembler "smlatb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/smlatt-1.c'
+--- old/gcc/testsuite/gcc.target/arm/smlatt-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/smlatt-1.c	2011-06-02 15:58:33 +0000
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++int
++foo (int x, int in1, int in2)
++{
++  short a = (in1 & 0xffff0000) >> 16;
++  short b = (in2 & 0xffff0000) >> 16;
++
++  return x + b * a;
++}
++
++/* { dg-final { scan-assembler "smlatt" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch
new file mode 100644
index 0000000000..b8f587c9f4
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106755.patch
@@ -0,0 +1,120 @@
+2011-06-10  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline:
+	gcc/
+        2011-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+	* config/arm/neon.md (orndi3_neon): Actually split it.
+
+
+2011-06-10  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        Backport from mainline.
+	gcc/
+        2011-05-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not.
+	("orndi3_neon"): Likewise.
+	("bic<mode>3_neon"): Likewise.
+
+	gcc/testsuite
+	2011-05-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* gcc.target/arm/neon-vorn-vbic.c: New test.
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-06-02 12:12:00 +0000
++++ new/gcc/config/arm/neon.md	2011-06-04 00:04:47 +0000
+@@ -783,30 +783,57 @@
+ 
+ (define_insn "orn<mode>3_neon"
+   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+-	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+-		 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
++	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
++		 (match_operand:VDQ 1 "s_register_operand" "w")))]
+   "TARGET_NEON"
+   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "neon_type" "neon_int_1")]
+ )
+ 
+-(define_insn "orndi3_neon"
+-  [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+-	(ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
+-	         (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
++;; TODO: investigate whether we should disable 
++;; this and bicdi3_neon for the A8 in line with the other
++;; changes above. 
++(define_insn_and_split "orndi3_neon"
++  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
++	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
++		(match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
+   "TARGET_NEON"
+   "@
+    vorn\t%P0, %P1, %P2
+    #
++   #
+    #"
+-  [(set_attr "neon_type" "neon_int_1,*,*")
+-   (set_attr "length" "*,8,8")]
++  "reload_completed && 
++   (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
++  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
++   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
++  "
++  {
++    if (TARGET_THUMB2)
++      {
++        operands[3] = gen_highpart (SImode, operands[0]);
++        operands[0] = gen_lowpart (SImode, operands[0]);
++        operands[4] = gen_highpart (SImode, operands[2]);
++        operands[2] = gen_lowpart (SImode, operands[2]);
++        operands[5] = gen_highpart (SImode, operands[1]);
++        operands[1] = gen_lowpart (SImode, operands[1]);
++      }
++    else
++      {
++        emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
++        emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
++        DONE;
++      }
++  }"
++  [(set_attr "neon_type" "neon_int_1,*,*,*")
++   (set_attr "length" "*,16,8,8")
++   (set_attr "arch" "any,a,t2,t2")]
+ )
+ 
+ (define_insn "bic<mode>3_neon"
+   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+-	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+-		  (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
++	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
++		 (match_operand:VDQ 1 "s_register_operand" "w")))]
+   "TARGET_NEON"
+   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "neon_type" "neon_int_1")]
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c	2011-06-03 23:50:02 +0000
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O2 -ftree-vectorize" } */
++/* { dg-add-options arm_neon } */
++
++void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
++{
++  int i;
++  for (i = 0; i < 9; i++)
++    c[i] = b[i] | (~a[i]);
++}
++void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
++{
++  int i;
++  for (i = 0; i < 9; i++)
++    c[i] = b[i] & (~a[i]);
++}
++
++/* { dg-final { scan-assembler "vorn\\t" } } */
++/* { dg-final { scan-assembler "vbic\\t" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch
new file mode 100644
index 0000000000..c515767946
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch
@@ -0,0 +1,545 @@
+2011-06-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	2011-06-03  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
+	(strongarm1110): Use strongarm tuning.
+	* config/arm/arm-protos.h (tune_params): Add max_insns_skipped
+	field.
+	* config/arm/arm.c (arm_strongarm_tune): New.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
+	(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
+	(arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
+	setting, using previous defaults or 1 for Cortex-A5.
+	(arm_option_override): Set max_insns_skipped from current tuning.
+ 
+2011-06-14  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning.
+	* config/arm/arm.c (arm_cortex_a5_branch_cost): New.
+	(arm_cortex_a5_tune): New.
+
+	2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-protos.h (tune_params): Add branch_cost hook.
+	* config/arm/arm.c (arm_default_branch_cost): New.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
+	(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune)
+	(arm_fa726_tune): Set branch_cost field using
+	arm_default_branch_cost.
+	* config/arm/arm.h (BRANCH_COST): Use branch_cost hook from
+	current_tune structure.
+	* dojump.c (tm_p.h): Include file.
+
+        2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2
+	tuning.
+	(cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4)
+	(cortex-m3, cortex-m1, cortex-m0): Use cortex tuning.
+	* config/arm/arm-protos.h (tune_params): Add prefer_constant_pool
+	field.
+	* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
+	(arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune)
+	(arm_fa726te_tune): Add prefer_constant_pool setting.
+	(arm_v6t2_tune, arm_cortex_tune): New.
+	* config/arm/arm.h (TARGET_USE_MOVT): Make dependent on
+	prefer_constant_pool setting.
+
+2011-06-14  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline
+	2011-06-01  Paul Brook  <paul@cpodesourcery.com>
+
+	* config/arm/arm-cores.def: Add cortex-r5.  Add DIV flags to
+	Cortex-A15.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm.c (FL_DIV): Rename...
+	(FL_THUMB_DIV): ... to this.
+	(FL_ARM_DIV): Define.
+	(FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV.
+	(arm_arch_hwdiv): Remove.
+	(arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables.
+	(arm_issue_rate): Add cortexr5.
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set
+	__ARM_ARCH_EXT_IDIV__.
+	(TARGET_IDIV): Define.
+	(arm_arch_hwdiv): Remove.
+	(arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes.
+	* config/arm/arm.md (tune_cortexr4): Add cortexr5.
+	(divsi3, udivsi3): New patterns.
+	* config/arm/thumb2.md (divsi3, udivsi3): Remove.
+	* doc/invoke.texi: Document ARM -mcpu=cortex-r5
+
+=== modified file 'gcc/config/arm/arm-cores.def'
+--- old/gcc/config/arm/arm-cores.def	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/arm-cores.def	2011-06-14 16:00:30 +0000
+@@ -70,10 +70,10 @@
+ /* V4 Architecture Processors */
+ ARM_CORE("arm8",          arm8,		4,	             FL_MODE26 | FL_LDSCHED, fastmul)
+ ARM_CORE("arm810",        arm810,	4,	             FL_MODE26 | FL_LDSCHED, fastmul)
+-ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+-ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+-ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+-ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
++ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
++ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
++ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
++ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ ARM_CORE("fa526",         fa526,        4,                               FL_LDSCHED, fastmul)
+ ARM_CORE("fa626",         fa626,        4,                               FL_LDSCHED, fastmul)
+ 
+@@ -122,15 +122,16 @@
+ ARM_CORE("arm1176jzf-s",  arm1176jzfs,	6ZK,				 FL_LDSCHED | FL_VFPV2, 9e)
+ ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
+ ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
+-ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
+-ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, 9e)
+-ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
++ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, v6t2)
++ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, v6t2)
++ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
++ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+-ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, 9e)
+-ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, 9e)
++ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
++ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
++ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
++ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
++ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
++ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
++ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
++ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-05-03 15:17:25 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-06-14 16:00:30 +0000
+@@ -219,9 +219,14 @@
+   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+   bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
+   int constant_limit;
++  /* Maximum number of instructions to conditionalise in
++     arm_final_prescan_insn.  */
++  int max_insns_skipped;
+   int num_prefetch_slots;
+   int l1_cache_size;
+   int l1_cache_line_size;
++  bool prefer_constant_pool;
++  int (*branch_cost) (bool, bool);
+ };
+ 
+ extern const struct tune_params *current_tune;
+
+=== modified file 'gcc/config/arm/arm-tune.md'
+--- old/gcc/config/arm/arm-tune.md	2010-12-20 17:48:51 +0000
++++ new/gcc/config/arm/arm-tune.md	2011-06-14 14:37:30 +0000
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from arm-cores.def
+ (define_attr "tune"
+-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
++	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+ 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-05-11 14:49:48 +0000
++++ new/gcc/config/arm/arm.c	2011-06-14 16:00:30 +0000
+@@ -255,6 +255,8 @@
+ static void arm_conditional_register_usage (void);
+ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+ static unsigned int arm_autovectorize_vector_sizes (void);
++static int arm_default_branch_cost (bool, bool);
++static int arm_cortex_a5_branch_cost (bool, bool);
+ 
+ 
+ /* Table of machine attributes.  */
+@@ -672,12 +674,13 @@
+ #define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
+ #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
+ 					 profile.  */
+-#define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
++#define FL_THUMB_DIV  (1 << 18)	      /* Hardware divide (Thumb mode).  */
+ #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
+ #define FL_NEON       (1 << 20)       /* Neon instructions.  */
+ #define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
+ 					 architecture.  */
+ #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
++#define FL_ARM_DIV    (1 << 23)	      /* Hardware divide (ARM mode).  */
+ 
+ #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
+ 
+@@ -704,8 +707,8 @@
+ #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
+ #define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
+ #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
+-#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
+-#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
++#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_THUMB_DIV)
++#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_THUMB_DIV)
+ #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
+ 
+ /* The bits in this mask specify which
+@@ -791,7 +794,8 @@
+ int arm_arch_thumb2;
+ 
+ /* Nonzero if chip supports integer division instruction.  */
+-int arm_arch_hwdiv;
++int arm_arch_arm_hwdiv;
++int arm_arch_thumb_hwdiv;
+ 
+ /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
+    we must report the mode of the memory reference from
+@@ -864,48 +868,117 @@
+ {
+   arm_slowmul_rtx_costs,
+   NULL,
+-  3,
+-  ARM_PREFETCH_NOT_BENEFICIAL
++  3,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
+ };
+ 
+ const struct tune_params arm_fastmul_tune =
+ {
+   arm_fastmul_rtx_costs,
+   NULL,
+-  1,
+-  ARM_PREFETCH_NOT_BENEFICIAL
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
++};
++
++/* StrongARM has early execution of branches, so a sequence that is worth
++   skipping is shorter.  Set max_insns_skipped to a lower value.  */
++
++const struct tune_params arm_strongarm_tune =
++{
++  arm_fastmul_rtx_costs,
++  NULL,
++  1,						/* Constant limit.  */
++  3,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
+ };
+ 
+ const struct tune_params arm_xscale_tune =
+ {
+   arm_xscale_rtx_costs,
+   xscale_sched_adjust_cost,
+-  2,
+-  ARM_PREFETCH_NOT_BENEFICIAL
++  2,						/* Constant limit.  */
++  3,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
+ };
+ 
+ const struct tune_params arm_9e_tune =
+ {
+   arm_9e_rtx_costs,
+   NULL,
+-  1,
+-  ARM_PREFETCH_NOT_BENEFICIAL
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
++};
++
++const struct tune_params arm_v6t2_tune =
++{
++  arm_9e_rtx_costs,
++  NULL,
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  false,					/* Prefer constant pool.  */
++  arm_default_branch_cost
++};
++
++/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
++const struct tune_params arm_cortex_tune =
++{
++  arm_9e_rtx_costs,
++  NULL,
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  false,					/* Prefer constant pool.  */
++  arm_default_branch_cost
++};
++
++/* Branches can be dual-issued on Cortex-A5, so conditional execution is
++   less appealing.  Set max_insns_skipped to a low value.  */
++
++const struct tune_params arm_cortex_a5_tune =
++{
++  arm_9e_rtx_costs,
++  NULL,
++  1,						/* Constant limit.  */
++  1,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  false,					/* Prefer constant pool.  */
++  arm_cortex_a5_branch_cost
+ };
+ 
+ const struct tune_params arm_cortex_a9_tune =
+ {
+   arm_9e_rtx_costs,
+   cortex_a9_sched_adjust_cost,
+-  1,
+-  ARM_PREFETCH_BENEFICIAL(4,32,32)
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_BENEFICIAL(4,32,32),
++  false,					/* Prefer constant pool.  */
++  arm_default_branch_cost
+ };
+ 
+ const struct tune_params arm_fa726te_tune =
+ {
+   arm_9e_rtx_costs,
+   fa726te_sched_adjust_cost,
+-  1,
+-  ARM_PREFETCH_NOT_BENEFICIAL
++  1,						/* Constant limit.  */
++  5,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,
++  true,						/* Prefer constant pool.  */
++  arm_default_branch_cost
+ };
+ 
+ 
+@@ -1711,7 +1784,8 @@
+   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
+   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
+   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
+-  arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
++  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
++  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
+   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+ 
+   /* If we are not using the default (ARM mode) section anchor offset
+@@ -1991,12 +2065,7 @@
+       max_insns_skipped = 6;
+     }
+   else
+-    {
+-      /* StrongARM has early execution of branches, so a sequence
+-         that is worth skipping is shorter.  */
+-      if (arm_tune_strongarm)
+-        max_insns_skipped = 3;
+-    }
++    max_insns_skipped = current_tune->max_insns_skipped;
+ 
+   /* Hot/Cold partitioning is not currently supported, since we can't
+      handle literal pool placement in that case.  */
+@@ -8211,6 +8280,21 @@
+   return cost;
+ }
+ 
++static int
++arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
++{
++  if (TARGET_32BIT)
++    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
++  else
++    return (optimize > 0) ? 2 : 0;
++}
++
++static int
++arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
++{
++  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
++}
++
+ static int fp_consts_inited = 0;
+ 
+ /* Only zero is valid for VFP.  Other values are also valid for FPA.  */
+@@ -23123,6 +23207,7 @@
+     {
+     case cortexr4:
+     case cortexr4f:
++    case cortexr5:
+     case cortexa5:
+     case cortexa8:
+     case cortexa9:
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-06-02 12:12:00 +0000
++++ new/gcc/config/arm/arm.h	2011-06-14 14:53:07 +0000
+@@ -101,6 +101,8 @@
+ 	      builtin_define ("__ARM_PCS");		\
+ 	    builtin_define ("__ARM_EABI__");		\
+ 	  }						\
++	if (TARGET_IDIV)				\
++	  builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
+     } while (0)
+ 
+ /* The various ARM cores.  */
+@@ -282,7 +284,8 @@
+   (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
+ 
+ /* Should MOVW/MOVT be used in preference to a constant pool.  */
+-#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
++#define TARGET_USE_MOVT \
++  (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
+ 
+ /* We could use unified syntax for arm mode, but for now we just use it
+    for Thumb-2.  */
+@@ -303,6 +306,10 @@
+ /* Nonzero if this chip supports ldrex{bhd} and strex{bhd}.  */
+ #define TARGET_HAVE_LDREXBHD	((arm_arch6k && TARGET_ARM) || arm_arch7)
+ 
++/* Nonzero if integer division instructions supported.  */
++#define TARGET_IDIV		((TARGET_ARM && arm_arch_arm_hwdiv) \
++				 || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
++
+ /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
+    then TARGET_AAPCS_BASED must be true -- but the converse does not
+    hold.  TARGET_BPABI implies the use of the BPABI runtime library,
+@@ -487,8 +494,11 @@
+ /* Nonzero if chip supports Thumb 2.  */
+ extern int arm_arch_thumb2;
+ 
+-/* Nonzero if chip supports integer division instruction.  */
+-extern int arm_arch_hwdiv;
++/* Nonzero if chip supports integer division instruction in ARM mode.  */
++extern int arm_arch_arm_hwdiv;
++
++/* Nonzero if chip supports integer division instruction in Thumb mode.  */
++extern int arm_arch_thumb_hwdiv;
+ 
+ #ifndef TARGET_DEFAULT
+ #define TARGET_DEFAULT  (MASK_APCS_FRAME)
+@@ -2018,8 +2028,8 @@
+ /* Try to generate sequences that don't involve branches, we can then use
+    conditional instructions */
+ #define BRANCH_COST(speed_p, predictable_p) \
+-  (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
+-		: (optimize > 0 ? 2 : 0))
++  (current_tune->branch_cost (speed_p, predictable_p))
++
+ 
+ /* Position Independent Code.  */
+ /* We decide which register to use based on the compilation options and
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-06-02 15:58:33 +0000
++++ new/gcc/config/arm/arm.md	2011-06-14 14:37:30 +0000
+@@ -490,7 +490,7 @@
+ 
+ (define_attr "tune_cortexr4" "yes,no"
+   (const (if_then_else
+-	  (eq_attr "tune" "cortexr4,cortexr4f")
++	  (eq_attr "tune" "cortexr4,cortexr4f,cortexr5")
+ 	  (const_string "yes")
+ 	  (const_string "no"))))
+ 
+@@ -3738,6 +3738,28 @@
+    (set_attr "predicable" "yes")]
+ )
+ 
++
++;; Division instructions
++(define_insn "divsi3"
++  [(set (match_operand:SI	  0 "s_register_operand" "=r")
++	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
++		(match_operand:SI 2 "s_register_operand"  "r")))]
++  "TARGET_IDIV"
++  "sdiv%?\t%0, %1, %2"
++  [(set_attr "predicable" "yes")
++   (set_attr "insn" "sdiv")]
++)
++
++(define_insn "udivsi3"
++  [(set (match_operand:SI	   0 "s_register_operand" "=r")
++	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
++		 (match_operand:SI 2 "s_register_operand"  "r")))]
++  "TARGET_IDIV"
++  "udiv%?\t%0, %1, %2"
++  [(set_attr "predicable" "yes")
++   (set_attr "insn" "udiv")]
++)
++
+ 
+ ;; Unary arithmetic insns
+ 
+
+=== modified file 'gcc/config/arm/thumb2.md'
+--- old/gcc/config/arm/thumb2.md	2011-05-11 07:15:47 +0000
++++ new/gcc/config/arm/thumb2.md	2011-06-14 14:37:30 +0000
+@@ -779,26 +779,6 @@
+    (set_attr "length" "2")]
+ )
+ 
+-(define_insn "divsi3"
+-  [(set (match_operand:SI	  0 "s_register_operand" "=r")
+-	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
+-		(match_operand:SI 2 "s_register_operand"  "r")))]
+-  "TARGET_THUMB2 && arm_arch_hwdiv"
+-  "sdiv%?\t%0, %1, %2"
+-  [(set_attr "predicable" "yes")
+-   (set_attr "insn" "sdiv")]
+-)
+-
+-(define_insn "udivsi3"
+-  [(set (match_operand:SI	   0 "s_register_operand" "=r")
+-	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
+-		 (match_operand:SI 2 "s_register_operand"  "r")))]
+-  "TARGET_THUMB2 && arm_arch_hwdiv"
+-  "udiv%?\t%0, %1, %2"
+-  [(set_attr "predicable" "yes")
+-   (set_attr "insn" "udiv")]
+-)
+-
+ (define_insn "*thumb2_subsi_short"
+   [(set (match_operand:SI 0 "low_register_operand" "=l")
+ 	(minus:SI (match_operand:SI 1 "low_register_operand" "l")
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2011-05-11 07:15:47 +0000
++++ new/gcc/doc/invoke.texi	2011-06-14 14:37:30 +0000
+@@ -10208,7 +10208,8 @@
+ @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
+ @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
+ @samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
+-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3},
++@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
++@samp{cortex-m4}, @samp{cortex-m3},
+ @samp{cortex-m1},
+ @samp{cortex-m0},
+ @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+
+=== modified file 'gcc/dojump.c'
+--- old/gcc/dojump.c	2010-05-19 19:09:57 +0000
++++ new/gcc/dojump.c	2011-06-14 14:53:07 +0000
+@@ -36,6 +36,7 @@
+ #include "ggc.h"
+ #include "basic-block.h"
+ #include "output.h"
++#include "tm_p.h"
+ 
+ static bool prefer_and_bit_test (enum machine_mode, int);
+ static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch
new file mode 100644
index 0000000000..bbf9819ecd
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch
@@ -0,0 +1,1355 @@
+2011-06-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-06-07  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be
+	a pointer.
+	* tree-vect-patterns.c (vect_recog_widen_sum_pattern,
+	vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern,
+	vect_recog_pow_pattern): Likewise.
+	(vect_pattern_recog_1): Remove declaration.
+	(widened_name_p): Remove declaration.  Add new argument to specify
+	whether to check that both types are either signed or unsigned.
+	(vect_recog_widen_mult_pattern): Update documentation.  Handle
+	unsigned patterns and multiplication by constants.
+	(vect_pattern_recog_1): Update vect_recog_func references.  Use
+	statement information from the statement returned from pattern
+	detection functions.
+	(vect_pattern_recog): Update vect_recog_func reference.
+	* tree-vect-stmts.c (vectorizable_type_promotion): For widening
+	multiplication by a constant use the type of the other operand.
+
+	gcc/testsuite
+	* lib/target-supports.exp
+	(check_effective_target_vect_widen_mult_qi_to_hi):
+	Add NEON as supporting target.
+	(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
+	(check_effective_target_vect_widen_mult_qi_to_hi_pattern): New.
+	(check_effective_target_vect_widen_mult_hi_to_si_pattern): New.
+	* gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized
+	using widening multiplication on targets that support it.
+	* gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
+	* gcc.dg/vect/vect-widen-mult-const-s16.c: New test.
+	* gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
+
+	and
+
+	2011-06-15  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove.
+	(slpeel_tree_peel_loop_to_edge): Don't call
+	remove_dead_stmts_from_loop.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Don't
+	remove irrelevant pattern statements.  For irrelevant statements
+	check if it is the last statement of a detected pattern, use
+	corresponding pattern statement instead.
+	(destroy_loop_vec_info): No need to remove pattern statements,
+	only free stmt_vec_info.
+	(vect_transform_loop): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-patterns.c (vect_pattern_recog_1): Don't insert
+	pattern statements.  Set basic block for the new statement.
+	(vect_pattern_recog): Update documentation.
+	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan
+	operands of pattern statements.
+	(vectorizable_call): Fix printing.  In case of a pattern statement
+	use the lhs of the original statement when creating a dummy
+	statement to replace the original call.
+	(vect_analyze_stmt): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-slp.c (vect_schedule_slp_instance): For pattern
+	statements use gsi of the original statement.
+
+	and
+	2011-06-21  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/49478
+	gcc/
+
+	* tree-vect-loop.c (vectorizable_reduction): Handle DOT_PROD_EXPR
+	with constant operand.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c	2011-06-19 10:59:13 +0000
+@@ -0,0 +1,60 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++
++__attribute__ ((noinline)) void 
++foo (int *__restrict a,
++     short *__restrict b,
++     int n)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    a[i] = b[i] * 2333;
++
++  for (i = 0; i < n; i++)
++    if (a[i] != b[i] * 2333)
++      abort ();
++}
++
++__attribute__ ((noinline)) void
++bar (int *__restrict a,
++     short *__restrict b,
++     int n)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    a[i] = b[i] * (short) 2333;
++
++  for (i = 0; i < n; i++)
++    if (a[i] != b[i] * (short) 2333)
++      abort ();
++}
++
++int main (void)
++{
++  int i;
++  int a[N];
++  short b[N];
++
++  for (i = 0; i < N; i++)
++    {
++      a[i] = 0;
++      b[i] = i;
++      __asm__ volatile ("");
++    }
++
++  foo (a, b, N);
++  bar (a, b, N);
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c	2011-06-19 10:59:13 +0000
+@@ -0,0 +1,77 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++
++__attribute__ ((noinline)) void 
++foo (unsigned int *__restrict a,
++     unsigned short *__restrict b,
++     int n)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    a[i] = b[i] * 2333;
++
++  for (i = 0; i < n; i++)
++    if (a[i] != b[i] * 2333)
++      abort ();
++}
++
++__attribute__ ((noinline)) void
++bar (unsigned int *__restrict a,
++     unsigned short *__restrict b,
++     int n)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    a[i] = (unsigned short) 2333 * b[i];
++
++  for (i = 0; i < n; i++)
++    if (a[i] != b[i] * (unsigned short) 2333)
++      abort ();
++}
++
++__attribute__ ((noinline)) void
++baz (unsigned int *__restrict a,
++     unsigned short *__restrict b,
++     int n)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    a[i] = b[i] * 233333333;
++
++  for (i = 0; i < n; i++)
++    if (a[i] != b[i] * 233333333)
++      abort ();
++}
++
++
++int main (void)
++{
++  int i;
++  unsigned int a[N];
++  unsigned short b[N];
++
++  for (i = 0; i < N; i++)
++    {
++      a[i] = 0;
++      b[i] = i;
++      __asm__ volatile ("");
++    }
++
++  foo (a, b, N);
++  bar (a, b, N);
++  baz (a, b, N);
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c	2010-05-27 12:23:45 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c	2011-06-19 10:59:13 +0000
+@@ -9,13 +9,11 @@
+ unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+ unsigned int result[N];
+ 
+-/* short->int widening-mult */
++/* unsigned short->unsigned int widening-mult.  */
+ __attribute__ ((noinline)) int
+ foo1(int len) {
+   int i;
+ 
+-  /* Not vectorized because X[i] and Y[i] are casted to 'int'
+-     so the widening multiplication pattern is not recognized.  */
+   for (i=0; i<len; i++) {
+     result[i] = (unsigned int)(X[i] * Y[i]);
+   }
+@@ -43,8 +41,8 @@
+   return 0;
+ }
+ 
+-/*The induction loop is vectorized  */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c	2011-06-19 10:59:13 +0000
+@@ -9,7 +9,7 @@
+ unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+ unsigned short result[N];
+ 
+-/* char->short widening-mult */
++/* unsigned char-> unsigned short widening-mult.  */
+ __attribute__ ((noinline)) int
+ foo1(int len) {
+   int i;
+@@ -28,8 +28,7 @@
+   for (i=0; i<N; i++) {
+     X[i] = i;
+     Y[i] = 64-i;
+-    if (i%4 == 0)
+-      X[i] = 5;
++    __asm__ volatile ("");
+   }
+ 
+   foo1 (N);
+@@ -43,5 +42,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-06-02 12:12:00 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-06-19 10:59:13 +0000
+@@ -2663,7 +2663,8 @@
+ 	} else {
+ 	    set et_vect_widen_mult_qi_to_hi_saved 0
+ 	}
+-        if { [istarget powerpc*-*-*] } {
++        if { [istarget powerpc*-*-*]
++              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+             set et_vect_widen_mult_qi_to_hi_saved 1
+         }
+     }
+@@ -2696,7 +2697,8 @@
+ 	      || [istarget spu-*-*]
+ 	      || [istarget ia64-*-*]
+ 	      || [istarget i?86-*-*]
+-	      || [istarget x86_64-*-*] } {
++	      || [istarget x86_64-*-*]
++              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+             set et_vect_widen_mult_hi_to_si_saved 1
+         }
+     }
+@@ -2705,6 +2707,52 @@
+ }
+ 
+ # Return 1 if the target plus current options supports a vector
++# widening multiplication of *char* args into *short* result, 0 otherwise.
++#
++# This won't change for different subtargets so cache the result.
++
++proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } {
++    global et_vect_widen_mult_qi_to_hi_pattern
++
++    if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] {
++        verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2
++    } else {
++        set et_vect_widen_mult_qi_to_hi_pattern_saved 0
++        if { [istarget powerpc*-*-*]
++              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
++            set et_vect_widen_mult_qi_to_hi_pattern_saved 1
++        }
++    }
++    verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2
++    return $et_vect_widen_mult_qi_to_hi_pattern_saved
++}
++
++# Return 1 if the target plus current options supports a vector
++# widening multiplication of *short* args into *int* result, 0 otherwise.
++#
++# This won't change for different subtargets so cache the result.
++
++proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
++    global et_vect_widen_mult_hi_to_si_pattern
++
++    if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] {
++        verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2
++    } else {
++        set et_vect_widen_mult_hi_to_si_pattern_saved 0
++        if { [istarget powerpc*-*-*]
++              || [istarget spu-*-*]
++              || [istarget ia64-*-*]
++              || [istarget i?86-*-*]
++              || [istarget x86_64-*-*]
++              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
++            set et_vect_widen_mult_hi_to_si_pattern_saved 1
++        }
++    }
++    verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2
++    return $et_vect_widen_mult_hi_to_si_pattern_saved
++}
++
++# Return 1 if the target plus current options supports a vector
+ # dot-product of signed chars, 0 otherwise.
+ #
+ # This won't change for different subtargets so cache the result.
+
+=== modified file 'gcc/tree-vect-loop-manip.c'
+--- old/gcc/tree-vect-loop-manip.c	2011-05-18 13:24:05 +0000
++++ new/gcc/tree-vect-loop-manip.c	2011-06-19 10:59:13 +0000
+@@ -1105,35 +1105,6 @@
+   first_niters = PHI_RESULT (newphi);
+ }
+ 
+-
+-/* Remove dead assignments from loop NEW_LOOP.  */
+-
+-static void
+-remove_dead_stmts_from_loop (struct loop *new_loop)
+-{
+-  basic_block *bbs = get_loop_body (new_loop);
+-  unsigned i;
+-  for (i = 0; i < new_loop->num_nodes; ++i)
+-    {
+-      gimple_stmt_iterator gsi;
+-      for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
+-	{
+-	  gimple stmt = gsi_stmt (gsi);
+-	  if (is_gimple_assign (stmt)
+-	      && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+-	      && has_zero_uses (gimple_assign_lhs (stmt)))
+-	    {
+-	      gsi_remove (&gsi, true);
+-	      release_defs (stmt);
+-	    }
+-	  else
+-	    gsi_next (&gsi);
+-	}
+-    }
+-  free (bbs);
+-}
+-
+-
+ /* Function slpeel_tree_peel_loop_to_edge.
+ 
+    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
+@@ -1445,13 +1416,6 @@
+   BITMAP_FREE (definitions);
+   delete_update_ssa ();
+ 
+-  /* Remove all pattern statements from the loop copy.  They will confuse
+-     the expander if DCE is disabled.
+-     ???  The pattern recognizer should be split into an analysis and
+-     a transformation phase that is then run only on the loop that is
+-     going to be transformed.  */
+-  remove_dead_stmts_from_loop (new_loop);
+-
+   adjust_vec_debug_stmts ();
+ 
+   return new_loop;
+
+=== modified file 'gcc/tree-vect-loop.c'
+--- old/gcc/tree-vect-loop.c	2011-03-01 13:18:25 +0000
++++ new/gcc/tree-vect-loop.c	2011-06-22 06:21:13 +0000
+@@ -244,7 +244,7 @@
+       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+         {
+ 	  tree vf_vectype;
+-	  gimple stmt = gsi_stmt (si);
++	  gimple stmt = gsi_stmt (si), pattern_stmt;
+ 	  stmt_info = vinfo_for_stmt (stmt);
+ 
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+@@ -259,9 +259,25 @@
+ 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ 	      && !STMT_VINFO_LIVE_P (stmt_info))
+ 	    {
+-	      if (vect_print_dump_info (REPORT_DETAILS))
+-	        fprintf (vect_dump, "skip.");
+-	      continue;
++              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++                {
++                  stmt = pattern_stmt;
++                  stmt_info = vinfo_for_stmt (pattern_stmt);
++                  if (vect_print_dump_info (REPORT_DETAILS))
++                    {
++                      fprintf (vect_dump, "==> examining pattern statement: ");
++                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++                    }
++                }
++             else
++               {
++                 if (vect_print_dump_info (REPORT_DETAILS))
++                   fprintf (vect_dump, "skip.");
++                 continue;
++               }
+ 	    }
+ 
+ 	  if (gimple_get_lhs (stmt) == NULL_TREE)
+@@ -816,25 +832,17 @@
+ 
+           if (stmt_info)
+             {
+-              /* Check if this is a "pattern stmt" (introduced by the
+-                 vectorizer during the pattern recognition pass).  */
+-              bool remove_stmt_p = false;
+-              gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+-              if (orig_stmt)
+-                {
+-                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
+-                  if (orig_stmt_info
+-                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
+-                    remove_stmt_p = true;
+-                }
++              /* Check if this statement has a related "pattern stmt"
++                 (introduced by the vectorizer during the pattern recognition
++                 pass).  Free pattern's stmt_vec_info.  */
++              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++                  && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)))
++                free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
+ 
+               /* Free stmt_vec_info.  */
+               free_stmt_vec_info (stmt);
++            }
+ 
+-              /* Remove dead "pattern stmts".  */
+-              if (remove_stmt_p)
+-                gsi_remove (&si, true);
+-            }
+           gsi_next (&si);
+         }
+     }
+@@ -4262,6 +4270,25 @@
+       return false;
+     }
+ 
++  /* In case of widenning multiplication by a constant, we update the type
++     of the constant to be the type of the other operand.  We check that the
++     constant fits the type in the pattern recognition pass.  */
++  if (code == DOT_PROD_EXPR
++      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
++    {
++      if (TREE_CODE (ops[0]) == INTEGER_CST)
++        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
++      else if (TREE_CODE (ops[1]) == INTEGER_CST)
++        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
++      else
++        {
++          if (vect_print_dump_info (REPORT_DETAILS))
++            fprintf (vect_dump, "invalid types in dot-prod");
++
++          return false;
++        }
++    }
++
+   if (!vec_stmt) /* transformation not required.  */
+     {
+       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+@@ -4796,7 +4823,7 @@
+ 
+       for (si = gsi_start_bb (bb); !gsi_end_p (si);)
+ 	{
+-	  gimple stmt = gsi_stmt (si);
++	  gimple stmt = gsi_stmt (si), pattern_stmt;
+ 	  bool is_store;
+ 
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+@@ -4821,14 +4848,25 @@
+ 
+ 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ 	      && !STMT_VINFO_LIVE_P (stmt_info))
+-	    {
+-	      gsi_next (&si);
+-	      continue;
++            {
++              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++                {
++                  stmt = pattern_stmt;
++                  stmt_info = vinfo_for_stmt (stmt);
++                }
++              else
++	        {
++   	          gsi_next (&si);
++	          continue;
++                }
+ 	    }
+ 
+ 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+-	  nunits =
+-	    (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
++	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
++                                               STMT_VINFO_VECTYPE (stmt_info));
+ 	  if (!STMT_SLP_TYPE (stmt_info)
+ 	      && nunits != (unsigned int) vectorization_factor
+               && vect_print_dump_info (REPORT_DETAILS))
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2010-12-02 11:47:12 +0000
++++ new/gcc/tree-vect-patterns.c	2011-06-22 12:10:44 +0000
+@@ -38,16 +38,11 @@
+ #include "recog.h"
+ #include "diagnostic-core.h"
+ 
+-/* Function prototypes */
+-static void vect_pattern_recog_1
+-  (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator);
+-static bool widened_name_p (tree, gimple, tree *, gimple *);
+-
+ /* Pattern recognition functions  */
+-static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *);
+-static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *);
+-static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *);
+-static gimple vect_recog_pow_pattern (gimple, tree *, tree *);
++static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
++static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
++static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
++static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ 	vect_recog_widen_mult_pattern,
+ 	vect_recog_widen_sum_pattern,
+@@ -61,10 +56,12 @@
+    is a result of a type-promotion, such that:
+      DEF_STMT: NAME = NOP (name0)
+    where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
+-*/
++   If CHECK_SIGN is TRUE, check that either both types are signed or both are
++   unsigned.  */
+ 
+ static bool
+-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
++widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
++		bool check_sign)
+ {
+   tree dummy;
+   gimple dummy_gimple;
+@@ -98,7 +95,7 @@
+ 
+   *half_type = TREE_TYPE (oprnd0);
+   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
+-      || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
++      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
+       || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
+     return false;
+ 
+@@ -168,12 +165,12 @@
+          inner-loop nested in an outer-loop that us being vectorized).  */
+ 
+ static gimple
+-vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
++vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
+ {
+   gimple stmt;
+   tree oprnd0, oprnd1;
+   tree oprnd00, oprnd01;
+-  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
++  stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
+   tree type, half_type;
+   gimple pattern_stmt;
+   tree prod_type;
+@@ -181,10 +178,10 @@
+   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+   tree var, rhs;
+ 
+-  if (!is_gimple_assign (last_stmt))
++  if (!is_gimple_assign (*last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (last_stmt);
++  type = gimple_expr_type (*last_stmt);
+ 
+   /* Look for the following pattern
+           DX = (TYPE1) X;
+@@ -210,7 +207,7 @@
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
++  if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
+     return NULL;
+ 
+   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+@@ -231,14 +228,14 @@
+ 
+       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+         return NULL;
+-      oprnd0 = gimple_assign_rhs1 (last_stmt);
+-      oprnd1 = gimple_assign_rhs2 (last_stmt);
++      oprnd0 = gimple_assign_rhs1 (*last_stmt);
++      oprnd1 = gimple_assign_rhs2 (*last_stmt);
+       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
+         return NULL;
+-      stmt = last_stmt;
++      stmt = *last_stmt;
+ 
+-      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
++      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
+         {
+           stmt = def_stmt;
+           oprnd0 = gimple_assign_rhs1 (stmt);
+@@ -293,10 +290,10 @@
+       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
+           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
+         return NULL;
+-      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
++      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
+         return NULL;
+       oprnd00 = gimple_assign_rhs1 (def_stmt);
+-      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
++      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
+         return NULL;
+       oprnd01 = gimple_assign_rhs1 (def_stmt);
+       if (!types_compatible_p (half_type0, half_type1))
+@@ -322,7 +319,7 @@
+ 
+   /* We don't allow changing the order of the computation in the inner-loop
+      when doing outer-loop vectorization.  */
+-  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
++  gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
+ 
+   return pattern_stmt;
+ }
+@@ -342,24 +339,47 @@
+ 
+    where type 'TYPE' is at least double the size of type 'type'.
+ 
+-   Input:
+-
+-   * LAST_STMT: A stmt from which the pattern search begins. In the example,
+-   when this function is called with S5, the pattern {S3,S4,S5} is be detected.
+-
+-   Output:
+-
+-   * TYPE_IN: The type of the input arguments to the pattern.
+-
+-   * TYPE_OUT: The type of the output  of this pattern.
+-
+-   * Return value: A new stmt that will be used to replace the sequence of
+-   stmts that constitute the pattern. In this case it will be:
+-        WIDEN_MULT <a_t, b_t>
+-*/
++   Also detect unsgigned cases:
++
++     unsigned type a_t, b_t;
++     unsigned TYPE u_prod_T;
++     TYPE a_T, b_T, prod_T;
++
++     S1  a_t = ;
++     S2  b_t = ;
++     S3  a_T = (TYPE) a_t;
++     S4  b_T = (TYPE) b_t;
++     S5  prod_T = a_T * b_T;
++     S6  u_prod_T = (unsigned TYPE) prod_T;
++
++   and multiplication by constants:
++
++     type a_t;
++     TYPE a_T, prod_T;
++
++     S1  a_t = ;
++     S3  a_T = (TYPE) a_t;
++     S5  prod_T = a_T * CONST;
++
++    Input:
++
++    * LAST_STMT: A stmt from which the pattern search begins.  In the example,
++    when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
++    detected.
++
++    Output:
++
++    * TYPE_IN: The type of the input arguments to the pattern.
++
++    * TYPE_OUT: The type of the output of this pattern.
++
++    * Return value: A new stmt that will be used to replace the sequence of
++    stmts that constitute the pattern.  In this case it will be:
++         WIDEN_MULT <a_t, b_t>
++ */
+ 
+ static gimple
+-vect_recog_widen_mult_pattern (gimple last_stmt,
++vect_recog_widen_mult_pattern (gimple *last_stmt,
+ 			       tree *type_in,
+ 			       tree *type_out)
+ {
+@@ -367,39 +387,112 @@
+   tree oprnd0, oprnd1;
+   tree type, half_type0, half_type1;
+   gimple pattern_stmt;
+-  tree vectype, vectype_out;
++  tree vectype, vectype_out = NULL_TREE;
+   tree dummy;
+   tree var;
+   enum tree_code dummy_code;
+   int dummy_int;
+   VEC (tree, heap) *dummy_vec;
++  bool op0_ok, op1_ok;
+ 
+-  if (!is_gimple_assign (last_stmt))
++  if (!is_gimple_assign (*last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (last_stmt);
++  type = gimple_expr_type (*last_stmt);
+ 
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
++  if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
+     return NULL;
+ 
+-  oprnd0 = gimple_assign_rhs1 (last_stmt);
+-  oprnd1 = gimple_assign_rhs2 (last_stmt);
++  oprnd0 = gimple_assign_rhs1 (*last_stmt);
++  oprnd1 = gimple_assign_rhs2 (*last_stmt);
+   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+       || !types_compatible_p (TREE_TYPE (oprnd1), type))
+     return NULL;
+ 
+-  /* Check argument 0 */
+-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
+-    return NULL;
+-  oprnd0 = gimple_assign_rhs1 (def_stmt0);
+-
+-  /* Check argument 1 */
+-  if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
+-    return NULL;
+-  oprnd1 = gimple_assign_rhs1 (def_stmt1);
++  /* Check argument 0.  */
++  op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
++  /* Check argument 1.  */
++  op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
++
++  /* In case of multiplication by a constant one of the operands may not match
++     the pattern, but not both.  */
++  if (!op0_ok && !op1_ok)
++     return NULL;
++
++  if (op0_ok && op1_ok)
++    {
++      oprnd0 = gimple_assign_rhs1 (def_stmt0);
++      oprnd1 = gimple_assign_rhs1 (def_stmt1);
++    }
++  else if (!op0_ok)
++    {
++      if (CONSTANT_CLASS_P (oprnd0)
++         && TREE_CODE (half_type1) == INTEGER_TYPE
++         && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
++         && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
++        {
++          /* OPRND0 is a constant of HALF_TYPE1.  */
++          half_type0 = half_type1;
++          oprnd1 = gimple_assign_rhs1 (def_stmt1);
++        }
++      else
++        return NULL;
++    }
++  else if (!op1_ok)
++    {
++      if (CONSTANT_CLASS_P (oprnd1)
++          && TREE_CODE (half_type0) == INTEGER_TYPE
++          && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
++          && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
++        {
++          /* OPRND1 is a constant of HALF_TYPE0.  */
++          half_type1 = half_type0;
++          oprnd0 = gimple_assign_rhs1 (def_stmt0);
++        }
++      else
++        return NULL;
++    }
++
++  /* Handle unsigned case.  Look for
++     S6  u_prod_T = (unsigned TYPE) prod_T;
++     Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
++  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
++    {
++      tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
++      imm_use_iterator imm_iter;
++      use_operand_p use_p;
++      int nuses = 0;
++      gimple use_stmt = NULL;
++      tree use_type;
++
++      if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
++        return NULL;
++
++      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++        {
++	  if (is_gimple_debug (USE_STMT (use_p)))
++	    continue;
++          use_stmt = USE_STMT (use_p);
++          nuses++;
++        }
++
++      if (nuses != 1 || !is_gimple_assign (use_stmt)
++          || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
++        return NULL;
++
++      use_lhs = gimple_assign_lhs (use_stmt);
++      use_type = TREE_TYPE (use_lhs);
++      if (!INTEGRAL_TYPE_P (use_type)
++          || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
++          || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
++        return NULL;
++
++      type = use_type;
++      *last_stmt = use_stmt;
++    }
+ 
+   if (!types_compatible_p (half_type0, half_type1))
+     return NULL;
+@@ -413,7 +506,7 @@
+   vectype_out = get_vectype_for_scalar_type (type);
+   if (!vectype
+       || !vectype_out
+-      || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
++      || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
+ 					  vectype_out, vectype,
+ 					  &dummy, &dummy, &dummy_code,
+ 					  &dummy_code, &dummy_int, &dummy_vec))
+@@ -462,16 +555,16 @@
+ */
+ 
+ static gimple
+-vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
++vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
+ {
+   tree fn, base, exp = NULL;
+   gimple stmt;
+   tree var;
+ 
+-  if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
++  if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
+     return NULL;
+ 
+-  fn = gimple_call_fndecl (last_stmt);
++  fn = gimple_call_fndecl (*last_stmt);
+   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
+    return NULL;
+ 
+@@ -481,8 +574,8 @@
+     case BUILT_IN_POWI:
+     case BUILT_IN_POWF:
+     case BUILT_IN_POW:
+-      base = gimple_call_arg (last_stmt, 0);
+-      exp = gimple_call_arg (last_stmt, 1);
++      base = gimple_call_arg (*last_stmt, 0);
++      exp = gimple_call_arg (*last_stmt, 1);
+       if (TREE_CODE (exp) != REAL_CST
+ 	  && TREE_CODE (exp) != INTEGER_CST)
+         return NULL;
+@@ -574,21 +667,21 @@
+ 	 inner-loop nested in an outer-loop that us being vectorized).  */
+ 
+ static gimple
+-vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
++vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
+ {
+   gimple stmt;
+   tree oprnd0, oprnd1;
+-  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
++  stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
+   tree type, half_type;
+   gimple pattern_stmt;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+   tree var;
+ 
+-  if (!is_gimple_assign (last_stmt))
++  if (!is_gimple_assign (*last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (last_stmt);
++  type = gimple_expr_type (*last_stmt);
+ 
+   /* Look for the following pattern
+           DX = (TYPE) X;
+@@ -600,25 +693,25 @@
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
++  if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
+     return NULL;
+ 
+   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+     return NULL;
+ 
+-  oprnd0 = gimple_assign_rhs1 (last_stmt);
+-  oprnd1 = gimple_assign_rhs2 (last_stmt);
++  oprnd0 = gimple_assign_rhs1 (*last_stmt);
++  oprnd1 = gimple_assign_rhs2 (*last_stmt);
+   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+       || !types_compatible_p (TREE_TYPE (oprnd1), type))
+     return NULL;
+ 
+-  /* So far so good. Since last_stmt was detected as a (summation) reduction,
++  /* So far so good.  Since *last_stmt was detected as a (summation) reduction,
+      we know that oprnd1 is the reduction variable (defined by a loop-header
+      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
+      Left to check that oprnd0 is defined by a cast from type 'type' to type
+      'TYPE'.  */
+ 
+-  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
++  if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
+     return NULL;
+ 
+   oprnd0 = gimple_assign_rhs1 (stmt);
+@@ -639,7 +732,7 @@
+ 
+   /* We don't allow changing the order of the computation in the inner-loop
+      when doing outer-loop vectorization.  */
+-  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
++  gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
+ 
+   return pattern_stmt;
+ }
+@@ -669,23 +762,27 @@
+ 
+ static void
+ vect_pattern_recog_1 (
+-	gimple (* vect_recog_func) (gimple, tree *, tree *),
++	gimple (* vect_recog_func) (gimple *, tree *, tree *),
+ 	gimple_stmt_iterator si)
+ {
+   gimple stmt = gsi_stmt (si), pattern_stmt;
+-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
++  stmt_vec_info stmt_info;
+   stmt_vec_info pattern_stmt_info;
+-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++  loop_vec_info loop_vinfo;
+   tree pattern_vectype;
+   tree type_in, type_out;
+   enum tree_code code;
+   int i;
+   gimple next;
+ 
+-  pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
++  pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
+   if (!pattern_stmt)
+     return;
+ 
++  si = gsi_for_stmt (stmt);
++  stmt_info = vinfo_for_stmt (stmt);
++  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++ 
+   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
+     {
+       /* No need to check target support (already checked by the pattern
+@@ -736,9 +833,9 @@
+     }
+ 
+   /* Mark the stmts that are involved in the pattern. */
+-  gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
+   set_vinfo_for_stmt (pattern_stmt,
+ 		      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
++  gimple_set_bb (pattern_stmt, gimple_bb (stmt));
+   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+ 
+   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
+@@ -761,8 +858,8 @@
+    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
+         computation idioms.
+ 
+-   Output - for each computation idiom that is detected we insert a new stmt
+-        that provides the same functionality and that can be vectorized. We
++   Output - for each computation idiom that is detected we create a new stmt
++        that provides the same functionality and that can be vectorized.  We
+         also record some information in the struct_stmt_info of the relevant
+         stmts, as explained below:
+ 
+@@ -777,52 +874,48 @@
+          S5: ... = ..use(a_0)..         -       -               -
+ 
+    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
+-   represented by a single stmt. We then:
+-   - create a new stmt S6 that will replace the pattern.
+-   - insert the new stmt S6 before the last stmt in the pattern
++   represented by a single stmt.  We then:
++   - create a new stmt S6 equivalent to the pattern (the stmt is not
++     inserted into the code)
+    - fill in the STMT_VINFO fields as follows:
+ 
+                                   in_pattern_p  related_stmt    vec_stmt
+          S1: a_i = ....                 -       -               -
+          S2: a_2 = ..use(a_i)..         -       -               -
+          S3: a_1 = ..use(a_2)..         -       -               -
+-       > S6: a_new = ....               -       S4              -
+          S4: a_0 = ..use(a_1)..         true    S6              -
++          '---> S6: a_new = ....        -       S4              -
+          S5: ... = ..use(a_0)..         -       -               -
+ 
+    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
+-    to each other through the RELATED_STMT field).
++   to each other through the RELATED_STMT field).
+ 
+    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
+    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
+    remain irrelevant unless used by stmts other than S4.
+ 
+    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
+-   (because they are marked as irrelevant). It will vectorize S6, and record
++   (because they are marked as irrelevant).  It will vectorize S6, and record
+    a pointer to the new vector stmt VS6 both from S6 (as usual), and also
+-   from S4. We do that so that when we get to vectorizing stmts that use the
++   from S4.  We do that so that when we get to vectorizing stmts that use the
+    def of S4 (like S5 that uses a_0), we'll know where to take the relevant
+-   vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
++   vector-def from.  S4 will be skipped, and S5 will be vectorized as usual:
+ 
+                                   in_pattern_p  related_stmt    vec_stmt
+          S1: a_i = ....                 -       -               -
+          S2: a_2 = ..use(a_i)..         -       -               -
+          S3: a_1 = ..use(a_2)..         -       -               -
+        > VS6: va_new = ....             -       -               -
+-         S6: a_new = ....               -       S4              VS6
+          S4: a_0 = ..use(a_1)..         true    S6              VS6
++          '---> S6: a_new = ....        -       S4              VS6
+        > VS5: ... = ..vuse(va_new)..    -       -               -
+          S5: ... = ..use(a_0)..         -       -               -
+ 
+-   DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
++   DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
+    elsewhere), and we'll end up with:
+ 
+         VS6: va_new = ....
+-        VS5: ... = ..vuse(va_new)..
+-
+-   If vectorization does not succeed, DCE will clean S6 away (its def is
+-   not used), and we'll end up with the original sequence.
+-*/
++        VS5: ... = ..vuse(va_new)..  */
+ 
+ void
+ vect_pattern_recog (loop_vec_info loop_vinfo)
+@@ -832,7 +925,7 @@
+   unsigned int nbbs = loop->num_nodes;
+   gimple_stmt_iterator si;
+   unsigned int i, j;
+-  gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
++  gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_pattern_recog ===");
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-05-05 15:43:06 +0000
++++ new/gcc/tree-vect-slp.c	2011-06-19 10:59:13 +0000
+@@ -2510,6 +2510,8 @@
+       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
+       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
++  else if (is_pattern_stmt_p (stmt_info))
++     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
+   else
+     si = gsi_for_stmt (stmt);
+ 
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-06-02 12:12:00 +0000
++++ new/gcc/tree-vect-stmts.c	2011-06-22 06:21:13 +0000
+@@ -605,15 +605,76 @@
+             break;
+         }
+ 
+-      FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
+-	{
+-	  tree op = USE_FROM_PTR (use_p);
+-	  if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
+-	    {
+-	      VEC_free (gimple, heap, worklist);
+-	      return false;
+-	    }
+-	}
++      if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
++        {
++          /* Pattern statements are not inserted into the code, so
++             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
++             have to scan the RHS or function arguments instead.  */
++          if (is_gimple_assign (stmt))
++            {
++              tree rhs = gimple_assign_rhs1 (stmt);
++              if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
++                  == GIMPLE_SINGLE_RHS)
++                {
++                  unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1
++                                                              (stmt));
++                  for (i = 0; i < op_num; i++)
++                    {
++                      tree op = TREE_OPERAND (rhs, i);
++                      if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                    &worklist))
++                       {
++                         VEC_free (gimple, heap, worklist);
++                         return false;
++                       }
++                    }
++                }
++              else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
++                       == GIMPLE_BINARY_RHS)
++                {
++                  tree op = gimple_assign_rhs1 (stmt);
++                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                    &worklist))
++                    {
++                      VEC_free (gimple, heap, worklist);
++                      return false;
++                    }
++                  op = gimple_assign_rhs2 (stmt);
++                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                    &worklist))
++                    {
++                      VEC_free (gimple, heap, worklist);
++                      return false;
++                    }
++                }
++              else
++                return false;
++            }
++          else if (is_gimple_call (stmt))
++            {
++              for (i = 0; i < gimple_call_num_args (stmt); i++)
++                {
++                  tree arg = gimple_call_arg (stmt, i);
++                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
++                                    &worklist))
++                    {
++                      VEC_free (gimple, heap, worklist);
++                      return false;
++                    }
++                }
++            }
++        }
++      else
++        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
++          {
++            tree op = USE_FROM_PTR (use_p);
++            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                              &worklist))
++              {
++                VEC_free (gimple, heap, worklist);
++                return false;
++              }
++          }
+     } /* while worklist */
+ 
+   VEC_free (gimple, heap, worklist);
+@@ -1405,6 +1466,7 @@
+   VEC(tree, heap) *vargs = NULL;
+   enum { NARROW, NONE, WIDEN } modifier;
+   size_t i, nargs;
++  tree lhs;
+ 
+   /* FORNOW: unsupported in basic block SLP.  */
+   gcc_assert (loop_vinfo);
+@@ -1542,7 +1604,7 @@
+   /** Transform.  **/
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+-    fprintf (vect_dump, "transform operation.");
++    fprintf (vect_dump, "transform call.");
+ 
+   /* Handle def.  */
+   scalar_dest = gimple_call_lhs (stmt);
+@@ -1661,8 +1723,11 @@
+      rhs of the statement with something harmless.  */
+ 
+   type = TREE_TYPE (scalar_dest);
+-  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+-				  build_zero_cst (type));
++  if (is_pattern_stmt_p (stmt_info))
++    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
++  else
++    lhs = gimple_call_lhs (stmt);
++  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
+   set_vinfo_for_stmt (new_stmt, stmt_info);
+   set_vinfo_for_stmt (stmt, NULL);
+   STMT_VINFO_STMT (stmt_info) = new_stmt;
+@@ -3231,6 +3296,33 @@
+ 	fprintf (vect_dump, "use not simple.");
+       return false;
+     }
++
++  op_type = TREE_CODE_LENGTH (code);
++  if (op_type == binary_op)
++    {
++      bool ok;
++
++      op1 = gimple_assign_rhs2 (stmt);
++      if (code == WIDEN_MULT_EXPR)
++        {
++	  /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
++	     OP1.  */
++          if (CONSTANT_CLASS_P (op0))
++            ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
++                             &def_stmt, &def, &dt[1], &vectype_in);
++          else
++            ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
++                                     &dt[1]);
++
++          if (!ok)
++            {
++	      if (vect_print_dump_info (REPORT_DETAILS))
++	        fprintf (vect_dump, "use not simple.");
++              return false;
++            }
++        }        
++    }
++
+   /* If op0 is an external or constant def use a vector type with
+      the same size as the output vector type.  */
+   if (!vectype_in)
+@@ -3263,18 +3355,6 @@
+ 
+   gcc_assert (ncopies >= 1);
+ 
+-  op_type = TREE_CODE_LENGTH (code);
+-  if (op_type == binary_op)
+-    {
+-      op1 = gimple_assign_rhs2 (stmt);
+-      if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
+-        {
+-	  if (vect_print_dump_info (REPORT_DETAILS))
+-	    fprintf (vect_dump, "use not simple.");
+-          return false;
+-        }
+-    }
+-
+   /* Supportable by target?  */
+   if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
+ 				       &decl1, &decl2, &code1, &code2,
+@@ -3300,6 +3380,14 @@
+     fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
+                         ncopies);
+ 
++  if (code == WIDEN_MULT_EXPR)
++    {
++      if (CONSTANT_CLASS_P (op0))
++	op0 = fold_convert (TREE_TYPE (op1), op0);
++      else if (CONSTANT_CLASS_P (op1))
++	op1 = fold_convert (TREE_TYPE (op0), op1);
++    }
++
+   /* Handle def.  */
+   /* In case of multi-step promotion, we first generate promotion operations
+      to the intermediate types, and then from that types to the final one.
+@@ -4824,10 +4912,26 @@
+   if (!STMT_VINFO_RELEVANT_P (stmt_info)
+       && !STMT_VINFO_LIVE_P (stmt_info))
+     {
+-      if (vect_print_dump_info (REPORT_DETAILS))
+-        fprintf (vect_dump, "irrelevant.");
++      gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
++      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++        {
++          stmt = pattern_stmt;
++          stmt_info = vinfo_for_stmt (pattern_stmt);
++          if (vect_print_dump_info (REPORT_DETAILS))
++            {
++              fprintf (vect_dump, "==> examining pattern statement: ");
++              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++            }
++        }
++      else
++        {
++          if (vect_print_dump_info (REPORT_DETAILS))
++            fprintf (vect_dump, "irrelevant.");
+ 
+-      return true;
++          return true;
++        }
+     }
+ 
+   switch (STMT_VINFO_DEF_TYPE (stmt_info))
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-05-05 15:43:06 +0000
++++ new/gcc/tree-vectorizer.h	2011-06-19 10:59:13 +0000
+@@ -884,7 +884,7 @@
+ /* Pattern recognition functions.
+    Additional pattern recognition functions can (and will) be added
+    in the future.  */
+-typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
++typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
+ #define NUM_PATTERNS 4
+ void vect_pattern_recog (loop_vec_info);
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch
new file mode 100644
index 0000000000..4abfa02a77
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch
@@ -0,0 +1,24 @@
+2011-06-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	gcc/
+	2011-06-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/49385
+	* config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast
+	one of the operands is a register.
+Index: gcc-4_6-branch/gcc/config/arm/thumb2.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/thumb2.md	2011-09-16 20:22:40.000000000 -0700
++++ gcc-4_6-branch/gcc/config/arm/thumb2.md	2011-09-16 20:28:47.648690433 -0700
+@@ -207,7 +207,9 @@
+ (define_insn "*thumb2_movhi_insn"
+   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
+ 	(match_operand:HI 1 "general_operand"      "rI,n,r,m"))]
+-  "TARGET_THUMB2"
++  "TARGET_THUMB2
++  && (register_operand (operands[0], HImode)
++     || register_operand (operands[1], HImode))"
+   "@
+    mov%?\\t%0, %1\\t%@ movhi
+    movw%?\\t%0, %L1\\t%@ movhi
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch
new file mode 100644
index 0000000000..a548b1b683
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch
@@ -0,0 +1,25 @@
+2011-06-30  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	LP 744754
+        2011-04-17  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* config/arm/arm.c (neon_struct_mem_operand):
+	Support POST_INC/PRE_DEC memory operands.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-06-27 22:14:07 +0000
++++ new/gcc/config/arm/arm.c	2011-06-29 09:13:17 +0000
+@@ -9357,6 +9357,11 @@
+   if (GET_CODE (ind) == REG)
+     return arm_address_register_rtx_p (ind, 0);
+ 
++  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
++  if (GET_CODE (ind) == POST_INC
++      || GET_CODE (ind) == PRE_DEC)
++    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
++
+   return FALSE;
+ }
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch
new file mode 100644
index 0000000000..15046a766e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch
@@ -0,0 +1,25 @@
+2011-07-03  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+	2011-06-12  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent):
+	Take number of iterations to peel into account for equally frequent
+	misalignment values.
+
+=== modified file 'gcc/tree-vect-data-refs.c'
+--- old/gcc/tree-vect-data-refs.c	2011-06-02 12:12:00 +0000
++++ new/gcc/tree-vect-data-refs.c	2011-06-29 11:20:24 +0000
+@@ -1256,7 +1256,9 @@
+   vect_peel_info elem = (vect_peel_info) *slot;
+   vect_peel_extended_info max = (vect_peel_extended_info) data;
+ 
+-  if (elem->count > max->peel_info.count)
++  if (elem->count > max->peel_info.count
++      || (elem->count == max->peel_info.count
++          && max->peel_info.npeel > elem->npeel))
+     {
+       max->peel_info.npeel = elem->npeel;
+       max->peel_info.count = elem->count;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch
new file mode 100644
index 0000000000..f1f7718eb5
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch
@@ -0,0 +1,182 @@
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* builtins.c (get_object_alignment): Fix comment.
+	* fold-const.c (get_pointer_modulus_and_residue): Remove
+	allow_func_align.  Use get_object_alignment.
+	(fold_binary_loc): Update caller.
+
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-06-29  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR tree-optimization/49545
+	* builtins.c (get_object_alignment_1): Update function comment.
+	Do not use DECL_ALIGN for functions, but test
+	TARGET_PTRMEMFUNC_VBIT_LOCATION instead.
+	* fold-const.c (get_pointer_modulus_and_residue): Don't check
+	for functions here.
+	* tree-ssa-ccp.c (get_value_from_alignment): Likewise.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-06-29  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets.
+
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-07-27  Richard Guenther  <rguenther@suse.de>
+
+	PR tree-optimization/49169
+	* fold-const.c (get_pointer_modulus_and_residue): Don't rely on
+	the alignment of function decls.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-07-27  Michael Hope  <michael.hope@linaro.org>
+		    Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR tree-optimization/49169
+	* gcc.dg/torture/pr49169.c: New test.
+
+=== modified file 'gcc/builtins.c'
+--- old/gcc/builtins.c	2011-03-03 21:56:58 +0000
++++ new/gcc/builtins.c	2011-07-04 09:52:27 +0000
+@@ -264,7 +264,14 @@
+ }
+ 
+ /* Return the alignment in bits of EXP, an object.
+-   Don't return more than MAX_ALIGN no matter what.  */
++   Don't return more than MAX_ALIGN no matter what.
++
++   Note that the address (and thus the alignment) computed here is based
++   on the address to which a symbol resolves, whereas DECL_ALIGN is based
++   on the address at which an object is actually located.  These two
++   addresses are not always the same.  For example, on ARM targets,
++   the address &foo of a Thumb function foo() has the lowest bit set,
++   whereas foo() itself starts on an even address.  */
+ 
+ unsigned int
+ get_object_alignment (tree exp, unsigned int max_align)
+@@ -286,7 +293,21 @@
+     exp = DECL_INITIAL (exp);
+   if (DECL_P (exp)
+       && TREE_CODE (exp) != LABEL_DECL)
+-    align = DECL_ALIGN (exp);
++    {
++      if (TREE_CODE (exp) == FUNCTION_DECL)
++	{
++	  /* Function addresses can encode extra information besides their
++	     alignment.  However, if TARGET_PTRMEMFUNC_VBIT_LOCATION
++	     allows the low bit to be used as a virtual bit, we know
++	     that the address itself must be 2-byte aligned.  */
++	  if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)
++	    align = 2 * BITS_PER_UNIT;
++	  else
++	    align = BITS_PER_UNIT;
++	}
++      else
++	align = DECL_ALIGN (exp);
++    }
+   else if (CONSTANT_CLASS_P (exp))
+     {
+       align = TYPE_ALIGN (TREE_TYPE (exp));
+
+=== modified file 'gcc/fold-const.c'
+--- old/gcc/fold-const.c	2011-05-23 20:37:18 +0000
++++ new/gcc/fold-const.c	2011-07-04 09:52:27 +0000
+@@ -9232,15 +9232,10 @@
+    0 <= N < M as is common.  In general, the precise value of P is unknown.
+    M is chosen as large as possible such that constant N can be determined.
+ 
+-   Returns M and sets *RESIDUE to N.
+-
+-   If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into
+-   account.  This is not always possible due to PR 35705.
+- */
++   Returns M and sets *RESIDUE to N.  */
+ 
+ static unsigned HOST_WIDE_INT
+-get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue,
+-				 bool allow_func_align)
++get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue)
+ {
+   enum tree_code code;
+ 
+@@ -9270,9 +9265,8 @@
+ 	    }
+ 	}
+ 
+-      if (DECL_P (expr)
+-	  && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL))
+-	return DECL_ALIGN_UNIT (expr);
++      if (DECL_P (expr))
++	return get_object_alignment (expr, ~0U) / BITS_PER_UNIT;
+     }
+   else if (code == POINTER_PLUS_EXPR)
+     {
+@@ -9282,8 +9276,7 @@
+ 
+       op0 = TREE_OPERAND (expr, 0);
+       STRIP_NOPS (op0);
+-      modulus = get_pointer_modulus_and_residue (op0, residue,
+-						 allow_func_align);
++      modulus = get_pointer_modulus_and_residue (op0, residue);
+ 
+       op1 = TREE_OPERAND (expr, 1);
+       STRIP_NOPS (op1);
+@@ -11163,8 +11156,7 @@
+ 	  unsigned HOST_WIDE_INT modulus, residue;
+ 	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1);
+ 
+-	  modulus = get_pointer_modulus_and_residue (arg0, &residue,
+-						     integer_onep (arg1));
++	  modulus = get_pointer_modulus_and_residue (arg0, &residue);
+ 
+ 	  /* This works because modulus is a power of 2.  If this weren't the
+ 	     case, we'd have to replace it by its greatest power-of-2
+
+=== added file 'gcc/testsuite/gcc.dg/torture/pr49169.c'
+--- old/gcc/testsuite/gcc.dg/torture/pr49169.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/torture/pr49169.c	2011-06-29 09:46:06 +0000
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */
++
++#include <stdlib.h>
++#include <stdint.h>
++
++int
++main (void)
++{
++  void *p = main;
++  if ((intptr_t) p & 1)
++    abort ();
++  return 0;
++}
++
++/* { dg-final { scan-assembler "abort" } } */
+
+=== modified file 'gcc/tree-ssa-ccp.c'
+--- old/gcc/tree-ssa-ccp.c	2011-05-05 15:42:22 +0000
++++ new/gcc/tree-ssa-ccp.c	2011-06-29 09:46:06 +0000
+@@ -522,10 +522,6 @@
+     val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr),
+ 			   TREE_OPERAND (base, 0), TREE_OPERAND (base, 1));
+   else if (base
+-	   /* ???  While function decls have DECL_ALIGN their addresses
+-	      may encode extra information in the lower bits on some
+-	      targets (PR47239).  Simply punt for function decls for now.  */
+-	   && TREE_CODE (base) != FUNCTION_DECL
+ 	   && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT))
+ 		> BITS_PER_UNIT))
+     {
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
new file mode 100644
index 0000000000..3a149231f8
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
@@ -0,0 +1,1281 @@
+2011-07-11  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+	2011-06-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vect_recog_func_ptr): Change the first
+	argument to be a VEC of statements.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Remove the
+	assert that pattern statements have to have their vector type set.
+	* tree-vect-patterns.c (vect_recog_widen_sum_pattern):
+	Change the first argument to be a VEC of statements.  Update
+	documentation.
+	(vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise.
+	(vect_handle_widen_mult_by_const): New function.
+	(vect_recog_widen_mult_pattern):  Change the first argument to be a
+	VEC of statements.  Update documentation.  Check that the constant is
+	INTEGER_CST.  Support multiplication by a constant that fits an
+	intermediate type - call vect_handle_widen_mult_by_const.
+	(vect_pattern_recog_1): Update vect_recog_func_ptr and its
+	call.  Handle additional pattern statements if necessary.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-widen-mult-half-u8.c: New test.
+
+	and
+	2011-06-30  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+	both pattern and original statements if necessary.
+	(vect_transform_loop): Likewise.
+	* tree-vect-patterns.c (vect_pattern_recog): Update documentation.
+	* tree-vect-stmts.c (vect_mark_relevant): Add new argument.
+	Mark the pattern statement only if the original statement doesn't
+	have its own uses.
+	(process_use): Call vect_mark_relevant with additional parameter.
+	(vect_mark_stmts_to_be_vectorized): Likewise.
+	(vect_get_vec_def_for_operand): Use vectorized pattern statement.
+	(vect_analyze_stmt): Handle both pattern and original statements
+	if necessary.
+	(vect_transform_stmt): Don't store vectorized pattern statement
+	in the original statement.
+	(vect_is_simple_use_1): Use related pattern statement only if the
+	original statement is irrelevant.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/vect/slp-widen-mult-half.c: New test.
+	* gcc.dg/vect/vect-widen-mult-half.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c'
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c	2012-01-09 15:03:29.156918805 -0800
+@@ -0,0 +1,52 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++#define COEF2 324700
++
++unsigned char in[N];
++int out[N];
++int out2[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++  int i;
++
++  for (i = 0; i < N/2; i++)
++    {
++      out[2*i] = in[2*i] * COEF;
++      out2[2*i] = in[2*i] + COEF2;
++      out[2*i+1] = in[2*i+1] * COEF;
++      out2[2*i+1] = in[2*i+1] + COEF2;
++    }
++}
++
++int main (void)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      __asm__ volatile ("");
++    }
++
++  foo ();
++
++  for (i = 0; i < N; i++)
++    if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c	2012-01-09 15:03:29.156918805 -0800
+@@ -0,0 +1,59 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++
++unsigned char in[N];
++int out[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    out[i] = in[i] * COEF;
++}
++
++__attribute__ ((noinline)) void
++bar ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    out[i] = COEF * in[i];
++}
++
++int main (void)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      __asm__ volatile ("");
++    }
++
++  foo ();
++
++  for (i = 0; i < N; i++)
++    if (out[i] != in[i] * COEF)
++      abort ();
++
++  bar ();
++
++  for (i = 0; i < N; i++)
++    if (out[i] != in[i] * COEF)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c	2012-01-09 15:03:29.160918806 -0800
+@@ -0,0 +1,49 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++#define COEF2 324700
++
++unsigned char in[N];
++int out[N];
++int out2[N];
++
++__attribute__ ((noinline)) void
++foo (int a)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      out[i] = in[i] * COEF;
++      out2[i] = in[i] + a;
++    }
++}
++
++int main (void)
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      __asm__ volatile ("");
++    }
++
++  foo (COEF2);
++
++  for (i = 0; i < N; i++)
++    if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/tree-vect-loop.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-loop.c	2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-loop.c	2012-01-09 15:03:29.160918806 -0800
+@@ -181,6 +181,8 @@
+   stmt_vec_info stmt_info;
+   int i;
+   HOST_WIDE_INT dummy;
++  gimple stmt, pattern_stmt = NULL;
++  bool analyze_pattern_stmt = false;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
+@@ -241,12 +243,20 @@
+ 	    }
+ 	}
+ 
+-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++      for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
+         {
+-	  tree vf_vectype;
+-	  gimple stmt = gsi_stmt (si), pattern_stmt;
+-	  stmt_info = vinfo_for_stmt (stmt);
++          tree vf_vectype;
++
++          if (analyze_pattern_stmt)
++            {
++              stmt = pattern_stmt;
++              analyze_pattern_stmt = false;
++            }
++          else
++            stmt = gsi_stmt (si);
+ 
++         stmt_info = vinfo_for_stmt (stmt);
++              
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    {
+ 	      fprintf (vect_dump, "==> examining statement: ");
+@@ -276,10 +286,17 @@
+                {
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    fprintf (vect_dump, "skip.");
++                 gsi_next (&si);
+                  continue;
+                }
+ 	    }
+ 
++          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++            analyze_pattern_stmt = true;
++
+ 	  if (gimple_get_lhs (stmt) == NULL_TREE)
+ 	    {
+ 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+@@ -311,9 +328,7 @@
+ 	    }
+ 	  else
+ 	    {
+-	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
+-			  && !is_pattern_stmt_p (stmt_info));
+-
++	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
+ 	      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
+ 	      if (vect_print_dump_info (REPORT_DETAILS))
+ 		{
+@@ -385,6 +400,9 @@
+ 	  if (!vectorization_factor
+ 	      || (nunits > vectorization_factor))
+ 	    vectorization_factor = nunits;
++
++          if (!analyze_pattern_stmt)
++            gsi_next (&si);
+         }
+     }
+ 
+@@ -4740,6 +4758,8 @@
+   tree cond_expr = NULL_TREE;
+   gimple_seq cond_expr_stmt_list = NULL;
+   bool do_peeling_for_loop_bound;
++  gimple stmt, pattern_stmt;
++  bool transform_pattern_stmt = false;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vec_transform_loop ===");
+@@ -4827,11 +4847,19 @@
+ 	    }
+ 	}
+ 
+-      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++      pattern_stmt = NULL;
++      for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
+ 	{
+-	  gimple stmt = gsi_stmt (si), pattern_stmt;
+ 	  bool is_store;
+ 
++          if (transform_pattern_stmt)
++            {
++              stmt = pattern_stmt;
++              transform_pattern_stmt = false;
++            }
++          else
++            stmt = gsi_stmt (si);
++
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    {
+ 	      fprintf (vect_dump, "------>vectorizing statement: ");
+@@ -4869,6 +4897,11 @@
+ 	          continue;
+                 }
+ 	    }
++          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++            transform_pattern_stmt = true;
+ 
+ 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+ 	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+@@ -4897,8 +4930,9 @@
+ 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
+ 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
+ 		{
+-		  gsi_next (&si);
+-		  continue;
++                  if (!transform_pattern_stmt)
++ 		    gsi_next (&si);
++  		  continue;
+ 		}
+ 	    }
+ 
+@@ -4917,7 +4951,7 @@
+ 		     the chain.  */
+ 		  vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
+ 		  gsi_remove (&si, true);
+-		  continue;
++ 		  continue;
+ 		}
+ 	      else
+ 		{
+@@ -4927,7 +4961,9 @@
+ 		  continue;
+ 		}
+ 	    }
+-	  gsi_next (&si);
++
++          if (!transform_pattern_stmt)
++ 	    gsi_next (&si);
+ 	}		        /* stmts in BB */
+     }				/* BBs in loop */
+ 
+Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c	2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-patterns.c	2012-01-09 15:03:29.160918806 -0800
+@@ -39,10 +39,13 @@
+ #include "diagnostic-core.h"
+ 
+ /* Pattern recognition functions  */
+-static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
++static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
++					    tree *);
++static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
++					     tree *);
++static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
++					   tree *);
++static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ 	vect_recog_widen_mult_pattern,
+ 	vect_recog_widen_sum_pattern,
+@@ -142,9 +145,9 @@
+ 
+    Input:
+ 
+-   * LAST_STMT: A stmt from which the pattern search begins. In the example,
+-   when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
+-   detected.
++   * STMTS: Contains a stmt from which the pattern search begins.  In the
++   example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
++   will be detected.
+ 
+    Output:
+ 
+@@ -165,12 +168,13 @@
+          inner-loop nested in an outer-loop that us being vectorized).  */
+ 
+ static gimple
+-vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
++			     tree *type_out)
+ {
+-  gimple stmt;
++  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
+   tree oprnd0, oprnd1;
+   tree oprnd00, oprnd01;
+-  stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
++  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+   tree type, half_type;
+   gimple pattern_stmt;
+   tree prod_type;
+@@ -178,10 +182,10 @@
+   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+   tree var, rhs;
+ 
+-  if (!is_gimple_assign (*last_stmt))
++  if (!is_gimple_assign (last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (*last_stmt);
++  type = gimple_expr_type (last_stmt);
+ 
+   /* Look for the following pattern
+           DX = (TYPE1) X;
+@@ -207,7 +211,7 @@
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
++  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+     return NULL;
+ 
+   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+@@ -228,12 +232,12 @@
+ 
+       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+         return NULL;
+-      oprnd0 = gimple_assign_rhs1 (*last_stmt);
+-      oprnd1 = gimple_assign_rhs2 (*last_stmt);
++      oprnd0 = gimple_assign_rhs1 (last_stmt);
++      oprnd1 = gimple_assign_rhs2 (last_stmt);
+       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
+         return NULL;
+-      stmt = *last_stmt;
++      stmt = last_stmt;
+ 
+       if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
+         {
+@@ -319,11 +323,79 @@
+ 
+   /* We don't allow changing the order of the computation in the inner-loop
+      when doing outer-loop vectorization.  */
+-  gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
++  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+ 
+   return pattern_stmt;
+ }
+ 
++/* Handle two cases of multiplication by a constant.  The first one is when
++   the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
++   operand (OPRND).  In that case, we can peform widen-mult from HALF_TYPE to
++   TYPE.
++
++   Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
++   HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
++   TYPE), we can perform widen-mult from the intermediate type to TYPE and
++   replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t;  */
++
++static bool
++vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
++                                 VEC (gimple, heap) **stmts, tree type,
++                                 tree *half_type, gimple def_stmt)
++{
++  tree new_type, new_oprnd, tmp;
++  gimple new_stmt;
++
++  if (int_fits_type_p (const_oprnd, *half_type))
++    {
++      /* CONST_OPRND is a constant of HALF_TYPE.  */
++      *oprnd = gimple_assign_rhs1 (def_stmt);
++      return true;
++    }
++
++  if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
++      || !vinfo_for_stmt (def_stmt))
++    return false;
++
++  /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
++     a type 2 times bigger than HALF_TYPE.  */
++  new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
++                                             TYPE_UNSIGNED (type));
++  if (!int_fits_type_p (const_oprnd, new_type))
++    return false;
++
++  /* Use NEW_TYPE for widen_mult.  */
++  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
++    {
++      new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
++      /* Check if the already created pattern stmt is what we need.  */
++      if (!is_gimple_assign (new_stmt)
++          || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
++          || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
++        return false;
++
++      *oprnd = gimple_assign_lhs (new_stmt);
++    }
++  else
++    {
++      /* Create a_T = (NEW_TYPE) a_t;  */
++      *oprnd = gimple_assign_rhs1 (def_stmt);
++      tmp = create_tmp_var (new_type, NULL);
++      add_referenced_var (tmp);
++      new_oprnd = make_ssa_name (tmp, NULL);
++      new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
++                                               NULL_TREE);
++      SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
++      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
++      VEC_safe_push (gimple, heap, *stmts, def_stmt);
++      *oprnd = new_oprnd;
++    }
++
++  *half_type = new_type;
++  return true;
++}
++
++
+ /* Function vect_recog_widen_mult_pattern
+ 
+    Try to find the following pattern:
+@@ -361,28 +433,47 @@
+      S3  a_T = (TYPE) a_t;
+      S5  prod_T = a_T * CONST;
+ 
+-    Input:
++   A special case of multiplication by constants is when 'TYPE' is 4 times
++   bigger than 'type', but CONST fits an intermediate type 2 times smaller
++   than 'TYPE'.  In that case we create an additional pattern stmt for S3
++   to create a variable of the intermediate type, and perform widen-mult
++   on the intermediate type as well:
++
++     type a_t;
++     interm_type a_it;
++     TYPE a_T, prod_T,  prod_T';
++
++     S1  a_t = ;
++     S3  a_T = (TYPE) a_t;
++           '--> a_it = (interm_type) a_t;
++     S5  prod_T = a_T * CONST;
++           '--> prod_T' = a_it w* CONST;
++
++   Input/Output:
+ 
+-    * LAST_STMT: A stmt from which the pattern search begins.  In the example,
+-    when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
+-    detected.
++   * STMTS: Contains a stmt from which the pattern search begins.  In the
++   example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
++   is detected.  In case of unsigned widen-mult, the original stmt (S5) is
++   replaced with S6 in STMTS.  In case of multiplication by a constant
++   of an intermediate type (the last case above), STMTS also contains S3
++   (inserted before S5).
+ 
+-    Output:
++   Output:
+ 
+-    * TYPE_IN: The type of the input arguments to the pattern.
++   * TYPE_IN: The type of the input arguments to the pattern.
+ 
+-    * TYPE_OUT: The type of the output of this pattern.
++   * TYPE_OUT: The type of the output of this pattern.
+ 
+-    * Return value: A new stmt that will be used to replace the sequence of
+-    stmts that constitute the pattern.  In this case it will be:
+-         WIDEN_MULT <a_t, b_t>
+- */
++   * Return value: A new stmt that will be used to replace the sequence of
++   stmts that constitute the pattern.  In this case it will be:
++        WIDEN_MULT <a_t, b_t>
++*/
+ 
+ static gimple
+-vect_recog_widen_mult_pattern (gimple *last_stmt,
+-			       tree *type_in,
+-			       tree *type_out)
++vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
++                               tree *type_in, tree *type_out)
+ {
++  gimple last_stmt = VEC_pop (gimple, *stmts);
+   gimple def_stmt0, def_stmt1;
+   tree oprnd0, oprnd1;
+   tree type, half_type0, half_type1;
+@@ -395,27 +486,27 @@
+   VEC (tree, heap) *dummy_vec;
+   bool op0_ok, op1_ok;
+ 
+-  if (!is_gimple_assign (*last_stmt))
++  if (!is_gimple_assign (last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (*last_stmt);
++  type = gimple_expr_type (last_stmt);
+ 
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
++  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+     return NULL;
+ 
+-  oprnd0 = gimple_assign_rhs1 (*last_stmt);
+-  oprnd1 = gimple_assign_rhs2 (*last_stmt);
++  oprnd0 = gimple_assign_rhs1 (last_stmt);
++  oprnd1 = gimple_assign_rhs2 (last_stmt);
+   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+       || !types_compatible_p (TREE_TYPE (oprnd1), type))
+     return NULL;
+ 
+   /* Check argument 0.  */
+-  op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
++  op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
+   /* Check argument 1.  */
+-  op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
++  op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
+ 
+   /* In case of multiplication by a constant one of the operands may not match
+      the pattern, but not both.  */
+@@ -429,29 +520,21 @@
+     }
+   else if (!op0_ok)
+     {
+-      if (CONSTANT_CLASS_P (oprnd0)
+-         && TREE_CODE (half_type1) == INTEGER_TYPE
+-         && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
+-         && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
+-        {
+-          /* OPRND0 is a constant of HALF_TYPE1.  */
+-          half_type0 = half_type1;
+-          oprnd1 = gimple_assign_rhs1 (def_stmt1);
+-        }
++      if (TREE_CODE (oprnd0) == INTEGER_CST
++          && TREE_CODE (half_type1) == INTEGER_TYPE
++          && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
++                                              &half_type1, def_stmt1))
++        half_type0 = half_type1;
+       else
+         return NULL;
+     }
+   else if (!op1_ok)
+     {
+-      if (CONSTANT_CLASS_P (oprnd1)
++      if (TREE_CODE (oprnd1) == INTEGER_CST
+           && TREE_CODE (half_type0) == INTEGER_TYPE
+-          && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
+-          && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
+-        {
+-          /* OPRND1 is a constant of HALF_TYPE0.  */
+-          half_type1 = half_type0;
+-          oprnd0 = gimple_assign_rhs1 (def_stmt0);
+-        }
++          && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
++                                              &half_type0, def_stmt0))
++        half_type1 = half_type0;
+       else
+         return NULL;
+     }
+@@ -461,7 +544,7 @@
+      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
+   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
+     {
+-      tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
++      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
+       imm_use_iterator imm_iter;
+       use_operand_p use_p;
+       int nuses = 0;
+@@ -491,7 +574,7 @@
+         return NULL;
+ 
+       type = use_type;
+-      *last_stmt = use_stmt;
++      last_stmt = use_stmt;
+     }
+ 
+   if (!types_compatible_p (half_type0, half_type1))
+@@ -506,7 +589,7 @@
+   vectype_out = get_vectype_for_scalar_type (type);
+   if (!vectype
+       || !vectype_out
+-      || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
++      || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
+ 					  vectype_out, vectype,
+ 					  &dummy, &dummy, &dummy_code,
+ 					  &dummy_code, &dummy_int, &dummy_vec))
+@@ -524,6 +607,7 @@
+   if (vect_print_dump_info (REPORT_DETAILS))
+     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+ 
++  VEC_safe_push (gimple, heap, *stmts, last_stmt);
+   return pattern_stmt;
+ }
+ 
+@@ -555,16 +639,17 @@
+ */
+ 
+ static gimple
+-vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out)
+ {
++  gimple last_stmt = VEC_index (gimple, *stmts, 0);
+   tree fn, base, exp = NULL;
+   gimple stmt;
+   tree var;
+ 
+-  if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
++  if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
+     return NULL;
+ 
+-  fn = gimple_call_fndecl (*last_stmt);
++  fn = gimple_call_fndecl (last_stmt);
+   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
+    return NULL;
+ 
+@@ -574,8 +659,8 @@
+     case BUILT_IN_POWI:
+     case BUILT_IN_POWF:
+     case BUILT_IN_POW:
+-      base = gimple_call_arg (*last_stmt, 0);
+-      exp = gimple_call_arg (*last_stmt, 1);
++      base = gimple_call_arg (last_stmt, 0);
++      exp = gimple_call_arg (last_stmt, 1);
+       if (TREE_CODE (exp) != REAL_CST
+ 	  && TREE_CODE (exp) != INTEGER_CST)
+         return NULL;
+@@ -667,21 +752,23 @@
+ 	 inner-loop nested in an outer-loop that us being vectorized).  */
+ 
+ static gimple
+-vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
++                              tree *type_out)
+ {
++  gimple last_stmt = VEC_index (gimple, *stmts, 0); 
+   gimple stmt;
+   tree oprnd0, oprnd1;
+-  stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
++  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+   tree type, half_type;
+   gimple pattern_stmt;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+   tree var;
+ 
+-  if (!is_gimple_assign (*last_stmt))
++  if (!is_gimple_assign (last_stmt))
+     return NULL;
+ 
+-  type = gimple_expr_type (*last_stmt);
++  type = gimple_expr_type (last_stmt);
+ 
+   /* Look for the following pattern
+           DX = (TYPE) X;
+@@ -693,25 +780,25 @@
+   /* Starting from LAST_STMT, follow the defs of its uses in search
+      of the above pattern.  */
+ 
+-  if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
++  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+     return NULL;
+ 
+   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+     return NULL;
+ 
+-  oprnd0 = gimple_assign_rhs1 (*last_stmt);
+-  oprnd1 = gimple_assign_rhs2 (*last_stmt);
++  oprnd0 = gimple_assign_rhs1 (last_stmt);
++  oprnd1 = gimple_assign_rhs2 (last_stmt);
+   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+       || !types_compatible_p (TREE_TYPE (oprnd1), type))
+     return NULL;
+ 
+-  /* So far so good.  Since *last_stmt was detected as a (summation) reduction,
++  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
+      we know that oprnd1 is the reduction variable (defined by a loop-header
+      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
+      Left to check that oprnd0 is defined by a cast from type 'type' to type
+      'TYPE'.  */
+ 
+-  if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
++  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
+     return NULL;
+ 
+   oprnd0 = gimple_assign_rhs1 (stmt);
+@@ -732,8 +819,9 @@
+ 
+   /* We don't allow changing the order of the computation in the inner-loop
+      when doing outer-loop vectorization.  */
+-  gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
++  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+ 
++  VEC_safe_push (gimple, heap, *stmts, last_stmt);
+   return pattern_stmt;
+ }
+ 
+@@ -762,7 +850,7 @@
+ 
+ static void
+ vect_pattern_recog_1 (
+-	gimple (* vect_recog_func) (gimple *, tree *, tree *),
++	gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
+ 	gimple_stmt_iterator si)
+ {
+   gimple stmt = gsi_stmt (si), pattern_stmt;
+@@ -774,12 +862,14 @@
+   enum tree_code code;
+   int i;
+   gimple next;
++  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
+ 
+-  pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
++  VEC_quick_push (gimple, stmts_to_replace, stmt);
++  pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
+   if (!pattern_stmt)
+     return;
+ 
+-  si = gsi_for_stmt (stmt);
++  stmt = VEC_last (gimple, stmts_to_replace);
+   stmt_info = vinfo_for_stmt (stmt);
+   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  
+@@ -849,6 +939,35 @@
+   FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
+     if (next == stmt)
+       VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 
++
++  /* In case of widen-mult by a constant, it is possible that an additional
++     pattern stmt is created and inserted in STMTS_TO_REPLACE.  We create a
++     stmt_info for it, and mark the relevant statements.  */
++  for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
++              && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
++       i++)
++    {
++      stmt_info = vinfo_for_stmt (stmt);
++      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
++      if (vect_print_dump_info (REPORT_DETAILS))
++        {
++          fprintf (vect_dump, "additional pattern stmt: ");
++          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
++        }
++
++      set_vinfo_for_stmt (pattern_stmt,
++                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
++      gimple_set_bb (pattern_stmt, gimple_bb (stmt));
++      pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
++
++      STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
++      STMT_VINFO_DEF_TYPE (pattern_stmt_info)
++        = STMT_VINFO_DEF_TYPE (stmt_info);
++      STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
++      STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
++    }
++
++  VEC_free (gimple, heap, stmts_to_replace);
+ }
+ 
+ 
+@@ -896,10 +1015,8 @@
+ 
+    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
+    (because they are marked as irrelevant).  It will vectorize S6, and record
+-   a pointer to the new vector stmt VS6 both from S6 (as usual), and also
+-   from S4.  We do that so that when we get to vectorizing stmts that use the
+-   def of S4 (like S5 that uses a_0), we'll know where to take the relevant
+-   vector-def from.  S4 will be skipped, and S5 will be vectorized as usual:
++   a pointer to the new vector stmt VS6 from S6 (as usual).
++   S4 will be skipped, and S5 will be vectorized as usual:
+ 
+                                   in_pattern_p  related_stmt    vec_stmt
+          S1: a_i = ....                 -       -               -
+@@ -915,7 +1032,21 @@
+    elsewhere), and we'll end up with:
+ 
+         VS6: va_new = ....
+-        VS5: ... = ..vuse(va_new)..  */
++        VS5: ... = ..vuse(va_new)..  
++
++   In case of more than one pattern statements, e.g., widen-mult with
++   intermediate type:
++
++     S1  a_t = ;
++     S2  a_T = (TYPE) a_t;
++           '--> S3: a_it = (interm_type) a_t;
++     S4  prod_T = a_T * CONST;
++           '--> S5: prod_T' = a_it w* CONST;
++   
++   there may be other users of a_T outside the pattern. In that case S2 will
++   be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
++   and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
++   be recorded in S3.  */
+ 
+ void
+ vect_pattern_recog (loop_vec_info loop_vinfo)
+@@ -925,7 +1056,7 @@
+   unsigned int nbbs = loop->num_nodes;
+   gimple_stmt_iterator si;
+   unsigned int i, j;
+-  gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
++  gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_pattern_recog ===");
+Index: gcc-4_6-branch/gcc/tree-vect-slp.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-slp.c	2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-slp.c	2012-01-09 15:03:29.160918806 -0800
+@@ -152,7 +152,9 @@
+       if (loop && def_stmt && gimple_bb (def_stmt)
+           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+           && vinfo_for_stmt (def_stmt)
+-          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
++          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
++          && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
++          && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+         {
+           if (!*first_stmt_dt0)
+             *pattern0 = true;
+Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c	2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-stmts.c	2012-01-09 15:06:23.636927250 -0800
+@@ -126,33 +126,72 @@
+ 
+ static void
+ vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
+-		    enum vect_relevant relevant, bool live_p)
++		    enum vect_relevant relevant, bool live_p,
++		    bool used_in_pattern)
+ {
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
+   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
++  gimple pattern_stmt;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
+ 
++  /* If this stmt is an original stmt in a pattern, we might need to mark its
++     related pattern stmt instead of the original stmt.  However, such stmts 
++     may have their own uses that are not in any pattern, in such cases the
++     stmt itself should be marked.  */
+   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+     {
+-      gimple pattern_stmt;
++      bool found = false;
++      if (!used_in_pattern)
++        {
++          imm_use_iterator imm_iter;
++          use_operand_p use_p;
++          gimple use_stmt;
++          tree lhs;
++
++          if (is_gimple_assign (stmt))
++            lhs = gimple_assign_lhs (stmt);
++          else
++            lhs = gimple_call_lhs (stmt);
++
++          /* This use is out of pattern use, if LHS has other uses that are
++             pattern uses, we should mark the stmt itself, and not the pattern
++             stmt.  */
++          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++            {
++              if (is_gimple_debug (USE_STMT (use_p)))
++                continue;
++              use_stmt = USE_STMT (use_p);
++
++              if (vinfo_for_stmt (use_stmt)
++                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
++                {
++                  found = true;
++                  break;
++                }
++            }
++        }
++
++      if (!found)
++        {
++          /* This is the last stmt in a sequence that was detected as a
++             pattern that can potentially be vectorized.  Don't mark the stmt
++             as relevant/live because it's not going to be vectorized.
++             Instead mark the pattern-stmt that replaces it.  */
++
++          pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ 
+-      /* This is the last stmt in a sequence that was detected as a
+-         pattern that can potentially be vectorized.  Don't mark the stmt
+-         as relevant/live because it's not going to be vectorized.
+-         Instead mark the pattern-stmt that replaces it.  */
+-
+-      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+-
+-      if (vect_print_dump_info (REPORT_DETAILS))
+-        fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
+-      stmt_info = vinfo_for_stmt (pattern_stmt);
+-      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
+-      save_relevant = STMT_VINFO_RELEVANT (stmt_info);
+-      save_live_p = STMT_VINFO_LIVE_P (stmt_info);
+-      stmt = pattern_stmt;
++          if (vect_print_dump_info (REPORT_DETAILS))
++            fprintf (vect_dump, "last stmt in pattern. don't mark"
++                                " relevant/live.");
++          stmt_info = vinfo_for_stmt (pattern_stmt);
++          gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
++          save_relevant = STMT_VINFO_RELEVANT (stmt_info);
++          save_live_p = STMT_VINFO_LIVE_P (stmt_info);
++          stmt = pattern_stmt;
++        }
+     }
+ 
+   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
+@@ -437,7 +476,8 @@
+         }
+     }
+ 
+-  vect_mark_relevant (worklist, def_stmt, relevant, live_p);
++  vect_mark_relevant (worklist, def_stmt, relevant, live_p,
++                      is_pattern_stmt_p (stmt_vinfo));
+   return true;
+ }
+ 
+@@ -494,7 +534,7 @@
+ 	    }
+ 
+ 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
+-	    vect_mark_relevant (&worklist, phi, relevant, live_p);
++	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
+ 	}
+       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ 	{
+@@ -506,7 +546,7 @@
+ 	    }
+ 
+ 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
+-            vect_mark_relevant (&worklist, stmt, relevant, live_p);
++            vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
+ 	}
+     }
+ 
+@@ -613,42 +653,55 @@
+           if (is_gimple_assign (stmt))
+             {
+               tree rhs = gimple_assign_rhs1 (stmt);
+-              if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+-                  == GIMPLE_SINGLE_RHS)
++              unsigned int op_num;
++              tree op;
++              switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
+                 {
+-                  unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1
+-                                                              (stmt));
+-                  for (i = 0; i < op_num; i++)
+-                    {
+-                      tree op = TREE_OPERAND (rhs, i);
+-                      if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+-                                    &worklist))
++                  case GIMPLE_SINGLE_RHS:
++                     op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
++                     for (i = 0; i < op_num; i++)
+                        {
+-                         VEC_free (gimple, heap, worklist);
+-                         return false;
++                         op = TREE_OPERAND (rhs, i);
++                         if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                           &worklist))
++                           {
++                             VEC_free (gimple, heap, worklist);
++                             return false;
++                           }
+                        }
+-                    }
+-                }
+-              else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+-                       == GIMPLE_BINARY_RHS)
+-                {
+-                  tree op = gimple_assign_rhs1 (stmt);
+-                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+-                                    &worklist))
+-                    {
+-                      VEC_free (gimple, heap, worklist);
+-                      return false;
+-                    }
+-                  op = gimple_assign_rhs2 (stmt);
+-                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+-                                    &worklist))
+-                    {
+-                      VEC_free (gimple, heap, worklist);
+-                      return false;
+-                    }
++                    break;
++                   
++                  case GIMPLE_BINARY_RHS:
++                    op = gimple_assign_rhs1 (stmt);
++                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                      &worklist))
++                      {
++                        VEC_free (gimple, heap, worklist);
++                        return false;
++                      }
++                    op = gimple_assign_rhs2 (stmt);
++                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                      &worklist))
++                      {
++                        VEC_free (gimple, heap, worklist);
++                        return false;
++                      }
++                    break;
++
++                  case GIMPLE_UNARY_RHS:
++                    op = gimple_assign_rhs1 (stmt);
++                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                      &worklist))
++                      {
++                        VEC_free (gimple, heap, worklist);
++                        return false;
++                      }
++
++                    break;
++                  
++                  default: 
++                    return false;
+                 }
+-              else
+-                return false;
+             }
+           else if (is_gimple_call (stmt))
+             {
+@@ -1210,7 +1263,14 @@
+ 
+         /* Get the def from the vectorized stmt.  */
+         def_stmt_info = vinfo_for_stmt (def_stmt);
++
+         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
++        /* Get vectorized pattern statement.  */
++        if (!vec_stmt
++            && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
++            && !STMT_VINFO_RELEVANT (def_stmt_info))
++          vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
++                       STMT_VINFO_RELATED_STMT (def_stmt_info)));
+         gcc_assert (vec_stmt);
+ 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
+ 	  vec_oprnd = PHI_RESULT (vec_stmt);
+@@ -4894,6 +4954,7 @@
+   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
+   bool ok;
+   tree scalar_type, vectype;
++  gimple pattern_stmt;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     {
+@@ -4915,16 +4976,22 @@
+      - any LABEL_EXPRs in the loop
+      - computations that are used only for array indexing or loop control.
+      In basic blocks we only analyze statements that are a part of some SLP
+-     instance, therefore, all the statements are relevant.  */
++     instance, therefore, all the statements are relevant.  
++
++     Pattern statement need to be analyzed instead of the original statement
++     if the original statement is not relevant. Otherwise, we analyze both
++     statements.  */
+ 
++  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+   if (!STMT_VINFO_RELEVANT_P (stmt_info)
+       && !STMT_VINFO_LIVE_P (stmt_info))
+     {
+-      gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++          && pattern_stmt
+           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+         {
++          /* Analyze PATTERN_STMT instead of the original stmt.  */
+           stmt = pattern_stmt;
+           stmt_info = vinfo_for_stmt (pattern_stmt);
+           if (vect_print_dump_info (REPORT_DETAILS))
+@@ -4941,6 +5008,21 @@
+           return true;
+         }
+     }
++  else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++           && pattern_stmt
++           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++    {
++      /* Analyze PATTERN_STMT too.  */
++      if (vect_print_dump_info (REPORT_DETAILS))
++        {
++          fprintf (vect_dump, "==> examining pattern statement: ");
++          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++        }
++
++      if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
++        return false;
++   }
+ 
+   switch (STMT_VINFO_DEF_TYPE (stmt_info))
+     {
+@@ -5074,7 +5156,6 @@
+   bool is_store = false;
+   gimple vec_stmt = NULL;
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+-  gimple orig_stmt_in_pattern;
+   bool done;
+ 
+   switch (STMT_VINFO_TYPE (stmt_info))
+@@ -5213,21 +5294,7 @@
+     }
+ 
+   if (vec_stmt)
+-    {
+       STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+-      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
+-      if (orig_stmt_in_pattern)
+-	{
+-	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
+-	  /* STMT was inserted by the vectorizer to replace a computation idiom.
+-	     ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
+-	     computed this idiom.  We need to record a pointer to VEC_STMT in
+-	     the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
+-	     documentation of vect_pattern_recog.  */
+-	  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+-	    STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
+-	}
+-    }
+ 
+   return is_store;
+ }
+@@ -5605,8 +5672,12 @@
+       || *dt == vect_nested_cycle)
+     {
+       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
+-      if (STMT_VINFO_IN_PATTERN_P (stmt_info))
++
++      if (STMT_VINFO_IN_PATTERN_P (stmt_info) 
++          && !STMT_VINFO_RELEVANT (stmt_info)
++          && !STMT_VINFO_LIVE_P (stmt_info))
+ 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
++
+       *vectype = STMT_VINFO_VECTYPE (stmt_info);
+       gcc_assert (*vectype != NULL_TREE);
+     }
+Index: gcc-4_6-branch/gcc/tree-vectorizer.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h	2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vectorizer.h	2012-01-09 15:03:29.164918806 -0800
+@@ -890,7 +890,7 @@
+ /* Pattern recognition functions.
+    Additional pattern recognition functions can (and will) be added
+    in the future.  */
+-typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
++typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+ #define NUM_PATTERNS 4
+ void vect_pattern_recog (loop_vec_info);
+ 
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch
new file mode 100644
index 0000000000..82ae3a1327
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch
@@ -0,0 +1,138 @@
+2011-07-11  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from mainline -r175090.
+	gcc/
+	* ddg.c (add_intra_loop_mem_dep): New function.
+	(build_intra_loop_deps): Call it.
+
+	gcc/testsuite
+	* gcc.dg/sms-9.c: New file.
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2011-05-13 16:03:40 +0000
++++ new/gcc/ddg.c	2011-07-04 11:00:06 +0000
+@@ -390,6 +390,33 @@
+ 			 &PATTERN (insn2));
+ }
+ 
++/* Given two nodes, analyze their RTL insns and add intra-loop mem deps
++   to ddg G.  */
++static void
++add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
++{
++
++  if ((from->cuid == to->cuid)
++      || !insns_may_alias_p (from->insn, to->insn))
++    /* Do not create edge if memory references have disjoint alias sets
++       or 'to' and 'from' are the same instruction.  */
++    return;
++
++  if (mem_write_insn_p (from->insn))
++    {
++      if (mem_read_insn_p (to->insn))
++	create_ddg_dep_no_link (g, from, to,
++				DEBUG_INSN_P (to->insn)
++				? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
++      else
++	create_ddg_dep_no_link (g, from, to,
++				DEBUG_INSN_P (to->insn)
++				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
++    }
++  else if (!mem_read_insn_p (to->insn))
++    create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
++}
++
+ /* Given two nodes, analyze their RTL insns and add inter-loop mem deps
+    to ddg G.  */
+ static void
+@@ -477,10 +504,22 @@
+ 	      if (DEBUG_INSN_P (j_node->insn))
+ 		continue;
+ 	      if (mem_access_insn_p (j_node->insn))
+- 		/* Don't bother calculating inter-loop dep if an intra-loop dep
+-		   already exists.  */
++		{
++		  /* Don't bother calculating inter-loop dep if an intra-loop dep
++		     already exists.  */
+ 	      	  if (! TEST_BIT (dest_node->successors, j))
+ 		    add_inter_loop_mem_dep (g, dest_node, j_node);
++		  /* If -fmodulo-sched-allow-regmoves
++		     is set certain anti-dep edges are not created.
++		     It might be that these anti-dep edges are on the
++		     path from one memory instruction to another such that
++		     removing these edges could cause a violation of the
++		     memory dependencies.  Thus we add intra edges between
++		     every two memory instructions in this case.  */
++		  if (flag_modulo_sched_allow_regmoves
++		      && !TEST_BIT (dest_node->predecessors, j))
++		    add_intra_loop_mem_dep (g, j_node, dest_node);
++		}
+             }
+         }
+     }
+
+=== added file 'gcc/testsuite/gcc.dg/sms-9.c'
+--- old/gcc/testsuite/gcc.dg/sms-9.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/sms-9.c	2011-07-04 11:00:06 +0000
+@@ -0,0 +1,60 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */
++
++#include <stdlib.h>
++#include <stdarg.h>
++
++struct df_ref_info
++{
++  unsigned int *begin;
++  unsigned int *count;
++};
++
++extern void *memset (void *s, int c, __SIZE_TYPE__ n);
++
++
++__attribute__ ((noinline))
++     int
++     df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info,
++					int num, unsigned int start)
++{
++  unsigned int m = num;
++  unsigned int offset = 77;
++  unsigned int r;
++
++  for (r = start; r < m; r++)
++    {
++      ref_info->begin[r] = offset;
++      offset += ref_info->count[r];
++      ref_info->count[r] = 0;
++    }
++
++  return offset;
++}
++
++int
++main ()
++{
++  struct df_ref_info temp;
++  int num = 100;
++  unsigned int start = 5;
++  int i, offset;
++
++  temp.begin = malloc (100 * sizeof (unsigned int));
++  temp.count = malloc (100 * sizeof (unsigned int));
++
++  memset (temp.begin, 0, sizeof (unsigned int) * num);
++  memset (temp.count, 0, sizeof (unsigned int) * num);
++
++  for (i = 0; i < num; i++)
++    temp.count[i] = i + 1;
++
++  offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start);
++
++  if (offset != 5112)
++    abort ();
++
++  free (temp.begin);
++  free (temp.count);
++  return 0;
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch
new file mode 100644
index 0000000000..70c8638cd2
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch
@@ -0,0 +1,211 @@
+2011-07-11  Revital Eres  <revital.eres@linaro.org>
+ 
+	Backport from mainline -r175091
+	gcc/
+	* modulo-sched.c (struct ps_insn): Remove row_rest_count
+	field.
+	(struct partial_schedule): Add rows_length field.
+	(verify_partial_schedule): Check rows_length.
+	(ps_insert_empty_row): Handle rows_length.
+	(create_partial_schedule): Likewise.
+	(free_partial_schedule): Likewise.
+	(reset_partial_schedule): Likewise.
+	(create_ps_insn): Remove rest_count argument.
+	(remove_node_from_ps): Update rows_length.
+	(add_node_to_ps): Update rows_length and call create_ps_insn without
+	passing row_rest_count.
+	(rotate_partial_schedule): Update rows_length.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-05-13 16:03:40 +0000
++++ new/gcc/modulo-sched.c	2011-07-04 12:01:34 +0000
+@@ -134,8 +134,6 @@
+   ps_insn_ptr next_in_row,
+ 	      prev_in_row;
+ 
+-  /* The number of nodes in the same row that come after this node.  */
+-  int row_rest_count;
+ };
+ 
+ /* Holds the partial schedule as an array of II rows.  Each entry of the
+@@ -149,6 +147,12 @@
+   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
+   ps_insn_ptr *rows;
+ 
++  /*  rows_length[i] holds the number of instructions in the row.
++      It is used only (as an optimization) to back off quickly from
++      trying to schedule a node in a full row; that is, to avoid running
++      through futile DFA state transitions.  */
++  int *rows_length;
++  
+   /* The earliest absolute cycle of an insn in the partial schedule.  */
+   int min_cycle;
+ 
+@@ -1907,6 +1911,7 @@
+   int ii = ps->ii;
+   int new_ii = ii + 1;
+   int row;
++  int *rows_length_new;
+ 
+   verify_partial_schedule (ps, sched_nodes);
+ 
+@@ -1921,9 +1926,11 @@
+   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
+ 
+   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
++  rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
+   for (row = 0; row < split_row; row++)
+     {
+       rows_new[row] = ps->rows[row];
++      rows_length_new[row] = ps->rows_length[row];
+       ps->rows[row] = NULL;
+       for (crr_insn = rows_new[row];
+ 	   crr_insn; crr_insn = crr_insn->next_in_row)
+@@ -1944,6 +1951,7 @@
+   for (row = split_row; row < ii; row++)
+     {
+       rows_new[row + 1] = ps->rows[row];
++      rows_length_new[row + 1] = ps->rows_length[row];
+       ps->rows[row] = NULL;
+       for (crr_insn = rows_new[row + 1];
+ 	   crr_insn; crr_insn = crr_insn->next_in_row)
+@@ -1965,6 +1973,8 @@
+     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
+   free (ps->rows);
+   ps->rows = rows_new;
++  free (ps->rows_length);
++  ps->rows_length = rows_length_new;
+   ps->ii = new_ii;
+   gcc_assert (ps->min_cycle >= 0);
+ 
+@@ -2040,16 +2050,23 @@
+   ps_insn_ptr crr_insn;
+ 
+   for (row = 0; row < ps->ii; row++)
+-    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+-      {
+-	ddg_node_ptr u = crr_insn->node;
+-
+-	gcc_assert (TEST_BIT (sched_nodes, u->cuid));
+-	/* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+-	   popcount (sched_nodes) == number of insns in ps.  */
+-	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+-	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+-      }
++    {
++      int length = 0;
++      
++      for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
++	{
++	  ddg_node_ptr u = crr_insn->node;
++	  
++	  length++;
++	  gcc_assert (TEST_BIT (sched_nodes, u->cuid));
++	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
++	     popcount (sched_nodes) == number of insns in ps.  */
++	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
++	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
++	}
++      
++      gcc_assert (ps->rows_length[row] == length);
++    }
+ }
+ 
+ 
+@@ -2455,6 +2472,7 @@
+ {
+   partial_schedule_ptr ps = XNEW (struct partial_schedule);
+   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
++  ps->rows_length = (int *) xcalloc (ii, sizeof (int));
+   ps->ii = ii;
+   ps->history = history;
+   ps->min_cycle = INT_MAX;
+@@ -2493,6 +2511,7 @@
+     return;
+   free_ps_insns (ps);
+   free (ps->rows);
++  free (ps->rows_length);
+   free (ps);
+ }
+ 
+@@ -2510,6 +2529,8 @@
+   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
+ 						 * sizeof (ps_insn_ptr));
+   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
++  ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
++  memset (ps->rows_length, 0, new_ii * sizeof (int));
+   ps->ii = new_ii;
+   ps->min_cycle = INT_MAX;
+   ps->max_cycle = INT_MIN;
+@@ -2538,14 +2559,13 @@
+ 
+ /* Creates an object of PS_INSN and initializes it to the given parameters.  */
+ static ps_insn_ptr
+-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
++create_ps_insn (ddg_node_ptr node, int cycle)
+ {
+   ps_insn_ptr ps_i = XNEW (struct ps_insn);
+ 
+   ps_i->node = node;
+   ps_i->next_in_row = NULL;
+   ps_i->prev_in_row = NULL;
+-  ps_i->row_rest_count = rest_count;
+   ps_i->cycle = cycle;
+ 
+   return ps_i;
+@@ -2578,6 +2598,8 @@
+       if (ps_i->next_in_row)
+ 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
+     }
++   
++  ps->rows_length[row] -= 1; 
+   free (ps_i);
+   return true;
+ }
+@@ -2734,17 +2756,12 @@
+ 		sbitmap must_precede, sbitmap must_follow)
+ {
+   ps_insn_ptr ps_i;
+-  int rest_count = 1;
+   int row = SMODULO (cycle, ps->ii);
+ 
+-  if (ps->rows[row]
+-      && ps->rows[row]->row_rest_count >= issue_rate)
++  if (ps->rows_length[row] >= issue_rate)
+     return NULL;
+ 
+-  if (ps->rows[row])
+-    rest_count += ps->rows[row]->row_rest_count;
+-
+-  ps_i = create_ps_insn (node, rest_count, cycle);
++  ps_i = create_ps_insn (node, cycle);
+ 
+   /* Finds and inserts PS_I according to MUST_FOLLOW and
+      MUST_PRECEDE.  */
+@@ -2754,6 +2771,7 @@
+       return NULL;
+     }
+ 
++  ps->rows_length[row] += 1;
+   return ps_i;
+ }
+ 
+@@ -2909,11 +2927,16 @@
+   for (i = 0; i < backward_rotates; i++)
+     {
+       ps_insn_ptr first_row = ps->rows[0];
++      int first_row_length = ps->rows_length[0];
+ 
+       for (row = 0; row < last_row; row++)
+-	ps->rows[row] = ps->rows[row+1];
++	{
++	  ps->rows[row] = ps->rows[row + 1];
++	  ps->rows_length[row] = ps->rows_length[row + 1]; 
++	}
+ 
+       ps->rows[last_row] = first_row;
++      ps->rows_length[last_row] = first_row_length;
+     }
+ 
+   ps->max_cycle -= start_cycle;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch
new file mode 100644
index 0000000000..d918f9c58c
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch
@@ -0,0 +1,350 @@
+2011-07-11  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	2011-06-22  Dmitry Plotnikov  <dplotnikov@ispras.ru>
+	Dmitry Melnik  <dm@ispras.ru>
+
+	* config/arm/arm.c (neon_immediate_valid_for_shift): New function.
+	(neon_output_shift_immediate): Ditto.
+	* config/arm/arm-protos.h (neon_immediate_valid_for_shift): New
+	prototype.
+	(neon_output_shift_immediate): Ditto.
+	* config/arm/neon.md (vashl<mode>3): Modified constraint.
+	(vashr<mode>3_imm): New insn pattern.
+	(vlshr<mode>3_imm): Ditto.
+	(vashr<mode>3): Modified constraint.
+	(vlshr<mode>3): Ditto.
+	* config/arm/predicates.md (imm_for_neon_lshift_operand): New
+	predicate.
+	(imm_for_neon_rshift_operand): Ditto.
+	(imm_lshift_or_reg_neon): Ditto.
+	(imm_rshift_or_reg_neon): Ditto.
+
+	* optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-06-14 16:00:30 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-07-04 14:03:49 +0000
+@@ -64,8 +64,12 @@
+ extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
+ extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
+ 					   int *);
++extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *,
++					   int *, bool);
+ extern char *neon_output_logic_immediate (const char *, rtx *,
+ 					  enum machine_mode, int, int);
++extern char *neon_output_shift_immediate (const char *, char, rtx *,
++					  enum machine_mode, int, bool);
+ extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
+ 				  rtx (*) (rtx, rtx, rtx));
+ extern rtx neon_make_constant (rtx);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-06-29 09:13:17 +0000
++++ new/gcc/config/arm/arm.c	2011-07-04 14:03:49 +0000
+@@ -8863,6 +8863,66 @@
+   return 1;
+ }
+ 
++/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
++   the immediate is valid, write a constant suitable for using as an operand
++   to VSHR/VSHL to *MODCONST and the corresponding element width to
++   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
++   because they have different limitations.  */
++
++int
++neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
++				rtx *modconst, int *elementwidth,
++				bool isleftshift)
++{
++  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
++  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
++  unsigned HOST_WIDE_INT last_elt = 0;
++  unsigned HOST_WIDE_INT maxshift;
++
++  /* Split vector constant out into a byte vector.  */
++  for (i = 0; i < n_elts; i++)
++    {
++      rtx el = CONST_VECTOR_ELT (op, i);
++      unsigned HOST_WIDE_INT elpart;
++
++      if (GET_CODE (el) == CONST_INT)
++        elpart = INTVAL (el);
++      else if (GET_CODE (el) == CONST_DOUBLE)
++        return 0;
++      else
++        gcc_unreachable ();
++
++      if (i != 0 && elpart != last_elt)
++        return 0;
++
++      last_elt = elpart;
++    }
++
++  /* Shift less than element size.  */
++  maxshift = innersize * 8;
++
++  if (isleftshift)
++    {
++      /* Left shift immediate value can be from 0 to <size>-1.  */
++      if (last_elt >= maxshift)
++        return 0;
++    }
++  else
++    {
++      /* Right shift immediate value can be from 1 to <size>.  */
++      if (last_elt == 0 || last_elt > maxshift)
++	return 0;
++    }
++
++  if (elementwidth)
++    *elementwidth = innersize * 8;
++
++  if (modconst)
++    *modconst = CONST_VECTOR_ELT (op, 0);
++
++  return 1;
++}
++
+ /* Return a string suitable for output of Neon immediate logic operation
+    MNEM.  */
+ 
+@@ -8885,6 +8945,28 @@
+   return templ;
+ }
+ 
++/* Return a string suitable for output of Neon immediate shift operation
++   (VSHR or VSHL) MNEM.  */
++
++char *
++neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
++			     enum machine_mode mode, int quad,
++			     bool isleftshift)
++{
++  int width, is_valid;
++  static char templ[40];
++
++  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
++  gcc_assert (is_valid != 0);
++
++  if (quad)
++    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
++  else
++    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
++
++  return templ;
++}
++
+ /* Output a sequence of pairwise operations to implement a reduction.
+    NOTE: We do "too much work" here, because pairwise operations work on two
+    registers-worth of operands in one go. Unfortunately we can't exploit those
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-07-01 09:19:21 +0000
++++ new/gcc/config/arm/neon.md	2011-07-04 14:03:49 +0000
+@@ -956,15 +956,57 @@
+ ; SImode elements.
+ 
+ (define_insn "vashl<mode>3"
+-  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+-	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+-		      (match_operand:VDQIW 2 "s_register_operand" "w")))]
+-  "TARGET_NEON"
+-  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+-  [(set (attr "neon_type")
+-      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+-                    (const_string "neon_vshl_ddd")
+-                    (const_string "neon_shift_3")))]
++  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
++	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
++		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
++  "TARGET_NEON"
++  {
++    switch (which_alternative)
++      {
++        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
++        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
++                         			    <MODE>mode,
++						    VALID_NEON_QREG_MODE (<MODE>mode),
++						    true);
++        default: gcc_unreachable ();
++      }
++  }
++  [(set (attr "neon_type")
++      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
++                    (const_string "neon_vshl_ddd")
++                    (const_string "neon_shift_3")))]
++)
++
++(define_insn "vashr<mode>3_imm"
++  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
++	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
++			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
++  "TARGET_NEON"
++  {
++    return neon_output_shift_immediate ("vshr", 's', &operands[2],
++					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
++					false);
++  }
++  [(set (attr "neon_type")
++      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
++                    (const_string "neon_vshl_ddd")
++                    (const_string "neon_shift_3")))]
++)
++
++(define_insn "vlshr<mode>3_imm"
++  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
++	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
++			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
++  "TARGET_NEON"
++  {
++    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
++					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
++					false);
++  }              
++  [(set (attr "neon_type")
++	(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
++		      (const_string "neon_vshl_ddd")
++		      (const_string "neon_shift_3")))]
+ )
+ 
+ ; Used for implementing logical shift-right, which is a left-shift by a negative
+@@ -1004,28 +1046,34 @@
+ (define_expand "vashr<mode>3"
+   [(set (match_operand:VDQIW 0 "s_register_operand" "")
+ 	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+-			(match_operand:VDQIW 2 "s_register_operand" "")))]
++			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
+   "TARGET_NEON"
+ {
+   rtx neg = gen_reg_rtx (<MODE>mode);
+-
+-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+-  emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+-
++  if (REG_P (operands[2]))
++    {
++      emit_insn (gen_neg<mode>2 (neg, operands[2]));
++      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
++    }
++  else
++    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
+   DONE;
+ })
+ 
+ (define_expand "vlshr<mode>3"
+   [(set (match_operand:VDQIW 0 "s_register_operand" "")
+ 	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+-			(match_operand:VDQIW 2 "s_register_operand" "")))]
++			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
+   "TARGET_NEON"
+ {
+   rtx neg = gen_reg_rtx (<MODE>mode);
+-
+-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+-  emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+-
++  if (REG_P (operands[2]))
++    {
++      emit_insn (gen_neg<mode>2 (neg, operands[2]));
++      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
++    }
++  else
++    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
+   DONE;
+ })
+ 
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-06-22 15:50:23 +0000
++++ new/gcc/config/arm/predicates.md	2011-07-04 14:03:49 +0000
+@@ -585,6 +585,26 @@
+   return neon_immediate_valid_for_move (op, mode, NULL, NULL);
+ })
+ 
++(define_predicate "imm_for_neon_lshift_operand"
++  (match_code "const_vector")
++{
++  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
++})
++
++(define_predicate "imm_for_neon_rshift_operand"
++  (match_code "const_vector")
++{
++  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
++})
++
++(define_predicate "imm_lshift_or_reg_neon"
++  (ior (match_operand 0 "s_register_operand")
++       (match_operand 0 "imm_for_neon_lshift_operand")))
++
++(define_predicate "imm_rshift_or_reg_neon"
++  (ior (match_operand 0 "s_register_operand")
++       (match_operand 0 "imm_for_neon_rshift_operand")))
++
+ (define_predicate "imm_for_neon_logic_operand"
+   (match_code "const_vector")
+ {
+
+=== modified file 'gcc/optabs.c'
+--- old/gcc/optabs.c	2011-03-04 10:27:10 +0000
++++ new/gcc/optabs.c	2011-07-04 14:03:49 +0000
+@@ -6171,6 +6171,9 @@
+   init_optab (usashl_optab, US_ASHIFT);
+   init_optab (ashr_optab, ASHIFTRT);
+   init_optab (lshr_optab, LSHIFTRT);
++  init_optabv (vashl_optab, ASHIFT);
++  init_optabv (vashr_optab, ASHIFTRT);
++  init_optabv (vlshr_optab, LSHIFTRT);
+   init_optab (rotl_optab, ROTATE);
+   init_optab (rotr_optab, ROTATERT);
+   init_optab (smin_optab, SMIN);
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c	2011-07-04 14:03:49 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
++/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */
++
++/* Verify that VSHR immediate is used.  */
++void f1(int n, unsigned int x[], unsigned int y[]) {
++  int i;
++  for (i = 0; i < n; ++i)
++    y[i] = x[i] >> 3;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c	2011-07-04 14:03:49 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
++/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */
++
++/* Verify that VSHR immediate is used.  */
++void f1(int n, int x[], int y[]) {
++  int i;
++  for (i = 0; i < n; ++i)
++    y[i] = x[i] << 3;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c	2011-07-04 14:03:49 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
++/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */
++
++/* Verify that VSHR immediate is used.  */
++void f1(int n, int x[], int y[]) {
++  int i;
++  for (i = 0; i < n; ++i)
++    y[i] = x[i] >> 3;
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch
new file mode 100644
index 0000000000..de3f29e193
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch
@@ -0,0 +1,119 @@
+2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Backport from mainline:
+	gcc/
+	2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* reload1.c (choose_reload_regs): Use mode sizes to check whether
+	an old reload register completely defines the required value.
+
+	gcc/testsuite/
+	2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/neon-modes-3.c: New test.
+
+=== modified file 'gcc/reload1.c'
+--- old/gcc/reload1.c	2011-07-01 09:19:21 +0000
++++ new/gcc/reload1.c	2011-07-11 10:06:50 +0000
+@@ -6451,6 +6451,8 @@
+ 
+ 	      if (regno >= 0
+ 		  && reg_last_reload_reg[regno] != 0
++		  && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno]))
++		      >= GET_MODE_SIZE (mode) + byte)
+ #ifdef CANNOT_CHANGE_MODE_CLASS
+ 		  /* Verify that the register it's in can be used in
+ 		     mode MODE.  */
+@@ -6462,24 +6464,12 @@
+ 		{
+ 		  enum reg_class rclass = rld[r].rclass, last_class;
+ 		  rtx last_reg = reg_last_reload_reg[regno];
+-		  enum machine_mode need_mode;
+ 
+ 		  i = REGNO (last_reg);
+ 		  i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode);
+ 		  last_class = REGNO_REG_CLASS (i);
+ 
+-		  if (byte == 0)
+-		    need_mode = mode;
+-		  else
+-		    need_mode
+-		      = smallest_mode_for_size
+-		        (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT,
+-			 GET_MODE_CLASS (mode) == MODE_PARTIAL_INT
+-			 ? MODE_INT : GET_MODE_CLASS (mode));
+-
+-		  if ((GET_MODE_SIZE (GET_MODE (last_reg))
+-		       >= GET_MODE_SIZE (need_mode))
+-		      && reg_reloaded_contents[i] == regno
++		  if (reg_reloaded_contents[i] == regno
+ 		      && TEST_HARD_REG_BIT (reg_reloaded_valid, i)
+ 		      && HARD_REGNO_MODE_OK (i, rld[r].mode)
+ 		      && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i)
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-3.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-modes-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-modes-3.c	2011-07-11 10:06:50 +0000
+@@ -0,0 +1,61 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O" } */
++/* { dg-add-options arm_neon } */
++
++#include <arm_neon.h>
++
++void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n)
++{
++  float32x4x4_t a5, a6, a7, a8, a9;
++  int i;
++
++  a5 = *src;
++  a6 = *src;
++  a7 = *src;
++  a8 = *src;
++  a9 = *src;
++  while (n--)
++    {
++      for (i = 0; i < 8; i++)
++	{
++	  float32x4x4_t a0, a1, a2, a3, a4;
++
++	  a0 = *src;
++	  a1 = *src;
++	  a2 = *src;
++	  a3 = *src;
++	  a4 = *src;
++	  *src = a0;
++	  *dest = a0.val[0];
++	  *dest = a0.val[3];
++	  *src = a1;
++	  *dest = a1.val[0];
++	  *dest = a1.val[3];
++	  *src = a2;
++	  *dest = a2.val[0];
++	  *dest = a2.val[3];
++	  *src = a3;
++	  *dest = a3.val[0];
++	  *dest = a3.val[3];
++	  *src = a4;
++	  *dest = a4.val[0];
++	  *dest = a4.val[3];
++	}
++      *src = a5;
++      *dest = a5.val[0];
++      *dest = a5.val[3];
++      *src = a6;
++      *dest = a6.val[0];
++      *dest = a6.val[3];
++      *src = a7;
++      *dest = a7.val[0];
++      *dest = a7.val[3];
++      *src = a8;
++      *dest = a8.val[0];
++      *dest = a8.val[3];
++      *src = a9;
++      *dest = a9.val[0];
++      *dest = a9.val[3];
++    }
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch
new file mode 100644
index 0000000000..0b05c38240
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch
@@ -0,0 +1,67 @@
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-05  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* ifcvt.c (cond_exec_process_insns): Disallow converting a block
+	that contains the prologue.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-01  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* gcc.c-torture/compile/20110401-1.c: New test.
+
+=== modified file 'gcc/ifcvt.c'
+--- old/gcc/ifcvt.c	2010-12-14 00:23:40 +0000
++++ new/gcc/ifcvt.c	2011-07-11 04:02:28 +0000
+@@ -1,5 +1,6 @@
+ /* If-conversion support.
+-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
++   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
++   2011
+    Free Software Foundation, Inc.
+ 
+    This file is part of GCC.
+@@ -304,6 +305,10 @@
+ 
+   for (insn = start; ; insn = NEXT_INSN (insn))
+     {
++      /* dwarf2out can't cope with conditional prologues.  */
++      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
++	return FALSE;
++
+       if (NOTE_P (insn) || DEBUG_INSN_P (insn))
+ 	goto insn_done;
+ 
+
+=== added file 'gcc/testsuite/gcc.c-torture/compile/20110401-1.c'
+--- old/gcc/testsuite/gcc.c-torture/compile/20110401-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.c-torture/compile/20110401-1.c	2011-07-11 04:02:28 +0000
+@@ -0,0 +1,22 @@
++void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len)
++{
++    int k;
++    unsigned char temp[4];
++    if (len < 128) {
++	if (ans != ((void *) 0))
++	    ans[0] = (unsigned char) len;
++	*ans_len = 1;
++    } else {
++	k = 0;
++	while (len) {
++	    temp[k++] = len & 0xFF;
++	    len = len >> 8;
++	}
++	*ans_len = k + 1;
++	if (ans != ((void *) 0)) {
++	    ans[0] = ((unsigned char) k & 0x7F) + 128;
++	    while (k--)
++		ans[*ans_len - 1 - k] = temp[k];
++	}
++    }
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch
new file mode 100644
index 0000000000..3d4d5c5049
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch
@@ -0,0 +1,46 @@
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+	2011-03-22  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* combine.c (simplify_set): Try harder to find the best CC mode when
+	simplifying a nested COMPARE on the RHS.
+
+=== modified file 'gcc/combine.c'
+--- old/gcc/combine.c	2011-05-27 14:31:18 +0000
++++ new/gcc/combine.c	2011-07-11 03:52:31 +0000
+@@ -6287,10 +6287,18 @@
+       enum rtx_code new_code;
+       rtx op0, op1, tmp;
+       int other_changed = 0;
++      rtx inner_compare = NULL_RTX;
+       enum machine_mode compare_mode = GET_MODE (dest);
+ 
+       if (GET_CODE (src) == COMPARE)
+-	op0 = XEXP (src, 0), op1 = XEXP (src, 1);
++	{
++	  op0 = XEXP (src, 0), op1 = XEXP (src, 1);
++	  if (GET_CODE (op0) == COMPARE && op1 == const0_rtx)
++	    {
++	      inner_compare = op0;
++	      op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1);
++	    }
++	}
+       else
+ 	op0 = src, op1 = CONST0_RTX (GET_MODE (src));
+ 
+@@ -6332,6 +6340,12 @@
+ 	 need to use a different CC mode here.  */
+       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+ 	compare_mode = GET_MODE (op0);
++      else if (inner_compare
++	       && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC
++	       && new_code == old_code
++	       && op0 == XEXP (inner_compare, 0)
++	       && op1 == XEXP (inner_compare, 1))
++	compare_mode = GET_MODE (inner_compare);
+       else
+ 	compare_mode = SELECT_CC_MODE (new_code, op0, op1);
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch
new file mode 100644
index 0000000000..68b682b3c6
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch
@@ -0,0 +1,192 @@
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+	2011-06-29  Nathan Sidwell  <nathan@codesourcery.com>
+
+	* config/arm/unwind-arm.c (enum __cxa_type_match_result): New.
+	(cxa_type_match): Correct declaration.
+	(__gnu_unwind_pr_common): Reconstruct
+	additional indirection when __cxa_type_match returns
+	succeeded_with_ptr_to_base.
+
+	libstdc++-v3/
+	Backport from mainline:
+
+	2011-06-29  Nathan Sidwell  <nathan@codesourcery.com>
+
+	* libsupc++/eh_arm.c (__cxa_type_match): Construct address of
+	thrown object here.  Return succeded_with_ptr_to_base for all
+	pointer cases.
+
+=== modified file 'gcc/config/arm/unwind-arm.c'
+--- old/gcc/config/arm/unwind-arm.c	2011-03-22 10:59:10 +0000
++++ new/gcc/config/arm/unwind-arm.c	2011-07-11 03:35:44 +0000
+@@ -32,13 +32,18 @@
+ typedef unsigned char bool;
+ 
+ typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
++enum __cxa_type_match_result
++  {
++    ctm_failed = 0,
++    ctm_succeeded = 1,
++    ctm_succeeded_with_ptr_to_base = 2
++  };
+ 
+ void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
+ bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp);
+-bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp,
+-					    const type_info *rttip,
+-					    bool is_reference,
+-					    void **matched_object);
++enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match
++  (_Unwind_Control_Block *ucbp, const type_info *rttip,
++   bool is_reference, void **matched_object);
+ 
+ _Unwind_Ptr __attribute__((weak))
+ __gnu_Unwind_Find_exidx (_Unwind_Ptr, int *);
+@@ -1107,6 +1112,7 @@
+ 		      _uw rtti;
+ 		      bool is_reference = (data[0] & uint32_highbit) != 0;
+ 		      void *matched;
++		      enum __cxa_type_match_result match_type;
+ 
+ 		      /* Check for no-throw areas.  */
+ 		      if (data[1] == (_uw) -2)
+@@ -1118,17 +1124,31 @@
+ 			{
+ 			  /* Match a catch specification.  */
+ 			  rtti = _Unwind_decode_target2 ((_uw) &data[1]);
+-			  if (!__cxa_type_match (ucbp, (type_info *) rtti,
+-						 is_reference,
+-						 &matched))
+-			    matched = (void *)0;
++			  match_type = __cxa_type_match (ucbp,
++							 (type_info *) rtti,
++							 is_reference,
++							 &matched);
+ 			}
++		      else
++			match_type = ctm_succeeded;
+ 
+-		      if (matched)
++		      if (match_type)
+ 			{
+ 			  ucbp->barrier_cache.sp =
+ 			    _Unwind_GetGR (context, R_SP);
+-			  ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
++			  // ctm_succeeded_with_ptr_to_base really
++			  // means _c_t_m indirected the pointer
++			  // object.  We have to reconstruct the
++			  // additional pointer layer by using a temporary.
++			  if (match_type == ctm_succeeded_with_ptr_to_base)
++			    {
++			      ucbp->barrier_cache.bitpattern[2]
++				= (_uw) matched;
++			      ucbp->barrier_cache.bitpattern[0]
++				= (_uw) &ucbp->barrier_cache.bitpattern[2];
++			    }
++			  else
++			    ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
+ 			  ucbp->barrier_cache.bitpattern[1] = (_uw) data;
+ 			  return _URC_HANDLER_FOUND;
+ 			}
+
+=== modified file 'libstdc++-v3/libsupc++/eh_arm.cc'
+--- old/libstdc++-v3/libsupc++/eh_arm.cc	2011-01-03 20:52:22 +0000
++++ new/libstdc++-v3/libsupc++/eh_arm.cc	2011-07-11 03:35:44 +0000
+@@ -30,10 +30,11 @@
+ using namespace __cxxabiv1;
+ 
+ 
+-// Given the thrown type THROW_TYPE, pointer to a variable containing a
+-// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to
+-// compare against, return whether or not there is a match and if so,
+-// update *THROWN_PTR_P.
++// Given the thrown type THROW_TYPE, exception object UE_HEADER and a
++// type CATCH_TYPE to compare against, return whether or not there is
++// a match and if so, update *THROWN_PTR_P to point to either the
++// type-matched object, or in the case of a pointer type, the object
++// pointed to by the pointer.
+ 
+ extern "C" __cxa_type_match_result
+ __cxa_type_match(_Unwind_Exception* ue_header,
+@@ -41,51 +42,51 @@
+ 		 bool is_reference __attribute__((__unused__)),
+ 		 void** thrown_ptr_p)
+ {
+-  bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class);
+-  bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
+-  bool dependent_exception =
+-    __is_dependent_exception(ue_header->exception_class);
++  bool forced_unwind
++    = __is_gxx_forced_unwind_class(ue_header->exception_class);
++  bool foreign_exception
++    = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
++  bool dependent_exception
++    = __is_dependent_exception(ue_header->exception_class);
+   __cxa_exception* xh = __get_exception_header_from_ue(ue_header);
+   __cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header);
+   const std::type_info* throw_type;
++  void *thrown_ptr = 0;
+ 
+   if (forced_unwind)
+     throw_type = &typeid(abi::__forced_unwind);
+   else if (foreign_exception)
+     throw_type = &typeid(abi::__foreign_exception);
+-  else if (dependent_exception)
+-    throw_type = __get_exception_header_from_obj
+-      (dx->primaryException)->exceptionType;
+   else
+-    throw_type = xh->exceptionType;
+-
+-  void* thrown_ptr = *thrown_ptr_p;
++    {
++      if (dependent_exception)
++	xh = __get_exception_header_from_obj (dx->primaryException);
++      throw_type = xh->exceptionType;
++      // We used to require the caller set the target of thrown_ptr_p,
++      // but that's incorrect -- the EHABI makes no such requirement
++      // -- and not all callers will set it.  Fortunately callers that
++      // do initialize will always pass us the value we calculate
++      // here, so there's no backwards compatibility problem.
++      thrown_ptr = __get_object_from_ue (ue_header);
++    }
++  
++  __cxa_type_match_result result = ctm_succeeded;
+ 
+   // Pointer types need to adjust the actual pointer, not
+   // the pointer to pointer that is the exception object.
+   // This also has the effect of passing pointer types
+   // "by value" through the __cxa_begin_catch return value.
+   if (throw_type->__is_pointer_p())
+-    thrown_ptr = *(void**) thrown_ptr;
++    {
++      thrown_ptr = *(void**) thrown_ptr;
++      // We need to indicate the indirection to our caller.
++      result = ctm_succeeded_with_ptr_to_base;
++    }
+ 
+   if (catch_type->__do_catch(throw_type, &thrown_ptr, 1))
+     {
+       *thrown_ptr_p = thrown_ptr;
+-
+-      if (typeid(*catch_type) == typeid (typeid(void*)))
+-	{
+-	  const __pointer_type_info *catch_pointer_type =
+-	    static_cast<const __pointer_type_info *> (catch_type);
+-	  const __pointer_type_info *throw_pointer_type =
+-	    static_cast<const __pointer_type_info *> (throw_type);
+-
+-	  if (typeid (*catch_pointer_type->__pointee) != typeid (void)
+-	      && (*catch_pointer_type->__pointee != 
+-		  *throw_pointer_type->__pointee))
+-	    return ctm_succeeded_with_ptr_to_base;
+-	}
+-
+-      return ctm_succeeded;
++      return result;
+     }
+ 
+   return ctm_failed;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch
new file mode 100644
index 0000000000..b42b425dc1
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch
@@ -0,0 +1,225 @@
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r174540
+	LP: #807573
+
+	gcc/
+	2011-06-01  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	PR rtl-optimization/48830
+	PR rtl-optimization/48808
+	PR rtl-optimization/48792
+	* reload.c (push_reload): Check contains_reg_of_mode.
+	* reload1.c (strip_paradoxical_subreg): New function.
+	(gen_reload_chain_without_interm_reg_p): Use it to handle
+	paradoxical subregs.
+	(emit_output_reload_insns, gen_reload): Likewise.
+
+	gcc/testsuite/
+	2011-06-01  Eric Botcazou  <ebotcazou@adacore.com>
+		    Hans-Peter Nilsson  <hp@axis.com>
+
+	PR rtl-optimization/48830
+	* gcc.target/sparc/ultrasp12.c: New test.
+
+=== modified file 'gcc/reload.c'
+--- old/gcc/reload.c	2011-07-01 09:19:21 +0000
++++ new/gcc/reload.c	2011-07-13 02:09:08 +0000
+@@ -1017,6 +1017,7 @@
+ #ifdef CANNOT_CHANGE_MODE_CLASS
+       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass)
+ #endif
++      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))]
+       && (CONSTANT_P (SUBREG_REG (in))
+ 	  || GET_CODE (SUBREG_REG (in)) == PLUS
+ 	  || strict_low
+@@ -1123,6 +1124,7 @@
+ #ifdef CANNOT_CHANGE_MODE_CLASS
+       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass)
+ #endif
++      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))]
+       && (CONSTANT_P (SUBREG_REG (out))
+ 	  || strict_low
+ 	  || (((REG_P (SUBREG_REG (out))
+
+=== modified file 'gcc/reload1.c'
+--- old/gcc/reload1.c	2011-07-11 10:06:50 +0000
++++ new/gcc/reload1.c	2011-07-14 22:14:45 +0000
+@@ -4476,6 +4476,43 @@
+ 	}
+     }
+ }
++
++/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload.
++   If *OP_PTR is a paradoxical subreg, try to remove that subreg
++   and apply the corresponding narrowing subreg to *OTHER_PTR.
++   Return true if the operands were changed, false otherwise.  */
++
++static bool
++strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr)
++{
++  rtx op, inner, other, tem;
++
++  op = *op_ptr;
++  if (GET_CODE (op) != SUBREG)
++    return false;
++
++  inner = SUBREG_REG (op);
++  if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner)))
++    return false;
++
++  other = *other_ptr;
++  tem = gen_lowpart_common (GET_MODE (inner), other);
++  if (!tem)
++    return false;
++
++  /* If the lowpart operation turned a hard register into a subreg,
++     rather than simplifying it to another hard register, then the
++     mode change cannot be properly represented.  For example, OTHER
++     might be valid in its current mode, but not in the new one.  */
++  if (GET_CODE (tem) == SUBREG
++      && REG_P (other)
++      && HARD_REGISTER_P (other))
++    return false;
++
++  *op_ptr = inner;
++  *other_ptr = tem;
++  return true;
++}
+ 
+ /* A subroutine of reload_as_needed.  If INSN has a REG_EH_REGION note,
+    examine all of the reload insns between PREV and NEXT exclusive, and
+@@ -5556,7 +5593,7 @@
+      chain reloads or do need an intermediate hard registers.  */
+   bool result = true;
+   int regno, n, code;
+-  rtx out, in, tem, insn;
++  rtx out, in, insn;
+   rtx last = get_last_insn ();
+ 
+   /* Make r2 a component of r1.  */
+@@ -5575,11 +5612,7 @@
+ 
+   /* If IN is a paradoxical SUBREG, remove it and try to put the
+      opposite SUBREG on OUT.  Likewise for a paradoxical SUBREG on OUT.  */
+-  if (GET_CODE (in) == SUBREG
+-      && (GET_MODE_SIZE (GET_MODE (in))
+-	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
+-      && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
+-    in = SUBREG_REG (in), out = tem;
++  strip_paradoxical_subreg (&in, &out);
+ 
+   if (GET_CODE (in) == PLUS
+       && (REG_P (XEXP (in, 0))
+@@ -7571,7 +7604,6 @@
+ 	      if (tertiary_icode != CODE_FOR_nothing)
+ 		{
+ 		  rtx third_reloadreg = rld[tertiary_reload].reg_rtx;
+-		  rtx tem;
+ 
+ 		  /* Copy primary reload reg to secondary reload reg.
+ 		     (Note that these have been swapped above, then
+@@ -7580,13 +7612,7 @@
+ 		  /* If REAL_OLD is a paradoxical SUBREG, remove it
+ 		     and try to put the opposite SUBREG on
+ 		     RELOADREG.  */
+-		  if (GET_CODE (real_old) == SUBREG
+-		      && (GET_MODE_SIZE (GET_MODE (real_old))
+-			  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old))))
+-		      && 0 != (tem = gen_lowpart_common
+-			       (GET_MODE (SUBREG_REG (real_old)),
+-				reloadreg)))
+-		    real_old = SUBREG_REG (real_old), reloadreg = tem;
++		  strip_paradoxical_subreg (&real_old, &reloadreg);
+ 
+ 		  gen_reload (reloadreg, second_reloadreg,
+ 			      rl->opnum, rl->when_needed);
+@@ -8402,16 +8428,8 @@
+ 
+   /* If IN is a paradoxical SUBREG, remove it and try to put the
+      opposite SUBREG on OUT.  Likewise for a paradoxical SUBREG on OUT.  */
+-  if (GET_CODE (in) == SUBREG
+-      && (GET_MODE_SIZE (GET_MODE (in))
+-	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
+-      && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
+-    in = SUBREG_REG (in), out = tem;
+-  else if (GET_CODE (out) == SUBREG
+-	   && (GET_MODE_SIZE (GET_MODE (out))
+-	       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out))))
+-	   && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0)
+-    out = SUBREG_REG (out), in = tem;
++  if (!strip_paradoxical_subreg (&in, &out))
++    strip_paradoxical_subreg (&out, &in);
+ 
+   /* How to do this reload can get quite tricky.  Normally, we are being
+      asked to reload a simple operand, such as a MEM, a constant, or a pseudo
+
+=== added file 'gcc/testsuite/gcc.target/sparc/ultrasp12.c'
+--- old/gcc/testsuite/gcc.target/sparc/ultrasp12.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/sparc/ultrasp12.c	2011-07-13 02:09:08 +0000
+@@ -0,0 +1,64 @@
++/* PR rtl-optimization/48830 */
++/* Testcase by Hans-Peter Nilsson <hp@gcc.gnu.org> */
++
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
++
++typedef unsigned char uint8_t;
++typedef unsigned int uint32_t;
++typedef unsigned long int uint64_t;
++typedef unsigned long int uintmax_t;
++typedef unsigned char rc_vec_t __attribute__((__vector_size__(8)));
++typedef short rc_svec_type_ __attribute__((__vector_size__(8)));
++typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4)));
++
++void
++rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim,
++                 const uint8_t *__restrict src2, int src2_dim,
++                 int len, int height, uintmax_t sum[5])
++{
++    uint32_t s1 = 0;
++    uint32_t s2 = 0;
++    uintmax_t s11 = 0;
++    uintmax_t s22 = 0;
++    uintmax_t s12 = 0;
++    int full = len / ((1024) < (1024) ? (1024) : (1024));
++    int rem = len % ((1024) < (1024) ? (1024) : (1024));
++    int rem1 = rem / 1;
++    int y;
++    unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0;
++    for (y = 0; y < height; y++) {
++        rc_vec_t a1, a2, a11, a22, a12;
++        int i1 = (y)*(src1_dim);
++        int i2 = (y)*(src2_dim);
++        int x;
++        ((a1) = ((rc_vec_t) {0}));
++        ((a2) = ((rc_vec_t) {0}));
++        ((a11) = ((rc_vec_t) {0}));
++        ((a22) = ((rc_vec_t) {0}));
++        ((a12) = ((rc_vec_t) {0}));
++        for (x = 0; x < full; x++) {
++            int k;
++            for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) /
++                            1; k++)
++            {
++                do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0);
++
++            }
++            do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
++        }
++        for (x = 0; x < rem1; x++) {
++            do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0);
++        }
++        do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
++
++        do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
++    }
++    sum[0] = s1;
++    sum[1] = s2;
++    sum[2] = s11;
++    sum[3] = s22;
++    sum[4] = s12;
++    ;
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch
new file mode 100644
index 0000000000..a86ddfdec0
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch
@@ -0,0 +1,741 @@
+2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	PR middle-end/49736
+	* expr.c (all_zeros_p): Undo bogus part of last change.
+
+2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Backport from mainline:
+	gcc/cp/
+	2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* typeck2.c (split_nonconstant_init_1): Pass the initializer directly,
+	rather than a pointer to it.  Return true if the whole of the value
+	was initialized by the generated statements.  Use
+	complete_ctor_at_level_p instead of count_type_elements.
+
+	gcc/
+	2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree.h (categorize_ctor_elements): Remove comment.  Fix long line.
+	(count_type_elements): Delete.
+	(complete_ctor_at_level_p): Declare.
+	* expr.c (flexible_array_member_p): New function, split out from...
+	(count_type_elements): ...here.  Make static.  Replace allow_flexarr
+	parameter with for_ctor_p.  When for_ctor_p is true, return the
+	number of elements that should appear in the top-level constructor,
+	otherwise return an estimate of the number of scalars.
+	(categorize_ctor_elements): Replace p_must_clear with p_complete.
+	(categorize_ctor_elements_1): Likewise.  Use complete_ctor_at_level_p.
+	(complete_ctor_at_level_p): New function, borrowing union logic
+	from old categorize_ctor_elements_1.
+	(mostly_zeros_p): Return true if the constructor is not complete.
+	(all_zeros_p): Update call to categorize_ctor_elements.
+	* gimplify.c (gimplify_init_constructor): Update call to
+	categorize_ctor_elements.  Don't call count_type_elements.
+	Unconditionally prevent clearing for variable-sized types,
+	otherwise rely on categorize_ctor_elements to detect
+	incomplete initializers.
+
+	gcc/testsuite/
+	2011-07-13  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* gcc.target/arm/pr48183.c: New test.
+
+=== modified file 'gcc/cp/typeck2.c'
+--- old/gcc/cp/typeck2.c	2011-05-20 21:29:14 +0000
++++ new/gcc/cp/typeck2.c	2011-07-13 13:17:31 +0000
+@@ -473,18 +473,20 @@
+ 
+ 
+ /* The recursive part of split_nonconstant_init.  DEST is an lvalue
+-   expression to which INIT should be assigned.  INIT is a CONSTRUCTOR.  */
++   expression to which INIT should be assigned.  INIT is a CONSTRUCTOR.
++   Return true if the whole of the value was initialized by the
++   generated statements.  */
+ 
+-static void
+-split_nonconstant_init_1 (tree dest, tree *initp)
++static bool
++split_nonconstant_init_1 (tree dest, tree init)
+ {
+   unsigned HOST_WIDE_INT idx;
+-  tree init = *initp;
+   tree field_index, value;
+   tree type = TREE_TYPE (dest);
+   tree inner_type = NULL;
+   bool array_type_p = false;
+-  HOST_WIDE_INT num_type_elements, num_initialized_elements;
++  bool complete_p = true;
++  HOST_WIDE_INT num_split_elts = 0;
+ 
+   switch (TREE_CODE (type))
+     {
+@@ -496,7 +498,6 @@
+     case RECORD_TYPE:
+     case UNION_TYPE:
+     case QUAL_UNION_TYPE:
+-      num_initialized_elements = 0;
+       FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx,
+ 				field_index, value)
+ 	{
+@@ -519,13 +520,14 @@
+ 		sub = build3 (COMPONENT_REF, inner_type, dest, field_index,
+ 			      NULL_TREE);
+ 
+-	      split_nonconstant_init_1 (sub, &value);
++	      if (!split_nonconstant_init_1 (sub, value))
++		complete_p = false;
++	      num_split_elts++;
+ 	    }
+ 	  else if (!initializer_constant_valid_p (value, inner_type))
+ 	    {
+ 	      tree code;
+ 	      tree sub;
+-	      HOST_WIDE_INT inner_elements;
+ 
+ 	      /* FIXME: Ordered removal is O(1) so the whole function is
+ 		 worst-case quadratic. This could be fixed using an aside
+@@ -549,21 +551,9 @@
+ 	      code = build_stmt (input_location, EXPR_STMT, code);
+ 	      add_stmt (code);
+ 
+-	      inner_elements = count_type_elements (inner_type, true);
+-	      if (inner_elements < 0)
+-		num_initialized_elements = -1;
+-	      else if (num_initialized_elements >= 0)
+-		num_initialized_elements += inner_elements;
+-	      continue;
++	      num_split_elts++;
+ 	    }
+ 	}
+-
+-      num_type_elements = count_type_elements (type, true);
+-      /* If all elements of the initializer are non-constant and
+-	 have been split out, we don't need the empty CONSTRUCTOR.  */
+-      if (num_type_elements > 0
+-	  && num_type_elements == num_initialized_elements)
+-	*initp = NULL;
+       break;
+ 
+     case VECTOR_TYPE:
+@@ -575,6 +565,7 @@
+ 	  code = build2 (MODIFY_EXPR, type, dest, cons);
+ 	  code = build_stmt (input_location, EXPR_STMT, code);
+ 	  add_stmt (code);
++	  num_split_elts += CONSTRUCTOR_NELTS (init);
+ 	}
+       break;
+ 
+@@ -584,6 +575,8 @@
+ 
+   /* The rest of the initializer is now a constant. */
+   TREE_CONSTANT (init) = 1;
++  return complete_p && complete_ctor_at_level_p (TREE_TYPE (init),
++						 num_split_elts, inner_type);
+ }
+ 
+ /* A subroutine of store_init_value.  Splits non-constant static
+@@ -599,7 +592,8 @@
+   if (TREE_CODE (init) == CONSTRUCTOR)
+     {
+       code = push_stmt_list ();
+-      split_nonconstant_init_1 (dest, &init);
++      if (split_nonconstant_init_1 (dest, init))
++	init = NULL_TREE;
+       code = pop_stmt_list (code);
+       DECL_INITIAL (dest) = init;
+       TREE_READONLY (dest) = 0;
+
+=== modified file 'gcc/expr.c'
+--- old/gcc/expr.c	2011-06-02 12:12:00 +0000
++++ new/gcc/expr.c	2011-07-14 11:52:32 +0000
+@@ -4866,16 +4866,136 @@
+   return NULL_RTX;
+ }
+ 
++/* Return true if field F of structure TYPE is a flexible array.  */
++
++static bool
++flexible_array_member_p (const_tree f, const_tree type)
++{
++  const_tree tf;
++
++  tf = TREE_TYPE (f);
++  return (DECL_CHAIN (f) == NULL
++	  && TREE_CODE (tf) == ARRAY_TYPE
++	  && TYPE_DOMAIN (tf)
++	  && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
++	  && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
++	  && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
++	  && int_size_in_bytes (type) >= 0);
++}
++
++/* If FOR_CTOR_P, return the number of top-level elements that a constructor
++   must have in order for it to completely initialize a value of type TYPE.
++   Return -1 if the number isn't known.
++
++   If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE.  */
++
++static HOST_WIDE_INT
++count_type_elements (const_tree type, bool for_ctor_p)
++{
++  switch (TREE_CODE (type))
++    {
++    case ARRAY_TYPE:
++      {
++	tree nelts;
++
++	nelts = array_type_nelts (type);
++	if (nelts && host_integerp (nelts, 1))
++	  {
++	    unsigned HOST_WIDE_INT n;
++
++	    n = tree_low_cst (nelts, 1) + 1;
++	    if (n == 0 || for_ctor_p)
++	      return n;
++	    else
++	      return n * count_type_elements (TREE_TYPE (type), false);
++	  }
++	return for_ctor_p ? -1 : 1;
++      }
++
++    case RECORD_TYPE:
++      {
++	unsigned HOST_WIDE_INT n;
++	tree f;
++
++	n = 0;
++	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
++	  if (TREE_CODE (f) == FIELD_DECL)
++	    {
++	      if (!for_ctor_p)
++		n += count_type_elements (TREE_TYPE (f), false);
++	      else if (!flexible_array_member_p (f, type))
++		/* Don't count flexible arrays, which are not supposed
++		   to be initialized.  */
++		n += 1;
++	    }
++
++	return n;
++      }
++
++    case UNION_TYPE:
++    case QUAL_UNION_TYPE:
++      {
++	tree f;
++	HOST_WIDE_INT n, m;
++
++	gcc_assert (!for_ctor_p);
++	/* Estimate the number of scalars in each field and pick the
++	   maximum.  Other estimates would do instead; the idea is simply
++	   to make sure that the estimate is not sensitive to the ordering
++	   of the fields.  */
++	n = 1;
++	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
++	  if (TREE_CODE (f) == FIELD_DECL)
++	    {
++	      m = count_type_elements (TREE_TYPE (f), false);
++	      /* If the field doesn't span the whole union, add an extra
++		 scalar for the rest.  */
++	      if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
++				    TYPE_SIZE (type)) != 1)
++		m++;
++	      if (n < m)
++		n = m;
++	    }
++	return n;
++      }
++
++    case COMPLEX_TYPE:
++      return 2;
++
++    case VECTOR_TYPE:
++      return TYPE_VECTOR_SUBPARTS (type);
++
++    case INTEGER_TYPE:
++    case REAL_TYPE:
++    case FIXED_POINT_TYPE:
++    case ENUMERAL_TYPE:
++    case BOOLEAN_TYPE:
++    case POINTER_TYPE:
++    case OFFSET_TYPE:
++    case REFERENCE_TYPE:
++      return 1;
++
++    case ERROR_MARK:
++      return 0;
++
++    case VOID_TYPE:
++    case METHOD_TYPE:
++    case FUNCTION_TYPE:
++    case LANG_TYPE:
++    default:
++      gcc_unreachable ();
++    }
++}
++
+ /* Helper for categorize_ctor_elements.  Identical interface.  */
+ 
+ static bool
+ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+-			    HOST_WIDE_INT *p_elt_count,
+-			    bool *p_must_clear)
++			    HOST_WIDE_INT *p_init_elts, bool *p_complete)
+ {
+   unsigned HOST_WIDE_INT idx;
+-  HOST_WIDE_INT nz_elts, elt_count;
+-  tree value, purpose;
++  HOST_WIDE_INT nz_elts, init_elts, num_fields;
++  tree value, purpose, elt_type;
+ 
+   /* Whether CTOR is a valid constant initializer, in accordance with what
+      initializer_constant_valid_p does.  If inferred from the constructor
+@@ -4884,7 +5004,9 @@
+   bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
+ 
+   nz_elts = 0;
+-  elt_count = 0;
++  init_elts = 0;
++  num_fields = 0;
++  elt_type = NULL_TREE;
+ 
+   FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value)
+     {
+@@ -4899,6 +5021,8 @@
+ 	    mult = (tree_low_cst (hi_index, 1)
+ 		    - tree_low_cst (lo_index, 1) + 1);
+ 	}
++      num_fields += mult;
++      elt_type = TREE_TYPE (value);
+ 
+       switch (TREE_CODE (value))
+ 	{
+@@ -4906,11 +5030,11 @@
+ 	  {
+ 	    HOST_WIDE_INT nz = 0, ic = 0;
+ 
+-	    bool const_elt_p
+-	      = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear);
++	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
++							   p_complete);
+ 
+ 	    nz_elts += mult * nz;
+- 	    elt_count += mult * ic;
++ 	    init_elts += mult * ic;
+ 
+ 	    if (const_from_elts_p && const_p)
+ 	      const_p = const_elt_p;
+@@ -4922,12 +5046,12 @@
+ 	case FIXED_CST:
+ 	  if (!initializer_zerop (value))
+ 	    nz_elts += mult;
+-	  elt_count += mult;
++	  init_elts += mult;
+ 	  break;
+ 
+ 	case STRING_CST:
+ 	  nz_elts += mult * TREE_STRING_LENGTH (value);
+-	  elt_count += mult * TREE_STRING_LENGTH (value);
++	  init_elts += mult * TREE_STRING_LENGTH (value);
+ 	  break;
+ 
+ 	case COMPLEX_CST:
+@@ -4935,7 +5059,7 @@
+ 	    nz_elts += mult;
+ 	  if (!initializer_zerop (TREE_IMAGPART (value)))
+ 	    nz_elts += mult;
+-	  elt_count += mult;
++	  init_elts += mult;
+ 	  break;
+ 
+ 	case VECTOR_CST:
+@@ -4945,65 +5069,31 @@
+ 	      {
+ 		if (!initializer_zerop (TREE_VALUE (v)))
+ 		  nz_elts += mult;
+-		elt_count += mult;
++		init_elts += mult;
+ 	      }
+ 	  }
+ 	  break;
+ 
+ 	default:
+ 	  {
+-	    HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true);
+-	    if (tc < 1)
+-	      tc = 1;
++	    HOST_WIDE_INT tc = count_type_elements (elt_type, false);
+ 	    nz_elts += mult * tc;
+-	    elt_count += mult * tc;
++	    init_elts += mult * tc;
+ 
+ 	    if (const_from_elts_p && const_p)
+-	      const_p = initializer_constant_valid_p (value, TREE_TYPE (value))
++	      const_p = initializer_constant_valid_p (value, elt_type)
+ 			!= NULL_TREE;
+ 	  }
+ 	  break;
+ 	}
+     }
+ 
+-  if (!*p_must_clear
+-      && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE
+-	  || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE))
+-    {
+-      tree init_sub_type;
+-      bool clear_this = true;
+-
+-      if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor)))
+-	{
+-	  /* We don't expect more than one element of the union to be
+-	     initialized.  Not sure what we should do otherwise... */
+-          gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor))
+-		      == 1);
+-
+-          init_sub_type = TREE_TYPE (VEC_index (constructor_elt,
+-						CONSTRUCTOR_ELTS (ctor),
+-						0)->value);
+-
+-	  /* ??? We could look at each element of the union, and find the
+-	     largest element.  Which would avoid comparing the size of the
+-	     initialized element against any tail padding in the union.
+-	     Doesn't seem worth the effort...  */
+-	  if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)),
+-				TYPE_SIZE (init_sub_type)) == 1)
+-	    {
+-	      /* And now we have to find out if the element itself is fully
+-		 constructed.  E.g. for union { struct { int a, b; } s; } u
+-		 = { .s = { .a = 1 } }.  */
+-	      if (elt_count == count_type_elements (init_sub_type, false))
+-		clear_this = false;
+-	    }
+-	}
+-
+-      *p_must_clear = clear_this;
+-    }
++  if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
++						num_fields, elt_type))
++    *p_complete = false;
+ 
+   *p_nz_elts += nz_elts;
+-  *p_elt_count += elt_count;
++  *p_init_elts += init_elts;
+ 
+   return const_p;
+ }
+@@ -5013,111 +5103,50 @@
+      and place it in *P_NZ_ELTS;
+    * how many scalar fields in total are in CTOR,
+      and place it in *P_ELT_COUNT.
+-   * if a type is a union, and the initializer from the constructor
+-     is not the largest element in the union, then set *p_must_clear.
++   * whether the constructor is complete -- in the sense that every
++     meaningful byte is explicitly given a value --
++     and place it in *P_COMPLETE.
+ 
+    Return whether or not CTOR is a valid static constant initializer, the same
+    as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
+ 
+ bool
+ categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+-			  HOST_WIDE_INT *p_elt_count,
+-			  bool *p_must_clear)
++			  HOST_WIDE_INT *p_init_elts, bool *p_complete)
+ {
+   *p_nz_elts = 0;
+-  *p_elt_count = 0;
+-  *p_must_clear = false;
++  *p_init_elts = 0;
++  *p_complete = true;
+ 
+-  return
+-    categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear);
++  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
+ }
+ 
+-/* Count the number of scalars in TYPE.  Return -1 on overflow or
+-   variable-sized.  If ALLOW_FLEXARR is true, don't count flexible
+-   array member at the end of the structure.  */
++/* TYPE is initialized by a constructor with NUM_ELTS elements, the last
++   of which had type LAST_TYPE.  Each element was itself a complete
++   initializer, in the sense that every meaningful byte was explicitly
++   given a value.  Return true if the same is true for the constructor
++   as a whole.  */
+ 
+-HOST_WIDE_INT
+-count_type_elements (const_tree type, bool allow_flexarr)
++bool
++complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts,
++			  const_tree last_type)
+ {
+-  const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1));
+-  switch (TREE_CODE (type))
++  if (TREE_CODE (type) == UNION_TYPE
++      || TREE_CODE (type) == QUAL_UNION_TYPE)
+     {
+-    case ARRAY_TYPE:
+-      {
+-	tree telts = array_type_nelts (type);
+-	if (telts && host_integerp (telts, 1))
+-	  {
+-	    HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1;
+-	    HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false);
+-	    if (n == 0)
+-	      return 0;
+-	    else if (max / n > m)
+-	      return n * m;
+-	  }
+-	return -1;
+-      }
+-
+-    case RECORD_TYPE:
+-      {
+-	HOST_WIDE_INT n = 0, t;
+-	tree f;
+-
+-	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+-	  if (TREE_CODE (f) == FIELD_DECL)
+-	    {
+-	      t = count_type_elements (TREE_TYPE (f), false);
+-	      if (t < 0)
+-		{
+-		  /* Check for structures with flexible array member.  */
+-		  tree tf = TREE_TYPE (f);
+-		  if (allow_flexarr
+-		      && DECL_CHAIN (f) == NULL
+-		      && TREE_CODE (tf) == ARRAY_TYPE
+-		      && TYPE_DOMAIN (tf)
+-		      && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
+-		      && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
+-		      && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
+-		      && int_size_in_bytes (type) >= 0)
+-		    break;
+-
+-		  return -1;
+-		}
+-	      n += t;
+-	    }
+-
+-	return n;
+-      }
+-
+-    case UNION_TYPE:
+-    case QUAL_UNION_TYPE:
+-      return -1;
+-
+-    case COMPLEX_TYPE:
+-      return 2;
+-
+-    case VECTOR_TYPE:
+-      return TYPE_VECTOR_SUBPARTS (type);
+-
+-    case INTEGER_TYPE:
+-    case REAL_TYPE:
+-    case FIXED_POINT_TYPE:
+-    case ENUMERAL_TYPE:
+-    case BOOLEAN_TYPE:
+-    case POINTER_TYPE:
+-    case OFFSET_TYPE:
+-    case REFERENCE_TYPE:
+-      return 1;
+-
+-    case ERROR_MARK:
+-      return 0;
+-
+-    case VOID_TYPE:
+-    case METHOD_TYPE:
+-    case FUNCTION_TYPE:
+-    case LANG_TYPE:
+-    default:
+-      gcc_unreachable ();
++      if (num_elts == 0)
++	return false;
++
++      gcc_assert (num_elts == 1 && last_type);
++
++      /* ??? We could look at each element of the union, and find the
++	 largest element.  Which would avoid comparing the size of the
++	 initialized element against any tail padding in the union.
++	 Doesn't seem worth the effort...  */
++      return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1;
+     }
++
++  return count_type_elements (type, true) == num_elts;
+ }
+ 
+ /* Return 1 if EXP contains mostly (3/4)  zeros.  */
+@@ -5126,18 +5155,12 @@
+ mostly_zeros_p (const_tree exp)
+ {
+   if (TREE_CODE (exp) == CONSTRUCTOR)
+-
+     {
+-      HOST_WIDE_INT nz_elts, count, elts;
+-      bool must_clear;
+-
+-      categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
+-      if (must_clear)
+-	return 1;
+-
+-      elts = count_type_elements (TREE_TYPE (exp), false);
+-
+-      return nz_elts < elts / 4;
++      HOST_WIDE_INT nz_elts, init_elts;
++      bool complete_p;
++
++      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
++      return !complete_p || nz_elts < init_elts / 4;
+     }
+ 
+   return initializer_zerop (exp);
+@@ -5149,12 +5172,11 @@
+ all_zeros_p (const_tree exp)
+ {
+   if (TREE_CODE (exp) == CONSTRUCTOR)
+-
+     {
+-      HOST_WIDE_INT nz_elts, count;
+-      bool must_clear;
++      HOST_WIDE_INT nz_elts, init_elts;
++      bool complete_p;
+ 
+-      categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
++      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
+       return nz_elts == 0;
+     }
+ 
+
+=== modified file 'gcc/gimplify.c'
+--- old/gcc/gimplify.c	2011-05-26 10:27:57 +0000
++++ new/gcc/gimplify.c	2011-07-13 13:17:31 +0000
+@@ -3693,9 +3693,8 @@
+     case ARRAY_TYPE:
+       {
+ 	struct gimplify_init_ctor_preeval_data preeval_data;
+-	HOST_WIDE_INT num_type_elements, num_ctor_elements;
+-	HOST_WIDE_INT num_nonzero_elements;
+-	bool cleared, valid_const_initializer;
++	HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
++	bool cleared, complete_p, valid_const_initializer;
+ 
+ 	/* Aggregate types must lower constructors to initialization of
+ 	   individual elements.  The exception is that a CONSTRUCTOR node
+@@ -3712,7 +3711,7 @@
+ 	   can only do so if it known to be a valid constant initializer.  */
+ 	valid_const_initializer
+ 	  = categorize_ctor_elements (ctor, &num_nonzero_elements,
+-				      &num_ctor_elements, &cleared);
++				      &num_ctor_elements, &complete_p);
+ 
+ 	/* If a const aggregate variable is being initialized, then it
+ 	   should never be a lose to promote the variable to be static.  */
+@@ -3750,26 +3749,29 @@
+ 	   parts in, then generate code for the non-constant parts.  */
+ 	/* TODO.  There's code in cp/typeck.c to do this.  */
+ 
+-	num_type_elements = count_type_elements (type, true);
++	if (int_size_in_bytes (TREE_TYPE (ctor)) < 0)
++	  /* store_constructor will ignore the clearing of variable-sized
++	     objects.  Initializers for such objects must explicitly set
++	     every field that needs to be set.  */
++	  cleared = false;
++	else if (!complete_p)
++	  /* If the constructor isn't complete, clear the whole object
++	     beforehand.
+ 
+-	/* If count_type_elements could not determine number of type elements
+-	   for a constant-sized object, assume clearing is needed.
+-	   Don't do this for variable-sized objects, as store_constructor
+-	   will ignore the clearing of variable-sized objects.  */
+-	if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
++	     ??? This ought not to be needed.  For any element not present
++	     in the initializer, we should simply set them to zero.  Except
++	     we'd need to *find* the elements that are not present, and that
++	     requires trickery to avoid quadratic compile-time behavior in
++	     large cases or excessive memory use in small cases.  */
+ 	  cleared = true;
+-	/* If there are "lots" of zeros, then block clear the object first.  */
+-	else if (num_type_elements - num_nonzero_elements
++	else if (num_ctor_elements - num_nonzero_elements
+ 		 > CLEAR_RATIO (optimize_function_for_speed_p (cfun))
+-		 && num_nonzero_elements < num_type_elements/4)
+-	  cleared = true;
+-	/* ??? This bit ought not be needed.  For any element not present
+-	   in the initializer, we should simply set them to zero.  Except
+-	   we'd need to *find* the elements that are not present, and that
+-	   requires trickery to avoid quadratic compile-time behavior in
+-	   large cases or excessive memory use in small cases.  */
+-	else if (num_ctor_elements < num_type_elements)
+-	  cleared = true;
++		 && num_nonzero_elements < num_ctor_elements / 4)
++	  /* If there are "lots" of zeros, it's more efficient to clear
++	     the memory and then set the nonzero elements.  */
++	  cleared = true;
++	else
++	  cleared = false;
+ 
+ 	/* If there are "lots" of initialized elements, and all of them
+ 	   are valid address constants, then the entire initializer can
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr48183.c'
+--- old/gcc/testsuite/gcc.target/arm/pr48183.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr48183.c	2011-07-13 13:17:31 +0000
+@@ -0,0 +1,25 @@
++/* testsuite/gcc.target/arm/pr48183.c */
++
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O -g" } */
++/* { dg-add-options arm_neon } */
++
++#include <arm_neon.h>
++
++void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
++{
++    unsigned i;
++    int16x4x2_t input;
++    int32x4x2_t mid;
++    int32x4x2_t output;
++
++    for (i = 0; i < n/2; i += 8) {
++        input = vld2_s16(src + i);
++        mid.val[0] = vmovl_s16(input.val[0]);
++        mid.val[1] = vmovl_s16(input.val[1]);
++        output.val[0] = vshlq_n_s32(mid.val[0], 8);
++        output.val[1] = vshlq_n_s32(mid.val[1], 8);
++        vst2q_s32((int32_t *)dst + i, output);
++    }
++}
+
+=== modified file 'gcc/tree.h'
+--- old/gcc/tree.h	2011-07-01 09:19:21 +0000
++++ new/gcc/tree.h	2011-07-13 13:17:31 +0000
+@@ -4627,21 +4627,10 @@
+ 
+ extern VEC(tree,gc) *ctor_to_vec (tree);
+ 
+-/* Examine CTOR to discover:
+-   * how many scalar fields are set to nonzero values,
+-     and place it in *P_NZ_ELTS;
+-   * how many scalar fields in total are in CTOR,
+-     and place it in *P_ELT_COUNT.
+-   * if a type is a union, and the initializer from the constructor
+-     is not the largest element in the union, then set *p_must_clear.
+-
+-   Return whether or not CTOR is a valid static constant initializer, the same
+-   as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
+-
+-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
+-				      bool *);
+-
+-extern HOST_WIDE_INT count_type_elements (const_tree, bool);
++extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
++				      HOST_WIDE_INT *, bool *);
++
++extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
+ 
+ /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0.  */
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch
new file mode 100644
index 0000000000..5335a9e375
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch
@@ -0,0 +1,27 @@
+2011-07-21  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	gcc/
+	Backport from mainline:
+
+	2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
+
+=== modified file 'gcc/regcprop.c'
+--- old/gcc/regcprop.c	2010-12-17 22:51:25 +0000
++++ new/gcc/regcprop.c	2011-07-21 11:30:53 +0000
+@@ -418,10 +418,9 @@
+ 
+       offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
+ 		+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
+-      return gen_rtx_raw_REG (new_mode,
+-			      regno + subreg_regno_offset (regno, orig_mode,
+-							   offset,
+-							   new_mode));
++      regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
++      if (HARD_REGNO_MODE_OK (regno, new_mode))
++	return gen_rtx_raw_REG (new_mode, regno);
+     }
+   return NULL_RTX;
+ }
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch
new file mode 100644
index 0000000000..61e3916375
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106783.patch
@@ -0,0 +1,62 @@
+2011-07-31  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r176970:
+
+	* modulo-sched.c: Change comment.
+	(reset_sched_times): Fix print message.
+	(print_partial_schedule): Add print info.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-07-04 12:01:34 +0000
++++ new/gcc/modulo-sched.c	2011-07-31 10:58:46 +0000
+@@ -84,13 +84,14 @@
+       II cycles (i.e. use register copies to prevent a def from overwriting
+       itself before reaching the use).
+ 
+-    SMS works with countable loops whose loop count can be easily
+-    adjusted.  This is because we peel a constant number of iterations
+-    into a prologue and epilogue for which we want to avoid emitting
+-    the control part, and a kernel which is to iterate that constant
+-    number of iterations less than the original loop.  So the control
+-    part should be a set of insns clearly identified and having its
+-    own iv, not otherwise used in the loop (at-least for now), which
++    SMS works with countable loops (1) whose control part can be easily
++    decoupled from the rest of the loop and (2) whose loop count can
++    be easily adjusted.  This is because we peel a constant number of
++    iterations into a prologue and epilogue for which we want to avoid
++    emitting the control part, and a kernel which is to iterate that
++    constant number of iterations less than the original loop.  So the
++    control part should be a set of insns clearly identified and having
++    its own iv, not otherwise used in the loop (at-least for now), which
+     initializes a register before the loop to the number of iterations.
+     Currently SMS relies on the do-loop pattern to recognize such loops,
+     where (1) the control part comprises of all insns defining and/or
+@@ -598,8 +599,8 @@
+             /* Print the scheduling times after the rotation.  */
+             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+                      "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
+-                     INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
+-                     normalized_time);
++                     INSN_UID (crr_insn->node->insn), normalized_time,
++                     new_min_cycle);
+             if (JUMP_P (crr_insn->node->insn))
+               fprintf (dump_file, " (branch)");
+             fprintf (dump_file, "\n");
+@@ -2550,8 +2551,13 @@
+       fprintf (dump, "\n[ROW %d ]: ", i);
+       while (ps_i)
+ 	{
+-	  fprintf (dump, "%d, ",
+-		   INSN_UID (ps_i->node->insn));
++	  if (JUMP_P (ps_i->node->insn))
++	    fprintf (dump, "%d (branch), ",
++		     INSN_UID (ps_i->node->insn));
++	  else
++	    fprintf (dump, "%d, ",
++		     INSN_UID (ps_i->node->insn));
++	
+ 	  ps_i = ps_i->next_in_row;
+ 	}
+     }
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch
new file mode 100644
index 0000000000..b82fe76d41
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106784.patch
@@ -0,0 +1,458 @@
+2011-08-09  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r177235.
+	* modulo-sched.c (calculate_stage_count,
+	calculate_must_precede_follow, get_sched_window,
+	try_scheduling_node_in_cycle, remove_node_from_ps):
+	Add declaration.
+	(update_node_sched_params, set_must_precede_follow, optimize_sc):
+	New functions.
+	(reset_sched_times): Call update_node_sched_params.
+	(sms_schedule):	Call optimize_sc.
+	(get_sched_window): Change function arguments.
+	(sms_schedule_by_order): Update call to	get_sched_window.
+	Call set_must_precede_follow.
+	(calculate_stage_count): Add function argument.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-07-31 10:58:46 +0000
++++ new/gcc/modulo-sched.c	2011-08-09 04:51:48 +0000
+@@ -203,7 +203,16 @@
+                                     rtx, rtx);
+ static void duplicate_insns_of_cycles (partial_schedule_ptr,
+ 				       int, int, int, rtx);
+-static int calculate_stage_count (partial_schedule_ptr ps);
++static int calculate_stage_count (partial_schedule_ptr, int);
++static void calculate_must_precede_follow (ddg_node_ptr, int, int,
++					   int, int, sbitmap, sbitmap, sbitmap);
++static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, 
++			     sbitmap, int, int *, int *, int *);
++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
++					  int, int, sbitmap, int *, sbitmap,
++					  sbitmap);
++static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
++
+ #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
+ #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
+ #define SCHED_FIRST_REG_MOVE(x) \
+@@ -577,6 +586,36 @@
+     }
+ }
+ 
++/* Update the sched_params (time, row and stage) for node U using the II,
++   the CYCLE of U and MIN_CYCLE.  
++   We're not simply taking the following
++   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
++   because the stages may not be aligned on cycle 0.  */
++static void
++update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
++{
++  int sc_until_cycle_zero;
++  int stage;
++
++  SCHED_TIME (u) = cycle;
++  SCHED_ROW (u) = SMODULO (cycle, ii);
++
++  /* The calculation of stage count is done adding the number
++     of stages before cycle zero and after cycle zero.  */
++  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
++
++  if (SCHED_TIME (u) < 0)
++    {
++      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
++      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
++    }
++  else
++    {
++      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
++      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
++    }
++}
++
+ /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
+    SCHED_ROW and SCHED_STAGE.  */
+ static void
+@@ -592,7 +631,6 @@
+ 	ddg_node_ptr u = crr_insn->node;
+ 	int normalized_time = SCHED_TIME (u) - amount;
+ 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
+-        int sc_until_cycle_zero, stage;
+ 
+         if (dump_file)
+           {
+@@ -608,23 +646,9 @@
+ 	
+ 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+ 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+-	SCHED_TIME (u) = normalized_time;
+-	SCHED_ROW (u) = SMODULO (normalized_time, ii);
+-      
+-        /* The calculation of stage count is done adding the number
+-           of stages before cycle zero and after cycle zero.  */
+-	sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
+-	
+-	if (SCHED_TIME (u) < 0)
+-	  {
+-	    stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+-	    SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+-	  }
+-	else
+-	  {
+-	    stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+-	    SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+-	  }
++
++	crr_insn->cycle = normalized_time;
++	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
+       }
+ }
+  
+@@ -661,6 +685,206 @@
+ 			    PREV_INSN (last));
+ }
+ 
++/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
++   respectively only if cycle C falls on the border of the scheduling
++   window boundaries marked by START and END cycles.  STEP is the
++   direction of the window.  */
++static inline void
++set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
++			 sbitmap *tmp_precede, sbitmap must_precede, int c,
++			 int start, int end, int step)
++{
++  *tmp_precede = NULL;
++  *tmp_follow = NULL;
++
++  if (c == start)
++    {
++      if (step == 1)
++	*tmp_precede = must_precede;
++      else			/* step == -1.  */
++	*tmp_follow = must_follow;
++    }
++  if (c == end - step)
++    {
++      if (step == 1)
++	*tmp_follow = must_follow;
++      else			/* step == -1.  */
++	*tmp_precede = must_precede;
++    }
++
++}
++
++/* Return True if the branch can be moved to row ii-1 while
++   normalizing the partial schedule PS to start from cycle zero and thus
++   optimize the SC.  Otherwise return False.  */
++static bool
++optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
++{
++  int amount = PS_MIN_CYCLE (ps);
++  sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
++  int start, end, step;
++  int ii = ps->ii;
++  bool ok = false;
++  int stage_count, stage_count_curr;
++
++  /* Compare the SC after normalization and SC after bringing the branch
++     to row ii-1.  If they are equal just bail out.  */
++  stage_count = calculate_stage_count (ps, amount);
++  stage_count_curr =
++    calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
++
++  if (stage_count == stage_count_curr)
++    {
++      if (dump_file)
++	fprintf (dump_file, "SMS SC already optimized.\n");
++
++      ok = false;
++      goto clear;
++    }
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "SMS Trying to optimize branch location\n");
++      fprintf (dump_file, "SMS partial schedule before trial:\n");
++      print_partial_schedule (ps, dump_file);
++    }
++
++  /* First, normalize the partial scheduling.  */
++  reset_sched_times (ps, amount);
++  rotate_partial_schedule (ps, amount);
++  if (dump_file)
++    {
++      fprintf (dump_file,
++	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
++	       ii, stage_count);
++      print_partial_schedule (ps, dump_file);
++    }
++
++  if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
++    {
++      ok = true;
++      goto clear;
++    }
++
++  sbitmap_ones (sched_nodes);
++
++  /* Calculate the new placement of the branch.  It should be in row
++     ii-1 and fall into it's scheduling window.  */
++  if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
++			&step, &end) == 0)
++    {
++      bool success;
++      ps_insn_ptr next_ps_i;
++      int branch_cycle = SCHED_TIME (g->closing_branch);
++      int row = SMODULO (branch_cycle, ps->ii);
++      int num_splits = 0;
++      sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
++      int c;
++
++      if (dump_file)
++	fprintf (dump_file, "\nTrying to schedule node %d "
++		 "INSN = %d  in (%d .. %d) step %d\n",
++		 g->closing_branch->cuid,
++		 (INSN_UID (g->closing_branch->insn)), start, end, step);
++
++      gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
++      if (step == 1)
++	{
++	  c = start + ii - SMODULO (start, ii) - 1;
++	  gcc_assert (c >= start);
++	  if (c >= end)
++	    {
++	      ok = false;
++	      if (dump_file)
++		fprintf (dump_file,
++			 "SMS failed to schedule branch at cycle: %d\n", c);
++	      goto clear;
++	    }
++	}
++      else
++	{
++	  c = start - SMODULO (start, ii) - 1;
++	  gcc_assert (c <= start);
++
++	  if (c <= end)
++	    {
++	      if (dump_file)
++		fprintf (dump_file,
++			 "SMS failed to schedule branch at cycle: %d\n", c);
++	      ok = false;
++	      goto clear;
++	    }
++	}
++
++      must_precede = sbitmap_alloc (g->num_nodes);
++      must_follow = sbitmap_alloc (g->num_nodes);
++
++      /* Try to schedule the branch is it's new cycle.  */
++      calculate_must_precede_follow (g->closing_branch, start, end,
++				     step, ii, sched_nodes,
++				     must_precede, must_follow);
++
++      set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
++			       must_precede, c, start, end, step);
++
++      /* Find the element in the partial schedule related to the closing
++         branch so we can remove it from it's current cycle.  */
++      for (next_ps_i = ps->rows[row];
++	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
++	if (next_ps_i->node->cuid == g->closing_branch->cuid)
++	  break;
++
++      gcc_assert (next_ps_i);
++      gcc_assert (remove_node_from_ps (ps, next_ps_i));
++      success =
++	try_scheduling_node_in_cycle (ps, g->closing_branch,
++				      g->closing_branch->cuid, c,
++				      sched_nodes, &num_splits,
++				      tmp_precede, tmp_follow);
++      gcc_assert (num_splits == 0);
++      if (!success)
++	{
++	  if (dump_file)
++	    fprintf (dump_file,
++		     "SMS failed to schedule branch at cycle: %d, "
++		     "bringing it back to cycle %d\n", c, branch_cycle);
++
++	  /* The branch was failed to be placed in row ii - 1.
++	     Put it back in it's original place in the partial
++	     schedualing.  */
++	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
++				   must_precede, branch_cycle, start, end,
++				   step);
++	  success =
++	    try_scheduling_node_in_cycle (ps, g->closing_branch,
++					  g->closing_branch->cuid,
++					  branch_cycle, sched_nodes,
++					  &num_splits, tmp_precede,
++					  tmp_follow);
++	  gcc_assert (success && (num_splits == 0));
++	  ok = false;
++	}
++      else
++	{
++	  /* The branch is placed in row ii - 1.  */
++	  if (dump_file)
++	    fprintf (dump_file,
++		     "SMS success in moving branch to cycle %d\n", c);
++
++	  update_node_sched_params (g->closing_branch, ii, c,
++				    PS_MIN_CYCLE (ps));
++	  ok = true;
++	}
++
++      free (must_precede);
++      free (must_follow);
++    }
++
++clear:
++  free (sched_nodes);
++  return ok;
++}
++
+ static void
+ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
+ 			   int to_stage, int for_prolog, rtx count_reg)
+@@ -1116,6 +1340,7 @@
+       int mii, rec_mii;
+       unsigned stage_count = 0;
+       HOST_WIDEST_INT loop_count = 0;
++      bool opt_sc_p = false;
+ 
+       if (! (g = g_arr[loop->num]))
+         continue;
+@@ -1197,14 +1422,32 @@
+       set_node_sched_params (g);
+ 
+       ps = sms_schedule_by_order (g, mii, maxii, node_order);
+-
+-       if (ps)
+-       {
+-         stage_count = calculate_stage_count (ps);
+-         gcc_assert(stage_count >= 1);
+-         PS_STAGE_COUNT(ps) = stage_count;
+-       }
+-
++      
++      if (ps)
++	{
++	  /* Try to achieve optimized SC by normalizing the partial
++	     schedule (having the cycles start from cycle zero).
++	     The branch location must be placed in row ii-1 in the
++	     final scheduling.	If failed, shift all instructions to
++	     position the branch in row ii-1.  */
++	  opt_sc_p = optimize_sc (ps, g);
++	  if (opt_sc_p)
++	    stage_count = calculate_stage_count (ps, 0);
++	  else
++	    {
++	      /* Bring the branch to cycle ii-1.  */
++	      int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
++	      
++	      if (dump_file)
++		fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
++	      
++	      stage_count = calculate_stage_count (ps, amount);
++	    }
++	  
++	  gcc_assert (stage_count >= 1);
++	  PS_STAGE_COUNT (ps) = stage_count;
++	}
++      
+       /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+          1 means that there is no interleaving between iterations thus
+          we let the scheduling passes do the job in this case.  */
+@@ -1225,12 +1468,16 @@
+       else
+ 	{
+ 	  struct undo_replace_buff_elem *reg_move_replaces;
+-          int amount = SCHED_TIME (g->closing_branch) + 1;
++
++          if (!opt_sc_p)
++            {
++	      /* Rotate the partial schedule to have the branch in row ii-1.  */
++              int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
++	      
++              reset_sched_times (ps, amount);
++              rotate_partial_schedule (ps, amount);
++            }
+ 	  
+-	  /* Set the stage boundaries.	The closing_branch was scheduled
+-	     and should appear in the last (ii-1) row.  */
+-	  reset_sched_times (ps, amount);
+-	  rotate_partial_schedule (ps, amount);
+ 	  set_columns_for_ps (ps);
+ 
+ 	  canon_loop (loop);
+@@ -1382,13 +1629,11 @@
+    scheduling window is empty and zero otherwise.  */
+ 
+ static int
+-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
++get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
+ 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
+ {
+   int start, step, end;
+   ddg_edge_ptr e;
+-  int u = nodes_order [i];
+-  ddg_node_ptr u_node = &ps->g->nodes[u];
+   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
+   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
+   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
+@@ -1800,7 +2045,7 @@
+ 
+ 	  /* Try to get non-empty scheduling window.  */
+ 	 success = 0;
+-         if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
++         if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
+                                 &step, &end) == 0)
+             {
+               if (dump_file)
+@@ -1817,24 +2062,11 @@
+ 
+               for (c = start; c != end; c += step)
+                 {
+-                  sbitmap tmp_precede = NULL;
+-                  sbitmap tmp_follow = NULL;
+-
+-                  if (c == start)
+-                    {
+-                      if (step == 1)
+-                        tmp_precede = must_precede;
+-                      else      /* step == -1.  */
+-                        tmp_follow = must_follow;
+-                    }
+-                  if (c == end - step)
+-                    {
+-                      if (step == 1)
+-                        tmp_follow = must_follow;
+-                      else      /* step == -1.  */
+-                        tmp_precede = must_precede;
+-                    }
+-
++		  sbitmap tmp_precede, tmp_follow;
++
++                  set_must_precede_follow (&tmp_follow, must_follow, 
++		                           &tmp_precede, must_precede, 
++                                           c, start, end, step);
+                   success =
+                     try_scheduling_node_in_cycle (ps, u_node, u, c,
+                                                   sched_nodes,
+@@ -2899,12 +3131,10 @@
+ }
+ 
+ /* Calculate the stage count of the partial schedule PS.  The calculation
+-   takes into account the rotation to bring the closing branch to row
+-   ii-1.  */
++   takes into account the rotation amount passed in ROTATION_AMOUNT.  */
+ int
+-calculate_stage_count (partial_schedule_ptr ps)
++calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
+ {
+-  int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
+   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
+   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
+   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch
new file mode 100644
index 0000000000..608dd18336
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106785.patch
@@ -0,0 +1,39 @@
+ 2011-08-09  Revital Eres  <revital.eres@linaro.org>
+ 
+ 	gcc/
+	Backport from trunk -r176972:
+
+	* ddg.c (create_ddg_dep_from_intra_loop_link): Remove
+	the creation of anti-dep edge from a branch.
+	(add_cross_iteration_register_deps):
+	Create anti-dep edge from a branch.
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2011-07-04 11:00:06 +0000
++++ new/gcc/ddg.c	2011-07-31 11:29:10 +0000
+@@ -197,11 +197,6 @@
+         }
+     }
+ 
+-  /* If a true dep edge enters the branch create an anti edge in the
+-     opposite direction to prevent the creation of reg-moves.  */
+-  if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
+-    create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
+-
+    latency = dep_cost (link);
+    e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+    add_edge_to_ddg (g, e);
+@@ -306,8 +301,11 @@
+ 
+ 	  gcc_assert (first_def_node);
+ 
++         /* Always create the edge if the use node is a branch in
++            order to prevent the creation of reg-moves.  */
+           if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+-              || !flag_modulo_sched_allow_regmoves)
++              || !flag_modulo_sched_allow_regmoves
++	      || JUMP_P (use_node->insn))
+             create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+                                     REG_DEP, 1);
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch
new file mode 100644
index 0000000000..55b7cc1078
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106786.patch
@@ -0,0 +1,94 @@
+2011-08-11  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+        2011-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* config/arm/vfp.md ("*movdf_vfp"): Handle the VFP constraints
+	before the core constraints. Adjust attributes.
+	(*thumb2_movdf_vfp"): Likewise.
+
+=== modified file 'gcc/config/arm/vfp.md'
+--- old/gcc/config/arm/vfp.md	2011-01-20 22:03:29 +0000
++++ new/gcc/config/arm/vfp.md	2011-07-27 12:59:19 +0000
+@@ -401,8 +401,8 @@
+ ;; DFmode moves
+ 
+ (define_insn "*movdf_vfp"
+-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
++  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
++	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+    && (   register_operand (operands[0], DFmode)
+        || register_operand (operands[1], DFmode))"
+@@ -418,9 +418,9 @@
+ 	gcc_assert (TARGET_VFP_DOUBLE);
+         return \"fconstd%?\\t%P0, #%G1\";
+       case 3: case 4:
++	return output_move_vfp (operands);
++      case 5: case 6:
+ 	return output_move_double (operands);
+-      case 5: case 6:
+-	return output_move_vfp (operands);
+       case 7:
+ 	if (TARGET_VFP_SINGLE)
+ 	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+@@ -435,7 +435,7 @@
+   "
+   [(set_attr "type"
+      "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
++   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+ 			       (eq_attr "alternative" "7")
+ 				(if_then_else
+ 				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+@@ -449,8 +449,8 @@
+ )
+ 
+ (define_insn "*thumb2_movdf_vfp"
+-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
++  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
++	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+   "*
+   {
+@@ -463,10 +463,10 @@
+       case 2:
+ 	gcc_assert (TARGET_VFP_DOUBLE);
+ 	return \"fconstd%?\\t%P0, #%G1\";
+-      case 3: case 4: case 8:
++      case 3: case 4:
++	return output_move_vfp (operands);
++      case 5: case 6: case 8:
+ 	return output_move_double (operands);
+-      case 5: case 6:
+-	return output_move_vfp (operands);
+       case 7:
+ 	if (TARGET_VFP_SINGLE)
+ 	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+@@ -478,8 +478,8 @@
+     }
+   "
+   [(set_attr "type"
+-     "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
+-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
++     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
++   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+ 			       (eq_attr "alternative" "7")
+ 				(if_then_else
+ 				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+@@ -487,8 +487,8 @@
+ 				 (const_int 8)
+ 				 (const_int 4))]
+ 			      (const_int 4)))
+-   (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
+-   (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
++   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
++   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+ )
+ 
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch
new file mode 100644
index 0000000000..bdb48ad1e6
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106787.patch
@@ -0,0 +1,30 @@
+2011-08-15  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r177357
+
+	gcc/testsuite/
+	2011-08-04  Ian Bolton  <ian.bolton@arm.com>
+
+	* gcc.target/arm/vfp-1.c: no large negative offsets on Thumb2.
+
+=== modified file 'gcc/testsuite/gcc.target/arm/vfp-1.c'
+--- old/gcc/testsuite/gcc.target/arm/vfp-1.c	2011-01-01 08:52:03 +0000
++++ new/gcc/testsuite/gcc.target/arm/vfp-1.c	2011-08-09 23:22:51 +0000
+@@ -127,13 +127,13 @@
+ 
+ void test_ldst (float f[], double d[]) {
+   /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
+-  /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */
++  /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
+   /* { dg-final { scan-assembler "add.+ r0, #1024" } } */
+-  /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */
++  /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */
+   f[256] = f[255] + f[-255];
+ 
+   /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
+-  /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */
++  /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
+   /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
+   d[32] = d[127] + d[-127];
+ }
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch
new file mode 100644
index 0000000000..a384ce7a18
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106789.patch
@@ -0,0 +1,33 @@
+2011-08-15  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* config/rs6000/rs6000.c (paired_expand_vector_init): Don't create
+	CONST_VECTORs with symbolic elements.
+	(rs6000_expand_vector_init): Likewise.
+
+=== modified file 'gcc/config/rs6000/rs6000.c'
+--- old/gcc/config/rs6000/rs6000.c	2011-07-27 18:17:15 +0000
++++ new/gcc/config/rs6000/rs6000.c	2011-08-16 08:59:36 +0000
+@@ -5134,7 +5134,9 @@
+   for (i = 0; i < n_elts; ++i)
+     {
+       x = XVECEXP (vals, 0, i);
+-      if (!CONSTANT_P (x))
++      if (!(CONST_INT_P (x)
++	    || GET_CODE (x) == CONST_DOUBLE
++	    || GET_CODE (x) == CONST_FIXED))
+ 	++n_var;
+     }
+   if (n_var == 0)
+@@ -5286,7 +5288,9 @@
+   for (i = 0; i < n_elts; ++i)
+     {
+       x = XVECEXP (vals, 0, i);
+-      if (!CONSTANT_P (x))
++      if (!(CONST_INT_P (x)
++	    || GET_CODE (x) == CONST_DOUBLE
++	    || GET_CODE (x) == CONST_FIXED))
+ 	++n_var, one_var = i;
+       else if (x != CONST0_RTX (inner_mode))
+ 	all_const_zero = false;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch
new file mode 100644
index 0000000000..12c578c2b4
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106792.patch
@@ -0,0 +1,61 @@
+2011-08-18  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-12  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	* config/arm/arm.c (get_label_padding): New function.
+	(create_fix_barrier, arm_reorg): Use it.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-07-04 14:03:49 +0000
++++ new/gcc/config/arm/arm.c	2011-08-12 08:08:31 +0000
+@@ -11769,6 +11769,19 @@
+   return 0;
+ }
+ 
++/* Return the maximum amount of padding that will be inserted before
++   label LABEL.  */
++
++static HOST_WIDE_INT
++get_label_padding (rtx label)
++{
++  HOST_WIDE_INT align, min_insn_size;
++
++  align = 1 << label_to_alignment (label);
++  min_insn_size = TARGET_THUMB ? 2 : 4;
++  return align > min_insn_size ? align - min_insn_size : 0;
++}
++
+ /* Move a minipool fix MP from its current location to before MAX_MP.
+    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
+    constraints may need updating.  */
+@@ -12315,8 +12328,12 @@
+ 	 within range.  */
+       gcc_assert (GET_CODE (from) != BARRIER);
+ 
+-      /* Count the length of this insn.  */
+-      count += get_attr_length (from);
++      /* Count the length of this insn.  This must stay in sync with the
++	 code that pushes minipool fixes.  */
++      if (LABEL_P (from))
++	count += get_label_padding (from);
++      else
++	count += get_attr_length (from);
+ 
+       /* If there is a jump table, add its length.  */
+       tmp = is_jump_table (from);
+@@ -12736,6 +12753,11 @@
+ 	      insn = table;
+ 	    }
+ 	}
++      else if (LABEL_P (insn))
++	/* Add the worst-case padding due to alignment.  We don't add
++	   the _current_ padding because the minipool insertions
++	   themselves might change it.  */
++	address += get_label_padding (insn);
+     }
+ 
+   fix = minipool_fix_head;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch
new file mode 100644
index 0000000000..29663c64a0
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106794.patch
@@ -0,0 +1,2648 @@
+2011-08-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_init_neon_builtins): Use 
+	n_operands instead of n_generator_args.
+
+2011-08-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline
+	2011-04-18  Jie Zhang  <jie@codesourcery.com>
+	Richard Earnshaw  <rearnsha@arm.com>
+
+	* arm.c (neon_builtin_type_bits): Remove.
+	(typedef enum neon_builtin_mode): New.
+	(T_MAX): Don't define.
+	(typedef enum neon_builtin_datum): Remove bits, codes[],
+	num_vars and base_fcode.  Add mode, code and fcode.
+	(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9
+	VAR10): Change accordingly.
+	(neon_builtin_data[]): Change accordingly
+	(arm_init_neon_builtins): Change accordingly.
+	(neon_builtin_compare): Remove.
+	(locate_neon_builtin_icode): Remove.
+	(arm_expand_neon_builtin): Change accordingly.
+
+	* arm.h (enum arm_builtins): Move to ...
+	* arm.c (enum arm_builtins): ... here; and rearrange builtin code.
+
+	* arm.c (arm_builtin_decl): Declare.
+	(TARGET_BUILTIN_DECL): Define.
+	(enum arm_builtins): Correct ARM_BUILTIN_MAX.
+	(arm_builtin_decls[]): New.
+	(arm_init_neon_builtins): Store builtin declarations in
+	arm_builtin_decls[].
+	(arm_init_tls_builtins): Likewise.
+	(arm_init_iwmmxt_builtins): Likewise.  Refactor initialization code.
+	(arm_builtin_decl): New.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-08-18 13:53:37 +0000
++++ new/gcc/config/arm/arm.c	2011-08-24 17:35:16 +0000
+@@ -162,6 +162,7 @@
+ static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
+ static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
+ static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
++static tree arm_builtin_decl (unsigned, bool);
+ static void emit_constant_insn (rtx cond, rtx pattern);
+ static rtx emit_set_insn (rtx, rtx);
+ static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+@@ -415,6 +416,8 @@
+ #define TARGET_INIT_BUILTINS  arm_init_builtins
+ #undef  TARGET_EXPAND_BUILTIN
+ #define TARGET_EXPAND_BUILTIN arm_expand_builtin
++#undef  TARGET_BUILTIN_DECL
++#define TARGET_BUILTIN_DECL arm_builtin_decl
+ 
+ #undef TARGET_INIT_LIBFUNCS
+ #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
+@@ -18147,505 +18150,31 @@
+   return value;
+ }
+ 
+-#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+-  do									\
+-    {									\
+-      if ((MASK) & insn_flags)						\
+-        add_builtin_function ((NAME), (TYPE), (CODE),			\
+-			     BUILT_IN_MD, NULL, NULL_TREE);		\
+-    }									\
+-  while (0)
+-
+-struct builtin_description
+-{
+-  const unsigned int       mask;
+-  const enum insn_code     icode;
+-  const char * const       name;
+-  const enum arm_builtins  code;
+-  const enum rtx_code      comparison;
+-  const unsigned int       flag;
+-};
+-
+-static const struct builtin_description bdesc_2arg[] =
+-{
+-#define IWMMXT_BUILTIN(code, string, builtin) \
+-  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+-
+-  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
+-  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
+-  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
+-  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
+-  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
+-  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
+-  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
+-  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
+-  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
+-  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
+-  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
+-  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
+-  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
+-  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
+-  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
+-  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
+-  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
+-  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
+-  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
+-  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
+-  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
+-  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
+-  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
+-  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
+-  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
+-  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
+-  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
+-  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
+-  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
+-  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
+-  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
+-  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
+-  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
+-  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
+-  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
+-  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
+-  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
+-  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
+-  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
+-  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
+-  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
+-  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
+-  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
+-  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
+-  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
+-  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
+-  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
+-  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
+-  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
+-  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
+-  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
+-  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
+-
+-#define IWMMXT_BUILTIN2(code, builtin) \
+-  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+-
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
+-  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
+-  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
+-  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
+-  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
+-  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
+-  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
+-  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
+-  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
+-  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
+-  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
+-  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
+-  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
+-  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
+-  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
+-  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
+-  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
+-  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
+-  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
+-  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
+-  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
+-  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
+-  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
+-  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
+-  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
+-  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
+-  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
+-  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
+-};
+-
+-static const struct builtin_description bdesc_1arg[] =
+-{
+-  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
+-  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
+-  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
+-  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
+-  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
+-  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
+-  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+-};
+-
+-/* Set up all the iWMMXt builtins.  This is
+-   not called if TARGET_IWMMXT is zero.  */
+-
+-static void
+-arm_init_iwmmxt_builtins (void)
+-{
+-  const struct builtin_description * d;
+-  size_t i;
+-  tree endlink = void_list_node;
+-
+-  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+-  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+-  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+-
+-  tree int_ftype_int
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, integer_type_node, endlink));
+-  tree v8qi_ftype_v8qi_v8qi_int
+-    = build_function_type (V8QI_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      tree_cons (NULL_TREE, V8QI_type_node,
+-						 tree_cons (NULL_TREE,
+-							    integer_type_node,
+-							    endlink))));
+-  tree v4hi_ftype_v4hi_int
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree v2si_ftype_v2si_int
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree v2si_ftype_di_di
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, long_long_integer_type_node,
+-				      tree_cons (NULL_TREE, long_long_integer_type_node,
+-						 endlink)));
+-  tree di_ftype_di_int
+-    = build_function_type (long_long_integer_type_node,
+-			   tree_cons (NULL_TREE, long_long_integer_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree di_ftype_di_int_int
+-    = build_function_type (long_long_integer_type_node,
+-			   tree_cons (NULL_TREE, long_long_integer_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 tree_cons (NULL_TREE,
+-							    integer_type_node,
+-							    endlink))));
+-  tree int_ftype_v8qi
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      endlink));
+-  tree int_ftype_v4hi
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      endlink));
+-  tree int_ftype_v2si
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      endlink));
+-  tree int_ftype_v8qi_int
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree int_ftype_v4hi_int
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree int_ftype_v2si_int
+-    = build_function_type (integer_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree v8qi_ftype_v8qi_int_int
+-    = build_function_type (V8QI_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 tree_cons (NULL_TREE,
+-							    integer_type_node,
+-							    endlink))));
+-  tree v4hi_ftype_v4hi_int_int
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 tree_cons (NULL_TREE,
+-							    integer_type_node,
+-							    endlink))));
+-  tree v2si_ftype_v2si_int_int
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 tree_cons (NULL_TREE,
+-							    integer_type_node,
+-							    endlink))));
+-  /* Miscellaneous.  */
+-  tree v8qi_ftype_v4hi_v4hi
+-    = build_function_type (V8QI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, V4HI_type_node,
+-						 endlink)));
+-  tree v4hi_ftype_v2si_v2si
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE, V2SI_type_node,
+-						 endlink)));
+-  tree v2si_ftype_v4hi_v4hi
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, V4HI_type_node,
+-						 endlink)));
+-  tree v2si_ftype_v8qi_v8qi
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      tree_cons (NULL_TREE, V8QI_type_node,
+-						 endlink)));
+-  tree v4hi_ftype_v4hi_di
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE,
+-						 long_long_integer_type_node,
+-						 endlink)));
+-  tree v2si_ftype_v2si_di
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE,
+-						 long_long_integer_type_node,
+-						 endlink)));
+-  tree void_ftype_int_int
+-    = build_function_type (void_type_node,
+-			   tree_cons (NULL_TREE, integer_type_node,
+-				      tree_cons (NULL_TREE, integer_type_node,
+-						 endlink)));
+-  tree di_ftype_void
+-    = build_function_type (long_long_unsigned_type_node, endlink);
+-  tree di_ftype_v8qi
+-    = build_function_type (long_long_integer_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      endlink));
+-  tree di_ftype_v4hi
+-    = build_function_type (long_long_integer_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      endlink));
+-  tree di_ftype_v2si
+-    = build_function_type (long_long_integer_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      endlink));
+-  tree v2si_ftype_v4hi
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      endlink));
+-  tree v4hi_ftype_v8qi
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      endlink));
+-
+-  tree di_ftype_di_v4hi_v4hi
+-    = build_function_type (long_long_unsigned_type_node,
+-			   tree_cons (NULL_TREE,
+-				      long_long_unsigned_type_node,
+-				      tree_cons (NULL_TREE, V4HI_type_node,
+-						 tree_cons (NULL_TREE,
+-							    V4HI_type_node,
+-							    endlink))));
+-
+-  tree di_ftype_v4hi_v4hi
+-    = build_function_type (long_long_unsigned_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, V4HI_type_node,
+-						 endlink)));
+-
+-  /* Normal vector binops.  */
+-  tree v8qi_ftype_v8qi_v8qi
+-    = build_function_type (V8QI_type_node,
+-			   tree_cons (NULL_TREE, V8QI_type_node,
+-				      tree_cons (NULL_TREE, V8QI_type_node,
+-						 endlink)));
+-  tree v4hi_ftype_v4hi_v4hi
+-    = build_function_type (V4HI_type_node,
+-			   tree_cons (NULL_TREE, V4HI_type_node,
+-				      tree_cons (NULL_TREE, V4HI_type_node,
+-						 endlink)));
+-  tree v2si_ftype_v2si_v2si
+-    = build_function_type (V2SI_type_node,
+-			   tree_cons (NULL_TREE, V2SI_type_node,
+-				      tree_cons (NULL_TREE, V2SI_type_node,
+-						 endlink)));
+-  tree di_ftype_di_di
+-    = build_function_type (long_long_unsigned_type_node,
+-			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
+-				      tree_cons (NULL_TREE,
+-						 long_long_unsigned_type_node,
+-						 endlink)));
+-
+-  /* Add all builtins that are more or less simple operations on two
+-     operands.  */
+-  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+-    {
+-      /* Use one of the operands; the target can have a different mode for
+-	 mask-generating compares.  */
+-      enum machine_mode mode;
+-      tree type;
+-
+-      if (d->name == 0)
+-	continue;
+-
+-      mode = insn_data[d->icode].operand[1].mode;
+-
+-      switch (mode)
+-	{
+-	case V8QImode:
+-	  type = v8qi_ftype_v8qi_v8qi;
+-	  break;
+-	case V4HImode:
+-	  type = v4hi_ftype_v4hi_v4hi;
+-	  break;
+-	case V2SImode:
+-	  type = v2si_ftype_v2si_v2si;
+-	  break;
+-	case DImode:
+-	  type = di_ftype_di_di;
+-	  break;
+-
+-	default:
+-	  gcc_unreachable ();
+-	}
+-
+-      def_mbuiltin (d->mask, d->name, type, d->code);
+-    }
+-
+-  /* Add the remaining MMX insns with somewhat more complicated types.  */
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
+-
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
+-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
+-}
+-
+-static void
+-arm_init_tls_builtins (void)
+-{
+-  tree ftype, decl;
+-
+-  ftype = build_function_type (ptr_type_node, void_list_node);
+-  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+-			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+-			       NULL, NULL_TREE);
+-  TREE_NOTHROW (decl) = 1;
+-  TREE_READONLY (decl) = 1;
+-}
+-
+-enum neon_builtin_type_bits {
+-  T_V8QI  = 0x0001,
+-  T_V4HI  = 0x0002,
+-  T_V2SI  = 0x0004,
+-  T_V2SF  = 0x0008,
+-  T_DI    = 0x0010,
+-  T_DREG  = 0x001F,
+-  T_V16QI = 0x0020,
+-  T_V8HI  = 0x0040,
+-  T_V4SI  = 0x0080,
+-  T_V4SF  = 0x0100,
+-  T_V2DI  = 0x0200,
+-  T_TI	  = 0x0400,
+-  T_QREG  = 0x07E0,
+-  T_EI	  = 0x0800,
+-  T_OI	  = 0x1000
+-};
++typedef enum {
++  T_V8QI,
++  T_V4HI,
++  T_V2SI,
++  T_V2SF,
++  T_DI,
++  T_V16QI,
++  T_V8HI,
++  T_V4SI,
++  T_V4SF,
++  T_V2DI,
++  T_TI,
++  T_EI,
++  T_OI,
++  T_MAX		/* Size of enum.  Keep last.  */
++} neon_builtin_type_mode;
++
++#define TYPE_MODE_BIT(X) (1 << (X))
++
++#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)	\
++		 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)	\
++		 | TYPE_MODE_BIT (T_DI))
++#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)	\
++		 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)	\
++		 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
+ 
+ #define v8qi_UP  T_V8QI
+ #define v4hi_UP  T_V4HI
+@@ -18663,8 +18192,6 @@
+ 
+ #define UP(X) X##_UP
+ 
+-#define T_MAX 13
+-
+ typedef enum {
+   NEON_BINOP,
+   NEON_TERNOP,
+@@ -18708,49 +18235,42 @@
+ typedef struct {
+   const char *name;
+   const neon_itype itype;
+-  const int bits;
+-  const enum insn_code codes[T_MAX];
+-  const unsigned int num_vars;
+-  unsigned int base_fcode;
++  const neon_builtin_type_mode mode;
++  const enum insn_code code;
++  unsigned int fcode;
+ } neon_builtin_datum;
+ 
+ #define CF(N,X) CODE_FOR_neon_##N##X
+ 
+ #define VAR1(T, N, A) \
+-  #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
++  {#N, NEON_##T, UP (A), CF (N, A), 0}
+ #define VAR2(T, N, A, B) \
+-  #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
++  VAR1 (T, N, A), \
++  {#N, NEON_##T, UP (B), CF (N, B), 0}
+ #define VAR3(T, N, A, B, C) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C), \
+-  { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
++  VAR2 (T, N, A, B), \
++  {#N, NEON_##T, UP (C), CF (N, C), 0}
+ #define VAR4(T, N, A, B, C, D) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
++  VAR3 (T, N, A, B, C), \
++  {#N, NEON_##T, UP (D), CF (N, D), 0}
+ #define VAR5(T, N, A, B, C, D, E) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
++  VAR4 (T, N, A, B, C, D), \
++  {#N, NEON_##T, UP (E), CF (N, E), 0}
+ #define VAR6(T, N, A, B, C, D, E, F) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
++  VAR5 (T, N, A, B, C, D, E), \
++  {#N, NEON_##T, UP (F), CF (N, F), 0}
+ #define VAR7(T, N, A, B, C, D, E, F, G) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+-    CF (N, G) }, 7, 0
++  VAR6 (T, N, A, B, C, D, E, F), \
++  {#N, NEON_##T, UP (G), CF (N, G), 0}
+ #define VAR8(T, N, A, B, C, D, E, F, G, H) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+-                | UP (H), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+-    CF (N, G), CF (N, H) }, 8, 0
++  VAR7 (T, N, A, B, C, D, E, F, G), \
++  {#N, NEON_##T, UP (H), CF (N, H), 0}
+ #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+-                | UP (H) | UP (I), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+-    CF (N, G), CF (N, H), CF (N, I) }, 9, 0
++  VAR8 (T, N, A, B, C, D, E, F, G, H), \
++  {#N, NEON_##T, UP (I), CF (N, I), 0}
+ #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+-                | UP (H) | UP (I) | UP (J), \
+-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+-    CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
++  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
++  {#N, NEON_##T, UP (J), CF (N, J), 0}
+ 
+ /* The mode entries in the following table correspond to the "key" type of the
+    instruction variant, i.e. equivalent to that which would be specified after
+@@ -18758,192 +18278,190 @@
+    (Signed/unsigned/polynomial types are not differentiated between though, and
+    are all mapped onto the same mode for a given element size.) The modes
+    listed per instruction should be the same as those defined for that
+-   instruction's pattern in neon.md.
+-   WARNING: Variants should be listed in the same increasing order as
+-   neon_builtin_type_bits.  */
++   instruction's pattern in neon.md.  */
+ 
+ static neon_builtin_datum neon_builtin_data[] =
+ {
+-  { VAR10 (BINOP, vadd,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
+-  { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
+-  { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
+-  { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
+-  { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
+-  { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
+-  { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
+-  { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
+-  { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
+-  { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
+-  { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
+-  { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
+-  { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
+-  { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
+-  { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
+-  { VAR2 (BINOP, vqdmull, v4hi, v2si) },
+-  { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
+-  { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
+-  { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
+-  { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
+-  { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR10 (BINOP, vsub,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
+-  { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
+-  { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
+-  { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR2 (BINOP, vcage, v2sf, v4sf) },
+-  { VAR2 (BINOP, vcagt, v2sf, v4sf) },
+-  { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
+-  { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
+-  { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
+-  { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
+-  { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
+-  { VAR2 (BINOP, vrecps, v2sf, v4sf) },
+-  { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
+-  { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+-  { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+-  { VAR2 (UNOP, vcnt, v8qi, v16qi) },
+-  { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
+-  { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
+-  { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
++  VAR10 (BINOP, vadd,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
++  VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
++  VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
++  VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
++  VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
++  VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
++  VAR2 (TERNOP, vqdmlal, v4hi, v2si),
++  VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
++  VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
++  VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
++  VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
++  VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
++  VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
++  VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
++  VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
++  VAR2 (BINOP, vqdmull, v4hi, v2si),
++  VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
++  VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
++  VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
++  VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
++  VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR10 (BINOP, vsub,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
++  VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
++  VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
++  VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR2 (BINOP, vcage, v2sf, v4sf),
++  VAR2 (BINOP, vcagt, v2sf, v4sf),
++  VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
++  VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
++  VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
++  VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
++  VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
++  VAR2 (BINOP, vrecps, v2sf, v4sf),
++  VAR2 (BINOP, vrsqrts, v2sf, v4sf),
++  VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
++  VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
++  VAR2 (UNOP, vcnt, v8qi, v16qi),
++  VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
++  VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
++  VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+   /* FIXME: vget_lane supports more variants than this!  */
+-  { VAR10 (GETLANE, vget_lane,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (SETLANE, vset_lane,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR10 (DUP, vdup_n,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (DUPLANE, vdup_lane,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
+-  { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
+-  { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
+-  { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
+-  { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
+-  { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
+-  { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
+-  { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
+-  { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
+-  { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
+-  { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
+-  { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
+-  { VAR10 (BINOP, vext,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
+-  { VAR2 (UNOP, vrev16, v8qi, v16qi) },
+-  { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
+-  { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
+-  { VAR10 (SELECT, vbsl,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR1 (VTBL, vtbl1, v8qi) },
+-  { VAR1 (VTBL, vtbl2, v8qi) },
+-  { VAR1 (VTBL, vtbl3, v8qi) },
+-  { VAR1 (VTBL, vtbl4, v8qi) },
+-  { VAR1 (VTBX, vtbx1, v8qi) },
+-  { VAR1 (VTBX, vtbx2, v8qi) },
+-  { VAR1 (VTBX, vtbx3, v8qi) },
+-  { VAR1 (VTBX, vtbx4, v8qi) },
+-  { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+-  { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOAD1, vld1,
+-           v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOAD1LANE, vld1_lane,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOAD1, vld1_dup,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (STORE1, vst1,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (STORE1LANE, vst1_lane,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR9 (LOADSTRUCT,
+-	  vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (LOADSTRUCTLANE, vld2_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR9 (STORESTRUCT, vst2,
+-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (STORESTRUCTLANE, vst2_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR9 (LOADSTRUCT,
+-	  vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (LOADSTRUCTLANE, vld3_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR9 (STORESTRUCT, vst3,
+-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (STORESTRUCTLANE, vst3_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR9 (LOADSTRUCT, vld4,
+-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (LOADSTRUCTLANE, vld4_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
+-  { VAR9 (STORESTRUCT, vst4,
+-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+-  { VAR7 (STORESTRUCTLANE, vst4_lane,
+-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+-  { VAR10 (LOGICBINOP, vand,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOGICBINOP, vorr,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (BINOP, veor,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOGICBINOP, vbic,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+-  { VAR10 (LOGICBINOP, vorn,
+-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
++  VAR10 (GETLANE, vget_lane,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (SETLANE, vset_lane,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
++  VAR10 (DUP, vdup_n,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (DUPLANE, vdup_lane,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
++  VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
++  VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
++  VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
++  VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
++  VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
++  VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
++  VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
++  VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
++  VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
++  VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
++  VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
++  VAR10 (BINOP, vext,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
++  VAR2 (UNOP, vrev16, v8qi, v16qi),
++  VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
++  VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
++  VAR10 (SELECT, vbsl,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR1 (VTBL, vtbl1, v8qi),
++  VAR1 (VTBL, vtbl2, v8qi),
++  VAR1 (VTBL, vtbl3, v8qi),
++  VAR1 (VTBL, vtbl4, v8qi),
++  VAR1 (VTBX, vtbx1, v8qi),
++  VAR1 (VTBX, vtbx2, v8qi),
++  VAR1 (VTBX, vtbx3, v8qi),
++  VAR1 (VTBX, vtbx4, v8qi),
++  VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
++  VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
++  VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOAD1, vld1,
++         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOAD1LANE, vld1_lane,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOAD1, vld1_dup,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (STORE1, vst1,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (STORE1LANE, vst1_lane,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR9 (LOADSTRUCT,
++	vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (LOADSTRUCTLANE, vld2_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
++  VAR9 (STORESTRUCT, vst2,
++	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (STORESTRUCTLANE, vst2_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR9 (LOADSTRUCT,
++	vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (LOADSTRUCTLANE, vld3_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
++  VAR9 (STORESTRUCT, vst3,
++	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (STORESTRUCTLANE, vst3_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR9 (LOADSTRUCT, vld4,
++	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (LOADSTRUCTLANE, vld4_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
++  VAR9 (STORESTRUCT, vst4,
++	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
++  VAR7 (STORESTRUCTLANE, vst4_lane,
++	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
++  VAR10 (LOGICBINOP, vand,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOGICBINOP, vorr,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (BINOP, veor,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOGICBINOP, vbic,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
++  VAR10 (LOGICBINOP, vorn,
++	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+ };
+ 
+ #undef CF
+@@ -18958,10 +18476,185 @@
+ #undef VAR9
+ #undef VAR10
+ 
++/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
++   symbolic names defined here (which would require too much duplication).
++   FIXME?  */
++enum arm_builtins
++{
++  ARM_BUILTIN_GETWCX,
++  ARM_BUILTIN_SETWCX,
++
++  ARM_BUILTIN_WZERO,
++
++  ARM_BUILTIN_WAVG2BR,
++  ARM_BUILTIN_WAVG2HR,
++  ARM_BUILTIN_WAVG2B,
++  ARM_BUILTIN_WAVG2H,
++
++  ARM_BUILTIN_WACCB,
++  ARM_BUILTIN_WACCH,
++  ARM_BUILTIN_WACCW,
++
++  ARM_BUILTIN_WMACS,
++  ARM_BUILTIN_WMACSZ,
++  ARM_BUILTIN_WMACU,
++  ARM_BUILTIN_WMACUZ,
++
++  ARM_BUILTIN_WSADB,
++  ARM_BUILTIN_WSADBZ,
++  ARM_BUILTIN_WSADH,
++  ARM_BUILTIN_WSADHZ,
++
++  ARM_BUILTIN_WALIGN,
++
++  ARM_BUILTIN_TMIA,
++  ARM_BUILTIN_TMIAPH,
++  ARM_BUILTIN_TMIABB,
++  ARM_BUILTIN_TMIABT,
++  ARM_BUILTIN_TMIATB,
++  ARM_BUILTIN_TMIATT,
++
++  ARM_BUILTIN_TMOVMSKB,
++  ARM_BUILTIN_TMOVMSKH,
++  ARM_BUILTIN_TMOVMSKW,
++
++  ARM_BUILTIN_TBCSTB,
++  ARM_BUILTIN_TBCSTH,
++  ARM_BUILTIN_TBCSTW,
++
++  ARM_BUILTIN_WMADDS,
++  ARM_BUILTIN_WMADDU,
++
++  ARM_BUILTIN_WPACKHSS,
++  ARM_BUILTIN_WPACKWSS,
++  ARM_BUILTIN_WPACKDSS,
++  ARM_BUILTIN_WPACKHUS,
++  ARM_BUILTIN_WPACKWUS,
++  ARM_BUILTIN_WPACKDUS,
++
++  ARM_BUILTIN_WADDB,
++  ARM_BUILTIN_WADDH,
++  ARM_BUILTIN_WADDW,
++  ARM_BUILTIN_WADDSSB,
++  ARM_BUILTIN_WADDSSH,
++  ARM_BUILTIN_WADDSSW,
++  ARM_BUILTIN_WADDUSB,
++  ARM_BUILTIN_WADDUSH,
++  ARM_BUILTIN_WADDUSW,
++  ARM_BUILTIN_WSUBB,
++  ARM_BUILTIN_WSUBH,
++  ARM_BUILTIN_WSUBW,
++  ARM_BUILTIN_WSUBSSB,
++  ARM_BUILTIN_WSUBSSH,
++  ARM_BUILTIN_WSUBSSW,
++  ARM_BUILTIN_WSUBUSB,
++  ARM_BUILTIN_WSUBUSH,
++  ARM_BUILTIN_WSUBUSW,
++
++  ARM_BUILTIN_WAND,
++  ARM_BUILTIN_WANDN,
++  ARM_BUILTIN_WOR,
++  ARM_BUILTIN_WXOR,
++
++  ARM_BUILTIN_WCMPEQB,
++  ARM_BUILTIN_WCMPEQH,
++  ARM_BUILTIN_WCMPEQW,
++  ARM_BUILTIN_WCMPGTUB,
++  ARM_BUILTIN_WCMPGTUH,
++  ARM_BUILTIN_WCMPGTUW,
++  ARM_BUILTIN_WCMPGTSB,
++  ARM_BUILTIN_WCMPGTSH,
++  ARM_BUILTIN_WCMPGTSW,
++
++  ARM_BUILTIN_TEXTRMSB,
++  ARM_BUILTIN_TEXTRMSH,
++  ARM_BUILTIN_TEXTRMSW,
++  ARM_BUILTIN_TEXTRMUB,
++  ARM_BUILTIN_TEXTRMUH,
++  ARM_BUILTIN_TEXTRMUW,
++  ARM_BUILTIN_TINSRB,
++  ARM_BUILTIN_TINSRH,
++  ARM_BUILTIN_TINSRW,
++
++  ARM_BUILTIN_WMAXSW,
++  ARM_BUILTIN_WMAXSH,
++  ARM_BUILTIN_WMAXSB,
++  ARM_BUILTIN_WMAXUW,
++  ARM_BUILTIN_WMAXUH,
++  ARM_BUILTIN_WMAXUB,
++  ARM_BUILTIN_WMINSW,
++  ARM_BUILTIN_WMINSH,
++  ARM_BUILTIN_WMINSB,
++  ARM_BUILTIN_WMINUW,
++  ARM_BUILTIN_WMINUH,
++  ARM_BUILTIN_WMINUB,
++
++  ARM_BUILTIN_WMULUM,
++  ARM_BUILTIN_WMULSM,
++  ARM_BUILTIN_WMULUL,
++
++  ARM_BUILTIN_PSADBH,
++  ARM_BUILTIN_WSHUFH,
++
++  ARM_BUILTIN_WSLLH,
++  ARM_BUILTIN_WSLLW,
++  ARM_BUILTIN_WSLLD,
++  ARM_BUILTIN_WSRAH,
++  ARM_BUILTIN_WSRAW,
++  ARM_BUILTIN_WSRAD,
++  ARM_BUILTIN_WSRLH,
++  ARM_BUILTIN_WSRLW,
++  ARM_BUILTIN_WSRLD,
++  ARM_BUILTIN_WRORH,
++  ARM_BUILTIN_WRORW,
++  ARM_BUILTIN_WRORD,
++  ARM_BUILTIN_WSLLHI,
++  ARM_BUILTIN_WSLLWI,
++  ARM_BUILTIN_WSLLDI,
++  ARM_BUILTIN_WSRAHI,
++  ARM_BUILTIN_WSRAWI,
++  ARM_BUILTIN_WSRADI,
++  ARM_BUILTIN_WSRLHI,
++  ARM_BUILTIN_WSRLWI,
++  ARM_BUILTIN_WSRLDI,
++  ARM_BUILTIN_WRORHI,
++  ARM_BUILTIN_WRORWI,
++  ARM_BUILTIN_WRORDI,
++
++  ARM_BUILTIN_WUNPCKIHB,
++  ARM_BUILTIN_WUNPCKIHH,
++  ARM_BUILTIN_WUNPCKIHW,
++  ARM_BUILTIN_WUNPCKILB,
++  ARM_BUILTIN_WUNPCKILH,
++  ARM_BUILTIN_WUNPCKILW,
++
++  ARM_BUILTIN_WUNPCKEHSB,
++  ARM_BUILTIN_WUNPCKEHSH,
++  ARM_BUILTIN_WUNPCKEHSW,
++  ARM_BUILTIN_WUNPCKEHUB,
++  ARM_BUILTIN_WUNPCKEHUH,
++  ARM_BUILTIN_WUNPCKEHUW,
++  ARM_BUILTIN_WUNPCKELSB,
++  ARM_BUILTIN_WUNPCKELSH,
++  ARM_BUILTIN_WUNPCKELSW,
++  ARM_BUILTIN_WUNPCKELUB,
++  ARM_BUILTIN_WUNPCKELUH,
++  ARM_BUILTIN_WUNPCKELUW,
++
++  ARM_BUILTIN_THREAD_POINTER,
++
++  ARM_BUILTIN_NEON_BASE,
++
++  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
++};
++
++static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
++
+ static void
+ arm_init_neon_builtins (void)
+ {
+-  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
++  unsigned int i, fcode;
++  tree decl;
+ 
+   tree neon_intQI_type_node;
+   tree neon_intHI_type_node;
+@@ -19209,250 +18902,740 @@
+         }
+     }
+ 
+-  for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
++  for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
++       i < ARRAY_SIZE (neon_builtin_data);
++       i++, fcode++)
+     {
+       neon_builtin_datum *d = &neon_builtin_data[i];
+-      unsigned int j, codeidx = 0;
+-
+-      d->base_fcode = fcode;
+-
+-      for (j = 0; j < T_MAX; j++)
+-	{
+-	  const char* const modenames[] = {
+-	    "v8qi", "v4hi", "v2si", "v2sf", "di",
+-	    "v16qi", "v8hi", "v4si", "v4sf", "v2di"
+-	  };
+-	  char namebuf[60];
+-	  tree ftype = NULL;
+-	  enum insn_code icode;
+-	  int is_load = 0, is_store = 0;
+-
+-          if ((d->bits & (1 << j)) == 0)
+-            continue;
+-
+-          icode = d->codes[codeidx++];
+-
+-          switch (d->itype)
+-            {
+-	    case NEON_LOAD1:
+-	    case NEON_LOAD1LANE:
+-	    case NEON_LOADSTRUCT:
+-	    case NEON_LOADSTRUCTLANE:
+-	      is_load = 1;
+-	      /* Fall through.  */
+-	    case NEON_STORE1:
+-	    case NEON_STORE1LANE:
+-	    case NEON_STORESTRUCT:
+-	    case NEON_STORESTRUCTLANE:
+-	      if (!is_load)
+-	        is_store = 1;
+-	      /* Fall through.  */
+-            case NEON_UNOP:
+-	    case NEON_BINOP:
+-	    case NEON_LOGICBINOP:
+-	    case NEON_SHIFTINSERT:
+-	    case NEON_TERNOP:
+-	    case NEON_GETLANE:
+-	    case NEON_SETLANE:
+-	    case NEON_CREATE:
+-	    case NEON_DUP:
+-	    case NEON_DUPLANE:
+-	    case NEON_SHIFTIMM:
+-	    case NEON_SHIFTACC:
+-	    case NEON_COMBINE:
+-	    case NEON_SPLIT:
+-	    case NEON_CONVERT:
+-	    case NEON_FIXCONV:
+-	    case NEON_LANEMUL:
+-	    case NEON_LANEMULL:
+-	    case NEON_LANEMULH:
+-	    case NEON_LANEMAC:
+-	    case NEON_SCALARMUL:
+-	    case NEON_SCALARMULL:
+-	    case NEON_SCALARMULH:
+-	    case NEON_SCALARMAC:
+-	    case NEON_SELECT:
+-	    case NEON_VTBL:
+-	    case NEON_VTBX:
+-	      {
+-		int k;
+-		tree return_type = void_type_node, args = void_list_node;
+-
+-		/* Build a function type directly from the insn_data for this
+-		   builtin.  The build_function_type() function takes care of
+-		   removing duplicates for us.  */
+-		for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
+-		  {
+-		    tree eltype;
+-
+-		    if (is_load && k == 1)
+-		      {
+-		        /* Neon load patterns always have the memory operand
+-			   in the operand 1 position.  */
+-			gcc_assert (insn_data[icode].operand[k].predicate
+-				    == neon_struct_operand);
+-
+-			switch (1 << j)
+-			  {
+-			  case T_V8QI:
+-			  case T_V16QI:
+-			    eltype = const_intQI_pointer_node;
+-			    break;
+-
+-			  case T_V4HI:
+-			  case T_V8HI:
+-			    eltype = const_intHI_pointer_node;
+-			    break;
+-
+-			  case T_V2SI:
+-			  case T_V4SI:
+-			    eltype = const_intSI_pointer_node;
+-			    break;
+-
+-			  case T_V2SF:
+-			  case T_V4SF:
+-			    eltype = const_float_pointer_node;
+-			    break;
+-
+-			  case T_DI:
+-			  case T_V2DI:
+-			    eltype = const_intDI_pointer_node;
+-			    break;
+-
+-			  default: gcc_unreachable ();
+-			  }
+-  		      }
+-		    else if (is_store && k == 0)
+-		      {
+-		        /* Similarly, Neon store patterns use operand 0 as
+-			   the memory location to store to.  */
+-			gcc_assert (insn_data[icode].operand[k].predicate
+-				    == neon_struct_operand);
+-
+-			switch (1 << j)
+-			  {
+-			  case T_V8QI:
+-			  case T_V16QI:
+-			    eltype = intQI_pointer_node;
+-			    break;
+-
+-			  case T_V4HI:
+-			  case T_V8HI:
+-			    eltype = intHI_pointer_node;
+-			    break;
+-
+-			  case T_V2SI:
+-			  case T_V4SI:
+-			    eltype = intSI_pointer_node;
+-			    break;
+-
+-			  case T_V2SF:
+-			  case T_V4SF:
+-			    eltype = float_pointer_node;
+-			    break;
+-
+-			  case T_DI:
+-			  case T_V2DI:
+-			    eltype = intDI_pointer_node;
+-			    break;
+-
+-			  default: gcc_unreachable ();
+-			  }
+-		      }
+-		    else
+-		      {
+-			switch (insn_data[icode].operand[k].mode)
+-	        	  {
+-			  case VOIDmode: eltype = void_type_node; break;
+-			  /* Scalars.  */
+-			  case QImode: eltype = neon_intQI_type_node; break;
+-			  case HImode: eltype = neon_intHI_type_node; break;
+-			  case SImode: eltype = neon_intSI_type_node; break;
+-			  case SFmode: eltype = neon_float_type_node; break;
+-			  case DImode: eltype = neon_intDI_type_node; break;
+-			  case TImode: eltype = intTI_type_node; break;
+-			  case EImode: eltype = intEI_type_node; break;
+-			  case OImode: eltype = intOI_type_node; break;
+-			  case CImode: eltype = intCI_type_node; break;
+-			  case XImode: eltype = intXI_type_node; break;
+-			  /* 64-bit vectors.  */
+-			  case V8QImode: eltype = V8QI_type_node; break;
+-			  case V4HImode: eltype = V4HI_type_node; break;
+-			  case V2SImode: eltype = V2SI_type_node; break;
+-			  case V2SFmode: eltype = V2SF_type_node; break;
+-			  /* 128-bit vectors.  */
+-			  case V16QImode: eltype = V16QI_type_node; break;
+-			  case V8HImode: eltype = V8HI_type_node; break;
+-			  case V4SImode: eltype = V4SI_type_node; break;
+-			  case V4SFmode: eltype = V4SF_type_node; break;
+-			  case V2DImode: eltype = V2DI_type_node; break;
+-			  default: gcc_unreachable ();
+-			  }
+-		      }
+-
+-		    if (k == 0 && !is_store)
+-	              return_type = eltype;
+-		    else
+-		      args = tree_cons (NULL_TREE, eltype, args);
+-		  }
+-
+-		ftype = build_function_type (return_type, args);
+-	      }
+-	      break;
+-
+-	    case NEON_RESULTPAIR:
+-              {
+-                switch (insn_data[icode].operand[1].mode)
+-                  {
+-		  case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
+-                  case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
+-                  case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
+-                  case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
+-                  case DImode: ftype = void_ftype_pdi_di_di; break;
+-                  case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
+-                  case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
+-                  case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
+-                  case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
+-                  case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+-                  default: gcc_unreachable ();
+-                  }
+-              }
+-              break;
+-
+-	    case NEON_REINTERP:
+-              {
+-                /* We iterate over 5 doubleword types, then 5 quadword
+-                   types.  */
+-                int rhs = j % 5;
+-                switch (insn_data[icode].operand[0].mode)
+-                  {
+-                  case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
+-                  case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
+-                  case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
+-                  case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
+-                  case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
+-                  case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
+-                  case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
+-                  case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
+-		  case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
+-                  case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
+-                  default: gcc_unreachable ();
+-                  }
+-              }
+-              break;
+-
+-            default:
+-              gcc_unreachable ();
+-            }
+-
+-          gcc_assert (ftype != NULL);
+-
+-          sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+-
+-          add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
+-				NULL_TREE);
+-        }
+-    }
++
++      const char* const modenames[] = {
++	"v8qi", "v4hi", "v2si", "v2sf", "di",
++	"v16qi", "v8hi", "v4si", "v4sf", "v2di",
++	"ti", "ei", "oi"
++      };
++      char namebuf[60];
++      tree ftype = NULL;
++      int is_load = 0, is_store = 0;
++
++      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
++
++      d->fcode = fcode;
++
++      switch (d->itype)
++	{
++	case NEON_LOAD1:
++	case NEON_LOAD1LANE:
++	case NEON_LOADSTRUCT:
++	case NEON_LOADSTRUCTLANE:
++	  is_load = 1;
++	  /* Fall through.  */
++	case NEON_STORE1:
++	case NEON_STORE1LANE:
++	case NEON_STORESTRUCT:
++	case NEON_STORESTRUCTLANE:
++	  if (!is_load)
++	    is_store = 1;
++	  /* Fall through.  */
++	case NEON_UNOP:
++	case NEON_BINOP:
++	case NEON_LOGICBINOP:
++	case NEON_SHIFTINSERT:
++	case NEON_TERNOP:
++	case NEON_GETLANE:
++	case NEON_SETLANE:
++	case NEON_CREATE:
++	case NEON_DUP:
++	case NEON_DUPLANE:
++	case NEON_SHIFTIMM:
++	case NEON_SHIFTACC:
++	case NEON_COMBINE:
++	case NEON_SPLIT:
++	case NEON_CONVERT:
++	case NEON_FIXCONV:
++	case NEON_LANEMUL:
++	case NEON_LANEMULL:
++	case NEON_LANEMULH:
++	case NEON_LANEMAC:
++	case NEON_SCALARMUL:
++	case NEON_SCALARMULL:
++	case NEON_SCALARMULH:
++	case NEON_SCALARMAC:
++	case NEON_SELECT:
++	case NEON_VTBL:
++	case NEON_VTBX:
++	  {
++	    int k;
++	    tree return_type = void_type_node, args = void_list_node;
++
++	    /* Build a function type directly from the insn_data for
++	       this builtin.  The build_function_type() function takes
++	       care of removing duplicates for us.  */
++	    for (k = insn_data[d->code].n_operands - 1; k >= 0; k--)
++	      {
++		tree eltype;
++
++		if (is_load && k == 1)
++		  {
++		    /* Neon load patterns always have the memory
++		       operand in the operand 1 position.  */
++		    gcc_assert (insn_data[d->code].operand[k].predicate
++				== neon_struct_operand);
++
++		    switch (d->mode)
++		      {
++		      case T_V8QI:
++		      case T_V16QI:
++			eltype = const_intQI_pointer_node;
++			break;
++
++		      case T_V4HI:
++		      case T_V8HI:
++			eltype = const_intHI_pointer_node;
++			break;
++
++		      case T_V2SI:
++		      case T_V4SI:
++			eltype = const_intSI_pointer_node;
++			break;
++
++		      case T_V2SF:
++		      case T_V4SF:
++			eltype = const_float_pointer_node;
++			break;
++
++		      case T_DI:
++		      case T_V2DI:
++			eltype = const_intDI_pointer_node;
++			break;
++
++		      default: gcc_unreachable ();
++		      }
++		  }
++		else if (is_store && k == 0)
++		  {
++		    /* Similarly, Neon store patterns use operand 0 as
++		       the memory location to store to.  */
++		    gcc_assert (insn_data[d->code].operand[k].predicate
++				== neon_struct_operand);
++
++		    switch (d->mode)
++		      {
++		      case T_V8QI:
++		      case T_V16QI:
++			eltype = intQI_pointer_node;
++			break;
++
++		      case T_V4HI:
++		      case T_V8HI:
++			eltype = intHI_pointer_node;
++			break;
++
++		      case T_V2SI:
++		      case T_V4SI:
++			eltype = intSI_pointer_node;
++			break;
++
++		      case T_V2SF:
++		      case T_V4SF:
++			eltype = float_pointer_node;
++			break;
++
++		      case T_DI:
++		      case T_V2DI:
++			eltype = intDI_pointer_node;
++			break;
++
++		      default: gcc_unreachable ();
++		      }
++		  }
++		else
++		  {
++		    switch (insn_data[d->code].operand[k].mode)
++		      {
++		      case VOIDmode: eltype = void_type_node; break;
++			/* Scalars.  */
++		      case QImode: eltype = neon_intQI_type_node; break;
++		      case HImode: eltype = neon_intHI_type_node; break;
++		      case SImode: eltype = neon_intSI_type_node; break;
++		      case SFmode: eltype = neon_float_type_node; break;
++		      case DImode: eltype = neon_intDI_type_node; break;
++		      case TImode: eltype = intTI_type_node; break;
++		      case EImode: eltype = intEI_type_node; break;
++		      case OImode: eltype = intOI_type_node; break;
++		      case CImode: eltype = intCI_type_node; break;
++		      case XImode: eltype = intXI_type_node; break;
++			/* 64-bit vectors.  */
++		      case V8QImode: eltype = V8QI_type_node; break;
++		      case V4HImode: eltype = V4HI_type_node; break;
++		      case V2SImode: eltype = V2SI_type_node; break;
++		      case V2SFmode: eltype = V2SF_type_node; break;
++			/* 128-bit vectors.  */
++		      case V16QImode: eltype = V16QI_type_node; break;
++		      case V8HImode: eltype = V8HI_type_node; break;
++		      case V4SImode: eltype = V4SI_type_node; break;
++		      case V4SFmode: eltype = V4SF_type_node; break;
++		      case V2DImode: eltype = V2DI_type_node; break;
++		      default: gcc_unreachable ();
++		      }
++		  }
++
++		if (k == 0 && !is_store)
++		  return_type = eltype;
++		else
++		  args = tree_cons (NULL_TREE, eltype, args);
++	      }
++
++	    ftype = build_function_type (return_type, args);
++	  }
++	  break;
++
++	case NEON_RESULTPAIR:
++	  {
++	    switch (insn_data[d->code].operand[1].mode)
++	      {
++	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
++	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
++	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
++	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
++	      case DImode: ftype = void_ftype_pdi_di_di; break;
++	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
++	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
++	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
++	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
++	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
++	      default: gcc_unreachable ();
++	      }
++	  }
++	  break;
++
++	case NEON_REINTERP:
++	  {
++	    /* We iterate over 5 doubleword types, then 5 quadword
++	       types.  */
++	    int rhs = d->mode % 5;
++	    switch (insn_data[d->code].operand[0].mode)
++	      {
++	      case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
++	      case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
++	      case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
++	      case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
++	      case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
++	      case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
++	      case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
++	      case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
++	      case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
++	      case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
++	      default: gcc_unreachable ();
++	      }
++	  }
++	  break;
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      gcc_assert (ftype != NULL);
++
++      sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
++
++      decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
++				   NULL_TREE);
++      arm_builtin_decls[fcode] = decl;
++    }
++}
++
++#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
++  do									\
++    {									\
++      if ((MASK) & insn_flags)						\
++	{								\
++	  tree bdecl;							\
++	  bdecl = add_builtin_function ((NAME), (TYPE), (CODE),		\
++					BUILT_IN_MD, NULL, NULL_TREE);	\
++	  arm_builtin_decls[CODE] = bdecl;				\
++	}								\
++    }									\
++  while (0)
++  
++struct builtin_description
++{
++  const unsigned int       mask;
++  const enum insn_code     icode;
++  const char * const       name;
++  const enum arm_builtins  code;
++  const enum rtx_code      comparison;
++  const unsigned int       flag;
++};
++  
++static const struct builtin_description bdesc_2arg[] =
++{
++#define IWMMXT_BUILTIN(code, string, builtin) \
++  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
++    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
++  
++  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
++  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
++  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
++  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
++  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
++  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
++  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
++  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
++  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
++  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
++  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
++  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
++  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
++  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
++  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
++  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
++  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
++  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
++  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
++  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
++  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
++  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
++  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
++  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
++  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
++  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
++  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
++  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
++  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
++  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
++  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
++  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
++  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
++  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
++  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
++  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
++  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
++  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
++  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
++  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
++  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
++  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
++  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
++  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
++  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
++  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
++  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
++  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
++  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
++  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
++  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
++  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
++  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
++  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
++  
++#define IWMMXT_BUILTIN2(code, builtin) \
++  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
++  
++  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
++  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
++  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
++  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
++  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
++  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
++  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
++  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
++  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
++  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
++  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
++  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
++  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
++  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
++  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
++  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
++  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
++  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
++  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
++  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
++  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
++  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
++  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
++  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
++  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
++  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
++  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
++  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
++  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
++  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
++  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
++  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
++};
++  
++static const struct builtin_description bdesc_1arg[] =
++{
++  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
++  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
++  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
++  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
++  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
++  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
++  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
++  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
++  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
++  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
++};
++  
++/* Set up all the iWMMXt builtins.  This is not called if
++   TARGET_IWMMXT is zero.  */
++
++static void
++arm_init_iwmmxt_builtins (void)
++{
++  const struct builtin_description * d;
++  size_t i;
++  tree endlink = void_list_node;
++
++  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
++  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
++  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
++
++  tree int_ftype_int
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, integer_type_node, endlink));
++  tree v8qi_ftype_v8qi_v8qi_int
++    = build_function_type (V8QI_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      tree_cons (NULL_TREE, V8QI_type_node,
++						 tree_cons (NULL_TREE,
++							    integer_type_node,
++							    endlink))));
++  tree v4hi_ftype_v4hi_int
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree v2si_ftype_v2si_int
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree v2si_ftype_di_di
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, long_long_integer_type_node,
++				      tree_cons (NULL_TREE,
++						 long_long_integer_type_node,
++						 endlink)));
++  tree di_ftype_di_int
++    = build_function_type (long_long_integer_type_node,
++			   tree_cons (NULL_TREE, long_long_integer_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree di_ftype_di_int_int
++    = build_function_type (long_long_integer_type_node,
++			   tree_cons (NULL_TREE, long_long_integer_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 tree_cons (NULL_TREE,
++							    integer_type_node,
++							    endlink))));
++  tree int_ftype_v8qi
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      endlink));
++  tree int_ftype_v4hi
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      endlink));
++  tree int_ftype_v2si
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      endlink));
++  tree int_ftype_v8qi_int
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree int_ftype_v4hi_int
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree int_ftype_v2si_int
++    = build_function_type (integer_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree v8qi_ftype_v8qi_int_int
++    = build_function_type (V8QI_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 tree_cons (NULL_TREE,
++							    integer_type_node,
++							    endlink))));
++  tree v4hi_ftype_v4hi_int_int
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 tree_cons (NULL_TREE,
++							    integer_type_node,
++							    endlink))));
++  tree v2si_ftype_v2si_int_int
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 tree_cons (NULL_TREE,
++							    integer_type_node,
++							    endlink))));
++  /* Miscellaneous.  */
++  tree v8qi_ftype_v4hi_v4hi
++    = build_function_type (V8QI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, V4HI_type_node,
++						 endlink)));
++  tree v4hi_ftype_v2si_v2si
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE, V2SI_type_node,
++						 endlink)));
++  tree v2si_ftype_v4hi_v4hi
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, V4HI_type_node,
++						 endlink)));
++  tree v2si_ftype_v8qi_v8qi
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      tree_cons (NULL_TREE, V8QI_type_node,
++						 endlink)));
++  tree v4hi_ftype_v4hi_di
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE,
++						 long_long_integer_type_node,
++						 endlink)));
++  tree v2si_ftype_v2si_di
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE,
++						 long_long_integer_type_node,
++						 endlink)));
++  tree void_ftype_int_int
++    = build_function_type (void_type_node,
++			   tree_cons (NULL_TREE, integer_type_node,
++				      tree_cons (NULL_TREE, integer_type_node,
++						 endlink)));
++  tree di_ftype_void
++    = build_function_type (long_long_unsigned_type_node, endlink);
++  tree di_ftype_v8qi
++    = build_function_type (long_long_integer_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      endlink));
++  tree di_ftype_v4hi
++    = build_function_type (long_long_integer_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      endlink));
++  tree di_ftype_v2si
++    = build_function_type (long_long_integer_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      endlink));
++  tree v2si_ftype_v4hi
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      endlink));
++  tree v4hi_ftype_v8qi
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      endlink));
++
++  tree di_ftype_di_v4hi_v4hi
++    = build_function_type (long_long_unsigned_type_node,
++			   tree_cons (NULL_TREE,
++				      long_long_unsigned_type_node,
++				      tree_cons (NULL_TREE, V4HI_type_node,
++						 tree_cons (NULL_TREE,
++							    V4HI_type_node,
++							    endlink))));
++
++  tree di_ftype_v4hi_v4hi
++    = build_function_type (long_long_unsigned_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, V4HI_type_node,
++						 endlink)));
++  
++  /* Normal vector binops.  */
++  tree v8qi_ftype_v8qi_v8qi
++    = build_function_type (V8QI_type_node,
++			   tree_cons (NULL_TREE, V8QI_type_node,
++				      tree_cons (NULL_TREE, V8QI_type_node,
++						 endlink)));
++  tree v4hi_ftype_v4hi_v4hi
++    = build_function_type (V4HI_type_node,
++			   tree_cons (NULL_TREE, V4HI_type_node,
++				      tree_cons (NULL_TREE, V4HI_type_node,
++						 endlink)));
++  tree v2si_ftype_v2si_v2si
++    = build_function_type (V2SI_type_node,
++			   tree_cons (NULL_TREE, V2SI_type_node,
++				      tree_cons (NULL_TREE, V2SI_type_node,
++						 endlink)));
++  tree di_ftype_di_di
++    = build_function_type (long_long_unsigned_type_node,
++			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
++				      tree_cons (NULL_TREE,
++						 long_long_unsigned_type_node,
++						 endlink)));
++  
++  /* Add all builtins that are more or less simple operations on two
++     operands.  */
++  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
++    {
++      /* Use one of the operands; the target can have a different mode for
++	 mask-generating compares.  */
++      enum machine_mode mode;
++      tree type;
++
++      if (d->name == 0)
++	continue;
++
++      mode = insn_data[d->icode].operand[1].mode;
++
++      switch (mode)
++	{
++	case V8QImode:
++	  type = v8qi_ftype_v8qi_v8qi;
++	  break;
++	case V4HImode:
++	  type = v4hi_ftype_v4hi_v4hi;
++	  break;
++	case V2SImode:
++	  type = v2si_ftype_v2si_v2si;
++	  break;
++	case DImode:
++	  type = di_ftype_di_di;
++	  break;
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      def_mbuiltin (d->mask, d->name, type, d->code);
++    }
++
++  /* Add the remaining MMX insns with somewhat more complicated types.  */
++#define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
++  def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),	\
++		ARM_BUILTIN_ ## CODE)
++
++  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
++  iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
++  iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
++
++  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
++  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
++  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
++  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
++  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
++  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
++
++  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
++  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
++  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
++  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
++  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
++  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
++
++  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
++  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
++  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
++  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
++  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
++  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
++
++  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
++  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
++  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
++  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
++  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
++  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
++
++  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
++
++  iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
++  iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
++  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
++  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
++
++  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
++  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
++  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
++  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
++  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
++  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
++  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
++  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
++  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
++
++  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
++  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
++  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
++
++  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
++  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
++  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
++
++  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
++  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
++  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
++  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
++  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
++  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
++
++  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
++  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
++  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
++  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
++  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
++  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
++  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
++  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
++  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
++  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
++  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
++  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
++
++  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
++  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
++  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
++  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
++
++  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
++  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
++  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
++  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
++  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
++  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
++  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
++
++#undef iwmmx_mbuiltin
++}
++
++static void
++arm_init_tls_builtins (void)
++{
++  tree ftype, decl;
++
++  ftype = build_function_type (ptr_type_node, void_list_node);
++  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
++			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
++			       NULL, NULL_TREE);
++  TREE_NOTHROW (decl) = 1;
++  TREE_READONLY (decl) = 1;
++  arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
+ }
+ 
+ static void
+@@ -19479,6 +19662,17 @@
+     arm_init_fp16_builtins ();
+ }
+ 
++/* Return the ARM builtin for CODE.  */
++
++static tree
++arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
++{
++  if (code >= ARM_BUILTIN_MAX)
++    return error_mark_node;
++
++  return arm_builtin_decls[code];
++}
++
+ /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+ 
+ static const char *
+@@ -19630,58 +19824,6 @@
+   return target;
+ }
+ 
+-static int
+-neon_builtin_compare (const void *a, const void *b)
+-{
+-  const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
+-  const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
+-  unsigned int soughtcode = key->base_fcode;
+-
+-  if (soughtcode >= memb->base_fcode
+-      && soughtcode < memb->base_fcode + memb->num_vars)
+-    return 0;
+-  else if (soughtcode < memb->base_fcode)
+-    return -1;
+-  else
+-    return 1;
+-}
+-
+-static enum insn_code
+-locate_neon_builtin_icode (int fcode, neon_itype *itype,
+-			   enum neon_builtin_type_bits *type_bit)
+-{
+-  neon_builtin_datum key
+-    = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
+-  neon_builtin_datum *found;
+-  int idx, type, ntypes;
+-
+-  key.base_fcode = fcode;
+-  found = (neon_builtin_datum *)
+-    bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
+-		   sizeof (neon_builtin_data[0]), neon_builtin_compare);
+-  gcc_assert (found);
+-  idx = fcode - (int) found->base_fcode;
+-  gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
+-
+-  if (itype)
+-    *itype = found->itype;
+-
+-  if (type_bit)
+-    {
+-      ntypes = 0;
+-      for (type = 0; type < T_MAX; type++)
+-	if (found->bits & (1 << type))
+-	  {
+-	    if (ntypes == idx)
+-	      break;
+-	    ntypes++;
+-	  }
+-      gcc_assert (type < T_MAX);
+-      *type_bit = (enum neon_builtin_type_bits) (1 << type);
+-    }
+-  return found->codes[idx];
+-}
+-
+ typedef enum {
+   NEON_ARG_COPY_TO_REG,
+   NEON_ARG_CONSTANT,
+@@ -19695,14 +19837,14 @@
+    and return an expression for the accessed memory.
+ 
+    The intrinsic function operates on a block of registers that has
+-   mode REG_MODE.  This block contains vectors of type TYPE_BIT.
++   mode REG_MODE.  This block contains vectors of type TYPE_MODE.
+    The function references the memory at EXP in mode MEM_MODE;
+    this mode may be BLKmode if no more suitable mode is available.  */
+ 
+ static tree
+ neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
+ 			  enum machine_mode reg_mode,
+-			  enum neon_builtin_type_bits type_bit)
++			  neon_builtin_type_mode type_mode)
+ {
+   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
+   tree elem_type, upper_bound, array_type;
+@@ -19711,8 +19853,8 @@
+   reg_size = GET_MODE_SIZE (reg_mode);
+ 
+   /* Work out the size of each vector in bytes.  */
+-  gcc_assert (type_bit & (T_DREG | T_QREG));
+-  vector_size = (type_bit & T_QREG ? 16 : 8);
++  gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
++  vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
+ 
+   /* Work out how many vectors there are.  */
+   gcc_assert (reg_size % vector_size == 0);
+@@ -19743,7 +19885,7 @@
+ /* Expand a Neon builtin.  */
+ static rtx
+ arm_expand_neon_args (rtx target, int icode, int have_retval,
+-		      enum neon_builtin_type_bits type_bit,
++		      neon_builtin_type_mode type_mode,
+ 		      tree exp, ...)
+ {
+   va_list ap;
+@@ -19779,7 +19921,7 @@
+             {
+               other_mode = insn_data[icode].operand[1 - opno].mode;
+               arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
+-                                                    other_mode, type_bit);
++                                                    other_mode, type_mode);
+             }
+           op[argc] = expand_normal (arg[argc]);
+ 
+@@ -19889,16 +20031,17 @@
+ static rtx
+ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+ {
+-  neon_itype itype;
+-  enum neon_builtin_type_bits type_bit;
+-  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype, &type_bit);
++  neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
++  neon_itype itype = d->itype;
++  enum insn_code icode = d->code;
++  neon_builtin_type_mode type_mode = d->mode;
+ 
+   switch (itype)
+     {
+     case NEON_UNOP:
+     case NEON_CONVERT:
+     case NEON_DUPLANE:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_BINOP:
+@@ -19908,89 +20051,89 @@
+     case NEON_SCALARMULH:
+     case NEON_SHIFTINSERT:
+     case NEON_LOGICBINOP:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_STOP);
+ 
+     case NEON_TERNOP:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_GETLANE:
+     case NEON_FIXCONV:
+     case NEON_SHIFTIMM:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
+         NEON_ARG_STOP);
+ 
+     case NEON_CREATE:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_DUP:
+     case NEON_SPLIT:
+     case NEON_REINTERP:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_COMBINE:
+     case NEON_VTBL:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_RESULTPAIR:
+-      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_STOP);
+ 
+     case NEON_LANEMUL:
+     case NEON_LANEMULL:
+     case NEON_LANEMULH:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_LANEMAC:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SHIFTACC:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SCALARMAC:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+ 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_CONSTANT, NEON_ARG_STOP);
+ 
+     case NEON_SELECT:
+     case NEON_VTBX:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+ 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+         NEON_ARG_STOP);
+ 
+     case NEON_LOAD1:
+     case NEON_LOADSTRUCT:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+ 	NEON_ARG_MEMORY, NEON_ARG_STOP);
+ 
+     case NEON_LOAD1LANE:
+     case NEON_LOADSTRUCTLANE:
+-      return arm_expand_neon_args (target, icode, 1, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+ 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ 	NEON_ARG_STOP);
+ 
+     case NEON_STORE1:
+     case NEON_STORESTRUCT:
+-      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
+ 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+ 
+     case NEON_STORE1LANE:
+     case NEON_STORESTRUCTLANE:
+-      return arm_expand_neon_args (target, icode, 0, type_bit, exp,
++      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
+ 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ 	NEON_ARG_STOP);
+     }
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-08-13 08:32:32 +0000
++++ new/gcc/config/arm/arm.h	2011-08-24 17:35:16 +0000
+@@ -2269,178 +2269,6 @@
+    : arm_gen_return_addr_mask ())
+ 
+ 
+-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
+-   symbolic names defined here (which would require too much duplication).
+-   FIXME?  */
+-enum arm_builtins
+-{
+-  ARM_BUILTIN_GETWCX,
+-  ARM_BUILTIN_SETWCX,
+-
+-  ARM_BUILTIN_WZERO,
+-
+-  ARM_BUILTIN_WAVG2BR,
+-  ARM_BUILTIN_WAVG2HR,
+-  ARM_BUILTIN_WAVG2B,
+-  ARM_BUILTIN_WAVG2H,
+-
+-  ARM_BUILTIN_WACCB,
+-  ARM_BUILTIN_WACCH,
+-  ARM_BUILTIN_WACCW,
+-
+-  ARM_BUILTIN_WMACS,
+-  ARM_BUILTIN_WMACSZ,
+-  ARM_BUILTIN_WMACU,
+-  ARM_BUILTIN_WMACUZ,
+-
+-  ARM_BUILTIN_WSADB,
+-  ARM_BUILTIN_WSADBZ,
+-  ARM_BUILTIN_WSADH,
+-  ARM_BUILTIN_WSADHZ,
+-
+-  ARM_BUILTIN_WALIGN,
+-
+-  ARM_BUILTIN_TMIA,
+-  ARM_BUILTIN_TMIAPH,
+-  ARM_BUILTIN_TMIABB,
+-  ARM_BUILTIN_TMIABT,
+-  ARM_BUILTIN_TMIATB,
+-  ARM_BUILTIN_TMIATT,
+-
+-  ARM_BUILTIN_TMOVMSKB,
+-  ARM_BUILTIN_TMOVMSKH,
+-  ARM_BUILTIN_TMOVMSKW,
+-
+-  ARM_BUILTIN_TBCSTB,
+-  ARM_BUILTIN_TBCSTH,
+-  ARM_BUILTIN_TBCSTW,
+-
+-  ARM_BUILTIN_WMADDS,
+-  ARM_BUILTIN_WMADDU,
+-
+-  ARM_BUILTIN_WPACKHSS,
+-  ARM_BUILTIN_WPACKWSS,
+-  ARM_BUILTIN_WPACKDSS,
+-  ARM_BUILTIN_WPACKHUS,
+-  ARM_BUILTIN_WPACKWUS,
+-  ARM_BUILTIN_WPACKDUS,
+-
+-  ARM_BUILTIN_WADDB,
+-  ARM_BUILTIN_WADDH,
+-  ARM_BUILTIN_WADDW,
+-  ARM_BUILTIN_WADDSSB,
+-  ARM_BUILTIN_WADDSSH,
+-  ARM_BUILTIN_WADDSSW,
+-  ARM_BUILTIN_WADDUSB,
+-  ARM_BUILTIN_WADDUSH,
+-  ARM_BUILTIN_WADDUSW,
+-  ARM_BUILTIN_WSUBB,
+-  ARM_BUILTIN_WSUBH,
+-  ARM_BUILTIN_WSUBW,
+-  ARM_BUILTIN_WSUBSSB,
+-  ARM_BUILTIN_WSUBSSH,
+-  ARM_BUILTIN_WSUBSSW,
+-  ARM_BUILTIN_WSUBUSB,
+-  ARM_BUILTIN_WSUBUSH,
+-  ARM_BUILTIN_WSUBUSW,
+-
+-  ARM_BUILTIN_WAND,
+-  ARM_BUILTIN_WANDN,
+-  ARM_BUILTIN_WOR,
+-  ARM_BUILTIN_WXOR,
+-
+-  ARM_BUILTIN_WCMPEQB,
+-  ARM_BUILTIN_WCMPEQH,
+-  ARM_BUILTIN_WCMPEQW,
+-  ARM_BUILTIN_WCMPGTUB,
+-  ARM_BUILTIN_WCMPGTUH,
+-  ARM_BUILTIN_WCMPGTUW,
+-  ARM_BUILTIN_WCMPGTSB,
+-  ARM_BUILTIN_WCMPGTSH,
+-  ARM_BUILTIN_WCMPGTSW,
+-
+-  ARM_BUILTIN_TEXTRMSB,
+-  ARM_BUILTIN_TEXTRMSH,
+-  ARM_BUILTIN_TEXTRMSW,
+-  ARM_BUILTIN_TEXTRMUB,
+-  ARM_BUILTIN_TEXTRMUH,
+-  ARM_BUILTIN_TEXTRMUW,
+-  ARM_BUILTIN_TINSRB,
+-  ARM_BUILTIN_TINSRH,
+-  ARM_BUILTIN_TINSRW,
+-
+-  ARM_BUILTIN_WMAXSW,
+-  ARM_BUILTIN_WMAXSH,
+-  ARM_BUILTIN_WMAXSB,
+-  ARM_BUILTIN_WMAXUW,
+-  ARM_BUILTIN_WMAXUH,
+-  ARM_BUILTIN_WMAXUB,
+-  ARM_BUILTIN_WMINSW,
+-  ARM_BUILTIN_WMINSH,
+-  ARM_BUILTIN_WMINSB,
+-  ARM_BUILTIN_WMINUW,
+-  ARM_BUILTIN_WMINUH,
+-  ARM_BUILTIN_WMINUB,
+-
+-  ARM_BUILTIN_WMULUM,
+-  ARM_BUILTIN_WMULSM,
+-  ARM_BUILTIN_WMULUL,
+-
+-  ARM_BUILTIN_PSADBH,
+-  ARM_BUILTIN_WSHUFH,
+-
+-  ARM_BUILTIN_WSLLH,
+-  ARM_BUILTIN_WSLLW,
+-  ARM_BUILTIN_WSLLD,
+-  ARM_BUILTIN_WSRAH,
+-  ARM_BUILTIN_WSRAW,
+-  ARM_BUILTIN_WSRAD,
+-  ARM_BUILTIN_WSRLH,
+-  ARM_BUILTIN_WSRLW,
+-  ARM_BUILTIN_WSRLD,
+-  ARM_BUILTIN_WRORH,
+-  ARM_BUILTIN_WRORW,
+-  ARM_BUILTIN_WRORD,
+-  ARM_BUILTIN_WSLLHI,
+-  ARM_BUILTIN_WSLLWI,
+-  ARM_BUILTIN_WSLLDI,
+-  ARM_BUILTIN_WSRAHI,
+-  ARM_BUILTIN_WSRAWI,
+-  ARM_BUILTIN_WSRADI,
+-  ARM_BUILTIN_WSRLHI,
+-  ARM_BUILTIN_WSRLWI,
+-  ARM_BUILTIN_WSRLDI,
+-  ARM_BUILTIN_WRORHI,
+-  ARM_BUILTIN_WRORWI,
+-  ARM_BUILTIN_WRORDI,
+-
+-  ARM_BUILTIN_WUNPCKIHB,
+-  ARM_BUILTIN_WUNPCKIHH,
+-  ARM_BUILTIN_WUNPCKIHW,
+-  ARM_BUILTIN_WUNPCKILB,
+-  ARM_BUILTIN_WUNPCKILH,
+-  ARM_BUILTIN_WUNPCKILW,
+-
+-  ARM_BUILTIN_WUNPCKEHSB,
+-  ARM_BUILTIN_WUNPCKEHSH,
+-  ARM_BUILTIN_WUNPCKEHSW,
+-  ARM_BUILTIN_WUNPCKEHUB,
+-  ARM_BUILTIN_WUNPCKEHUH,
+-  ARM_BUILTIN_WUNPCKEHUW,
+-  ARM_BUILTIN_WUNPCKELSB,
+-  ARM_BUILTIN_WUNPCKELSH,
+-  ARM_BUILTIN_WUNPCKELSW,
+-  ARM_BUILTIN_WUNPCKELUB,
+-  ARM_BUILTIN_WUNPCKELUH,
+-  ARM_BUILTIN_WUNPCKELUW,
+-
+-  ARM_BUILTIN_THREAD_POINTER,
+-
+-  ARM_BUILTIN_NEON_BASE,
+-
+-  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE  /* FIXME: Wrong!  */
+-};
+-
+ /* Do not emit .note.GNU-stack by default.  */
+ #ifndef NEED_INDICATE_EXEC_STACK
+ #define NEED_INDICATE_EXEC_STACK	0
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch
new file mode 100644
index 0000000000..1a940975f3
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106796.patch
@@ -0,0 +1,1255 @@
+2011-08-25  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_rhs_p): Handle constants
+	beyond conversions.
+	(convert_mult_to_widen): Convert constant inputs to the right type.
+	(convert_plusminus_to_widen): Don't automatically reject inputs that
+	are not an SSA_NAME.
+	Convert constant inputs to the right type.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-11.c: New file.
+	* gcc.target/arm/wmul-12.c: New file.
+	* gcc.target/arm/wmul-13.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Convert add_rhs
+	to the correct type.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-10.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Better handle
+	unsigned inputs of different modes.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-9.c: New file.
+	* gcc.target/arm/wmul-bitfield-2.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_rhs_p): Add new argument
+	'type'.
+	Use 'type' from caller, not inferred from 'rhs'.
+	Don't reject non-conversion statements. Do return lhs in this case.
+	(is_widening_mult_p): Add new argument 'type'.
+	Use 'type' from caller, not inferred from 'stmt'.
+	Pass type to is_widening_mult_rhs_p.
+	(convert_mult_to_widen): Pass type to is_widening_mult_p.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-8.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_p): Remove FIXME.
+	Ensure the the larger type is the first operand.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-7.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Convert
+	unsupported unsigned multiplies to signed.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-6.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Permit a single
+	conversion statement separating multiply-and-accumulate.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-5.c: New file.
+	* gcc.target/arm/no-wmla-1.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.md (maddhidi4): Remove '*' from name.
+	* expr.c (expand_expr_real_2): Use find_widening_optab_handler.
+	* optabs.c (find_widening_optab_handler_and_mode): New function.
+	(expand_widen_pattern_expr): Use find_widening_optab_handler.
+	(expand_binop_directly): Likewise.
+	(expand_binop): Likewise.
+	* optabs.h (find_widening_optab_handler): New macro define.
+	(find_widening_optab_handler_and_mode): New prototype.
+	* tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR
+	type precision rules.
+	(verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR.
+	* tree-ssa-math-opts.c (build_and_insert_cast): New function.
+	(is_widening_mult_rhs_p): Allow widening by more than one mode.
+	Explicitly disallow mis-matched input types.
+	(convert_mult_to_widen): Use find_widening_optab_handler, and cast
+	input types to fit the new handler.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-bitfield-1.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* expr.c (expand_expr_real_2): Use widening_optab_handler.
+	* genopinit.c (optabs): Use set_widening_optab_handler for $N.
+	(gen_insn): $N now means $a must be wider than $b, not consecutive.
+	* optabs.c (widened_mode): New function.
+	(expand_widen_pattern_expr): Use widening_optab_handler.
+	(expand_binop_directly): Likewise.
+	(expand_binop): Likewise.
+	* optabs.h (widening_optab_handlers): New struct.
+	(optab_d): New member, 'widening'.
+	(widening_optab_handler): New function.
+	(set_widening_optab_handler): New function.
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Use
+	widening_optab_handler.
+	(convert_plusminus_to_widen): Likewise.
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-08-13 08:32:32 +0000
++++ new/gcc/config/arm/arm.md	2011-08-25 11:42:09 +0000
+@@ -1839,7 +1839,7 @@
+    (set_attr "predicable" "yes")]
+ )
+ 
+-(define_insn "*maddhidi4"
++(define_insn "maddhidi4"
+   [(set (match_operand:DI 0 "s_register_operand" "=r")
+ 	(plus:DI
+ 	  (mult:DI (sign_extend:DI
+
+=== modified file 'gcc/expr.c'
+--- old/gcc/expr.c	2011-08-13 08:32:32 +0000
++++ new/gcc/expr.c	2011-08-25 11:42:09 +0000
+@@ -7688,18 +7688,16 @@
+ 	{
+ 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
+ 	  this_optab = usmul_widen_optab;
+-	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
++	  if (find_widening_optab_handler (this_optab, mode, innermode, 0)
++		!= CODE_FOR_nothing)
+ 	    {
+-	      if (optab_handler (this_optab, mode) != CODE_FOR_nothing)
+-		{
+-		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+-		    expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+-				     EXPAND_NORMAL);
+-		  else
+-		    expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
+-				     EXPAND_NORMAL);
+-		  goto binop3;
+-		}
++	      if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
++		expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
++				 EXPAND_NORMAL);
++	      else
++		expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
++				 EXPAND_NORMAL);
++	      goto binop3;
+ 	    }
+ 	}
+       /* Check for a multiplication with matching signedness.  */
+@@ -7714,10 +7712,10 @@
+ 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
+ 	  this_optab = zextend_p ? umul_widen_optab : smul_widen_optab;
+ 
+-	  if (mode == GET_MODE_2XWIDER_MODE (innermode)
+-	      && TREE_CODE (treeop0) != INTEGER_CST)
++	  if (TREE_CODE (treeop0) != INTEGER_CST)
+ 	    {
+-	      if (optab_handler (this_optab, mode) != CODE_FOR_nothing)
++	      if (find_widening_optab_handler (this_optab, mode, innermode, 0)
++		    != CODE_FOR_nothing)
+ 		{
+ 		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+ 				   EXPAND_NORMAL);
+@@ -7725,7 +7723,8 @@
+ 					       unsignedp, this_optab);
+ 		  return REDUCE_BIT_FIELD (temp);
+ 		}
+-	      if (optab_handler (other_optab, mode) != CODE_FOR_nothing
++	      if (find_widening_optab_handler (other_optab, mode, innermode, 0)
++		    != CODE_FOR_nothing
+ 		  && innermode == word_mode)
+ 		{
+ 		  rtx htem, hipart;
+
+=== modified file 'gcc/genopinit.c'
+--- old/gcc/genopinit.c	2011-05-05 15:43:06 +0000
++++ new/gcc/genopinit.c	2011-07-15 13:06:31 +0000
+@@ -46,10 +46,12 @@
+    used.  $A and $B are replaced with the full name of the mode; $a and $b
+    are replaced with the short form of the name, as above.
+ 
+-   If $N is present in the pattern, it means the two modes must be consecutive
+-   widths in the same mode class (e.g, QImode and HImode).  $I means that
+-   only full integer modes should be considered for the next mode, and $F
+-   means that only float modes should be considered.
++   If $N is present in the pattern, it means the two modes must be in
++   the same mode class, and $b must be greater than $a (e.g, QImode
++   and HImode).
++
++   $I means that only full integer modes should be considered for the
++   next mode, and $F means that only float modes should be considered.
+    $P means that both full and partial integer modes should be considered.
+    $Q means that only fixed-point modes should be considered.
+ 
+@@ -99,17 +101,17 @@
+   "set_optab_handler (smulv_optab, $A, CODE_FOR_$(mulv$I$a3$))",
+   "set_optab_handler (umul_highpart_optab, $A, CODE_FOR_$(umul$a3_highpart$))",
+   "set_optab_handler (smul_highpart_optab, $A, CODE_FOR_$(smul$a3_highpart$))",
+-  "set_optab_handler (smul_widen_optab, $B, CODE_FOR_$(mul$a$b3$)$N)",
+-  "set_optab_handler (umul_widen_optab, $B, CODE_FOR_$(umul$a$b3$)$N)",
+-  "set_optab_handler (usmul_widen_optab, $B, CODE_FOR_$(usmul$a$b3$)$N)",
+-  "set_optab_handler (smadd_widen_optab, $B, CODE_FOR_$(madd$a$b4$)$N)",
+-  "set_optab_handler (umadd_widen_optab, $B, CODE_FOR_$(umadd$a$b4$)$N)",
+-  "set_optab_handler (ssmadd_widen_optab, $B, CODE_FOR_$(ssmadd$a$b4$)$N)",
+-  "set_optab_handler (usmadd_widen_optab, $B, CODE_FOR_$(usmadd$a$b4$)$N)",
+-  "set_optab_handler (smsub_widen_optab, $B, CODE_FOR_$(msub$a$b4$)$N)",
+-  "set_optab_handler (umsub_widen_optab, $B, CODE_FOR_$(umsub$a$b4$)$N)",
+-  "set_optab_handler (ssmsub_widen_optab, $B, CODE_FOR_$(ssmsub$a$b4$)$N)",
+-  "set_optab_handler (usmsub_widen_optab, $B, CODE_FOR_$(usmsub$a$b4$)$N)",
++  "set_widening_optab_handler (smul_widen_optab, $B, $A, CODE_FOR_$(mul$a$b3$)$N)",
++  "set_widening_optab_handler (umul_widen_optab, $B, $A, CODE_FOR_$(umul$a$b3$)$N)",
++  "set_widening_optab_handler (usmul_widen_optab, $B, $A, CODE_FOR_$(usmul$a$b3$)$N)",
++  "set_widening_optab_handler (smadd_widen_optab, $B, $A, CODE_FOR_$(madd$a$b4$)$N)",
++  "set_widening_optab_handler (umadd_widen_optab, $B, $A, CODE_FOR_$(umadd$a$b4$)$N)",
++  "set_widening_optab_handler (ssmadd_widen_optab, $B, $A, CODE_FOR_$(ssmadd$a$b4$)$N)",
++  "set_widening_optab_handler (usmadd_widen_optab, $B, $A, CODE_FOR_$(usmadd$a$b4$)$N)",
++  "set_widening_optab_handler (smsub_widen_optab, $B, $A, CODE_FOR_$(msub$a$b4$)$N)",
++  "set_widening_optab_handler (umsub_widen_optab, $B, $A, CODE_FOR_$(umsub$a$b4$)$N)",
++  "set_widening_optab_handler (ssmsub_widen_optab, $B, $A, CODE_FOR_$(ssmsub$a$b4$)$N)",
++  "set_widening_optab_handler (usmsub_widen_optab, $B, $A, CODE_FOR_$(usmsub$a$b4$)$N)",
+   "set_optab_handler (sdiv_optab, $A, CODE_FOR_$(div$a3$))",
+   "set_optab_handler (ssdiv_optab, $A, CODE_FOR_$(ssdiv$Q$a3$))",
+   "set_optab_handler (sdivv_optab, $A, CODE_FOR_$(div$V$I$a3$))",
+@@ -304,7 +306,7 @@
+     {
+       int force_float = 0, force_int = 0, force_partial_int = 0;
+       int force_fixed = 0;
+-      int force_consec = 0;
++      int force_wider = 0;
+       int matches = 1;
+ 
+       for (pp = optabs[pindex]; pp[0] != '$' || pp[1] != '('; pp++)
+@@ -322,7 +324,7 @@
+ 	    switch (*++pp)
+ 	      {
+ 	      case 'N':
+-		force_consec = 1;
++		force_wider = 1;
+ 		break;
+ 	      case 'I':
+ 		force_int = 1;
+@@ -391,7 +393,10 @@
+ 			    || mode_class[i] == MODE_VECTOR_FRACT
+ 			    || mode_class[i] == MODE_VECTOR_UFRACT
+ 			    || mode_class[i] == MODE_VECTOR_ACCUM
+-			    || mode_class[i] == MODE_VECTOR_UACCUM))
++			    || mode_class[i] == MODE_VECTOR_UACCUM)
++			&& (! force_wider
++			    || *pp == 'a'
++			    || m1 < i))
+ 		      break;
+ 		  }
+ 
+@@ -411,8 +416,7 @@
+ 	}
+ 
+       if (matches && pp[0] == '$' && pp[1] == ')'
+-	  && *np == 0
+-	  && (! force_consec || (int) GET_MODE_WIDER_MODE(m1) == m2))
++	  && *np == 0)
+ 	break;
+     }
+ 
+
+=== modified file 'gcc/optabs.c'
+--- old/gcc/optabs.c	2011-07-04 14:03:49 +0000
++++ new/gcc/optabs.c	2011-08-11 15:46:01 +0000
+@@ -225,6 +225,61 @@
+   return 1;
+ }
+ 
++/* Given two input operands, OP0 and OP1, determine what the correct from_mode
++   for a widening operation would be.  In most cases this would be OP0, but if
++   that's a constant it'll be VOIDmode, which isn't useful.  */
++
++static enum machine_mode
++widened_mode (enum machine_mode to_mode, rtx op0, rtx op1)
++{
++  enum machine_mode m0 = GET_MODE (op0);
++  enum machine_mode m1 = GET_MODE (op1);
++  enum machine_mode result;
++
++  if (m0 == VOIDmode && m1 == VOIDmode)
++    return to_mode;
++  else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1))
++    result = m1;
++  else
++    result = m0;
++
++  if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode))
++    return to_mode;
++
++  return result;
++}
++
++/* Find a widening optab even if it doesn't widen as much as we want.
++   E.g. if from_mode is HImode, and to_mode is DImode, and there is no
++   direct HI->SI insn, then return SI->DI, if that exists.
++   If PERMIT_NON_WIDENING is non-zero then this can be used with
++   non-widening optabs also.  */
++
++enum insn_code
++find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
++				      enum machine_mode from_mode,
++				      int permit_non_widening,
++				      enum machine_mode *found_mode)
++{
++  for (; (permit_non_widening || from_mode != to_mode)
++	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
++	 && from_mode != VOIDmode;
++       from_mode = GET_MODE_WIDER_MODE (from_mode))
++    {
++      enum insn_code handler = widening_optab_handler (op, to_mode,
++						       from_mode);
++
++      if (handler != CODE_FOR_nothing)
++	{
++	  if (found_mode)
++	    *found_mode = from_mode;
++	  return handler;
++	}
++    }
++
++  return CODE_FOR_nothing;
++}
++
+ /* Widen OP to MODE and return the rtx for the widened operand.  UNSIGNEDP
+    says whether OP is signed or unsigned.  NO_EXTEND is nonzero if we need
+    not actually do a sign-extend or zero-extend, but can leave the
+@@ -517,8 +572,9 @@
+     optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
+   if (ops->code == WIDEN_MULT_PLUS_EXPR
+       || ops->code == WIDEN_MULT_MINUS_EXPR)
+-    icode = (int) optab_handler (widen_pattern_optab,
+-				 TYPE_MODE (TREE_TYPE (ops->op2)));
++    icode = (int) find_widening_optab_handler (widen_pattern_optab,
++					       TYPE_MODE (TREE_TYPE (ops->op2)),
++					       tmode0, 0);
+   else
+     icode = (int) optab_handler (widen_pattern_optab, tmode0);
+   gcc_assert (icode != CODE_FOR_nothing);
+@@ -1389,7 +1445,9 @@
+ 		       rtx target, int unsignedp, enum optab_methods methods,
+ 		       rtx last)
+ {
+-  int icode = (int) optab_handler (binoptab, mode);
++  enum machine_mode from_mode = widened_mode (mode, op0, op1);
++  int icode = (int) find_widening_optab_handler (binoptab, mode,
++						 from_mode, 1);
+   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+   enum machine_mode tmp_mode;
+@@ -1546,7 +1604,9 @@
+   /* If we can do it with a three-operand insn, do so.  */
+ 
+   if (methods != OPTAB_MUST_WIDEN
+-      && optab_handler (binoptab, mode) != CODE_FOR_nothing)
++      && find_widening_optab_handler (binoptab, mode,
++				      widened_mode (mode, op0, op1), 1)
++	    != CODE_FOR_nothing)
+     {
+       temp = expand_binop_directly (mode, binoptab, op0, op1, target,
+ 				    unsignedp, methods, last);
+@@ -1586,8 +1646,9 @@
+ 
+   if (binoptab == smul_optab
+       && GET_MODE_WIDER_MODE (mode) != VOIDmode
+-      && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab),
+-			 GET_MODE_WIDER_MODE (mode))
++      && (widening_optab_handler ((unsignedp ? umul_widen_optab
++					     : smul_widen_optab),
++				  GET_MODE_WIDER_MODE (mode), mode)
+ 	  != CODE_FOR_nothing))
+     {
+       temp = expand_binop (GET_MODE_WIDER_MODE (mode),
+@@ -1618,9 +1679,11 @@
+ 	if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
+ 	    || (binoptab == smul_optab
+ 		&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
+-		&& (optab_handler ((unsignedp ? umul_widen_optab
+-				    : smul_widen_optab),
+-				   GET_MODE_WIDER_MODE (wider_mode))
++		&& (find_widening_optab_handler ((unsignedp
++						  ? umul_widen_optab
++						  : smul_widen_optab),
++						 GET_MODE_WIDER_MODE (wider_mode),
++						 mode, 0)
+ 		    != CODE_FOR_nothing)))
+ 	  {
+ 	    rtx xop0 = op0, xop1 = op1;
+@@ -2043,8 +2106,8 @@
+       && optab_handler (add_optab, word_mode) != CODE_FOR_nothing)
+     {
+       rtx product = NULL_RTX;
+-
+-      if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing)
++      if (widening_optab_handler (umul_widen_optab, mode, word_mode)
++	    != CODE_FOR_nothing)
+ 	{
+ 	  product = expand_doubleword_mult (mode, op0, op1, target,
+ 					    true, methods);
+@@ -2053,7 +2116,8 @@
+ 	}
+ 
+       if (product == NULL_RTX
+-	  && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing)
++	  && widening_optab_handler (smul_widen_optab, mode, word_mode)
++		!= CODE_FOR_nothing)
+ 	{
+ 	  product = expand_doubleword_mult (mode, op0, op1, target,
+ 					    false, methods);
+@@ -2144,7 +2208,8 @@
+ 	   wider_mode != VOIDmode;
+ 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
+ 	{
+-	  if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
++	  if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
++		  != CODE_FOR_nothing
+ 	      || (methods == OPTAB_LIB
+ 		  && optab_libfunc (binoptab, wider_mode)))
+ 	    {
+
+=== modified file 'gcc/optabs.h'
+--- old/gcc/optabs.h	2011-05-05 15:43:06 +0000
++++ new/gcc/optabs.h	2011-07-27 14:12:45 +0000
+@@ -42,6 +42,11 @@
+   int insn_code;
+ };
+ 
++struct widening_optab_handlers
++{
++  struct optab_handlers handlers[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
++};
++
+ struct optab_d
+ {
+   enum rtx_code code;
+@@ -50,6 +55,7 @@
+   void (*libcall_gen)(struct optab_d *, const char *name, char suffix,
+ 		      enum machine_mode);
+   struct optab_handlers handlers[NUM_MACHINE_MODES];
++  struct widening_optab_handlers *widening;
+ };
+ typedef struct optab_d * optab;
+ 
+@@ -799,6 +805,15 @@
+ extern void emit_unop_insn (int, rtx, rtx, enum rtx_code);
+ extern bool maybe_emit_unop_insn (int, rtx, rtx, enum rtx_code);
+ 
++/* Find a widening optab even if it doesn't widen as much as we want.  */
++#define find_widening_optab_handler(A,B,C,D) \
++  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
++extern enum insn_code find_widening_optab_handler_and_mode (optab,
++							    enum machine_mode,
++							    enum machine_mode,
++							    int,
++							    enum machine_mode *);
++
+ /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
+    distinguish between machines with a vector shift that takes a scalar for the
+    shift amount vs. machines that take a vector for the shift amount.  */
+@@ -874,6 +889,23 @@
+ 			   + (int) CODE_FOR_nothing);
+ }
+ 
++/* Like optab_handler, but for widening_operations that have a TO_MODE and
++  a FROM_MODE.  */
++
++static inline enum insn_code
++widening_optab_handler (optab op, enum machine_mode to_mode,
++			enum machine_mode from_mode)
++{
++  if (to_mode == from_mode || from_mode == VOIDmode)
++    return optab_handler (op, to_mode);
++
++  if (op->widening)
++    return (enum insn_code) (op->widening->handlers[(int) to_mode][(int) from_mode].insn_code
++			     + (int) CODE_FOR_nothing);
++
++  return CODE_FOR_nothing;
++}
++
+ /* Record that insn CODE should be used to implement mode MODE of OP.  */
+ 
+ static inline void
+@@ -882,6 +914,26 @@
+   op->handlers[(int) mode].insn_code = (int) code - (int) CODE_FOR_nothing;
+ }
+ 
++/* Like set_optab_handler, but for widening operations that have a TO_MODE
++   and a FROM_MODE.  */
++
++static inline void
++set_widening_optab_handler (optab op, enum machine_mode to_mode,
++			    enum machine_mode from_mode, enum insn_code code)
++{
++  if (to_mode == from_mode)
++    set_optab_handler (op, to_mode, code);
++  else
++    {
++      if (op->widening == NULL)
++	op->widening = (struct widening_optab_handlers *)
++	      xcalloc (1, sizeof (struct widening_optab_handlers));
++
++      op->widening->handlers[(int) to_mode][(int) from_mode].insn_code
++	  = (int) code - (int) CODE_FOR_nothing;
++    }
++}
++
+ /* Return the insn used to perform conversion OP from mode FROM_MODE
+    to mode TO_MODE; return CODE_FOR_nothing if the target does not have
+    such an insn.  */
+
+=== added file 'gcc/testsuite/gcc.target/arm/no-wmla-1.c'
+--- old/gcc/testsuite/gcc.target/arm/no-wmla-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/no-wmla-1.c	2011-07-15 13:52:38 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++int
++foo (int a, short b, short c)
++{
++     int bc = b * c;
++        return a + (short)bc;
++}
++
++/* { dg-final { scan-assembler "mul" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-10.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-10.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-10.c	2011-07-18 12:56:20 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++unsigned long long
++foo (unsigned short a, unsigned short *b, unsigned short *c)
++{
++  return (unsigned)a + (unsigned long long)*b * (unsigned long long)*c;
++}
++
++/* { dg-final { scan-assembler "umlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-11.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-11.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-11.c	2011-07-22 15:46:42 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (int *b)
++{
++  return 10 * (long long)*b;
++}
++
++/* { dg-final { scan-assembler "smull" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-12.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-12.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-12.c	2011-07-22 15:46:42 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (int *b, int *c)
++{
++  int tmp = *b * *c;
++  return 10 + (long long)tmp;
++}
++
++/* { dg-final { scan-assembler "smlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-13.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-13.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-13.c	2011-07-22 15:46:42 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (int *a, int *b)
++{
++  return *a + (long long)*b * 10;
++}
++
++/* { dg-final { scan-assembler "smlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-5.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-5.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-5.c	2011-07-15 13:52:38 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (long long a, char *b, char *c)
++{
++  return a + *b * *c;
++}
++
++/* { dg-final { scan-assembler "umlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-6.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-6.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-6.c	2011-07-15 13:59:11 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (long long a, unsigned char *b, signed char *c)
++{
++  return a + (long long)*b * (long long)*c;
++}
++
++/* { dg-final { scan-assembler "smlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-7.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-7.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-7.c	2011-07-15 14:11:23 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++unsigned long long
++foo (unsigned long long a, unsigned char *b, unsigned short *c)
++{
++  return a + *b * *c;
++}
++
++/* { dg-final { scan-assembler "umlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-8.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-8.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-8.c	2011-07-15 14:16:54 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (long long a, int *b, int *c)
++{
++  return a + *b * *c;
++}
++
++/* { dg-final { scan-assembler "smlal" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-9.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-9.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-9.c	2011-07-15 14:22:39 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++long long
++foo (long long a, short *b, char *c)
++{
++  return a + *b * *c;
++}
++
++/* { dg-final { scan-assembler "smlalbb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c	2011-07-15 13:44:50 +0000
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++struct bf
++{
++  int a : 3;
++  int b : 15;
++  int c : 3;
++};
++
++long long
++foo (long long a, struct bf b, struct bf c)
++{
++  return a + b.b * c.b;
++}
++
++/* { dg-final { scan-assembler "smlalbb" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c	2011-07-15 14:22:39 +0000
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -march=armv7-a" } */
++
++struct bf
++{
++  int a : 3;
++  unsigned int b : 15;
++  int c : 3;
++};
++
++long long
++foo (long long a, struct bf b, struct bf c)
++{
++  return a + b.b * c.c;
++}
++
++/* { dg-final { scan-assembler "smlalbb" } } */
+
+=== modified file 'gcc/tree-cfg.c'
+--- old/gcc/tree-cfg.c	2011-07-01 09:19:21 +0000
++++ new/gcc/tree-cfg.c	2011-07-15 13:44:50 +0000
+@@ -3574,7 +3574,7 @@
+     case WIDEN_MULT_EXPR:
+       if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+ 	return true;
+-      return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
++      return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))
+ 	      || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
+ 
+     case WIDEN_SUM_EXPR:
+@@ -3667,7 +3667,7 @@
+ 	   && !FIXED_POINT_TYPE_P (rhs1_type))
+ 	  || !useless_type_conversion_p (rhs1_type, rhs2_type)
+ 	  || !useless_type_conversion_p (lhs_type, rhs3_type)
+-	  || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)
++	  || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)
+ 	  || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))
+ 	{
+ 	  error ("type mismatch in widening multiply-accumulate expression");
+
+=== modified file 'gcc/tree-ssa-math-opts.c'
+--- old/gcc/tree-ssa-math-opts.c	2011-03-11 16:36:16 +0000
++++ new/gcc/tree-ssa-math-opts.c	2011-08-09 10:26:48 +0000
+@@ -1266,39 +1266,67 @@
+  }
+ };
+ 
+-/* Return true if RHS is a suitable operand for a widening multiplication.
++/* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
++   prior to GSI's current position, and return the fresh SSA name.  */
++
++static tree
++build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
++		       tree target, tree val)
++{
++  tree result = make_ssa_name (target, NULL);
++  gimple stmt = gimple_build_assign_with_ops (CONVERT_EXPR, result, val, NULL);
++  gimple_set_location (stmt, loc);
++  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
++  return result;
++}
++
++/* Return true if RHS is a suitable operand for a widening multiplication,
++   assuming a target type of TYPE.
+    There are two cases:
+ 
+-     - RHS makes some value twice as wide.  Store that value in *NEW_RHS_OUT
+-       if so, and store its type in *TYPE_OUT.
++     - RHS makes some value at least twice as wide.  Store that value
++       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
+ 
+      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
+        but leave *TYPE_OUT untouched.  */
+ 
+ static bool
+-is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
++is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
++			tree *new_rhs_out)
+ {
+   gimple stmt;
+-  tree type, type1, rhs1;
++  tree type1, rhs1;
+   enum tree_code rhs_code;
+ 
+   if (TREE_CODE (rhs) == SSA_NAME)
+     {
+-      type = TREE_TYPE (rhs);
+       stmt = SSA_NAME_DEF_STMT (rhs);
+-      if (!is_gimple_assign (stmt))
+-	return false;
+-
+-      rhs_code = gimple_assign_rhs_code (stmt);
+-      if (TREE_CODE (type) == INTEGER_TYPE
+-	  ? !CONVERT_EXPR_CODE_P (rhs_code)
+-	  : rhs_code != FIXED_CONVERT_EXPR)
+-	return false;
+-
+-      rhs1 = gimple_assign_rhs1 (stmt);
++      if (is_gimple_assign (stmt))
++	{
++	  rhs_code = gimple_assign_rhs_code (stmt);
++	  if (TREE_CODE (type) == INTEGER_TYPE
++	      ? !CONVERT_EXPR_CODE_P (rhs_code)
++	      : rhs_code != FIXED_CONVERT_EXPR)
++	    rhs1 = rhs;
++	  else
++	    {
++	      rhs1 = gimple_assign_rhs1 (stmt);
++
++	      if (TREE_CODE (rhs1) == INTEGER_CST)
++		{
++		  *new_rhs_out = rhs1;
++		  *type_out = NULL;
++		  return true;
++		}
++	    }
++	}
++      else
++	rhs1 = rhs;
++
+       type1 = TREE_TYPE (rhs1);
++
+       if (TREE_CODE (type1) != TREE_CODE (type)
+-	  || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
++	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
+ 	return false;
+ 
+       *new_rhs_out = rhs1;
+@@ -1316,28 +1344,27 @@
+   return false;
+ }
+ 
+-/* Return true if STMT performs a widening multiplication.  If so,
+-   store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT
+-   respectively.  Also fill *RHS1_OUT and *RHS2_OUT such that converting
+-   those operands to types *TYPE1_OUT and *TYPE2_OUT would give the
+-   operands of the multiplication.  */
++/* Return true if STMT performs a widening multiplication, assuming the
++   output type is TYPE.  If so, store the unwidened types of the operands
++   in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
++   *RHS2_OUT such that converting those operands to types *TYPE1_OUT
++   and *TYPE2_OUT would give the operands of the multiplication.  */
+ 
+ static bool
+-is_widening_mult_p (gimple stmt,
++is_widening_mult_p (tree type, gimple stmt,
+ 		    tree *type1_out, tree *rhs1_out,
+ 		    tree *type2_out, tree *rhs2_out)
+ {
+-  tree type;
+-
+-  type = TREE_TYPE (gimple_assign_lhs (stmt));
+   if (TREE_CODE (type) != INTEGER_TYPE
+       && TREE_CODE (type) != FIXED_POINT_TYPE)
+     return false;
+ 
+-  if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out))
++  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
++			       rhs1_out))
+     return false;
+ 
+-  if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out))
++  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
++			       rhs2_out))
+     return false;
+ 
+   if (*type1_out == NULL)
+@@ -1354,6 +1381,18 @@
+       *type2_out = *type1_out;
+     }
+ 
++  /* Ensure that the larger of the two operands comes first. */
++  if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
++    {
++      tree tmp;
++      tmp = *type1_out;
++      *type1_out = *type2_out;
++      *type2_out = tmp;
++      tmp = *rhs1_out;
++      *rhs1_out = *rhs2_out;
++      *rhs2_out = tmp;
++    }
++
+   return true;
+ }
+ 
+@@ -1362,31 +1401,100 @@
+    value is true iff we converted the statement.  */
+ 
+ static bool
+-convert_mult_to_widen (gimple stmt)
++convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
+ {
+-  tree lhs, rhs1, rhs2, type, type1, type2;
++  tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
+   enum insn_code handler;
++  enum machine_mode to_mode, from_mode, actual_mode;
++  optab op;
++  int actual_precision;
++  location_t loc = gimple_location (stmt);
++  bool from_unsigned1, from_unsigned2;
+ 
+   lhs = gimple_assign_lhs (stmt);
+   type = TREE_TYPE (lhs);
+   if (TREE_CODE (type) != INTEGER_TYPE)
+     return false;
+ 
+-  if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
++  if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2))
+     return false;
+ 
+-  if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
+-    handler = optab_handler (umul_widen_optab, TYPE_MODE (type));
+-  else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
+-    handler = optab_handler (smul_widen_optab, TYPE_MODE (type));
++  to_mode = TYPE_MODE (type);
++  from_mode = TYPE_MODE (type1);
++  from_unsigned1 = TYPE_UNSIGNED (type1);
++  from_unsigned2 = TYPE_UNSIGNED (type2);
++
++  if (from_unsigned1 && from_unsigned2)
++    op = umul_widen_optab;
++  else if (!from_unsigned1 && !from_unsigned2)
++    op = smul_widen_optab;
+   else
+-    handler = optab_handler (usmul_widen_optab, TYPE_MODE (type));
++    op = usmul_widen_optab;
++
++  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
++						  0, &actual_mode);
+ 
+   if (handler == CODE_FOR_nothing)
+-    return false;
+-
+-  gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
+-  gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
++    {
++      if (op != smul_widen_optab)
++	{
++	  /* We can use a signed multiply with unsigned types as long as
++	     there is a wider mode to use, or it is the smaller of the two
++	     types that is unsigned.  Note that type1 >= type2, always.  */
++	  if ((TYPE_UNSIGNED (type1)
++	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
++	      || (TYPE_UNSIGNED (type2)
++		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
++	    {
++	      from_mode = GET_MODE_WIDER_MODE (from_mode);
++	      if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
++		return false;
++	    }
++
++	  op = smul_widen_optab;
++	  handler = find_widening_optab_handler_and_mode (op, to_mode,
++							  from_mode, 0,
++							  &actual_mode);
++
++	  if (handler == CODE_FOR_nothing)
++	    return false;
++
++	  from_unsigned1 = from_unsigned2 = false;
++	}
++      else
++	return false;
++    }
++
++  /* Ensure that the inputs to the handler are in the correct precison
++     for the opcode.  This will be the full mode size.  */
++  actual_precision = GET_MODE_PRECISION (actual_mode);
++  if (actual_precision != TYPE_PRECISION (type1)
++      || from_unsigned1 != TYPE_UNSIGNED (type1))
++    {
++      tmp = create_tmp_var (build_nonstandard_integer_type
++				(actual_precision, from_unsigned1),
++			    NULL);
++      rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
++    }
++  if (actual_precision != TYPE_PRECISION (type2)
++      || from_unsigned2 != TYPE_UNSIGNED (type2))
++    {
++      /* Reuse the same type info, if possible.  */
++      if (!tmp || from_unsigned1 != from_unsigned2)
++	tmp = create_tmp_var (build_nonstandard_integer_type
++				(actual_precision, from_unsigned2),
++			      NULL);
++      rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
++    }
++
++  /* Handle constants.  */
++  if (TREE_CODE (rhs1) == INTEGER_CST)
++    rhs1 = fold_convert (type1, rhs1);
++  if (TREE_CODE (rhs2) == INTEGER_CST)
++    rhs2 = fold_convert (type2, rhs2);
++
++  gimple_assign_set_rhs1 (stmt, rhs1);
++  gimple_assign_set_rhs2 (stmt, rhs2);
+   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
+   update_stmt (stmt);
+   return true;
+@@ -1403,11 +1511,17 @@
+ 			    enum tree_code code)
+ {
+   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
+-  tree type, type1, type2;
++  gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
++  tree type, type1, type2, optype, tmp = NULL;
+   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
+   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
+   optab this_optab;
+   enum tree_code wmult_code;
++  enum insn_code handler;
++  enum machine_mode to_mode, from_mode, actual_mode;
++  location_t loc = gimple_location (stmt);
++  int actual_precision;
++  bool from_unsigned1, from_unsigned2;
+ 
+   lhs = gimple_assign_lhs (stmt);
+   type = TREE_TYPE (lhs);
+@@ -1429,8 +1543,6 @@
+       if (is_gimple_assign (rhs1_stmt))
+ 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+     }
+-  else
+-    return false;
+ 
+   if (TREE_CODE (rhs2) == SSA_NAME)
+     {
+@@ -1438,57 +1550,160 @@
+       if (is_gimple_assign (rhs2_stmt))
+ 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+     }
+-  else
+-    return false;
+-
+-  if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
+-    {
+-      if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
+-			       &type2, &mult_rhs2))
+-	return false;
+-      add_rhs = rhs2;
+-    }
+-  else if (rhs2_code == MULT_EXPR)
+-    {
+-      if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
+-			       &type2, &mult_rhs2))
+-	return false;
+-      add_rhs = rhs1;
+-    }
+-  else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
+-    {
+-      mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
+-      mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
+-      type1 = TREE_TYPE (mult_rhs1);
+-      type2 = TREE_TYPE (mult_rhs2);
+-      add_rhs = rhs2;
+-    }
+-  else if (rhs2_code == WIDEN_MULT_EXPR)
+-    {
+-      mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
+-      mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
+-      type1 = TREE_TYPE (mult_rhs1);
+-      type2 = TREE_TYPE (mult_rhs2);
+-      add_rhs = rhs1;
+-    }
+-  else
+-    return false;
+-
+-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+-    return false;
++
++  /* Allow for one conversion statement between the multiply
++     and addition/subtraction statement.  If there are more than
++     one conversions then we assume they would invalidate this
++     transformation.  If that's not the case then they should have
++     been folded before now.  */
++  if (CONVERT_EXPR_CODE_P (rhs1_code))
++    {
++      conv1_stmt = rhs1_stmt;
++      rhs1 = gimple_assign_rhs1 (rhs1_stmt);
++      if (TREE_CODE (rhs1) == SSA_NAME)
++	{
++	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
++	  if (is_gimple_assign (rhs1_stmt))
++	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
++	}
++      else
++	return false;
++    }
++  if (CONVERT_EXPR_CODE_P (rhs2_code))
++    {
++      conv2_stmt = rhs2_stmt;
++      rhs2 = gimple_assign_rhs1 (rhs2_stmt);
++      if (TREE_CODE (rhs2) == SSA_NAME)
++	{
++	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
++	  if (is_gimple_assign (rhs2_stmt))
++	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
++	}
++      else
++	return false;
++    }
++
++  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
++     is_widening_mult_p, but we still need the rhs returns.
++
++     It might also appear that it would be sufficient to use the existing
++     operands of the widening multiply, but that would limit the choice of
++     multiply-and-accumulate instructions.  */
++  if (code == PLUS_EXPR
++      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
++    {
++      if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1,
++			       &type2, &mult_rhs2))
++	return false;
++      add_rhs = rhs2;
++      conv_stmt = conv1_stmt;
++    }
++  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
++    {
++      if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1,
++			       &type2, &mult_rhs2))
++	return false;
++      add_rhs = rhs1;
++      conv_stmt = conv2_stmt;
++    }
++  else
++    return false;
++
++  to_mode = TYPE_MODE (type);
++  from_mode = TYPE_MODE (type1);
++  from_unsigned1 = TYPE_UNSIGNED (type1);
++  from_unsigned2 = TYPE_UNSIGNED (type2);
++
++  /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
++  if (from_unsigned1 != from_unsigned2)
++    {
++      /* We can use a signed multiply with unsigned types as long as
++	 there is a wider mode to use, or it is the smaller of the two
++	 types that is unsigned.  Note that type1 >= type2, always.  */
++      if ((from_unsigned1
++	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
++	  || (from_unsigned2
++	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
++	{
++	  from_mode = GET_MODE_WIDER_MODE (from_mode);
++	  if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
++	    return false;
++	}
++
++      from_unsigned1 = from_unsigned2 = false;
++    }
++
++  /* If there was a conversion between the multiply and addition
++     then we need to make sure it fits a multiply-and-accumulate.
++     The should be a single mode change which does not change the
++     value.  */
++  if (conv_stmt)
++    {
++      /* We use the original, unmodified data types for this.  */
++      tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
++      tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
++      int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
++      bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
++
++      if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
++	{
++	  /* Conversion is a truncate.  */
++	  if (TYPE_PRECISION (to_type) < data_size)
++	    return false;
++	}
++      else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
++	{
++	  /* Conversion is an extend.  Check it's the right sort.  */
++	  if (TYPE_UNSIGNED (from_type) != is_unsigned
++	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
++	    return false;
++	}
++      /* else convert is a no-op for our purposes.  */
++    }
+ 
+   /* Verify that the machine can perform a widening multiply
+      accumulate in this mode/signedness combination, otherwise
+      this transformation is likely to pessimize code.  */
+-  this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
+-  if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
++  optype = build_nonstandard_integer_type (from_mode, from_unsigned1);
++  this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
++  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
++						  from_mode, 0, &actual_mode);
++
++  if (handler == CODE_FOR_nothing)
+     return false;
+ 
+-  /* ??? May need some type verification here?  */
+-
+-  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
+-				    fold_convert (type1, mult_rhs1),
+-				    fold_convert (type2, mult_rhs2),
++  /* Ensure that the inputs to the handler are in the correct precison
++     for the opcode.  This will be the full mode size.  */
++  actual_precision = GET_MODE_PRECISION (actual_mode);
++  if (actual_precision != TYPE_PRECISION (type1)
++      || from_unsigned1 != TYPE_UNSIGNED (type1))
++    {
++      tmp = create_tmp_var (build_nonstandard_integer_type
++				(actual_precision, from_unsigned1),
++			    NULL);
++      mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
++    }
++  if (actual_precision != TYPE_PRECISION (type2)
++      || from_unsigned2 != TYPE_UNSIGNED (type2))
++    {
++      if (!tmp || from_unsigned1 != from_unsigned2)
++	tmp = create_tmp_var (build_nonstandard_integer_type
++				(actual_precision, from_unsigned2),
++			      NULL);
++      mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
++    }
++
++  if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
++    add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL),
++				     add_rhs);
++
++  /* Handle constants.  */
++  if (TREE_CODE (mult_rhs1) == INTEGER_CST)
++    rhs1 = fold_convert (type1, mult_rhs1);
++  if (TREE_CODE (mult_rhs2) == INTEGER_CST)
++    rhs2 = fold_convert (type2, mult_rhs2);
++
++  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
+ 				    add_rhs);
+   update_stmt (gsi_stmt (*gsi));
+   return true;
+@@ -1696,7 +1911,7 @@
+ 	      switch (code)
+ 		{
+ 		case MULT_EXPR:
+-		  if (!convert_mult_to_widen (stmt)
++		  if (!convert_mult_to_widen (stmt, &gsi)
+ 		      && convert_mult_to_fma (stmt,
+ 					      gimple_assign_rhs1 (stmt),
+ 					      gimple_assign_rhs2 (stmt)))
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch
new file mode 100644
index 0000000000..8230beb91e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106797.patch
@@ -0,0 +1,23 @@
+2011-08-26  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-26  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* df-problems.c (df_note_bb_compute): Pass uses rather than defs
+	to df_set_dead_notes_for_mw.
+
+=== modified file 'gcc/df-problems.c'
+--- old/gcc/df-problems.c	2011-07-07 19:10:01 +0000
++++ new/gcc/df-problems.c	2011-08-26 14:32:47 +0000
+@@ -3375,7 +3375,7 @@
+       while (*mws_rec)
+ 	{
+ 	  struct df_mw_hardreg *mws = *mws_rec;
+-	  if ((DF_MWS_REG_DEF_P (mws))
++	  if (DF_MWS_REG_USE_P (mws)
+ 	      && !df_ignore_stack_reg (mws->start_regno))
+ 	    {
+ 	      bool really_add_notes = debug_insn != 0;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch
new file mode 100644
index 0000000000..aa067b7113
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106798.patch
@@ -0,0 +1,23 @@
+2011-09-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+
+	2011-08-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        PR target/48328
+	* config/arm/arm.h (CASE_VECTOR_SHORTEN_MODE): Fix distance
+	for tbh instructions.
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-08-24 17:35:16 +0000
++++ new/gcc/config/arm/arm.h	2011-09-05 14:32:11 +0000
+@@ -1961,7 +1961,7 @@
+       : min >= -4096 && max < 4096					\
+       ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)	\
+       : SImode)								\
+-   : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode		\
++   : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode		\
+       : (max >= 0x200) ? HImode						\
+       : QImode))
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch
new file mode 100644
index 0000000000..c440db91e1
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106799.patch
@@ -0,0 +1,75 @@
+ 2011-09-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+ 
+       Backport from mainline.
+       2011-08-26  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+       * config/arm/cortex-a9.md ("cortex_a9_mult_long"): New.
+       ("cortex_a9_multiply_long"): New and use above.  Handle all
+        long multiply cases.
+       ("cortex_a9_multiply"): Handle smmul and smmulr.
+       ("cortex_a9_mac"): Handle smmla.
+
+=== modified file 'gcc/config/arm/cortex-a9.md'
+--- old/gcc/config/arm/cortex-a9.md	2011-01-18 15:28:08 +0000
++++ new/gcc/config/arm/cortex-a9.md	2011-08-26 08:52:15 +0000
+@@ -68,7 +68,8 @@
+   "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
+ (define_reservation "cortex_a9_mac"
+   "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
+-
++(define_reservation "cortex_a9_mult_long"
++  "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
+ 
+ ;; Issue at the same time along the load store pipeline and
+ ;; the VFP / Neon pipeline is not possible.
+@@ -139,29 +140,35 @@
+        (eq_attr "insn" "smlaxy"))
+   "cortex_a9_mac16")
+ 
+-
+ (define_insn_reservation "cortex_a9_multiply" 4
+   (and (eq_attr "tune" "cortexa9")
+-       (eq_attr "insn" "mul"))
++       (eq_attr "insn" "mul,smmul,smmulr"))
+        "cortex_a9_mult")
+ 
+ (define_insn_reservation "cortex_a9_mac" 4
+   (and (eq_attr "tune" "cortexa9")
+-       (eq_attr "insn" "mla"))
++       (eq_attr "insn" "mla,smmla"))
+        "cortex_a9_mac")
+ 
++(define_insn_reservation "cortex_a9_multiply_long" 5
++  (and (eq_attr "tune" "cortexa9")
++       (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
++       "cortex_a9_mult_long")
++
+ ;; An instruction with a result in E2 can be forwarded
+ ;; to E2 or E1 or M1 or the load store unit in the next cycle.
+ 
+ (define_bypass 1 "cortex_a9_dp"
+                  "cortex_a9_dp_shift, cortex_a9_multiply,
+  cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
++ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, 
++ cortex_a9_multiply_long")
+ 
+ (define_bypass 2 "cortex_a9_dp_shift"
+                  "cortex_a9_dp_shift, cortex_a9_multiply,
+  cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
++ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
++ cortex_a9_multiply_long")
+ 
+ ;; An instruction in the load store pipeline can provide
+ ;; read access to a DP instruction in the P0 default pipeline
+@@ -212,7 +219,7 @@
+ 
+ (define_bypass 1
+   "cortex_a9_fps"
+-  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
++  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
+ 
+ ;; Scheduling on the FP_ADD pipeline.
+ (define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch
new file mode 100644
index 0000000000..dfdeec7245
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch
@@ -0,0 +1,1270 @@
+2011-09-07  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-08-04  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (struct _stmt_vec_info): Add new field for
+	pattern def statement, and its access macro.
+	(NUM_PATTERNS): Set to 5.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+	pattern def statement.
+	(vect_transform_loop): Likewise.
+	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
+	function vect_recog_over_widening_pattern ().
+	(vect_operation_fits_smaller_type): New function.
+	(vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
+	Likewise.
+	(vect_pattern_recog_1): Move the code that marks pattern
+	statements to vect_mark_pattern_stmts (), and call it.  Update
+	documentation.
+	* tree-vect-stmts.c (vect_supportable_shift): New function.
+	(vect_analyze_stmt): Handle pattern def statement.
+	(new_stmt_vec_info): Initialize pattern def statement.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-over-widen-1.c: New test.
+	* gcc.dg/vect/vect-over-widen-2.c: New test.
+	* gcc.dg/vect/vect-over-widen-3.c: New test.
+	* gcc.dg/vect/vect-over-widen-4.c: New test.
+
+
+	2011-08-09  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50014
+	* tree-vect-loop.c (vectorizable_reduction): Get def type before
+	calling vect_get_vec_def_for_stmt_copy ().
+
+	gcc/testsuite/
+	PR tree-optimization/50014
+	* gcc.dg/vect/pr50014.c: New test.
+
+
+	2011-08-11  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50039
+	* tree-vect-patterns.c (vect_operation_fits_smaller_type): Check
+	that DEF_STMT has a stmt_vec_info.
+
+	gcc/testsuite/
+	PR tree-optimization/50039
+	* gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre.
+	* gcc.dg/vect/no-tree-fre-pr50039.c: New test.
+
+
+	2011-09-04  Jakub Jelinek  <jakub@redhat.com>
+		    Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50208
+	* tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add an
+	argument.  Check that def_stmt is inside the loop.
+	(vect_recog_widen_mult_pattern): Update calls to
+	vect_handle_widen_mult_by_cons.
+	(vect_operation_fits_smaller_type): Check that def_stmt is
+	inside the loop.
+
+	gcc/testsuite/
+	PR tree-optimization/50208
+	* gcc.dg/vect/no-fre-pre-pr50208.c: New test.
+	* gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with
+	-fno-tree-fre -fno-tree-pre.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++
++char c;
++int a, b;
++
++void foo (int j)
++{
++  int i;
++  while (--j)
++    {
++      b = 3;
++      for (i = 0; i < 2; ++i)
++        a = b ^ c;
++    }
++}
++
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== added file 'gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++
++extern unsigned char g_5;
++extern int g_31, g_76;
++int main(void) {
++ int i, j;
++    for (j=0; j < 2; ++j) {
++        g_31 = -3;
++        for (i=0; i < 2; ++i)
++          g_76 = (g_31 ? g_31+1 : 0) ^ g_5;
++    }
++}
++
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/pr50014.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr50014.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr50014.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_int } */
++
++int f(unsigned char *s, int n)
++{
++  int sum = 0;
++  int i;
++
++  for (i = 0; i < n; i++)
++    sum += 256 * s[i];
++
++  return sum;
++}
++
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,64 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdlib.h>
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++
++/* Modified rgb to rgb conversion from FFmpeg.  */
++__attribute__ ((noinline)) void
++foo (unsigned char *src, unsigned char *dst)
++{
++  unsigned char *s = src;
++  unsigned short *d = (unsigned short *)dst;
++  int i;
++
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
++      d++;
++    }
++
++  s = src;
++  d = (unsigned short *)dst;
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  unsigned char in[N], out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,65 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdlib.h>
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++
++/* Modified rgb to rgb conversion from FFmpeg.  */
++__attribute__ ((noinline)) void
++foo (unsigned char *src, unsigned char *dst)
++{
++  unsigned char *s = src;
++  int *d = (int *)dst;
++  int i;
++
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
++      d++;
++    }
++
++  s = src;
++  d = (int *)dst;
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  unsigned char in[N], out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* Final value stays in int, so no over-widening is detected at the moment.  */
++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,64 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdlib.h>
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++
++/* Modified rgb to rgb conversion from FFmpeg.  */
++__attribute__ ((noinline)) void
++foo (unsigned char *src, unsigned char *dst)
++{
++  unsigned char *s = src;
++  unsigned short *d = (unsigned short *)dst;
++  int i;
++
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
++      d++;
++    }
++
++  s = src;
++  d = (unsigned short *)dst;
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  unsigned char in[N], out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c	2011-09-05 06:23:37 +0000
+@@ -0,0 +1,68 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdlib.h>
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++
++/* Modified rgb to rgb conversion from FFmpeg.  */
++__attribute__ ((noinline)) int
++foo (unsigned char *src, unsigned char *dst)
++{
++  unsigned char *s = src;
++  unsigned short *d = (unsigned short *)dst, res;
++  int i, result = 0;
++
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
++      *d = res;
++      result += res;
++      d++;
++    }
++
++  s = src;
++  d = (unsigned short *)dst;
++  for (i = 0; i < N/4; i++)
++    {
++      const int b = *s++;
++      const int g = *s++;
++      const int r = *s++;
++      const int a = *s++;
++      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
++        abort ();
++      d++;
++    }
++
++  return result;
++}
++
++int main (void)
++{
++  int i;
++  unsigned char in[N], out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
+--- old/gcc/testsuite/gcc.dg/vect/vect.exp	2011-05-05 15:43:31 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect.exp	2011-09-05 06:23:37 +0000
+@@ -245,6 +245,18 @@
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]]  \
+         "" $VECT_SLP_CFLAGS
+ 
++# -fno-tree-fre
++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]]  \
++        "" $DEFAULT_VECTCFLAGS
++
++# -fno-tree-fre -fno-tree-pre
++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre"
++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]]  \
++        "" $DEFAULT_VECTCFLAGS
++
+ # Clean up.
+ set dg-do-what-default ${save-dg-do-what-default}
+ 
+
+=== modified file 'gcc/tree-vect-loop.c'
+--- old/gcc/tree-vect-loop.c	2011-07-11 11:02:55 +0000
++++ new/gcc/tree-vect-loop.c	2011-09-05 06:23:37 +0000
+@@ -181,8 +181,8 @@
+   stmt_vec_info stmt_info;
+   int i;
+   HOST_WIDE_INT dummy;
+-  gimple stmt, pattern_stmt = NULL;
+-  bool analyze_pattern_stmt = false;
++  gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
++  bool analyze_pattern_stmt = false, pattern_def = false;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
+@@ -297,6 +297,29 @@
+                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+             analyze_pattern_stmt = true;
+ 
++          /* If a pattern statement has a def stmt, analyze it too.  */
++          if (is_pattern_stmt_p (stmt_info)
++              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
++              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
++                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
++            {
++              if (pattern_def)
++                pattern_def = false;
++              else
++                {
++                  if (vect_print_dump_info (REPORT_DETAILS))
++                    {
++                      fprintf (vect_dump, "==> examining pattern def stmt: ");
++                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
++                                         TDF_SLIM);
++                    }
++
++                  pattern_def = true;
++                  stmt = pattern_def_stmt;
++                  stmt_info = vinfo_for_stmt (stmt);
++                }
++            }
++
+ 	  if (gimple_get_lhs (stmt) == NULL_TREE)
+ 	    {
+ 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+@@ -401,7 +424,7 @@
+ 	      || (nunits > vectorization_factor))
+ 	    vectorization_factor = nunits;
+ 
+-          if (!analyze_pattern_stmt)
++          if (!analyze_pattern_stmt && !pattern_def)
+             gsi_next (&si);
+         }
+     }
+@@ -3985,7 +4008,7 @@
+   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
+   VEC (gimple, heap) *phis = NULL;
+   int vec_num;
+-  tree def0, def1, tem;
++  tree def0, def1, tem, op0, op1 = NULL_TREE;
+ 
+   if (nested_in_vect_loop_p (loop, stmt))
+     {
+@@ -4418,8 +4441,6 @@
+       /* Handle uses.  */
+       if (j == 0)
+         {
+-          tree op0, op1 = NULL_TREE;
+-
+           op0 = ops[!reduc_index];
+           if (op_type == ternary_op)
+             {
+@@ -4449,11 +4470,19 @@
+         {
+           if (!slp_node)
+             {
+-              enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
+-              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
++              enum vect_def_type dt;
++              gimple dummy_stmt;
++              tree dummy;
++
++              vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL,
++                                  &dummy_stmt, &dummy, &dt);
++              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
++                                                              loop_vec_def0);
+               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
+               if (op_type == ternary_op)
+                 {
++                  vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt,
++                                      &dummy, &dt);
+                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
+                                                                 loop_vec_def1);
+                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
+@@ -4758,8 +4787,8 @@
+   tree cond_expr = NULL_TREE;
+   gimple_seq cond_expr_stmt_list = NULL;
+   bool do_peeling_for_loop_bound;
+-  gimple stmt, pattern_stmt;
+-  bool transform_pattern_stmt = false;
++  gimple stmt, pattern_stmt, pattern_def_stmt;
++  bool transform_pattern_stmt = false, pattern_def = false;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vec_transform_loop ===");
+@@ -4903,6 +4932,30 @@
+                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+             transform_pattern_stmt = true;
+ 
++          /* If pattern statement has a def stmt, vectorize it too.  */
++          if (is_pattern_stmt_p (stmt_info)
++              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
++              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
++                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
++            {
++              if (pattern_def)
++                pattern_def = false;
++              else
++                {
++                  if (vect_print_dump_info (REPORT_DETAILS))
++                    {
++                      fprintf (vect_dump, "==> vectorizing pattern def"
++                                          " stmt: ");
++                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
++                                         TDF_SLIM);
++                    }
++
++                  pattern_def = true;
++                  stmt = pattern_def_stmt;
++                  stmt_info = vinfo_for_stmt (stmt);
++                }
++            }
++
+ 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+ 	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+                                                STMT_VINFO_VECTYPE (stmt_info));
+@@ -4930,7 +4983,7 @@
+ 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
+ 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
+ 		{
+-                  if (!transform_pattern_stmt)
++                  if (!transform_pattern_stmt && !pattern_def)
+  		    gsi_next (&si);
+   		  continue;
+ 		}
+@@ -4962,7 +5015,7 @@
+ 		}
+ 	    }
+ 
+-          if (!transform_pattern_stmt)
++          if (!transform_pattern_stmt && !pattern_def)
+  	    gsi_next (&si);
+ 	}		        /* stmts in BB */
+     }				/* BBs in loop */
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-07-06 12:04:10 +0000
++++ new/gcc/tree-vect-patterns.c	2011-09-05 06:23:37 +0000
+@@ -46,11 +46,14 @@
+ static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
+ 					   tree *);
+ static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
++static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
++                                                 tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ 	vect_recog_widen_mult_pattern,
+ 	vect_recog_widen_sum_pattern,
+ 	vect_recog_dot_prod_pattern,
+-	vect_recog_pow_pattern};
++	vect_recog_pow_pattern,
++        vect_recog_over_widening_pattern};
+ 
+ 
+ /* Function widened_name_p
+@@ -339,12 +342,14 @@
+    replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t;  */
+ 
+ static bool
+-vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
++vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
+                                  VEC (gimple, heap) **stmts, tree type,
+                                  tree *half_type, gimple def_stmt)
+ {
+   tree new_type, new_oprnd, tmp;
+   gimple new_stmt;
++  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
++  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+ 
+   if (int_fits_type_p (const_oprnd, *half_type))
+     {
+@@ -354,6 +359,8 @@
+     }
+ 
+   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
++      || !gimple_bb (def_stmt)
++      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+       || !vinfo_for_stmt (def_stmt))
+     return false;
+ 
+@@ -522,7 +529,8 @@
+     {
+       if (TREE_CODE (oprnd0) == INTEGER_CST
+           && TREE_CODE (half_type1) == INTEGER_TYPE
+-          && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
++          && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, 
++		                              stmts, type,
+                                               &half_type1, def_stmt1))
+         half_type0 = half_type1;
+       else
+@@ -532,7 +540,8 @@
+     {
+       if (TREE_CODE (oprnd1) == INTEGER_CST
+           && TREE_CODE (half_type0) == INTEGER_TYPE
+-          && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
++          && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
++		                              stmts, type,
+                                               &half_type0, def_stmt0))
+         half_type1 = half_type0;
+       else
+@@ -826,6 +835,424 @@
+ }
+ 
+ 
++/* Return TRUE if the operation in STMT can be performed on a smaller type.
++
++   Input:
++   STMT - a statement to check.
++   DEF - we support operations with two operands, one of which is constant.
++         The other operand can be defined by a demotion operation, or by a
++         previous statement in a sequence of over-promoted operations.  In the
++         later case DEF is used to replace that operand.  (It is defined by a
++         pattern statement we created for the previous statement in the
++         sequence).
++
++   Input/output:
++   NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
++         NULL, it's the type of DEF.
++   STMTS - additional pattern statements.  If a pattern statement (type
++         conversion) is created in this function, its original statement is
++         added to STMTS.
++
++   Output:
++   OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
++         operands to use in the new pattern statement for STMT (will be created
++         in vect_recog_over_widening_pattern ()).
++   NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
++         statements for STMT: the first one is a type promotion and the second
++         one is the operation itself.  We return the type promotion statement
++         in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
++         the second pattern statement.  */
++
++static bool
++vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
++                                  tree *op0, tree *op1, gimple *new_def_stmt,
++                                  VEC (gimple, heap) **stmts)
++{
++  enum tree_code code;
++  tree const_oprnd, oprnd;
++  tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
++  gimple def_stmt, new_stmt;
++  bool first = false;
++  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
++  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
++
++  *new_def_stmt = NULL;
++
++  if (!is_gimple_assign (stmt))
++    return false;
++
++  code = gimple_assign_rhs_code (stmt);
++  if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
++      && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
++    return false;
++
++  oprnd = gimple_assign_rhs1 (stmt);
++  const_oprnd = gimple_assign_rhs2 (stmt);
++  type = gimple_expr_type (stmt);
++
++  if (TREE_CODE (oprnd) != SSA_NAME
++      || TREE_CODE (const_oprnd) != INTEGER_CST)
++    return false;
++
++  /* If we are in the middle of a sequence, we use DEF from a previous
++     statement.  Otherwise, OPRND has to be a result of type promotion.  */
++  if (*new_type)
++    {
++      half_type = *new_type;
++      oprnd = def;
++    }
++  else
++    {
++      first = true;
++      if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
++          || !gimple_bb (def_stmt)
++          || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
++          || !vinfo_for_stmt (def_stmt))
++        return false;
++    }
++
++  /* Can we perform the operation on a smaller type?  */
++  switch (code)
++    {
++      case BIT_IOR_EXPR:
++      case BIT_XOR_EXPR:
++      case BIT_AND_EXPR:
++        if (!int_fits_type_p (const_oprnd, half_type))
++          {
++            /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
++            if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
++              return false;
++
++            interm_type = build_nonstandard_integer_type (
++                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
++            if (!int_fits_type_p (const_oprnd, interm_type))
++              return false;
++          }
++
++        break;
++
++      case LSHIFT_EXPR:
++        /* Try intermediate type - HALF_TYPE is not enough for sure.  */
++        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
++          return false;
++
++        /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
++          (e.g., if the original value was char, the shift amount is at most 8
++           if we want to use short).  */
++        if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
++          return false;
++
++        interm_type = build_nonstandard_integer_type (
++                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
++
++        if (!vect_supportable_shift (code, interm_type))
++          return false;
++
++        break;
++
++      case RSHIFT_EXPR:
++        if (vect_supportable_shift (code, half_type))
++          break;
++
++        /* Try intermediate type - HALF_TYPE is not supported.  */
++        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
++          return false;
++
++        interm_type = build_nonstandard_integer_type (
++                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
++
++        if (!vect_supportable_shift (code, interm_type))
++          return false;
++
++        break;
++
++      default:
++        gcc_unreachable ();
++    }
++
++  /* There are four possible cases:
++     1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
++        the first statement in the sequence)
++        a. The original, HALF_TYPE, is not enough - we replace the promotion
++           from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
++        b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
++           promotion.
++     2. OPRND is defined by a pattern statement we created.
++        a. Its type is not sufficient for the operation, we create a new stmt:
++           a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
++           this statement in NEW_DEF_STMT, and it is later put in
++           STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
++        b. OPRND is good to use in the new statement.  */
++  if (first)
++    {
++      if (interm_type)
++        {
++          /* Replace the original type conversion HALF_TYPE->TYPE with
++             HALF_TYPE->INTERM_TYPE.  */
++          if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
++            {
++              new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
++              /* Check if the already created pattern stmt is what we need.  */
++              if (!is_gimple_assign (new_stmt)
++                  || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
++                  || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
++                return false;
++
++              oprnd = gimple_assign_lhs (new_stmt);
++            }
++          else
++            {
++              /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
++              oprnd = gimple_assign_rhs1 (def_stmt);
++              tmp = create_tmp_reg (interm_type, NULL);
++              add_referenced_var (tmp);
++              new_oprnd = make_ssa_name (tmp, NULL);
++              new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
++                                                       oprnd, NULL_TREE);
++              SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
++              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
++              VEC_safe_push (gimple, heap, *stmts, def_stmt);
++              oprnd = new_oprnd;
++            }
++        }
++      else
++        {
++          /* Retrieve the operand before the type promotion.  */
++          oprnd = gimple_assign_rhs1 (def_stmt);
++        }
++    }
++  else
++    {
++      if (interm_type)
++        {
++          /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
++          tmp = create_tmp_reg (interm_type, NULL);
++          add_referenced_var (tmp);
++          new_oprnd = make_ssa_name (tmp, NULL);
++          new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
++                                                   oprnd, NULL_TREE);
++          SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
++          oprnd = new_oprnd;
++          *new_def_stmt = new_stmt;
++        }
++
++      /* Otherwise, OPRND is already set.  */
++    }
++
++  if (interm_type)
++    *new_type = interm_type;
++  else
++    *new_type = half_type;
++
++  *op0 = oprnd;
++  *op1 = fold_convert (*new_type, const_oprnd);
++
++  return true;
++}
++
++
++/* Try to find a statement or a sequence of statements that can be performed
++   on a smaller type:
++
++     type x_t;
++     TYPE x_T, res0_T, res1_T;
++   loop:
++     S1  x_t = *p;
++     S2  x_T = (TYPE) x_t;
++     S3  res0_T = op (x_T, C0);
++     S4  res1_T = op (res0_T, C1);
++     S5  ... = () res1_T;  - type demotion
++
++   where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
++   constants.
++   Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
++   be 'type' or some intermediate type.  For now, we expect S5 to be a type
++   demotion operation.  We also check that S3 and S4 have only one use.
++.
++
++*/
++static gimple
++vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
++                                  tree *type_in, tree *type_out)
++{
++  gimple stmt = VEC_pop (gimple, *stmts);
++  gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
++  tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  int nuses = 0;
++  tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
++  bool first;
++  struct loop *loop = (gimple_bb (stmt))->loop_father;
++
++  first = true;
++  while (1)
++    {
++      if (!vinfo_for_stmt (stmt)
++          || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
++        return NULL;
++
++      new_def_stmt = NULL;
++      if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
++                                             &op0, &op1, &new_def_stmt,
++                                             stmts))
++        {
++          if (first)
++            return NULL;
++          else
++            break;
++        }
++
++      /* STMT can be performed on a smaller type.  Check its uses.  */
++      lhs = gimple_assign_lhs (stmt);
++      nuses = 0;
++      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++        {
++          if (is_gimple_debug (USE_STMT (use_p)))
++            continue;
++          use_stmt = USE_STMT (use_p);
++          nuses++;
++        }
++
++      if (nuses != 1 || !is_gimple_assign (use_stmt)
++          || !gimple_bb (use_stmt)
++          || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++        return NULL;
++
++      /* Create pattern statement for STMT.  */
++      vectype = get_vectype_for_scalar_type (new_type);
++      if (!vectype)
++        return NULL;
++
++      /* We want to collect all the statements for which we create pattern
++         statetments, except for the case when the last statement in the
++         sequence doesn't have a corresponding pattern statement.  In such
++         case we associate the last pattern statement with the last statement
++         in the sequence.  Therefore, we only add an original statetement to
++         the list if we know that it is not the last.  */
++      if (prev_stmt)
++        VEC_safe_push (gimple, heap, *stmts, prev_stmt);
++
++      var = vect_recog_temp_ssa_var (new_type, NULL);
++      pattern_stmt = gimple_build_assign_with_ops (
++                          gimple_assign_rhs_code (stmt), var, op0, op1);
++      SSA_NAME_DEF_STMT (var) = pattern_stmt;
++      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
++      STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
++
++      if (vect_print_dump_info (REPORT_DETAILS))
++        {
++          fprintf (vect_dump, "created pattern stmt: ");
++          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
++        }
++
++      prev_stmt = stmt;
++      stmt = use_stmt;
++
++      first = false;
++    }
++
++  /* We got a sequence.  We expect it to end with a type demotion operation.
++     Otherwise, we quit (for now).  There are three possible cases: the
++     conversion is to NEW_TYPE (we don't do anything), the conversion is to
++     a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
++     NEW_TYPE differs (we create a new conversion statement).  */
++  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
++    {
++      use_lhs = gimple_assign_lhs (use_stmt);
++      use_type = TREE_TYPE (use_lhs);
++      /* Support only type promotion or signedess change.  */
++      if (!INTEGRAL_TYPE_P (use_type)
++          || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
++        return NULL;
++
++      if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
++          || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
++        {
++          /* Create NEW_TYPE->USE_TYPE conversion.  */
++          tmp = create_tmp_reg (use_type, NULL);
++          add_referenced_var (tmp);
++          new_oprnd = make_ssa_name (tmp, NULL);
++          pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
++                                                       var, NULL_TREE);
++          SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
++          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
++
++          *type_in = get_vectype_for_scalar_type (new_type);
++          *type_out = get_vectype_for_scalar_type (use_type);
++
++          /* We created a pattern statement for the last statement in the
++             sequence, so we don't need to associate it with the pattern
++             statement created for PREV_STMT.  Therefore, we add PREV_STMT
++             to the list in order to mark it later in vect_pattern_recog_1.  */
++          if (prev_stmt)
++            VEC_safe_push (gimple, heap, *stmts, prev_stmt);
++        }
++      else
++        {
++          if (prev_stmt)
++            STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
++               = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
++
++          *type_in = vectype;
++          *type_out = NULL_TREE;
++        }
++
++      VEC_safe_push (gimple, heap, *stmts, use_stmt);
++    }
++  else
++    /* TODO: support general case, create a conversion to the correct type.  */
++    return NULL;
++
++  /* Pattern detected.  */
++  if (vect_print_dump_info (REPORT_DETAILS))
++    {
++      fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
++      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
++    }
++
++  return pattern_stmt;
++}
++
++
++/* Mark statements that are involved in a pattern.  */
++
++static inline void
++vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
++                         tree pattern_vectype)
++{
++  stmt_vec_info pattern_stmt_info, def_stmt_info;
++  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
++  gimple def_stmt;
++
++  set_vinfo_for_stmt (pattern_stmt,
++                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
++  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
++  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
++
++  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
++  STMT_VINFO_DEF_TYPE (pattern_stmt_info)
++	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
++  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
++  STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
++  STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
++  STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
++	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
++  if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
++    {
++      def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
++      set_vinfo_for_stmt (def_stmt,
++                          new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
++      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
++      def_stmt_info = vinfo_for_stmt (def_stmt);
++      STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
++      STMT_VINFO_DEF_TYPE (def_stmt_info)
++	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
++      STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
++    }
++}
++
+ /* Function vect_pattern_recog_1
+ 
+    Input:
+@@ -855,7 +1282,6 @@
+ {
+   gimple stmt = gsi_stmt (si), pattern_stmt;
+   stmt_vec_info stmt_info;
+-  stmt_vec_info pattern_stmt_info;
+   loop_vec_info loop_vinfo;
+   tree pattern_vectype;
+   tree type_in, type_out;
+@@ -923,16 +1349,7 @@
+     }
+ 
+   /* Mark the stmts that are involved in the pattern. */
+-  set_vinfo_for_stmt (pattern_stmt,
+-		      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
+-  gimple_set_bb (pattern_stmt, gimple_bb (stmt));
+-  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+-
+-  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
+-  STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
+-  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
+-  STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
+-  STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
++  vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
+ 
+   /* Patterns cannot be vectorized using SLP, because they change the order of
+      computation.  */
+@@ -940,9 +1357,9 @@
+     if (next == stmt)
+       VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 
+ 
+-  /* In case of widen-mult by a constant, it is possible that an additional
+-     pattern stmt is created and inserted in STMTS_TO_REPLACE.  We create a
+-     stmt_info for it, and mark the relevant statements.  */
++  /* It is possible that additional pattern stmts are created and inserted in
++     STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
++     relevant statements.  */
+   for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
+               && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
+        i++)
+@@ -955,16 +1372,7 @@
+           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+         }
+ 
+-      set_vinfo_for_stmt (pattern_stmt,
+-                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
+-      gimple_set_bb (pattern_stmt, gimple_bb (stmt));
+-      pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+-
+-      STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
+-      STMT_VINFO_DEF_TYPE (pattern_stmt_info)
+-        = STMT_VINFO_DEF_TYPE (stmt_info);
+-      STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
+-      STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
++      vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
+     }
+ 
+   VEC_free (gimple, heap, stmts_to_replace);
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-07-06 12:04:10 +0000
++++ new/gcc/tree-vect-stmts.c	2011-09-05 06:23:37 +0000
+@@ -2246,6 +2246,42 @@
+ }
+ 
+ 
++/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
++   either as shift by a scalar or by a vector.  */
++
++bool
++vect_supportable_shift (enum tree_code code, tree scalar_type)
++{
++
++  enum machine_mode vec_mode;
++  optab optab;
++  int icode;
++  tree vectype;
++
++  vectype = get_vectype_for_scalar_type (scalar_type);
++  if (!vectype)
++    return false;
++
++  optab = optab_for_tree_code (code, vectype, optab_scalar);
++  if (!optab
++      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
++    {
++      optab = optab_for_tree_code (code, vectype, optab_vector);
++      if (!optab
++          || (optab_handler (optab, TYPE_MODE (vectype))
++                      == CODE_FOR_nothing))
++        return false;
++    }
++
++  vec_mode = TYPE_MODE (vectype);
++  icode = (int) optab_handler (optab, vec_mode);
++  if (icode == CODE_FOR_nothing)
++    return false;
++
++  return true;
++}
++
++
+ /* Function vectorizable_shift.
+ 
+    Check if STMT performs a shift operation that can be vectorized.
+@@ -4946,7 +4982,7 @@
+   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
+   bool ok;
+   tree scalar_type, vectype;
+-  gimple pattern_stmt;
++  gimple pattern_stmt, pattern_def_stmt;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     {
+@@ -5016,6 +5052,23 @@
+         return false;
+    }
+ 
++  if (is_pattern_stmt_p (stmt_info)
++      && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
++      && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
++          || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
++    {
++      /* Analyze def stmt of STMT if it's a pattern stmt.  */
++      if (vect_print_dump_info (REPORT_DETAILS))
++        {
++          fprintf (vect_dump, "==> examining pattern def statement: ");
++          print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
++        }
++
++      if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
++        return false;
++   }
++
++
+   switch (STMT_VINFO_DEF_TYPE (stmt_info))
+     {
+       case vect_internal_def:
+@@ -5336,6 +5389,7 @@
+   STMT_VINFO_VECTORIZABLE (res) = true;
+   STMT_VINFO_IN_PATTERN_P (res) = false;
+   STMT_VINFO_RELATED_STMT (res) = NULL;
++  STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
+   STMT_VINFO_DATA_REF (res) = NULL;
+ 
+   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-07-11 11:02:55 +0000
++++ new/gcc/tree-vectorizer.h	2011-09-05 06:23:37 +0000
+@@ -464,6 +464,9 @@
+         pattern).  */
+   gimple related_stmt;
+ 
++  /* Used to keep a def stmt of a pattern stmt if such exists.  */
++  gimple pattern_def_stmt;
++
+   /* List of datarefs that are known to have the same alignment as the dataref
+      of this stmt.  */
+   VEC(dr_p,heap) *same_align_refs;
+@@ -531,6 +534,7 @@
+ 
+ #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
+ #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
++#define STMT_VINFO_PATTERN_DEF_STMT(S)     (S)->pattern_def_stmt
+ #define STMT_VINFO_SAME_ALIGN_REFS(S)      (S)->same_align_refs
+ #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
+ #define STMT_VINFO_DR_GROUP_FIRST_DR(S)    (S)->first_dr
+@@ -814,6 +818,7 @@
+ extern void vect_get_load_cost (struct data_reference *, int, bool,
+                                 unsigned int *, unsigned int *);
+ extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
++extern bool vect_supportable_shift (enum tree_code, tree);
+ 
+ /* In tree-vect-data-refs.c.  */
+ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
+@@ -891,7 +896,7 @@
+    Additional pattern recognition functions can (and will) be added
+    in the future.  */
+ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+-#define NUM_PATTERNS 4
++#define NUM_PATTERNS 5
+ void vect_pattern_recog (loop_vec_info);
+ 
+ /* In tree-vectorizer.c.  */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch
new file mode 100644
index 0000000000..ade96fdd11
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch
@@ -0,0 +1,948 @@
+2011-09-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2,
+	b3 and b4 unsigned.
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Set can_negate correctly
+	when code is SET.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (struct four_ints): New type.
+	(count_insns_for_constant): Delete function.
+	(find_best_start): Delete function.
+	(optimal_immediate_sequence): New function.
+	(optimal_immediate_sequence_1): New function.
+	(arm_gen_constant): Move constant splitting code to
+	optimal_immediate_sequence.
+	Rewrite constant negation/invertion code.
+
+	gcc/testsuite/
+	* gcc.target/arm/thumb2-replicated-constant1.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant2.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant3.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant4.c: New file.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
+	* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
+	Remove prototype. Remove static function type.
+	* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
+	Add arch attribute.
+	* config/arm/constraints.md (Pj, PJ): New constraints.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Move mowv support ....
+	(const_ok_for_op): ... to here.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
+
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-07-04 14:03:49 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-08-25 13:26:58 +0000
+@@ -46,6 +46,7 @@
+ extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
+ extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
+ extern int const_ok_for_arm (HOST_WIDE_INT);
++extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
+ 			       HOST_WIDE_INT, rtx, rtx, int);
+ extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-08-24 17:35:16 +0000
++++ new/gcc/config/arm/arm.c	2011-09-06 12:57:56 +0000
+@@ -63,6 +63,11 @@
+ 
+ void (*arm_lang_output_object_attributes_hook)(void);
+ 
++struct four_ints
++{
++  int i[4];
++};
++
+ /* Forward function declarations.  */
+ static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
+ static int arm_compute_static_chain_stack_bytes (void);
+@@ -81,7 +86,6 @@
+ static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
+ static int thumb_far_jump_used_p (void);
+ static bool thumb_force_lr_save (void);
+-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+ static rtx emit_sfm (int, int);
+ static unsigned arm_size_return_regs (void);
+ static bool arm_assemble_integer (rtx, unsigned int, int);
+@@ -129,7 +133,13 @@
+ static int arm_comp_type_attributes (const_tree, const_tree);
+ static void arm_set_default_type_attributes (tree);
+ static int arm_adjust_cost (rtx, rtx, rtx, int);
+-static int count_insns_for_constant (HOST_WIDE_INT, int);
++static int optimal_immediate_sequence (enum rtx_code code,
++				       unsigned HOST_WIDE_INT val,
++				       struct four_ints *return_sequence);
++static int optimal_immediate_sequence_1 (enum rtx_code code,
++					 unsigned HOST_WIDE_INT val,
++					 struct four_ints *return_sequence,
++					 int i);
+ static int arm_get_strip_length (int);
+ static bool arm_function_ok_for_sibcall (tree, tree);
+ static enum machine_mode arm_promote_function_mode (const_tree,
+@@ -2525,7 +2535,7 @@
+ }
+ 
+ /* Return true if I is a valid constant for the operation CODE.  */
+-static int
++int
+ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
+ {
+   if (const_ok_for_arm (i))
+@@ -2533,7 +2543,21 @@
+ 
+   switch (code)
+     {
++    case SET:
++      /* See if we can use movw.  */
++      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
++	return 1;
++      else
++	return 0;
++
+     case PLUS:
++      /* See if we can use addw or subw.  */
++      if (TARGET_THUMB2
++	  && ((i & 0xfffff000) == 0
++	      || ((-i) & 0xfffff000) == 0))
++	return 1;
++      /* else fall through.  */
++
+     case COMPARE:
+     case EQ:
+     case NE:
+@@ -2649,68 +2673,41 @@
+ 			   1);
+ }
+ 
+-/* Return the number of instructions required to synthesize the given
+-   constant, if we start emitting them from bit-position I.  */
+-static int
+-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
+-{
+-  HOST_WIDE_INT temp1;
+-  int step_size = TARGET_ARM ? 2 : 1;
+-  int num_insns = 0;
+-
+-  gcc_assert (TARGET_ARM || i == 0);
+-
+-  do
+-    {
+-      int end;
+-
+-      if (i <= 0)
+-	i += 32;
+-      if (remainder & (((1 << step_size) - 1) << (i - step_size)))
+-	{
+-	  end = i - 8;
+-	  if (end < 0)
+-	    end += 32;
+-	  temp1 = remainder & ((0x0ff << end)
+-				    | ((i < end) ? (0xff >> (32 - end)) : 0));
+-	  remainder &= ~temp1;
+-	  num_insns++;
+-	  i -= 8 - step_size;
+-	}
+-      i -= step_size;
+-    } while (remainder);
+-  return num_insns;
+-}
+-
+-static int
+-find_best_start (unsigned HOST_WIDE_INT remainder)
++/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
++   ARM/THUMB2 immediates, and add up to VAL.
++   Thr function return value gives the number of insns required.  */
++static int
++optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
++			    struct four_ints *return_sequence)
+ {
+   int best_consecutive_zeros = 0;
+   int i;
+   int best_start = 0;
++  int insns1, insns2;
++  struct four_ints tmp_sequence;
+ 
+   /* If we aren't targetting ARM, the best place to start is always at
+-     the bottom.  */
+-  if (! TARGET_ARM)
+-    return 0;
+-
+-  for (i = 0; i < 32; i += 2)
++     the bottom, otherwise look more closely.  */
++  if (TARGET_ARM)
+     {
+-      int consecutive_zeros = 0;
+-
+-      if (!(remainder & (3 << i)))
++      for (i = 0; i < 32; i += 2)
+ 	{
+-	  while ((i < 32) && !(remainder & (3 << i)))
+-	    {
+-	      consecutive_zeros += 2;
+-	      i += 2;
+-	    }
+-	  if (consecutive_zeros > best_consecutive_zeros)
+-	    {
+-	      best_consecutive_zeros = consecutive_zeros;
+-	      best_start = i - consecutive_zeros;
+-	    }
+-	  i -= 2;
++	  int consecutive_zeros = 0;
++
++	  if (!(val & (3 << i)))
++	    {
++	      while ((i < 32) && !(val & (3 << i)))
++		{
++		  consecutive_zeros += 2;
++		  i += 2;
++		}
++	      if (consecutive_zeros > best_consecutive_zeros)
++		{
++		  best_consecutive_zeros = consecutive_zeros;
++		  best_start = i - consecutive_zeros;
++		}
++	      i -= 2;
++	    }
+ 	}
+     }
+ 
+@@ -2737,13 +2734,161 @@
+      the constant starting from `best_start', and also starting from
+      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
+      yield a shorter sequence, we may as well use zero.  */
++  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
+   if (best_start != 0
+-      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+-      && (count_insns_for_constant (remainder, 0) <=
+-	  count_insns_for_constant (remainder, best_start)))
+-    best_start = 0;
+-
+-  return best_start;
++      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
++    {
++      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
++      if (insns2 <= insns1)
++	{
++	  *return_sequence = tmp_sequence;
++	  insns1 = insns2;
++	}
++    }
++
++  return insns1;
++}
++
++/* As for optimal_immediate_sequence, but starting at bit-position I.  */
++static int
++optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
++			     struct four_ints *return_sequence, int i)
++{
++  int remainder = val & 0xffffffff;
++  int insns = 0;
++
++  /* Try and find a way of doing the job in either two or three
++     instructions.
++     
++     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
++     location.  We start at position I.  This may be the MSB, or
++     optimial_immediate_sequence may have positioned it at the largest block 
++     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
++     wrapping around to the top of the word when we drop off the bottom.
++     In the worst case this code should produce no more than four insns.
++
++     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
++     constants, shifted to any arbitrary location.  We should always start
++     at the MSB.  */
++  do
++    {
++      int end;
++      unsigned int b1, b2, b3, b4;
++      unsigned HOST_WIDE_INT result;
++      int loc;
++
++      gcc_assert (insns < 4);
++
++      if (i <= 0)
++	i += 32;
++
++      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
++      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
++	{
++	  loc = i;
++	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
++	    /* We can use addw/subw for the last 12 bits.  */
++	    result = remainder;
++	  else
++	    {
++	      /* Use an 8-bit shifted/rotated immediate.  */
++	      end = i - 8;
++	      if (end < 0)
++		end += 32;
++	      result = remainder & ((0x0ff << end)
++				   | ((i < end) ? (0xff >> (32 - end))
++						: 0));
++	      i -= 8;
++	    }
++	}
++      else
++	{
++	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
++	     arbitrary shifts.  */
++	  i -= TARGET_ARM ? 2 : 1;
++	  continue;
++	}
++
++      /* Next, see if we can do a better job with a thumb2 replicated
++	 constant.
++       
++         We do it this way around to catch the cases like 0x01F001E0 where
++	 two 8-bit immediates would work, but a replicated constant would
++	 make it worse.
++       
++         TODO: 16-bit constants that don't clear all the bits, but still win.
++         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
++      if (TARGET_THUMB2)
++	{
++	  b1 = (remainder & 0xff000000) >> 24;
++	  b2 = (remainder & 0x00ff0000) >> 16;
++	  b3 = (remainder & 0x0000ff00) >> 8;
++	  b4 = remainder & 0xff;
++
++	  if (loc > 24)
++	    {
++	      /* The 8-bit immediate already found clears b1 (and maybe b2),
++		 but must leave b3 and b4 alone.  */
++
++	      /* First try to find a 32-bit replicated constant that clears
++		 almost everything.  We can assume that we can't do it in one,
++		 or else we wouldn't be here.  */
++	      unsigned int tmp = b1 & b2 & b3 & b4;
++	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
++				  + (tmp << 24);
++	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
++					    + (tmp == b3) + (tmp == b4);
++	      if (tmp
++		  && (matching_bytes >= 3
++		      || (matching_bytes == 2
++			  && const_ok_for_op (remainder & ~tmp2, code))))
++		{
++		  /* At least 3 of the bytes match, and the fourth has at 
++		     least as many bits set, or two of the bytes match
++		     and it will only require one more insn to finish.  */
++		  result = tmp2;
++		  i = tmp != b1 ? 32
++		      : tmp != b2 ? 24
++		      : tmp != b3 ? 16
++		      : 8;
++		}
++
++	      /* Second, try to find a 16-bit replicated constant that can
++		 leave three of the bytes clear.  If b2 or b4 is already
++		 zero, then we can.  If the 8-bit from above would not
++		 clear b2 anyway, then we still win.  */
++	      else if (b1 == b3 && (!b2 || !b4
++			       || (remainder & 0x00ff0000 & ~result)))
++		{
++		  result = remainder & 0xff00ff00;
++		  i = 24;
++		}
++	    }
++	  else if (loc > 16)
++	    {
++	      /* The 8-bit immediate already found clears b2 (and maybe b3)
++		 and we don't get here unless b1 is alredy clear, but it will
++		 leave b4 unchanged.  */
++
++	      /* If we can clear b2 and b4 at once, then we win, since the
++		 8-bits couldn't possibly reach that far.  */
++	      if (b2 == b4)
++		{
++		  result = remainder & 0x00ff00ff;
++		  i = 16;
++		}
++	    }
++	}
++
++      return_sequence->i[insns++] = result;
++      remainder &= ~result;
++
++      if (code == SET || code == MINUS)
++	code = PLUS;
++    }
++  while (remainder);
++
++  return insns;
+ }
+ 
+ /* Emit an instruction with the indicated PATTERN.  If COND is
+@@ -2760,7 +2905,6 @@
+ 
+ /* As above, but extra parameter GENERATE which, if clear, suppresses
+    RTL generation.  */
+-/* ??? This needs more work for thumb2.  */
+ 
+ static int
+ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
+@@ -2772,15 +2916,15 @@
+   int final_invert = 0;
+   int can_negate_initial = 0;
+   int i;
+-  int num_bits_set = 0;
+   int set_sign_bit_copies = 0;
+   int clear_sign_bit_copies = 0;
+   int clear_zero_bit_copies = 0;
+   int set_zero_bit_copies = 0;
+-  int insns = 0;
++  int insns = 0, neg_insns, inv_insns;
+   unsigned HOST_WIDE_INT temp1, temp2;
+   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+-  int step_size = TARGET_ARM ? 2 : 1;
++  struct four_ints *immediates;
++  struct four_ints pos_immediates, neg_immediates, inv_immediates;
+ 
+   /* Find out which operations are safe for a given CODE.  Also do a quick
+      check for degenerate cases; these can occur when DImode operations
+@@ -2789,7 +2933,6 @@
+     {
+     case SET:
+       can_invert = 1;
+-      can_negate = 1;
+       break;
+ 
+     case PLUS:
+@@ -2817,9 +2960,6 @@
+ 				gen_rtx_SET (VOIDmode, target, source));
+ 	  return 1;
+ 	}
+-
+-      if (TARGET_THUMB2)
+-	can_invert = 1;
+       break;
+ 
+     case AND:
+@@ -2861,6 +3001,7 @@
+ 					     gen_rtx_NOT (mode, source)));
+ 	  return 1;
+ 	}
++      final_invert = 1;
+       break;
+ 
+     case MINUS:
+@@ -2883,7 +3024,6 @@
+ 							    source)));
+ 	  return 1;
+ 	}
+-      can_negate = 1;
+ 
+       break;
+ 
+@@ -2892,9 +3032,7 @@
+     }
+ 
+   /* If we can do it in one insn get out quickly.  */
+-  if (const_ok_for_arm (val)
+-      || (can_negate_initial && const_ok_for_arm (-val))
+-      || (can_invert && const_ok_for_arm (~val)))
++  if (const_ok_for_op (val, code))
+     {
+       if (generate)
+ 	emit_constant_insn (cond,
+@@ -2947,15 +3085,6 @@
+   switch (code)
+     {
+     case SET:
+-      /* See if we can use movw.  */
+-      if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
+-	{
+-	  if (generate)
+-	    emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+-						   GEN_INT (val)));
+-	  return 1;
+-	}
+-
+       /* See if we can do this by sign_extending a constant that is known
+ 	 to be negative.  This is a good, way of doing it, since the shift
+ 	 may well merge into a subsequent insn.  */
+@@ -3306,121 +3435,97 @@
+       break;
+     }
+ 
+-  for (i = 0; i < 32; i++)
+-    if (remainder & (1 << i))
+-      num_bits_set++;
+-
+-  if ((code == AND)
+-      || (code != IOR && can_invert && num_bits_set > 16))
+-    remainder ^= 0xffffffff;
+-  else if (code == PLUS && num_bits_set > 16)
+-    remainder = (-remainder) & 0xffffffff;
+-
+-  /* For XOR, if more than half the bits are set and there's a sequence
+-     of more than 8 consecutive ones in the pattern then we can XOR by the
+-     inverted constant and then invert the final result; this may save an
+-     instruction and might also lead to the final mvn being merged with
+-     some other operation.  */
+-  else if (code == XOR && num_bits_set > 16
+-	   && (count_insns_for_constant (remainder ^ 0xffffffff,
+-					 find_best_start
+-					 (remainder ^ 0xffffffff))
+-	       < count_insns_for_constant (remainder,
+-					   find_best_start (remainder))))
+-    {
+-      remainder ^= 0xffffffff;
+-      final_invert = 1;
++  /* Calculate what the instruction sequences would be if we generated it
++     normally, negated, or inverted.  */
++  if (code == AND)
++    /* AND cannot be split into multiple insns, so invert and use BIC.  */
++    insns = 99;
++  else
++    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
++
++  if (can_negate)
++    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
++					    &neg_immediates);
++  else
++    neg_insns = 99;
++
++  if (can_invert || final_invert)
++    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
++					    &inv_immediates);
++  else
++    inv_insns = 99;
++
++  immediates = &pos_immediates;
++
++  /* Is the negated immediate sequence more efficient?  */
++  if (neg_insns < insns && neg_insns <= inv_insns)
++    {
++      insns = neg_insns;
++      immediates = &neg_immediates;
++    }
++  else
++    can_negate = 0;
++
++  /* Is the inverted immediate sequence more efficient?
++     We must allow for an extra NOT instruction for XOR operations, although
++     there is some chance that the final 'mvn' will get optimized later.  */
++  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
++    {
++      insns = inv_insns;
++      immediates = &inv_immediates;
+     }
+   else
+     {
+       can_invert = 0;
+-      can_negate = 0;
++      final_invert = 0;
+     }
+ 
+-  /* Now try and find a way of doing the job in either two or three
+-     instructions.
+-     We start by looking for the largest block of zeros that are aligned on
+-     a 2-bit boundary, we then fill up the temps, wrapping around to the
+-     top of the word when we drop off the bottom.
+-     In the worst case this code should produce no more than four insns.
+-     Thumb-2 constants are shifted, not rotated, so the MSB is always the
+-     best place to start.  */
+-
+-  /* ??? Use thumb2 replicated constants when the high and low halfwords are
+-     the same.  */
+-  {
+-    /* Now start emitting the insns.  */
+-    i = find_best_start (remainder);
+-    do
+-      {
+-	int end;
+-
+-	if (i <= 0)
+-	  i += 32;
+-	if (remainder & (3 << (i - 2)))
+-	  {
+-	    end = i - 8;
+-	    if (end < 0)
+-	      end += 32;
+-	    temp1 = remainder & ((0x0ff << end)
+-				 | ((i < end) ? (0xff >> (32 - end)) : 0));
+-	    remainder &= ~temp1;
+-
+-	    if (generate)
+-	      {
+-		rtx new_src, temp1_rtx;
+-
+-		if (code == SET || code == MINUS)
+-		  {
+-		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
+-		    if (can_invert && code != MINUS)
+-		      temp1 = ~temp1;
+-		  }
+-		else
+-		  {
+-		    if ((final_invert || remainder) && subtargets)
+-		      new_src = gen_reg_rtx (mode);
+-		    else
+-		      new_src = target;
+-		    if (can_invert)
+-		      temp1 = ~temp1;
+-		    else if (can_negate)
+-		      temp1 = -temp1;
+-		  }
+-
+-		temp1 = trunc_int_for_mode (temp1, mode);
+-		temp1_rtx = GEN_INT (temp1);
+-
+-		if (code == SET)
+-		  ;
+-		else if (code == MINUS)
+-		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+-		else
+-		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+-
+-		emit_constant_insn (cond,
+-				    gen_rtx_SET (VOIDmode, new_src,
+-						 temp1_rtx));
+-		source = new_src;
+-	      }
+-
+-	    if (code == SET)
+-	      {
+-		can_invert = 0;
+-		code = PLUS;
+-	      }
+-	    else if (code == MINUS)
++  /* Now output the chosen sequence as instructions.  */
++  if (generate)
++    {
++      for (i = 0; i < insns; i++)
++	{
++	  rtx new_src, temp1_rtx;
++
++	  temp1 = immediates->i[i];
++
++	  if (code == SET || code == MINUS)
++	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
++	  else if ((final_invert || i < (insns - 1)) && subtargets)
++	    new_src = gen_reg_rtx (mode);
++	  else
++	    new_src = target;
++
++	  if (can_invert)
++	    temp1 = ~temp1;
++	  else if (can_negate)
++	    temp1 = -temp1;
++
++	  temp1 = trunc_int_for_mode (temp1, mode);
++	  temp1_rtx = GEN_INT (temp1);
++
++	  if (code == SET)
++	    ;
++	  else if (code == MINUS)
++	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
++	  else
++	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
++
++	  emit_constant_insn (cond,
++			      gen_rtx_SET (VOIDmode, new_src,
++					   temp1_rtx));
++	  source = new_src;
++
++	  if (code == SET)
++	    {
++	      can_negate = can_invert;
++	      can_invert = 0;
+ 	      code = PLUS;
+-
+-	    insns++;
+-	    i -= 8 - step_size;
+-	  }
+-	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
+-	   shifts.  */
+-	i -= step_size;
+-      }
+-    while (remainder);
+-  }
++	    }
++	  else if (code == MINUS)
++	    code = PLUS;
++	}
++    }
+ 
+   if (final_invert)
+     {
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-08-25 11:42:09 +0000
++++ new/gcc/config/arm/arm.md	2011-08-25 13:26:58 +0000
+@@ -701,21 +701,24 @@
+ ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
+ ;; put the duplicated register first, and not try the commutative version.
+ (define_insn_and_split "*arm_addsi3"
+-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k,r")
+-	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
+-		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
++  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
++	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
++		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
+   "TARGET_32BIT"
+   "@
+    add%?\\t%0, %1, %2
+    add%?\\t%0, %1, %2
+    add%?\\t%0, %2, %1
+-   sub%?\\t%0, %1, #%n2
+-   sub%?\\t%0, %1, #%n2
++   addw%?\\t%0, %1, %2
++   addw%?\\t%0, %1, %2
++   sub%?\\t%0, %1, #%n2
++   sub%?\\t%0, %1, #%n2
++   subw%?\\t%0, %1, #%n2
++   subw%?\\t%0, %1, #%n2
+    #"
+   "TARGET_32BIT
+    && GET_CODE (operands[2]) == CONST_INT
+-   && !(const_ok_for_arm (INTVAL (operands[2]))
+-        || const_ok_for_arm (-INTVAL (operands[2])))
++   && !const_ok_for_op (INTVAL (operands[2]), PLUS)
+    && (reload_completed || !arm_eliminable_register (operands[1]))"
+   [(clobber (const_int 0))]
+   "
+@@ -724,8 +727,9 @@
+ 		      operands[1], 0);
+   DONE;
+   "
+-  [(set_attr "length" "4,4,4,4,4,16")
+-   (set_attr "predicable" "yes")]
++  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
++   (set_attr "predicable" "yes")
++   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+ )
+ 
+ (define_insn_and_split "*thumb1_addsi3"
+
+=== modified file 'gcc/config/arm/constraints.md'
+--- old/gcc/config/arm/constraints.md	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/constraints.md	2011-08-25 13:26:58 +0000
+@@ -31,7 +31,7 @@
+ ;; The following multi-letter normal constraints have been used:
+ ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+ ;; in Thumb-1 state: Pa, Pb, Pc, Pd
+-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
++;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
+ 
+ ;; The following memory constraints have been used:
+ ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+@@ -74,6 +74,18 @@
+ 	   (and (match_code "const_int")
+                 (match_test "(ival & 0xffff0000) == 0")))))
+ 
++(define_constraint "Pj"
++ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
++ (and (match_code "const_int")
++      (and (match_test "TARGET_THUMB2")
++	   (match_test "(ival & 0xfffff000) == 0"))))
++
++(define_constraint "PJ"
++ "@internal A constant that satisfies the Pj constrant if negated."
++ (and (match_code "const_int")
++      (and (match_test "TARGET_THUMB2")
++	   (match_test "((-ival) & 0xfffff000) == 0"))))
++
+ (define_register_constraint "k" "STACK_REG"
+  "@internal The stack register.")
+ 
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,27 @@
++/* Ensure simple replicated constant immediates work.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a + 0xfefefefe;
++}
++
++/* { dg-final { scan-assembler "add.*#-16843010" } } */
++
++int
++foo2 (int a)
++{
++  return a - 0xab00ab00;
++}
++
++/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
++
++int
++foo3 (int a)
++{
++  return a & 0x00cd00cd;
++}
++
++/* { dg-final { scan-assembler "and.*#13435085" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,75 @@
++/* Ensure split constants can use replicated patterns.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a + 0xfe00fe01;
++}
++
++/* { dg-final { scan-assembler "add.*#-33489408" } } */
++/* { dg-final { scan-assembler "add.*#1" } } */
++
++int
++foo2 (int a)
++{
++  return a + 0xdd01dd00;
++}
++
++/* { dg-final { scan-assembler "add.*#-587145984" } } */
++/* { dg-final { scan-assembler "add.*#65536" } } */
++
++int
++foo3 (int a)
++{
++  return a + 0x00443344;
++}
++
++/* { dg-final { scan-assembler "add.*#4456516" } } */
++/* { dg-final { scan-assembler "add.*#13056" } } */
++
++int
++foo4 (int a)
++{
++  return a + 0x77330033;
++}
++
++/* { dg-final { scan-assembler "add.*#1996488704" } } */
++/* { dg-final { scan-assembler "add.*#3342387" } } */
++
++int
++foo5 (int a)
++{
++  return a + 0x11221122;
++}
++
++/* { dg-final { scan-assembler "add.*#285217024" } } */
++/* { dg-final { scan-assembler "add.*#2228258" } } */
++
++int
++foo6 (int a)
++{
++  return a + 0x66666677;
++}
++
++/* { dg-final { scan-assembler "add.*#1717986918" } } */
++/* { dg-final { scan-assembler "add.*#17" } } */
++
++int
++foo7 (int a)
++{
++  return a + 0x99888888;
++}
++
++/* { dg-final { scan-assembler "add.*#-2004318072" } } */
++/* { dg-final { scan-assembler "add.*#285212672" } } */
++
++int
++foo8 (int a)
++{
++  return a + 0xdddddfff;
++}
++
++/* { dg-final { scan-assembler "add.*#-572662307" } } */
++/* { dg-final { scan-assembler "addw.*#546" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,28 @@
++/* Ensure negated/inverted replicated constant immediates work.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a | 0xffffff00;
++}
++
++/* { dg-final { scan-assembler "orn.*#255" } } */
++
++int
++foo2 (int a)
++{
++  return a & 0xffeeffee;
++}
++
++/* { dg-final { scan-assembler "bic.*#1114129" } } */
++
++int
++foo3 (int a)
++{
++  return a & 0xaaaaaa00;
++}
++
++/* { dg-final { scan-assembler "and.*#-1431655766" } } */
++/* { dg-final { scan-assembler "bic.*#170" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,22 @@
++/* Ensure replicated constants don't make things worse.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  /* It might be tempting to use 0x01000100, but it wouldn't help. */
++  return a + 0x01f001e0;
++}
++
++/* { dg-final { scan-assembler "add.*#32505856" } } */
++/* { dg-final { scan-assembler "add.*#480" } } */
++
++int
++foo2 (int a)
++{
++  return a + 0x0f100e10;
++}
++
++/* { dg-final { scan-assembler "add.*#252706816" } } */
++/* { dg-final { scan-assembler "add.*#3600" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch
new file mode 100644
index 0000000000..5b36959b6b
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106803.patch
@@ -0,0 +1,201 @@
+2011-09-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-08-25 13:26:58 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-09-12 11:03:11 +0000
+@@ -179,6 +179,7 @@
+ #endif
+ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+ #ifdef RTX_CODE
++extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
+ extern void thumb1_final_prescan_insn (rtx);
+ extern void thumb2_final_prescan_insn (rtx);
+ extern const char *thumb_load_double_from_address (rtx *);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-09-06 12:57:56 +0000
++++ new/gcc/config/arm/arm.c	2011-09-12 11:03:11 +0000
+@@ -17494,10 +17494,10 @@
+    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
+ 
+ /* Returns the index of the ARM condition code string in
+-   `arm_condition_codes'.  COMPARISON should be an rtx like
+-   `(eq (...) (...))'.  */
+-static enum arm_cond_code
+-get_arm_condition_code (rtx comparison)
++   `arm_condition_codes', or ARM_NV if the comparison is invalid.
++   COMPARISON should be an rtx like `(eq (...) (...))'.  */
++enum arm_cond_code
++maybe_get_arm_condition_code (rtx comparison)
+ {
+   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+   enum arm_cond_code code;
+@@ -17521,11 +17521,11 @@
+     case CC_DLTUmode: code = ARM_CC;
+ 
+     dominance:
+-      gcc_assert (comp_code == EQ || comp_code == NE);
+-
+       if (comp_code == EQ)
+ 	return ARM_INVERSE_CONDITION_CODE (code);
+-      return code;
++      if (comp_code == NE)
++	return code;
++      return ARM_NV;
+ 
+     case CC_NOOVmode:
+       switch (comp_code)
+@@ -17534,7 +17534,7 @@
+ 	case EQ: return ARM_EQ;
+ 	case GE: return ARM_PL;
+ 	case LT: return ARM_MI;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Zmode:
+@@ -17542,7 +17542,7 @@
+ 	{
+ 	case NE: return ARM_NE;
+ 	case EQ: return ARM_EQ;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Nmode:
+@@ -17550,7 +17550,7 @@
+ 	{
+ 	case NE: return ARM_MI;
+ 	case EQ: return ARM_PL;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CCFPEmode:
+@@ -17575,7 +17575,7 @@
+ 	  /* UNEQ and LTGT do not have a representation.  */
+ 	case UNEQ: /* Fall through.  */
+ 	case LTGT: /* Fall through.  */
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_SWPmode:
+@@ -17591,7 +17591,7 @@
+ 	case GTU: return ARM_CC;
+ 	case LEU: return ARM_CS;
+ 	case LTU: return ARM_HI;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Cmode:
+@@ -17599,7 +17599,7 @@
+ 	{
+ 	case LTU: return ARM_CS;
+ 	case GEU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_CZmode:
+@@ -17611,7 +17611,7 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_NCVmode:
+@@ -17621,7 +17621,7 @@
+ 	case LT: return ARM_LT;
+ 	case GEU: return ARM_CS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CCmode:
+@@ -17637,13 +17637,22 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     default: gcc_unreachable ();
+     }
+ }
+ 
++/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
++static enum arm_cond_code
++get_arm_condition_code (rtx comparison)
++{
++  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
++  gcc_assert (code != ARM_NV);
++  return code;
++}
++
+ /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
+    instructions.  */
+ void
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-08-13 08:40:36 +0000
++++ new/gcc/config/arm/predicates.md	2011-09-05 09:40:19 +0000
+@@ -242,10 +242,9 @@
+ ;; True for integer comparisons and, if FP is active, for comparisons
+ ;; other than LTGT or UNEQ.
+ (define_special_predicate "arm_comparison_operator"
+-  (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
+-       (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
+-			 && (TARGET_FPA || TARGET_VFP)")
+-            (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
++  (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
++		    unordered,ordered,unlt,unle,unge,ungt")
++       (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+ 
+ (define_special_predicate "lt_ge_comparison_operator"
+   (match_code "lt,ge"))
+
+=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
+--- old/gcc/testsuite/gcc.dg/torture/pr49030.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c	2011-09-05 09:40:19 +0000
+@@ -0,0 +1,19 @@
++void
++sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
++		       unsigned long dst_skip)
++{
++  long long y;
++  while (nsamples--)
++    {
++      y = (long long) (*src * 8388608.0f) << 8;
++      if (y > 2147483647) {
++	*(int *) dst = 2147483647;
++      } else if (y < -2147483647 - 1) {
++	*(int *) dst = -2147483647 - 1;
++      } else {
++	*(int *) dst = (int) y;
++      }
++      dst += dst_skip;
++      src++;
++    }
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch
new file mode 100644
index 0000000000..09d0767914
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106804.patch
@@ -0,0 +1,38 @@
+2011-09-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/predicates.md (shift_amount_operand): Ensure shift
+	amount is positive.
+
+	gcc/testsuite/
+	* gcc.dg/pr50193-1.c: New file.
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-09-05 09:40:19 +0000
++++ new/gcc/config/arm/predicates.md	2011-09-12 11:24:34 +0000
+@@ -132,7 +132,8 @@
+ (define_predicate "shift_amount_operand"
+   (ior (and (match_test "TARGET_ARM")
+ 	    (match_operand 0 "s_register_operand"))
+-       (match_operand 0 "const_int_operand")))
++       (and (match_operand 0 "const_int_operand")
++	    (match_test "INTVAL (op) > 0"))))
+ 
+ (define_predicate "arm_add_operand"
+   (ior (match_operand 0 "arm_rhs_operand")
+
+=== added file 'gcc/testsuite/gcc.dg/pr50193-1.c'
+--- old/gcc/testsuite/gcc.dg/pr50193-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/pr50193-1.c	2011-09-01 12:22:14 +0000
+@@ -0,0 +1,10 @@
++/* PR 50193: ARM: ICE on a | (b << negative-constant) */
++/* Ensure that the compiler doesn't ICE.  */
++
++/* { dg-options "-O2" } */
++
++int
++foo(int a, int b)
++{
++  return a | (b << -3); /* { dg-warning "left shift count is negative" } */
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch
new file mode 100644
index 0000000000..5dec32fa71
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106805.patch
@@ -0,0 +1,47 @@
+2011-09-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-09-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR tree-optimization/50318
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Correct
+	typo in use of mult_rhs1 and mult_rhs2.
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50318-1.c: New file.
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr50318-1.c'
+--- old/gcc/testsuite/gcc.target/arm/pr50318-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr50318-1.c	2011-09-08 20:11:43 +0000
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-require-effective-target arm_dsp } */
++
++long long test (unsigned int sec, unsigned long long nsecs)
++{
++   return (long long)(long)sec * 1000000000L + (long long)(unsigned
++   long)nsecs;
++}
++
++/* { dg-final { scan-assembler "umlal" } } */
+
+=== modified file 'gcc/tree-ssa-math-opts.c'
+--- old/gcc/tree-ssa-math-opts.c	2011-08-09 10:26:48 +0000
++++ new/gcc/tree-ssa-math-opts.c	2011-09-08 20:11:43 +0000
+@@ -1699,9 +1699,9 @@
+ 
+   /* Handle constants.  */
+   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
+-    rhs1 = fold_convert (type1, mult_rhs1);
++    mult_rhs1 = fold_convert (type1, mult_rhs1);
+   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
+-    rhs2 = fold_convert (type2, mult_rhs2);
++    mult_rhs2 = fold_convert (type2, mult_rhs2);
+ 
+   gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
+ 				    add_rhs);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch
new file mode 100644
index 0000000000..2b96854c95
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106806.patch
@@ -0,0 +1,92 @@
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50099.c: Fix testcase from previous commit.
+
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:838994
+	gcc/
+	Backport from mainline.
+
+        2011-09-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        PR target/50099
+	* config/arm/iterators.md (qhs_zextenddi_cstr): New.
+	(qhs_zextenddi_op): New.
+	* config/arm/arm.md ("zero_extend<mode>di2"): Use them.
+	* config/arm/predicates.md ("arm_extendqisi_mem_op"):
+	Distinguish between ARM and Thumb2 states.
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50099.c: New test.
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-08-25 13:26:58 +0000
++++ new/gcc/config/arm/arm.md	2011-09-12 12:32:29 +0000
+@@ -4136,8 +4136,8 @@
+ 
+ (define_insn "zero_extend<mode>di2"
+   [(set (match_operand:DI 0 "s_register_operand" "=r")
+-        (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
+-					    "<qhs_extenddi_cstr>")))]
++        (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
++					    "<qhs_zextenddi_cstr>")))]
+   "TARGET_32BIT <qhs_zextenddi_cond>"
+   "#"
+   [(set_attr "length" "8")
+
+=== modified file 'gcc/config/arm/iterators.md'
+--- old/gcc/config/arm/iterators.md	2011-05-03 15:14:56 +0000
++++ new/gcc/config/arm/iterators.md	2011-09-06 14:29:24 +0000
+@@ -379,10 +379,14 @@
+ (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
+ (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
+ 				      (QI "&& arm_arch6")])
++(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand")
++				   (HI "nonimmediate_operand")
++				   (QI "nonimmediate_operand")])
+ (define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
+ 				   (HI "nonimmediate_operand")
+-				   (QI "nonimmediate_operand")])
+-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
++				   (QI "arm_reg_or_extendqisi_mem_op")])
++(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
++(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+ 
+ ;;----------------------------------------------------------------------------
+ ;; Code attributes
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-09-12 11:24:34 +0000
++++ new/gcc/config/arm/predicates.md	2011-09-12 12:32:29 +0000
+@@ -289,8 +289,11 @@
+ 
+ (define_special_predicate "arm_extendqisi_mem_op"
+   (and (match_operand 0 "memory_operand")
+-       (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0),
+-						    SIGN_EXTEND, 0)")))
++       (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode,
++                                                                 XEXP (op, 0),
++						                 SIGN_EXTEND,
++								 0)
++                               : memory_address_p (QImode, XEXP (op, 0))")))
+ 
+ (define_special_predicate "arm_reg_or_extendqisi_mem_op"
+   (ior (match_operand 0 "arm_extendqisi_mem_op")
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr50099.c'
+--- old/gcc/testsuite/gcc.target/arm/pr50099.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr50099.c	2011-09-09 16:42:45 +0000
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++long long foo (signed char * arg)
++{
++  long long temp_1;
++
++  temp_1 = arg[256]; 
++  return temp_1;
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch
new file mode 100644
index 0000000000..9474030f57
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106807.patch
@@ -0,0 +1,767 @@
+2011-08-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from FSF mainline:
+        2011-04-06  Wei Guozhi  <carrot@google.com>
+
+        PR target/47855
+	gcc/
+        * config/arm/arm.md (arm_cmpsi_insn): Compute attr "length".
+        (arm_cond_branch): Likewise.
+        (arm_cond_branch_reversed): Likewise.
+        (arm_jump): Likewise.
+        (push_multi): Likewise.
+        * config/arm/constraints.md (Py): New constraint.
+
+	2011-04-08  Wei Guozhi  <carrot@google.com>
+
+	PR target/47855
+	* config/arm/arm-protos.h (arm_attr_length_push_multi): New prototype.
+	* config/arm/arm.c (arm_attr_length_push_multi): New function.
+	* config/arm/arm.md (*push_multi): Change the length computation to
+	call a C function.
+
+2011-08-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from FSF mainline:
+
+        2011-08-18  Jiangning Liu  <jiangning.liu@arm.com>
+
+	gcc/
+	* config/arm/arm.md (*ior_scc_scc): Enable for Thumb2 as well.
+	(*ior_scc_scc_cmp): Likewise
+	(*and_scc_scc): Likewise.
+	(*and_scc_scc_cmp): Likewise.
+	(*and_scc_scc_nodom): Likewise.
+	(*cmp_ite0, *cmp_ite1, *cmp_and, *cmp_ior): Handle Thumb2.
+
+	gcc/testsuite
+	* gcc.target/arm/thumb2-cond-cmp-1.c: New. Make sure conditional
+	compare can be generated.
+	* gcc.target/arm/thumb2-cond-cmp-2.c: Likewise.
+	* gcc.target/arm/thumb2-cond-cmp-3.c: Likewise.
+	* gcc.target/arm/thumb2-cond-cmp-4.c: Likewise.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-09-12 11:03:11 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-09-12 14:14:00 +0000
+@@ -156,6 +156,7 @@
+ extern const char *arm_output_memory_barrier (rtx *);
+ extern const char *arm_output_sync_insn (rtx, rtx *);
+ extern unsigned int arm_sync_loop_insns (rtx , rtx *);
++extern int arm_attr_length_push_multi(rtx, rtx);
+ 
+ #if defined TREE_CODE
+ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-09-12 11:03:11 +0000
++++ new/gcc/config/arm/arm.c	2011-09-12 14:14:00 +0000
+@@ -24391,4 +24391,30 @@
+     return NO_REGS;
+ }
+ 
++/* Compute the atrribute "length" of insn "*push_multi".
++   So this function MUST be kept in sync with that insn pattern.  */
++int
++arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
++{
++  int i, regno, hi_reg;
++  int num_saves = XVECLEN (parallel_op, 0);
++
++  /* ARM mode.  */
++  if (TARGET_ARM)
++    return 4;
++
++  /* Thumb2 mode.  */
++  regno = REGNO (first_op);
++  hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
++  for (i = 1; i < num_saves && !hi_reg; i++)
++    {
++      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
++      hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
++    }
++
++  if (!hi_reg)
++    return 2;
++  return 4;
++}
++
+ #include "gt-arm.h"
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-09-12 12:32:29 +0000
++++ new/gcc/config/arm/arm.md	2011-09-12 14:14:00 +0000
+@@ -48,6 +48,15 @@
+    (DOM_CC_X_OR_Y   2)
+   ]
+ )
++;; conditional compare combination
++(define_constants
++  [(CMP_CMP 0)
++   (CMN_CMP 1)
++   (CMP_CMN 2)
++   (CMN_CMN 3)
++   (NUM_OF_COND_CMP 4)
++  ]
++)
+ 
+ ;; UNSPEC Usage:
+ ;; Note: sin and cos are no-longer used.
+@@ -7198,13 +7207,17 @@
+ 
+ (define_insn "*arm_cmpsi_insn"
+   [(set (reg:CC CC_REGNUM)
+-	(compare:CC (match_operand:SI 0 "s_register_operand" "r,r")
+-		    (match_operand:SI 1 "arm_add_operand"    "rI,L")))]
++	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
++		    (match_operand:SI 1 "arm_add_operand"    "Py,r,rI,L")))]
+   "TARGET_32BIT"
+   "@
+    cmp%?\\t%0, %1
++   cmp%?\\t%0, %1
++   cmp%?\\t%0, %1
+    cmn%?\\t%0, #%n1"
+-  [(set_attr "conds" "set")]
++  [(set_attr "conds" "set")
++   (set_attr "arch" "t2,t2,any,any")
++   (set_attr "length" "2,2,4,4")]
+ )
+ 
+ (define_insn "*cmpsi_shiftsi"
+@@ -7375,7 +7388,14 @@
+   return \"b%d1\\t%l0\";
+   "
+   [(set_attr "conds" "use")
+-   (set_attr "type" "branch")]
++   (set_attr "type" "branch")
++   (set (attr "length")
++	(if_then_else
++	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
++		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
++		     (le (minus (match_dup 0) (pc)) (const_int 256))))
++	   (const_int 2)
++	   (const_int 4)))]
+ )
+ 
+ (define_insn "*arm_cond_branch_reversed"
+@@ -7394,7 +7414,14 @@
+   return \"b%D1\\t%l0\";
+   "
+   [(set_attr "conds" "use")
+-   (set_attr "type" "branch")]
++   (set_attr "type" "branch")
++   (set (attr "length")
++	(if_then_else
++	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
++		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
++		     (le (minus (match_dup 0) (pc)) (const_int 256))))
++	   (const_int 2)
++	   (const_int 4)))]
+ )
+ 
+ 
+@@ -7846,7 +7873,14 @@
+     return \"b%?\\t%l0\";
+   }
+   "
+-  [(set_attr "predicable" "yes")]
++  [(set_attr "predicable" "yes")
++   (set (attr "length")
++	(if_then_else
++	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
++		(and (ge (minus (match_dup 0) (pc)) (const_int -2044))
++		     (le (minus (match_dup 0) (pc)) (const_int 2048))))
++	   (const_int 2)
++	   (const_int 4)))]
+ )
+ 
+ (define_insn "*thumb_jump"
+@@ -8931,40 +8965,85 @@
+    (set_attr "length" "8,12")]
+ )
+ 
+-;; ??? Is it worth using these conditional patterns in Thumb-2 mode?
+ (define_insn "*cmp_ite0"
+   [(set (match_operand 6 "dominant_cc_register" "")
+ 	(compare
+ 	 (if_then_else:SI
+ 	  (match_operator 4 "arm_comparison_operator"
+-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
++	   [(match_operand:SI 0 "s_register_operand"
++	        "l,l,l,r,r,r,r,r,r")
++	    (match_operand:SI 1 "arm_add_operand"
++	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+ 	  (match_operator:SI 5 "arm_comparison_operator"
+-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
++	   [(match_operand:SI 2 "s_register_operand"
++	        "l,r,r,l,l,r,r,r,r")
++	    (match_operand:SI 3 "arm_add_operand"
++	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+ 	  (const_int 0))
+ 	 (const_int 0)))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "*
+   {
+-    static const char * const opcodes[4][2] =
+-    {
+-      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
+-       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
+-      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
+-       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
+-      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
+-       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
+-      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
+-       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+-    };
++    static const char * const cmp1[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp%d5\\t%0, %1\",
++       \"cmp%d4\\t%2, %3\"},
++      {\"cmn%d5\\t%0, #%n1\",
++       \"cmp%d4\\t%2, %3\"},
++      {\"cmp%d5\\t%0, %1\",
++       \"cmn%d4\\t%2, #%n3\"},
++      {\"cmn%d5\\t%0, #%n1\",
++       \"cmn%d4\\t%2, #%n3\"}
++    };
++    static const char * const cmp2[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp\\t%2, %3\",
++       \"cmp\\t%0, %1\"},
++      {\"cmp\\t%2, %3\",
++       \"cmn\\t%0, #%n1\"},
++      {\"cmn\\t%2, #%n3\",
++       \"cmp\\t%0, %1\"},
++      {\"cmn\\t%2, #%n3\",
++       \"cmn\\t%0, #%n1\"}
++    };
++    static const char * const ite[2] =
++    {
++      \"it\\t%d5\",
++      \"it\\t%d4\"
++    };
++    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
++                                   CMP_CMP, CMN_CMP, CMP_CMP,
++                                   CMN_CMP, CMP_CMN, CMN_CMN};
+     int swap =
+       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+ 
+-    return opcodes[which_alternative][swap];
++    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
++    if (TARGET_THUMB2) {
++      output_asm_insn (ite[swap], operands);
++    }
++    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
++    return \"\";
+   }"
+   [(set_attr "conds" "set")
+-   (set_attr "length" "8")]
++   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
++   (set_attr_alternative "length"
++      [(const_int 6)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))])]
+ )
+ 
+ (define_insn "*cmp_ite1"
+@@ -8972,35 +9051,81 @@
+ 	(compare
+ 	 (if_then_else:SI
+ 	  (match_operator 4 "arm_comparison_operator"
+-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
++	   [(match_operand:SI 0 "s_register_operand"
++	        "l,l,l,r,r,r,r,r,r")
++	    (match_operand:SI 1 "arm_add_operand"
++	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+ 	  (match_operator:SI 5 "arm_comparison_operator"
+-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
++	   [(match_operand:SI 2 "s_register_operand"
++	        "l,r,r,l,l,r,r,r,r")
++	    (match_operand:SI 3 "arm_add_operand"
++	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+ 	  (const_int 1))
+ 	 (const_int 0)))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "*
+   {
+-    static const char * const opcodes[4][2] =
+-    {
+-      {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\",
+-       \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
+-      {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\",
+-       \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
+-      {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\",
+-       \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
+-      {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\",
+-       \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
+-    };
++    static const char * const cmp1[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp\\t%0, %1\",
++       \"cmp\\t%2, %3\"},
++      {\"cmn\\t%0, #%n1\",
++       \"cmp\\t%2, %3\"},
++      {\"cmp\\t%0, %1\",
++       \"cmn\\t%2, #%n3\"},
++      {\"cmn\\t%0, #%n1\",
++       \"cmn\\t%2, #%n3\"}
++    };
++    static const char * const cmp2[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp%d4\\t%2, %3\",
++       \"cmp%D5\\t%0, %1\"},
++      {\"cmp%d4\\t%2, %3\",
++       \"cmn%D5\\t%0, #%n1\"},
++      {\"cmn%d4\\t%2, #%n3\",
++       \"cmp%D5\\t%0, %1\"},
++      {\"cmn%d4\\t%2, #%n3\",
++       \"cmn%D5\\t%0, #%n1\"}
++    };
++    static const char * const ite[2] =
++    {
++      \"it\\t%d4\",
++      \"it\\t%D5\"
++    };
++    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
++                                   CMP_CMP, CMN_CMP, CMP_CMP,
++                                   CMN_CMP, CMP_CMN, CMN_CMN};
+     int swap =
+       comparison_dominates_p (GET_CODE (operands[5]),
+ 			      reverse_condition (GET_CODE (operands[4])));
+ 
+-    return opcodes[which_alternative][swap];
++    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
++    if (TARGET_THUMB2) {
++      output_asm_insn (ite[swap], operands);
++    }
++    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
++    return \"\";
+   }"
+   [(set_attr "conds" "set")
+-   (set_attr "length" "8")]
++   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
++   (set_attr_alternative "length"
++      [(const_int 6)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))])]
+ )
+ 
+ (define_insn "*cmp_and"
+@@ -9008,34 +9133,80 @@
+ 	(compare
+ 	 (and:SI
+ 	  (match_operator 4 "arm_comparison_operator"
+-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
++	   [(match_operand:SI 0 "s_register_operand" 
++	        "l,l,l,r,r,r,r,r,r")
++	    (match_operand:SI 1 "arm_add_operand" 
++	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+ 	  (match_operator:SI 5 "arm_comparison_operator"
+-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
++	   [(match_operand:SI 2 "s_register_operand" 
++	        "l,r,r,l,l,r,r,r,r")
++	    (match_operand:SI 3 "arm_add_operand" 
++	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+ 	 (const_int 0)))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "*
+   {
+-    static const char *const opcodes[4][2] =
+-    {
+-      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
+-       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
+-      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
+-       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
+-      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
+-       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
+-      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
+-       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+-    };
++    static const char *const cmp1[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp%d5\\t%0, %1\",
++       \"cmp%d4\\t%2, %3\"},
++      {\"cmn%d5\\t%0, #%n1\",
++       \"cmp%d4\\t%2, %3\"},
++      {\"cmp%d5\\t%0, %1\",
++       \"cmn%d4\\t%2, #%n3\"},
++      {\"cmn%d5\\t%0, #%n1\",
++       \"cmn%d4\\t%2, #%n3\"}
++    };
++    static const char *const cmp2[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp\\t%2, %3\",
++       \"cmp\\t%0, %1\"},
++      {\"cmp\\t%2, %3\",
++       \"cmn\\t%0, #%n1\"},
++      {\"cmn\\t%2, #%n3\",
++       \"cmp\\t%0, %1\"},
++      {\"cmn\\t%2, #%n3\",
++       \"cmn\\t%0, #%n1\"}
++    };
++    static const char *const ite[2] =
++    {
++      \"it\\t%d5\",
++      \"it\\t%d4\"
++    };
++    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
++                                   CMP_CMP, CMN_CMP, CMP_CMP,
++                                   CMN_CMP, CMP_CMN, CMN_CMN};
+     int swap =
+       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+ 
+-    return opcodes[which_alternative][swap];
++    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
++    if (TARGET_THUMB2) {
++      output_asm_insn (ite[swap], operands);
++    }
++    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
++    return \"\";
+   }"
+   [(set_attr "conds" "set")
+    (set_attr "predicable" "no")
+-   (set_attr "length" "8")]
++   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
++   (set_attr_alternative "length"
++      [(const_int 6)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))])]
+ )
+ 
+ (define_insn "*cmp_ior"
+@@ -9043,34 +9214,80 @@
+ 	(compare
+ 	 (ior:SI
+ 	  (match_operator 4 "arm_comparison_operator"
+-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
++	   [(match_operand:SI 0 "s_register_operand"
++	        "l,l,l,r,r,r,r,r,r")
++	    (match_operand:SI 1 "arm_add_operand"
++	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+ 	  (match_operator:SI 5 "arm_comparison_operator"
+-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
++	   [(match_operand:SI 2 "s_register_operand"
++	        "l,r,r,l,l,r,r,r,r")
++	    (match_operand:SI 3 "arm_add_operand"
++	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+ 	 (const_int 0)))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "*
+-{
+-  static const char *const opcodes[4][2] =
+   {
+-    {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\",
+-     \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
+-    {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\",
+-     \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
+-    {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\",
+-     \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
+-    {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\",
+-     \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
+-  };
+-  int swap =
+-    comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
++    static const char *const cmp1[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp\\t%0, %1\",
++       \"cmp\\t%2, %3\"},
++      {\"cmn\\t%0, #%n1\",
++       \"cmp\\t%2, %3\"},
++      {\"cmp\\t%0, %1\",
++       \"cmn\\t%2, #%n3\"},
++      {\"cmn\\t%0, #%n1\",
++       \"cmn\\t%2, #%n3\"}
++    };
++    static const char *const cmp2[NUM_OF_COND_CMP][2] =
++    {
++      {\"cmp%D4\\t%2, %3\",
++       \"cmp%D5\\t%0, %1\"},
++      {\"cmp%D4\\t%2, %3\",
++       \"cmn%D5\\t%0, #%n1\"},
++      {\"cmn%D4\\t%2, #%n3\",
++       \"cmp%D5\\t%0, %1\"},
++      {\"cmn%D4\\t%2, #%n3\",
++       \"cmn%D5\\t%0, #%n1\"}
++    };
++    static const char *const ite[2] =
++    {
++      \"it\\t%D4\",
++      \"it\\t%D5\"
++    };
++    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
++                                   CMP_CMP, CMN_CMP, CMP_CMP,
++                                   CMN_CMP, CMP_CMN, CMN_CMN};
++    int swap =
++      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+ 
+-  return opcodes[which_alternative][swap];
+-}
+-"
++    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
++    if (TARGET_THUMB2) {
++      output_asm_insn (ite[swap], operands);
++    }
++    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
++    return \"\";
++  }
++  "
+   [(set_attr "conds" "set")
+-   (set_attr "length" "8")]
++   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
++   (set_attr_alternative "length"
++      [(const_int 6)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (const_int 8)
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))
++       (if_then_else (eq_attr "is_thumb" "no")
++           (const_int 8)
++           (const_int 10))])]
+ )
+ 
+ (define_insn_and_split "*ior_scc_scc"
+@@ -9082,11 +9299,11 @@
+ 		 [(match_operand:SI 4 "s_register_operand" "r")
+ 		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+    (clobber (reg:CC CC_REGNUM))]
+-  "TARGET_ARM
++  "TARGET_32BIT
+    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
+        != CCmode)"
+   "#"
+-  "TARGET_ARM && reload_completed"
++  "TARGET_32BIT && reload_completed"
+   [(set (match_dup 7)
+ 	(compare
+ 	 (ior:SI
+@@ -9115,9 +9332,9 @@
+    (set (match_operand:SI 7 "s_register_operand" "=r")
+ 	(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+ 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "#"
+-  "TARGET_ARM && reload_completed"
++  "TARGET_32BIT && reload_completed"
+   [(set (match_dup 0)
+ 	(compare
+ 	 (ior:SI
+@@ -9138,11 +9355,11 @@
+ 		 [(match_operand:SI 4 "s_register_operand" "r")
+ 		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+    (clobber (reg:CC CC_REGNUM))]
+-  "TARGET_ARM
++  "TARGET_32BIT
+    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+        != CCmode)"
+   "#"
+-  "TARGET_ARM && reload_completed
++  "TARGET_32BIT && reload_completed
+    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+        != CCmode)"
+   [(set (match_dup 7)
+@@ -9173,9 +9390,9 @@
+    (set (match_operand:SI 7 "s_register_operand" "=r")
+ 	(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+ 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+-  "TARGET_ARM"
++  "TARGET_32BIT"
+   "#"
+-  "TARGET_ARM && reload_completed"
++  "TARGET_32BIT && reload_completed"
+   [(set (match_dup 0)
+ 	(compare
+ 	 (and:SI
+@@ -9200,11 +9417,11 @@
+ 		 [(match_operand:SI 4 "s_register_operand" "r,r,r")
+ 		  (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
+    (clobber (reg:CC CC_REGNUM))]
+-  "TARGET_ARM
++  "TARGET_32BIT
+    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+        == CCmode)"
+   "#"
+-  "TARGET_ARM && reload_completed"
++  "TARGET_32BIT && reload_completed"
+   [(parallel [(set (match_dup 0)
+ 		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ 	      (clobber (reg:CC CC_REGNUM))])
+@@ -10314,6 +10531,8 @@
+ ;; Push multiple registers to the stack.  Registers are in parallel (use ...)
+ ;; expressions.  For simplicity, the first register is also in the unspec
+ ;; part.
++;; To avoid the usage of GNU extension, the length attribute is computed
++;; in a C function arm_attr_length_push_multi.
+ (define_insn "*push_multi"
+   [(match_parallel 2 "multi_register_push"
+     [(set (match_operand:BLK 0 "memory_operand" "=m")
+@@ -10353,7 +10572,9 @@
+ 
+     return \"\";
+   }"
+-  [(set_attr "type" "store4")]
++  [(set_attr "type" "store4")
++   (set (attr "length")
++	(symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
+ )
+ 
+ (define_insn "stack_tie"
+
+=== modified file 'gcc/config/arm/constraints.md'
+--- old/gcc/config/arm/constraints.md	2011-08-25 13:26:58 +0000
++++ new/gcc/config/arm/constraints.md	2011-09-12 14:14:00 +0000
+@@ -31,7 +31,7 @@
+ ;; The following multi-letter normal constraints have been used:
+ ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+ ;; in Thumb-1 state: Pa, Pb, Pc, Pd
+-;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
++;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
+ 
+ ;; The following memory constraints have been used:
+ ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+@@ -201,6 +201,11 @@
+   (and (match_code "const_int")
+        (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+ 
++(define_constraint "Py"
++  "@internal In Thumb-2 state a constant in the range 0 to 255"
++  (and (match_code "const_int")
++       (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
++
+ (define_constraint "G"
+  "In ARM/Thumb-2 state a valid FPA immediate constant."
+  (and (match_code "const_double")
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c	2011-09-12 14:14:00 +0000
+@@ -0,0 +1,13 @@
++/* Use conditional compare */
++/* { dg-options "-O2" } */
++/* { dg-skip-if "" { arm_thumb1_ok } } */
++/* { dg-final { scan-assembler "cmpne" } } */
++
++int f(int i, int j) 
++{
++  if ( (i == '+') || (j == '-') ) {
++    return 1;
++  } else {
++    return 0;
++  }
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c	2011-09-12 14:14:00 +0000
+@@ -0,0 +1,13 @@
++/* Use conditional compare */                                                                                         
++/* { dg-options "-O2" } */
++/* { dg-skip-if "" { arm_thumb1_ok } } */
++/* { dg-final { scan-assembler "cmpeq" } } */
++
++int f(int i, int j) 
++{
++  if ( (i == '+') && (j == '-') ) {
++    return 1;
++  } else {
++    return 0;
++  }
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c	2011-09-12 14:14:00 +0000
+@@ -0,0 +1,12 @@
++/* Use conditional compare */                                                                                         
++/* { dg-options "-O2" } */
++/* { dg-skip-if "" { arm_thumb1_ok } } */
++/* { dg-final { scan-assembler "cmpgt" } } */
++
++int f(int i, int j)
++{
++  if ( (i >= '+') ? (j > '-') : 0)
++    return 1;
++  else
++    return 0;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c	2011-09-12 14:14:00 +0000
+@@ -0,0 +1,12 @@
++/* Use conditional compare */                                                                                         
++/* { dg-options "-O2" } */
++/* { dg-skip-if "" { arm_thumb1_ok } } */
++/* { dg-final { scan-assembler "cmpgt" } } */
++
++int f(int i, int j)
++{
++  if ( (i >= '+') ? (j <= '-') : 1)
++    return 1;
++  else
++    return 0;
++}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch
new file mode 100644
index 0000000000..41b5c6dbf9
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106811.patch
@@ -0,0 +1,203 @@
+2011-09-15  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Revert:
+
+	gcc/
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-09-12 14:14:00 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-09-15 09:45:31 +0000
+@@ -180,7 +180,6 @@
+ #endif
+ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+ #ifdef RTX_CODE
+-extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
+ extern void thumb1_final_prescan_insn (rtx);
+ extern void thumb2_final_prescan_insn (rtx);
+ extern const char *thumb_load_double_from_address (rtx *);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-09-12 14:14:00 +0000
++++ new/gcc/config/arm/arm.c	2011-09-15 09:45:31 +0000
+@@ -17494,10 +17494,10 @@
+    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
+ 
+ /* Returns the index of the ARM condition code string in
+-   `arm_condition_codes', or ARM_NV if the comparison is invalid.
+-   COMPARISON should be an rtx like `(eq (...) (...))'.  */
+-enum arm_cond_code
+-maybe_get_arm_condition_code (rtx comparison)
++   `arm_condition_codes'.  COMPARISON should be an rtx like
++   `(eq (...) (...))'.  */
++static enum arm_cond_code
++get_arm_condition_code (rtx comparison)
+ {
+   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+   enum arm_cond_code code;
+@@ -17521,11 +17521,11 @@
+     case CC_DLTUmode: code = ARM_CC;
+ 
+     dominance:
++      gcc_assert (comp_code == EQ || comp_code == NE);
++
+       if (comp_code == EQ)
+ 	return ARM_INVERSE_CONDITION_CODE (code);
+-      if (comp_code == NE)
+-	return code;
+-      return ARM_NV;
++      return code;
+ 
+     case CC_NOOVmode:
+       switch (comp_code)
+@@ -17534,7 +17534,7 @@
+ 	case EQ: return ARM_EQ;
+ 	case GE: return ARM_PL;
+ 	case LT: return ARM_MI;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_Zmode:
+@@ -17542,7 +17542,7 @@
+ 	{
+ 	case NE: return ARM_NE;
+ 	case EQ: return ARM_EQ;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_Nmode:
+@@ -17550,7 +17550,7 @@
+ 	{
+ 	case NE: return ARM_MI;
+ 	case EQ: return ARM_PL;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CCFPEmode:
+@@ -17575,7 +17575,7 @@
+ 	  /* UNEQ and LTGT do not have a representation.  */
+ 	case UNEQ: /* Fall through.  */
+ 	case LTGT: /* Fall through.  */
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_SWPmode:
+@@ -17591,7 +17591,7 @@
+ 	case GTU: return ARM_CC;
+ 	case LEU: return ARM_CS;
+ 	case LTU: return ARM_HI;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_Cmode:
+@@ -17599,7 +17599,7 @@
+ 	{
+ 	case LTU: return ARM_CS;
+ 	case GEU: return ARM_CC;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_CZmode:
+@@ -17611,7 +17611,7 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CC_NCVmode:
+@@ -17621,7 +17621,7 @@
+ 	case LT: return ARM_LT;
+ 	case GEU: return ARM_CS;
+ 	case LTU: return ARM_CC;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     case CCmode:
+@@ -17637,22 +17637,13 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: return ARM_NV;
++	default: gcc_unreachable ();
+ 	}
+ 
+     default: gcc_unreachable ();
+     }
+ }
+ 
+-/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
+-static enum arm_cond_code
+-get_arm_condition_code (rtx comparison)
+-{
+-  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+-  gcc_assert (code != ARM_NV);
+-  return code;
+-}
+-
+ /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
+    instructions.  */
+ void
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-09-12 12:32:29 +0000
++++ new/gcc/config/arm/predicates.md	2011-09-15 09:45:31 +0000
+@@ -243,9 +243,10 @@
+ ;; True for integer comparisons and, if FP is active, for comparisons
+ ;; other than LTGT or UNEQ.
+ (define_special_predicate "arm_comparison_operator"
+-  (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+-		    unordered,ordered,unlt,unle,unge,ungt")
+-       (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
++  (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
++       (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
++			 && (TARGET_FPA || TARGET_VFP)")
++            (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+ 
+ (define_special_predicate "lt_ge_comparison_operator"
+   (match_code "lt,ge"))
+
+=== removed file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
+--- old/gcc/testsuite/gcc.dg/torture/pr49030.c	2011-09-05 09:40:19 +0000
++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c	1970-01-01 00:00:00 +0000
+@@ -1,19 +0,0 @@
+-void
+-sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+-		       unsigned long dst_skip)
+-{
+-  long long y;
+-  while (nsamples--)
+-    {
+-      y = (long long) (*src * 8388608.0f) << 8;
+-      if (y > 2147483647) {
+-	*(int *) dst = 2147483647;
+-      } else if (y < -2147483647 - 1) {
+-	*(int *) dst = -2147483647 - 1;
+-      } else {
+-	*(int *) dst = (int) y;
+-      }
+-      dst += dst_skip;
+-      src++;
+-    }
+-}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch
new file mode 100644
index 0000000000..91b2191cb8
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch
@@ -0,0 +1,80 @@
+2011-09-22  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r178804:
+	modulo-sched.c (remove_node_from_ps): Return void
+	instead of bool.
+	(optimize_sc): Adjust call to remove_node_from_ps.
+	(sms_schedule): Add print info.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-08-09 04:51:48 +0000
++++ new/gcc/modulo-sched.c	2011-09-14 11:06:06 +0000
+@@ -211,7 +211,7 @@
+ static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ 					  int, int, sbitmap, int *, sbitmap,
+ 					  sbitmap);
+-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
++static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+ 
+ #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
+ #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
+@@ -834,8 +834,7 @@
+ 	if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ 	  break;
+ 
+-      gcc_assert (next_ps_i);
+-      gcc_assert (remove_node_from_ps (ps, next_ps_i));
++      remove_node_from_ps (ps, next_ps_i);
+       success =
+ 	try_scheduling_node_in_cycle (ps, g->closing_branch,
+ 				      g->closing_branch->cuid, c,
+@@ -1485,8 +1484,8 @@
+           if (dump_file)
+             {
+ 	      fprintf (dump_file,
+-		       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
+-		       stage_count);
++		       "%s:%d SMS succeeded %d %d (with ii, sc)\n",
++		       insn_file (tail), insn_line (tail), ps->ii, stage_count);
+ 	      print_partial_schedule (ps, dump_file);
+ 	    }
+  
+@@ -2810,22 +2809,18 @@
+ }
+ 
+ 
+-/* Removes the given PS_INSN from the partial schedule.  Returns false if the
+-   node is not found in the partial schedule, else returns true.  */
+-static bool
++/* Removes the given PS_INSN from the partial schedule.  */  
++static void 
+ remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
+ {
+   int row;
+ 
+-  if (!ps || !ps_i)
+-    return false;
+-
++  gcc_assert (ps && ps_i);
++  
+   row = SMODULO (ps_i->cycle, ps->ii);
+   if (! ps_i->prev_in_row)
+     {
+-      if (ps_i != ps->rows[row])
+-	return false;
+-
++      gcc_assert (ps_i == ps->rows[row]);
+       ps->rows[row] = ps_i->next_in_row;
+       if (ps->rows[row])
+ 	ps->rows[row]->prev_in_row = NULL;
+@@ -2839,7 +2834,7 @@
+    
+   ps->rows_length[row] -= 1; 
+   free (ps_i);
+-  return true;
++  return;
+ }
+ 
+ /* Unlike what literature describes for modulo scheduling (which focuses
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch
new file mode 100644
index 0000000000..16779bbf1e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch
@@ -0,0 +1,528 @@
+2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	Replace check_effective_target_arm_neon with
+	check_effective_target_arm_neon_ok.
+
+	Backport from mainline:
+
+	2011-09-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_preferred_simd_mode): Check
+	TARGET_NEON_VECTORIZE_DOUBLE instead of
+	TARGET_NEON_VECTORIZE_QUAD.
+	(arm_autovectorize_vector_sizes): Likewise.
+	* config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
+	mask of mvectorize-with-neon-double.  Add RejectNegative.
+	(mvectorize-with-neon-double): New.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	New procedure.
+	(add_options_for_quad_vectors): Replace with ...
+	(add_options_for_double_vectors): ... this.
+	* gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
+	support multiple vector sizes since the vectorizer attempts to
+	vectorize with both vector sizes.
+	* gcc.dg/vect/no-vfa-vect-79.c,
+	gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
+	gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
+	gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
+	gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
+	gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
+	gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
+	* gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
+	* gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
+	gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
+	gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
+	gcc.dg/vect/vect-40.c: Likewise.
+	* gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
+	redundant.
+	* gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
+	gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
+	gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
+	gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
+	Likewise.
+	* gcc.dg/vect/vect-peel-4.c: Make ia global.
+	
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-09-15 09:45:31 +0000
++++ new/gcc/config/arm/arm.c	2011-09-19 07:44:24 +0000
+@@ -22974,7 +22974,7 @@
+   return false;
+ }
+ 
+-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
++/* Use the option -mvectorize-with-neon-double to override the use of quardword
+    registers when autovectorizing for Neon, at least until multiple vector
+    widths are supported properly by the middle-end.  */
+ 
+@@ -22985,15 +22985,15 @@
+     switch (mode)
+       {
+       case SFmode:
+-	return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
++	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
+       case SImode:
+-	return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
++	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
+       case HImode:
+-	return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
++	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
+       case QImode:
+-	return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
++	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
+       case DImode:
+-	if (TARGET_NEON_VECTORIZE_QUAD)
++	if (!TARGET_NEON_VECTORIZE_DOUBLE)
+ 	  return V2DImode;
+ 	break;
+ 
+@@ -24226,7 +24226,7 @@
+ static unsigned int
+ arm_autovectorize_vector_sizes (void)
+ {
+-  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
++  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
+ }
+ 
+ static bool
+
+=== modified file 'gcc/config/arm/arm.opt'
+--- old/gcc/config/arm/arm.opt	2009-06-18 11:24:10 +0000
++++ new/gcc/config/arm/arm.opt	2011-09-19 07:44:24 +0000
+@@ -158,9 +158,13 @@
+ Assume big endian bytes, little endian words
+ 
+ mvectorize-with-neon-quad
+-Target Report Mask(NEON_VECTORIZE_QUAD)
++Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
+ Use Neon quad-word (rather than double-word) registers for vectorization
+ 
++mvectorize-with-neon-double
++Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
++Use Neon double-word (rather than quad-word) registers for vectorization
++
+ mword-relocations
+ Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
+ Only generate absolute relocations on word sized values.
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c	2011-04-28 11:46:58 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c	2007-09-04 12:05:19 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c	2011-09-19 07:44:24 +0000
+@@ -45,6 +45,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c	2007-09-12 07:48:44 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c	2011-09-19 07:44:24 +0000
+@@ -53,6 +53,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c	2007-09-12 07:48:44 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c	2011-09-19 07:44:24 +0000
+@@ -53,6 +53,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c	2011-09-19 07:44:24 +0000
+@@ -58,5 +58,6 @@
+    If/when the aliasing problems are resolved, unalignment may
+    prevent vectorization on some targets.  */
+ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c'
+--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c	2011-09-19 07:44:24 +0000
+@@ -46,5 +46,6 @@
+   If/when the aliasing problems are resolved, unalignment may
+   prevent vectorization on some targets.  */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-25.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-25.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-3.c	2011-04-28 11:46:58 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-3.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-104.c	2007-09-12 07:48:44 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-104.c	2011-09-19 07:44:24 +0000
+@@ -64,6 +64,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+ 
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-109.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-109.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-40.c	2009-05-25 14:18:21 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-40.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-42.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-42.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-46.c	2009-05-25 14:18:21 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-46.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-48.c	2009-11-04 10:22:22 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-48.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-52.c	2009-11-04 10:22:22 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-52.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-54.c	2009-10-27 11:46:07 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-54.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_float } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-96.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-96.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_int } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c	2009-11-04 10:22:22 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_int } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c	2010-10-04 14:59:30 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c	2009-11-10 18:01:22 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c	2011-09-19 07:44:24 +0000
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target vect_int } */
++/* { dg-add-options double_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c	2011-09-19 07:44:24 +0000
+@@ -22,5 +22,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c	2009-06-16 06:21:12 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c	2011-09-19 07:44:24 +0000
+@@ -20,5 +20,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c	2007-08-19 11:02:48 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c	2011-09-19 07:44:24 +0000
+@@ -22,5 +22,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c	2011-09-19 07:44:24 +0000
+@@ -37,5 +37,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c	2011-09-19 07:44:24 +0000
+@@ -49,5 +49,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c	2009-05-08 12:39:01 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c	2011-09-19 07:44:24 +0000
+@@ -49,5 +49,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c	2011-04-28 11:46:58 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_float } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include <signal.h>
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c	2011-01-10 12:41:40 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c	2011-01-10 12:41:40 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c	2011-09-19 07:44:24 +0000
+@@ -1,5 +1,4 @@
+ /* { dg-require-effective-target vect_int } */
+-/* { dg-add-options quad_vectors } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c	2011-01-10 12:41:40 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c	2011-09-19 07:44:24 +0000
+@@ -6,12 +6,12 @@
+ #define N 128
+ 
+ int ib[N+7];
++int ia[N+1];
+ 
+ __attribute__ ((noinline))
+ int main1 ()
+ {
+   int i;
+-  int ia[N+1];
+ 
+   /* Don't peel keeping one load and the store aligned.  */
+   for (i = 0; i <= N; i++)
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c	2010-05-27 12:23:45 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c	2011-09-19 07:44:24 +0000
+@@ -58,7 +58,8 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+ 
+
+=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90'
+--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90	2005-07-25 11:05:07 +0000
++++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90	2011-09-19 07:44:24 +0000
+@@ -19,6 +19,7 @@
+       end
+ 
+ ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
+-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
++! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
++! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
+ ! { dg-final { cleanup-tree-dump "vect" } }
+ 
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-08-13 08:32:32 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-09-20 07:54:28 +0000
+@@ -3265,6 +3265,24 @@
+     }]
+ }
+ 
++# Return 1 if the target supports multiple vector sizes
++
++proc check_effective_target_vect_multiple_sizes { } {
++    global et_vect_multiple_sizes
++
++    if [info exists et_vect_multiple_sizes_saved] {
++        verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
++    } else {
++        set et_vect_multiple_sizes_saved 0
++          if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
++           set et_vect_multiple_sizes_saved 1
++        }
++    }
++
++    verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
++    return $et_vect_multiple_sizes_saved
++}
++
+ # Return 1 if the target supports section-anchors
+ 
+ proc check_effective_target_section_anchors { } {
+@@ -3648,11 +3666,11 @@
+     return $flags
+ }
+ 
+-# Add to FLAGS the flags needed to enable 128-bit vectors.
++# Add to FLAGS the flags needed to enable 64-bit vectors.
+ 
+-proc add_options_for_quad_vectors { flags } {
++proc add_options_for_double_vectors { flags } {
+     if [is-effective-target arm_neon_ok] {
+-	return "$flags -mvectorize-with-neon-quad"
++	return "$flags -mvectorize-with-neon-double"
+     }
+ 
+     return $flags
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch
new file mode 100644
index 0000000000..2f70b1b9c2
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch
@@ -0,0 +1,387 @@
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
+	(neon_move_hi_quad_<mode>): Likewise.
+	(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
+
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-27  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
+	(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
+	(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
+	(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
+	that produce subreg moves.  Define using VQX iterators.
+
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* simplify-rtx.c (simplify_subreg): Check that the inner mode is
+	a scalar integer before applying integer-only optimisations to
+	inner arithmetic.
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-07-04 14:03:49 +0000
++++ new/gcc/config/arm/neon.md	2011-09-28 15:14:59 +0000
+@@ -1235,66 +1235,14 @@
+                     (const_string "neon_int_1") (const_string "neon_int_5")))]
+ )
+ 
+-; FIXME: We wouldn't need the following insns if we could write subregs of
+-; vector registers. Make an attempt at removing unnecessary moves, though
+-; we're really at the mercy of the register allocator.
+-
+-(define_insn "neon_move_lo_quad_<mode>"
+-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+-        (vec_concat:ANY128
+-          (match_operand:<V_HALF> 1 "s_register_operand" "w")
+-          (vec_select:<V_HALF> 
+-		(match_dup 0)
+-	        (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%e0, %P1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_move_hi_quad_<mode>"
+-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+-        (vec_concat:ANY128
+-          (vec_select:<V_HALF>
+-		(match_dup 0)
+-	        (match_operand:ANY128 2 "vect_par_constant_low" ""))
+-          (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
+-	   
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%f0, %P1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+ (define_expand "move_hi_quad_<mode>"
+  [(match_operand:ANY128 0 "s_register_operand" "")
+   (match_operand:<V_HALF> 1 "s_register_operand" "")]
+  "TARGET_NEON"
+ {
+-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+-  rtx t1;
+-  int i;
+-
+-  for (i=0; i < (<V_mode_nunits>/2); i++)
+-     RTVEC_ELT (v, i) = GEN_INT (i);
+-
+-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+-  emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
+-
++  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
++				       GET_MODE_SIZE (<V_HALF>mode)),
++		  operands[1]);
+   DONE;
+ })
+ 
+@@ -1303,16 +1251,9 @@
+   (match_operand:<V_HALF> 1 "s_register_operand" "")]
+  "TARGET_NEON"
+ {
+-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+-  rtx t1;
+-  int i;
+-
+-  for (i=0; i < (<V_mode_nunits>/2); i++)
+-     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
+-
+-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+-  emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
+-
++  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
++				       <MODE>mode, 0),
++		  operands[1]);
+   DONE;
+ })
+ 
+@@ -2950,183 +2891,27 @@
+    (set_attr "neon_type" "neon_bp_simple")]
+ )
+ 
+-(define_insn "neon_vget_highv16qi"
+-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+-                         (parallel [(const_int 8) (const_int 9)
+-			            (const_int 10) (const_int 11)
+-				    (const_int 12) (const_int 13)
+-				    (const_int 14) (const_int 15)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src + 2)
+-    return "vmov\t%P0, %f1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_highv8hi"
+-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 4) (const_int 5)
+-			            (const_int 6) (const_int 7)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src + 2)
+-    return "vmov\t%P0, %f1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_highv4si"
+-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 2) (const_int 3)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src + 2)
+-    return "vmov\t%P0, %f1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_highv4sf"
+-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 2) (const_int 3)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src + 2)
+-    return "vmov\t%P0, %f1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_highv2di"
+-  [(set (match_operand:DI 0 "s_register_operand" "=w")
+-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+-	               (parallel [(const_int 1)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src + 2)
+-    return "vmov\t%P0, %f1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_lowv16qi"
+-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+-                         (parallel [(const_int 0) (const_int 1)
+-			            (const_int 2) (const_int 3)
+-				    (const_int 4) (const_int 5)
+-				    (const_int 6) (const_int 7)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%P0, %e1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_lowv8hi"
+-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 0) (const_int 1)
+-			            (const_int 2) (const_int 3)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%P0, %e1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_lowv4si"
+-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 0) (const_int 1)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%P0, %e1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_lowv4sf"
+-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+-	                 (parallel [(const_int 0) (const_int 1)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%P0, %e1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
+-
+-(define_insn "neon_vget_lowv2di"
+-  [(set (match_operand:DI 0 "s_register_operand" "=w")
+-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+-	               (parallel [(const_int 0)])))]
+-  "TARGET_NEON"
+-{
+-  int dest = REGNO (operands[0]);
+-  int src = REGNO (operands[1]);
+-
+-  if (dest != src)
+-    return "vmov\t%P0, %e1";
+-  else
+-    return "";
+-}
+-  [(set_attr "neon_type" "neon_bp_simple")]
+-)
++(define_expand "neon_vget_high<mode>"
++  [(match_operand:<V_HALF> 0 "s_register_operand")
++   (match_operand:VQX 1 "s_register_operand")]
++  "TARGET_NEON"
++{
++  emit_move_insn (operands[0],
++		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
++				       GET_MODE_SIZE (<V_HALF>mode)));
++  DONE;
++})
++
++(define_expand "neon_vget_low<mode>"
++  [(match_operand:<V_HALF> 0 "s_register_operand")
++   (match_operand:VQX 1 "s_register_operand")]
++  "TARGET_NEON"
++{
++  emit_move_insn (operands[0],
++		  simplify_gen_subreg (<V_HALF>mode, operands[1],
++				       <MODE>mode, 0));
++  DONE;
++})
+ 
+ (define_insn "neon_vcvt<mode>"
+   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+
+=== modified file 'gcc/simplify-rtx.c'
+--- old/gcc/simplify-rtx.c	2011-08-13 08:32:32 +0000
++++ new/gcc/simplify-rtx.c	2011-09-28 15:11:59 +0000
+@@ -5567,6 +5567,7 @@
+   /* Optimize SUBREG truncations of zero and sign extended values.  */
+   if ((GET_CODE (op) == ZERO_EXTEND
+        || GET_CODE (op) == SIGN_EXTEND)
++      && SCALAR_INT_MODE_P (innermode)
+       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
+     {
+       unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
+@@ -5605,6 +5606,7 @@
+   if ((GET_CODE (op) == LSHIFTRT
+        || GET_CODE (op) == ASHIFTRT)
+       && SCALAR_INT_MODE_P (outermode)
++      && SCALAR_INT_MODE_P (innermode)
+       /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
+ 	 to avoid the possibility that an outer LSHIFTRT shifts by more
+ 	 than the sign extension's sign_bit_copies and introduces zeros
+@@ -5624,6 +5626,7 @@
+   if ((GET_CODE (op) == LSHIFTRT
+        || GET_CODE (op) == ASHIFTRT)
+       && SCALAR_INT_MODE_P (outermode)
++      && SCALAR_INT_MODE_P (innermode)
+       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
+       && CONST_INT_P (XEXP (op, 1))
+       && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
+@@ -5638,6 +5641,7 @@
+      the outer subreg is effectively a truncation to the original mode.  */
+   if (GET_CODE (op) == ASHIFT
+       && SCALAR_INT_MODE_P (outermode)
++      && SCALAR_INT_MODE_P (innermode)
+       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
+       && CONST_INT_P (XEXP (op, 1))
+       && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
+@@ -5651,7 +5655,7 @@
+   /* Recognize a word extraction from a multi-word subreg.  */
+   if ((GET_CODE (op) == LSHIFTRT
+        || GET_CODE (op) == ASHIFTRT)
+-      && SCALAR_INT_MODE_P (outermode)
++      && SCALAR_INT_MODE_P (innermode)
+       && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
+       && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
+       && CONST_INT_P (XEXP (op, 1))
+@@ -5673,6 +5677,7 @@
+ 
+   if ((GET_CODE (op) == LSHIFTRT
+        || GET_CODE (op) == ASHIFTRT)
++      && SCALAR_INT_MODE_P (innermode)
+       && MEM_P (XEXP (op, 0))
+       && CONST_INT_P (XEXP (op, 1))
+       && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch
new file mode 100644
index 0000000000..d44f8cf1a5
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch
@@ -0,0 +1,290 @@
+2011-10-01  Revital Eres  <revital.eres@linaro.org> 
+
+	gcc/
+	Backport from mainline -r179380 and -r179381
+
+	* ddg.c (autoinc_var_is_used_p): New function.
+	(create_ddg_dep_from_intra_loop_link,
+	add_cross_iteration_register_deps): Call it.
+	* ddg.h (autoinc_var_is_used_p): Declare.
+	* modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
+	(generate_reg_moves): Call autoinc_var_is_used_p.  Skip
+	instructions that do not set a register and verify no regmoves
+	are created for !single_set instructions.
+
+	gcc/testsuite/
+
+	* gcc.dg/sms-10.c: New file
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2011-07-31 11:29:10 +0000
++++ new/gcc/ddg.c	2011-10-02 06:56:53 +0000
+@@ -145,6 +145,27 @@
+   return rtx_mem_access_p (PATTERN (insn));
+ }
+ 
++/* Return true if DEF_INSN contains address being auto-inc or auto-dec
++   which is used in USE_INSN.  Otherwise return false.  The result is
++   being used to decide whether to remove the edge between def_insn and
++   use_insn when -fmodulo-sched-allow-regmoves is set.  This function
++   doesn't need to consider the specific address register; no reg_moves
++   will be allowed for any life range defined by def_insn and used
++   by use_insn, if use_insn uses an address register auto-inc'ed by
++   def_insn.  */
++bool
++autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
++{
++  rtx note;
++
++  for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
++    if (REG_NOTE_KIND (note) == REG_INC
++	&& reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
++      return true;
++
++  return false;
++}
++
+ /* Computes the dependence parameters (latency, distance etc.), creates
+    a ddg_edge and adds it to the given DDG.  */
+ static void
+@@ -173,10 +194,15 @@
+      compensate for that by generating reg-moves based on the life-range
+      analysis.  The anti-deps that will be deleted are the ones which
+      have true-deps edges in the opposite direction (in other words
+-     the kernel has only one def of the relevant register).  TODO:
+-     support the removal of all anti-deps edges, i.e. including those
++     the kernel has only one def of the relevant register).
++     If the address that is being auto-inc or auto-dec in DEST_NODE
++     is used in SRC_NODE then do not remove the edge to make sure
++     reg-moves will not be created for this address.  
++     TODO: support the removal of all anti-deps edges, i.e. including those
+      whose register has multiple defs in the loop.  */
+-  if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
++  if (flag_modulo_sched_allow_regmoves 
++      && (t == ANTI_DEP && dt == REG_DEP)
++      && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
+     {
+       rtx set;
+ 
+@@ -302,10 +328,14 @@
+ 	  gcc_assert (first_def_node);
+ 
+          /* Always create the edge if the use node is a branch in
+-            order to prevent the creation of reg-moves.  */
++            order to prevent the creation of reg-moves.  
++            If the address that is being auto-inc or auto-dec in LAST_DEF
++            is used in USE_INSN then do not remove the edge to make sure
++            reg-moves will not be created for that address.  */
+           if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+               || !flag_modulo_sched_allow_regmoves
+-	      || JUMP_P (use_node->insn))
++	      || JUMP_P (use_node->insn)
++              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
+             create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+                                     REG_DEP, 1);
+ 
+
+=== modified file 'gcc/ddg.h'
+--- old/gcc/ddg.h	2009-11-25 10:55:54 +0000
++++ new/gcc/ddg.h	2011-10-02 06:56:53 +0000
+@@ -186,4 +186,6 @@
+ int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
+ int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
+ 
++bool autoinc_var_is_used_p (rtx, rtx);
++
+ #endif /* GCC_DDG_H */
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-09-14 11:06:06 +0000
++++ new/gcc/modulo-sched.c	2011-10-02 06:56:53 +0000
+@@ -477,7 +477,12 @@
+       sbitmap *uses_of_defs;
+       rtx last_reg_move;
+       rtx prev_reg, old_reg;
+-
++      rtx set = single_set (u->insn);
++      
++      /* Skip instructions that do not set a register.  */
++      if ((set && !REG_P (SET_DEST (set))))
++        continue;
++ 
+       /* Compute the number of reg_moves needed for u, by looking at life
+ 	 ranges started at u (excluding self-loops).  */
+       for (e = u->out; e; e = e->next_out)
+@@ -494,6 +499,20 @@
+ 		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
+ 	      nreg_moves4e--;
+ 
++            if (nreg_moves4e >= 1)
++	      {
++		/* !single_set instructions are not supported yet and
++		   thus we do not except to encounter them in the loop
++		   except from the doloop part.  For the latter case
++		   we assume no regmoves are generated as the doloop
++		   instructions are tied to the branch with an edge.  */
++		gcc_assert (set);
++		/* If the instruction contains auto-inc register then
++		   validate that the regmov is being generated for the
++		   target regsiter rather then the inc'ed register.	*/
++		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
++	      }
++	    
+ 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
+ 	  }
+ 
+@@ -1266,12 +1285,10 @@
+ 	continue;
+       }
+ 
+-      /* Don't handle BBs with calls or barriers or auto-increment insns 
+-	 (to avoid creating invalid reg-moves for the auto-increment insns),
++      /* Don't handle BBs with calls or barriers
+ 	 or !single_set with the exception of instructions that include
+ 	 count_reg---these instructions are part of the control part
+ 	 that do-loop recognizes.
+-         ??? Should handle auto-increment insns.
+          ??? Should handle insns defining subregs.  */
+      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+       {
+@@ -1282,7 +1299,6 @@
+             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
+                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+                 && !reg_mentioned_p (count_reg, insn))
+-            || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
+             || (INSN_P (insn) && (set = single_set (insn))
+                 && GET_CODE (SET_DEST (set)) == SUBREG))
+         break;
+@@ -1296,8 +1312,6 @@
+ 		fprintf (dump_file, "SMS loop-with-call\n");
+ 	      else if (BARRIER_P (insn))
+ 		fprintf (dump_file, "SMS loop-with-barrier\n");
+-              else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
+-                fprintf (dump_file, "SMS reg inc\n");
+               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
+                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
+                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
+
+=== added file 'gcc/testsuite/gcc.dg/sms-10.c'
+--- old/gcc/testsuite/gcc.dg/sms-10.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/sms-10.c	2011-10-02 06:56:53 +0000
+@@ -0,0 +1,118 @@
++ /* { dg-do run } */
++ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
++
++
++typedef __SIZE_TYPE__ size_t;
++extern void *malloc (size_t);
++extern void free (void *);
++extern void abort (void);
++
++struct regstat_n_sets_and_refs_t
++{
++  int sets;
++  int refs;
++};
++
++struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
++
++struct df_reg_info
++{
++  unsigned int n_refs;
++};
++
++struct df_d
++{
++  struct df_reg_info **def_regs;
++  struct df_reg_info **use_regs;
++};
++struct df_d *df;
++
++static inline int
++REG_N_SETS (int regno)
++{
++  return regstat_n_sets_and_refs[regno].sets;
++}
++
++__attribute__ ((noinline))
++     int max_reg_num (void)
++{
++  return 100;
++}
++
++__attribute__ ((noinline))
++     void regstat_init_n_sets_and_refs (void)
++{
++  unsigned int i;
++  unsigned int max_regno = max_reg_num ();
++
++  for (i = 0; i < max_regno; i++)
++    {
++      (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
++      (regstat_n_sets_and_refs[i].refs =
++       (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
++    }
++}
++
++int a_sets[100] =
++  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
++  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
++  40, 41, 42,
++  43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
++  62, 63, 64,
++  65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
++  84, 85, 86,
++  87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
++};
++
++int a_refs[100] =
++  { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
++  40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
++  78, 80, 82,
++  84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
++  118, 120,
++  122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
++  152, 154, 156,
++  158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
++  188, 190, 192,
++  194, 196, 198
++};
++
++int
++main ()
++{
++  struct df_reg_info *b[100], *c[100];
++  struct df_d df1;
++  size_t s = sizeof (struct df_reg_info);
++  struct regstat_n_sets_and_refs_t a[100];
++
++  df = &df1;
++  regstat_n_sets_and_refs = a;
++  int i;
++
++  for (i = 0; i < 100; i++)
++    {
++      b[i] = (struct df_reg_info *) malloc (s);
++      b[i]->n_refs = i;
++      c[i] = (struct df_reg_info *) malloc (s);
++      c[i]->n_refs = i;
++    }
++
++  df1.def_regs = b;
++  df1.use_regs = c;
++  regstat_init_n_sets_and_refs ();
++
++  for (i = 0; i < 100; i++)
++    if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
++      abort ();
++
++  for (i = 0; i < 100; i++)
++    {
++      free (b[i]);
++      free (c[i]);
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
++/* { dg-final { cleanup-rtl-dump "sms" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch
new file mode 100644
index 0000000000..ef98142bc4
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch
@@ -0,0 +1,105 @@
+2011-10-03  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-13  Sevak Sargsyan <sevak.sargsyan@ispras.ru>
+
+	gcc/
+	* config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
+	define_insn patterns for combine.
+
+	gcc/testsuite/
+	* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-09-28 15:14:59 +0000
++++ new/gcc/config/arm/neon.md	2011-10-03 01:32:17 +0000
+@@ -5428,3 +5428,32 @@
+   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
+   DONE;
+ })
++
++(define_insn "neon_vabd<mode>_2"
++ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
++       (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
++                           (match_operand:VDQ 2 "s_register_operand" "w"))))]
++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
++ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set (attr "neon_type")
++       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
++                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
++                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
++                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
++                     (const_string "neon_int_5")))]
++)
++
++(define_insn "neon_vabd<mode>_3"
++ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
++       (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
++                             (match_operand:VDQ 2 "s_register_operand" "w")]
++                 UNSPEC_VSUB)))]
++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
++ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set (attr "neon_type")
++       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
++                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
++                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
++                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
++                     (const_string "neon_int_5")))]
++)
+
+=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c'
+--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c	2011-10-03 01:32:17 +0000
+@@ -0,0 +1,50 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O2 -funsafe-math-optimizations" } */
++/* { dg-add-options arm_neon } */
++
++#include <arm_neon.h>
++float32x2_t f_sub_abs_to_vabd_32()
++{
++  float32x2_t val1 = vdup_n_f32 (10);
++  float32x2_t val2 = vdup_n_f32 (30);
++  float32x2_t sres = vsub_f32(val1, val2);
++  float32x2_t res = vabs_f32 (sres);
++
++  return res;
++}
++/* { dg-final { scan-assembler "vabd\.f32" } }*/
++
++#include <arm_neon.h>
++int8x8_t sub_abs_to_vabd_8()
++{
++  int8x8_t val1 = vdup_n_s8 (10);
++  int8x8_t val2 = vdup_n_s8 (30);
++  int8x8_t sres = vsub_s8(val1, val2);
++  int8x8_t res = vabs_s8 (sres);
++
++  return res;
++}
++/* { dg-final { scan-assembler "vabd\.s8" } }*/
++
++int16x4_t sub_abs_to_vabd_16()
++{
++  int16x4_t val1 = vdup_n_s16 (10);
++  int16x4_t val2 = vdup_n_s16 (30);
++  int16x4_t sres = vsub_s16(val1, val2);
++  int16x4_t res = vabs_s16 (sres);
++
++  return res;
++}
++/* { dg-final { scan-assembler "vabd\.s16" } }*/
++
++int32x2_t sub_abs_to_vabd_32()
++{
++  int32x2_t val1 = vdup_n_s32 (10);
++  int32x2_t val2 = vdup_n_s32 (30);
++  int32x2_t sres = vsub_s32(val1, val2);
++  int32x2_t res = vabs_s32 (sres);
++
++   return res;
++}
++/* { dg-final { scan-assembler "vabd\.s32" } }*/
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch
new file mode 100644
index 0000000000..e097ec27fa
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch
@@ -0,0 +1,436 @@
+2011-10-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-22  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/predicates.md (expandable_comparison_operator): New
+	predicate, extracted from...
+	(arm_comparison_operator): ...here.
+	* config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
+	(cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
+	(movdfcc): Use expandable_comparison_operator.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-09-22  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/cmp-1.c: New test.
+	* gcc.target/arm/cmp-2.c: Likewise.
+
+2011-10-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-09-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-09-15 09:45:31 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-10-03 09:46:40 +0000
+@@ -180,6 +180,7 @@
+ #endif
+ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+ #ifdef RTX_CODE
++extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
+ extern void thumb1_final_prescan_insn (rtx);
+ extern void thumb2_final_prescan_insn (rtx);
+ extern const char *thumb_load_double_from_address (rtx *);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-09-19 07:44:24 +0000
++++ new/gcc/config/arm/arm.c	2011-10-03 09:46:40 +0000
+@@ -17494,10 +17494,10 @@
+    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
+ 
+ /* Returns the index of the ARM condition code string in
+-   `arm_condition_codes'.  COMPARISON should be an rtx like
+-   `(eq (...) (...))'.  */
+-static enum arm_cond_code
+-get_arm_condition_code (rtx comparison)
++   `arm_condition_codes', or ARM_NV if the comparison is invalid.
++   COMPARISON should be an rtx like `(eq (...) (...))'.  */
++enum arm_cond_code
++maybe_get_arm_condition_code (rtx comparison)
+ {
+   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+   enum arm_cond_code code;
+@@ -17521,11 +17521,11 @@
+     case CC_DLTUmode: code = ARM_CC;
+ 
+     dominance:
+-      gcc_assert (comp_code == EQ || comp_code == NE);
+-
+       if (comp_code == EQ)
+ 	return ARM_INVERSE_CONDITION_CODE (code);
+-      return code;
++      if (comp_code == NE)
++	return code;
++      return ARM_NV;
+ 
+     case CC_NOOVmode:
+       switch (comp_code)
+@@ -17534,7 +17534,7 @@
+ 	case EQ: return ARM_EQ;
+ 	case GE: return ARM_PL;
+ 	case LT: return ARM_MI;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Zmode:
+@@ -17542,7 +17542,7 @@
+ 	{
+ 	case NE: return ARM_NE;
+ 	case EQ: return ARM_EQ;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Nmode:
+@@ -17550,7 +17550,7 @@
+ 	{
+ 	case NE: return ARM_MI;
+ 	case EQ: return ARM_PL;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CCFPEmode:
+@@ -17575,7 +17575,7 @@
+ 	  /* UNEQ and LTGT do not have a representation.  */
+ 	case UNEQ: /* Fall through.  */
+ 	case LTGT: /* Fall through.  */
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_SWPmode:
+@@ -17591,7 +17591,7 @@
+ 	case GTU: return ARM_CC;
+ 	case LEU: return ARM_CS;
+ 	case LTU: return ARM_HI;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_Cmode:
+@@ -17599,7 +17599,7 @@
+ 	{
+ 	case LTU: return ARM_CS;
+ 	case GEU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_CZmode:
+@@ -17611,7 +17611,7 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CC_NCVmode:
+@@ -17621,7 +17621,7 @@
+ 	case LT: return ARM_LT;
+ 	case GEU: return ARM_CS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     case CCmode:
+@@ -17637,13 +17637,22 @@
+ 	case GTU: return ARM_HI;
+ 	case LEU: return ARM_LS;
+ 	case LTU: return ARM_CC;
+-	default: gcc_unreachable ();
++	default: return ARM_NV;
+ 	}
+ 
+     default: gcc_unreachable ();
+     }
+ }
+ 
++/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
++static enum arm_cond_code
++get_arm_condition_code (rtx comparison)
++{
++  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
++  gcc_assert (code != ARM_NV);
++  return code;
++}
++
+ /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
+    instructions.  */
+ void
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-09-12 14:14:00 +0000
++++ new/gcc/config/arm/arm.md	2011-10-03 09:47:33 +0000
+@@ -6543,7 +6543,7 @@
+ 
+ (define_expand "cbranchsi4"
+   [(set (pc) (if_then_else
+-	      (match_operator 0 "arm_comparison_operator"
++	      (match_operator 0 "expandable_comparison_operator"
+ 	       [(match_operand:SI 1 "s_register_operand" "")
+ 	        (match_operand:SI 2 "nonmemory_operand" "")])
+ 	      (label_ref (match_operand 3 "" ""))
+@@ -6594,7 +6594,7 @@
+ 
+ (define_expand "cbranchsf4"
+   [(set (pc) (if_then_else
+-	      (match_operator 0 "arm_comparison_operator"
++	      (match_operator 0 "expandable_comparison_operator"
+ 	       [(match_operand:SF 1 "s_register_operand" "")
+ 	        (match_operand:SF 2 "arm_float_compare_operand" "")])
+ 	      (label_ref (match_operand 3 "" ""))
+@@ -6606,7 +6606,7 @@
+ 
+ (define_expand "cbranchdf4"
+   [(set (pc) (if_then_else
+-	      (match_operator 0 "arm_comparison_operator"
++	      (match_operator 0 "expandable_comparison_operator"
+ 	       [(match_operand:DF 1 "s_register_operand" "")
+ 	        (match_operand:DF 2 "arm_float_compare_operand" "")])
+ 	      (label_ref (match_operand 3 "" ""))
+@@ -6618,7 +6618,7 @@
+ 
+ (define_expand "cbranchdi4"
+   [(set (pc) (if_then_else
+-	      (match_operator 0 "arm_comparison_operator"
++	      (match_operator 0 "expandable_comparison_operator"
+ 	       [(match_operand:DI 1 "cmpdi_operand" "")
+ 	        (match_operand:DI 2 "cmpdi_operand" "")])
+ 	      (label_ref (match_operand 3 "" ""))
+@@ -7473,7 +7473,7 @@
+ 
+ (define_expand "cstoresi4"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(match_operator:SI 1 "arm_comparison_operator"
++	(match_operator:SI 1 "expandable_comparison_operator"
+ 	 [(match_operand:SI 2 "s_register_operand" "")
+ 	  (match_operand:SI 3 "reg_or_int_operand" "")]))]
+   "TARGET_32BIT || TARGET_THUMB1"
+@@ -7609,7 +7609,7 @@
+ 
+ (define_expand "cstoresf4"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(match_operator:SI 1 "arm_comparison_operator"
++	(match_operator:SI 1 "expandable_comparison_operator"
+ 	 [(match_operand:SF 2 "s_register_operand" "")
+ 	  (match_operand:SF 3 "arm_float_compare_operand" "")]))]
+   "TARGET_32BIT && TARGET_HARD_FLOAT"
+@@ -7619,7 +7619,7 @@
+ 
+ (define_expand "cstoredf4"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(match_operator:SI 1 "arm_comparison_operator"
++	(match_operator:SI 1 "expandable_comparison_operator"
+ 	 [(match_operand:DF 2 "s_register_operand" "")
+ 	  (match_operand:DF 3 "arm_float_compare_operand" "")]))]
+   "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+@@ -7629,7 +7629,7 @@
+ 
+ (define_expand "cstoredi4"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(match_operator:SI 1 "arm_comparison_operator"
++	(match_operator:SI 1 "expandable_comparison_operator"
+ 	 [(match_operand:DI 2 "cmpdi_operand" "")
+ 	  (match_operand:DI 3 "cmpdi_operand" "")]))]
+   "TARGET_32BIT"
+@@ -7749,7 +7749,7 @@
+ 
+ (define_expand "movsicc"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+-	(if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
++	(if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
+ 			 (match_operand:SI 2 "arm_not_operand" "")
+ 			 (match_operand:SI 3 "arm_not_operand" "")))]
+   "TARGET_32BIT"
+@@ -7769,7 +7769,7 @@
+ 
+ (define_expand "movsfcc"
+   [(set (match_operand:SF 0 "s_register_operand" "")
+-	(if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
++	(if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
+ 			 (match_operand:SF 2 "s_register_operand" "")
+ 			 (match_operand:SF 3 "nonmemory_operand" "")))]
+   "TARGET_32BIT && TARGET_HARD_FLOAT"
+@@ -7795,7 +7795,7 @@
+ 
+ (define_expand "movdfcc"
+   [(set (match_operand:DF 0 "s_register_operand" "")
+-	(if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
++	(if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
+ 			 (match_operand:DF 2 "s_register_operand" "")
+ 			 (match_operand:DF 3 "arm_float_add_operand" "")))]
+   "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-09-15 09:45:31 +0000
++++ new/gcc/config/arm/predicates.md	2011-10-03 09:47:33 +0000
+@@ -242,11 +242,15 @@
+ 
+ ;; True for integer comparisons and, if FP is active, for comparisons
+ ;; other than LTGT or UNEQ.
++(define_special_predicate "expandable_comparison_operator"
++  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
++	       unordered,ordered,unlt,unle,unge,ungt"))
++
++;; Likewise, but only accept comparisons that are directly supported
++;; by ARM condition codes.
+ (define_special_predicate "arm_comparison_operator"
+-  (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
+-       (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
+-			 && (TARGET_FPA || TARGET_VFP)")
+-            (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
++  (and (match_operand 0 "expandable_comparison_operator")
++       (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+ 
+ (define_special_predicate "lt_ge_comparison_operator"
+   (match_code "lt,ge"))
+
+=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
+--- old/gcc/testsuite/gcc.dg/torture/pr49030.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c	2011-10-03 09:46:40 +0000
+@@ -0,0 +1,19 @@
++void
++sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
++		       unsigned long dst_skip)
++{
++  long long y;
++  while (nsamples--)
++    {
++      y = (long long) (*src * 8388608.0f) << 8;
++      if (y > 2147483647) {
++	*(int *) dst = 2147483647;
++      } else if (y < -2147483647 - 1) {
++	*(int *) dst = -2147483647 - 1;
++      } else {
++	*(int *) dst = (int) y;
++      }
++      dst += dst_skip;
++      src++;
++    }
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c'
+--- old/gcc/testsuite/gcc.target/arm/cmp-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/cmp-1.c	2011-10-03 09:47:33 +0000
+@@ -0,0 +1,37 @@
++/* { dg-do compile } */
++/* { dg-options "-O" } */
++/* { dg-final { scan-assembler-not "\tbl\t" } } */
++/* { dg-final { scan-assembler-not "__aeabi" } } */
++int x, y;
++
++#define TEST_EXPR(NAME, ARGS, EXPR)			\
++  int NAME##1 ARGS { return (EXPR); }			\
++  int NAME##2 ARGS { return !(EXPR); }			\
++  int NAME##3 ARGS { return (EXPR) ? x : y; }		\
++  void NAME##4 ARGS { if (EXPR) x++; }			\
++  void NAME##5 ARGS { if (!(EXPR)) x++; }
++
++#define TEST(NAME, TYPE, OPERATOR) \
++  TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2)	\
++  TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2)	\
++  TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2)	\
++  TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
++  TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100)		\
++  TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
++
++#define TEST_OP(NAME, OPERATOR) \
++  TEST (sc_##NAME, signed char, OPERATOR)		\
++  TEST (uc_##NAME, unsigned char, OPERATOR)		\
++  TEST (ss_##NAME, short, OPERATOR)			\
++  TEST (us_##NAME, unsigned short, OPERATOR)		\
++  TEST (si_##NAME, int, OPERATOR)			\
++  TEST (ui_##NAME, unsigned int, OPERATOR)		\
++  TEST (sll_##NAME, long long, OPERATOR)		\
++  TEST (ull_##NAME, unsigned long long, OPERATOR)
++
++TEST_OP (eq, ==)
++TEST_OP (ne, !=)
++TEST_OP (lt, <)
++TEST_OP (gt, >)
++TEST_OP (le, <=)
++TEST_OP (ge, >=)
+
+=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c'
+--- old/gcc/testsuite/gcc.target/arm/cmp-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/cmp-2.c	2011-10-03 09:47:33 +0000
+@@ -0,0 +1,49 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_vfp_ok } */
++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
++/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
++/* { dg-final { scan-assembler-not "\tbl\t" } } */
++/* { dg-final { scan-assembler-not "__aeabi" } } */
++int x, y;
++
++#define EQ(X, Y) ((X) == (Y))
++#define NE(X, Y) ((X) != (Y))
++#define LT(X, Y) ((X) < (Y))
++#define GT(X, Y) ((X) > (Y))
++#define LE(X, Y) ((X) <= (Y))
++#define GE(X, Y) ((X) >= (Y))
++
++#define TEST_EXPR(NAME, ARGS, EXPR)			\
++  int NAME##1 ARGS { return (EXPR); }			\
++  int NAME##2 ARGS { return !(EXPR); }			\
++  int NAME##3 ARGS { return (EXPR) ? x : y; }		\
++  void NAME##4 ARGS { if (EXPR) x++; }			\
++  void NAME##5 ARGS { if (!(EXPR)) x++; }
++
++#define TEST(NAME, TYPE, OPERATOR) \
++  TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2))		\
++  TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2))	\
++  TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2))	\
++  TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2))	\
++  TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100))			\
++  TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
++
++#define TEST_OP(NAME, OPERATOR) \
++  TEST (f_##NAME, float, OPERATOR)		\
++  TEST (d_##NAME, double, OPERATOR)		\
++  TEST (ld_##NAME, long double, OPERATOR)
++
++TEST_OP (eq, EQ)
++TEST_OP (ne, NE)
++TEST_OP (lt, LT)
++TEST_OP (gt, GT)
++TEST_OP (le, LE)
++TEST_OP (ge, GE)
++TEST_OP (blt, __builtin_isless)
++TEST_OP (bgt, __builtin_isgreater)
++TEST_OP (ble, __builtin_islessequal)
++TEST_OP (bge, __builtin_isgreaterequal)
++/* This one should be expanded into separate ordered and equality
++   comparisons.  */
++TEST_OP (blg, __builtin_islessgreater)
++TEST_OP (bun, __builtin_isunordered)
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch
new file mode 100644
index 0000000000..4a886ce56d
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch
@@ -0,0 +1,378 @@
+2011-10-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-data-ref.c (dr_analyze_innermost): Add new argument.
+	Allow not simple iv if analyzing basic block.
+	(create_data_ref): Update call to dr_analyze_innermost.
+	(stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
+	* tree-loop-distribution.c (generate_memset_zero): Likewise.
+	* tree-predcom.c (find_looparound_phi): Likewise.
+	* tree-data-ref.h (dr_analyze_innermost): Add new argument.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-24.c: New.
+
+
+	2011-09-15  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
+	read-after-read dependencies in basic block SLP.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-25.c: New.
+
+
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
+	operand_equal_p to compare DR_BASE_ADDRESSes.
+	(vect_check_interleaving): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-119.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c	2011-10-02 08:43:10 +0000
+@@ -0,0 +1,59 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 3
++#define N 256
++
++short src[N], dst[N];
++
++void foo (short * __restrict__ dst, short * __restrict__ src, int h,
++          int stride, int dummy)
++{
++  int i;
++  h /= 8;
++  for (i = 0; i < h; i++)
++    {
++      dst[0] += A*src[0];
++      dst[1] += A*src[1];
++      dst[2] += A*src[2];
++      dst[3] += A*src[3];
++      dst[4] += A*src[4];
++      dst[5] += A*src[5];
++      dst[6] += A*src[6];
++      dst[7] += A*src[7];
++      dst += stride;
++      src += stride;
++      if (dummy == 32)
++        abort ();
++    }
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      dst[i] = 0;
++      src[i] = i;
++    }
++
++  foo (dst, src, N, 8, 0);
++
++  for (i = 0; i < N; i++)
++    {
++      if (dst[i] != A * i)
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c	2011-10-02 08:43:10 +0000
+@@ -0,0 +1,59 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 3
++#define B 4
++#define N 256
++
++short src[N], dst[N];
++
++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
++{
++  int i;
++  h /= 16;
++  for (i = 0; i < h; i++)
++    {
++      dst[0] += A*src[0] + src[stride];
++      dst[1] += A*src[1] + src[1+stride];
++      dst[2] += A*src[2] + src[2+stride];
++      dst[3] += A*src[3] + src[3+stride];
++      dst[4] += A*src[4] + src[4+stride];
++      dst[5] += A*src[5] + src[5+stride];
++      dst[6] += A*src[6] + src[6+stride];
++      dst[7] += A*src[7] + src[7+stride];
++      dst += 8;
++      src += 8;
++      if (dummy == 32)
++        abort ();
++   }
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++       dst[i] = 0;
++       src[i] = i;
++    }
++
++  foo (dst, src, N, 8, 0);
++
++  for (i = 0; i < N/2; i++)
++    {
++      if (dst[i] != A * i + i + 8)
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-119.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-119.c	2011-10-02 08:43:10 +0000
+@@ -0,0 +1,28 @@
++/* { dg-do compile } */
++
++#define OUTER 32
++#define INNER 40
++
++static unsigned int
++bar (const unsigned int x[INNER][2], unsigned int sum)
++{
++  int i;
++
++  for (i = 0; i < INNER; i++)
++    sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
++  return sum;
++}
++
++unsigned int foo (const unsigned int x[OUTER][INNER][2])
++{
++  int i;
++  unsigned int sum;
++
++  sum = 0.0f;
++  for (i = 0; i < OUTER; i++)
++    sum = bar (x[i], sum);
++  return sum;
++}
++
++/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/tree-data-ref.c'
+--- old/gcc/tree-data-ref.c	2011-05-26 14:27:33 +0000
++++ new/gcc/tree-data-ref.c	2011-10-02 08:43:10 +0000
+@@ -721,11 +721,11 @@
+ }
+ 
+ /* Analyzes the behavior of the memory reference DR in the innermost loop or
+-   basic block that contains it. Returns true if analysis succeed or false
++   basic block that contains it.  Returns true if analysis succeed or false
+    otherwise.  */
+ 
+ bool
+-dr_analyze_innermost (struct data_reference *dr)
++dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
+ {
+   gimple stmt = DR_STMT (dr);
+   struct loop *loop = loop_containing_stmt (stmt);
+@@ -768,14 +768,25 @@
+     }
+   else
+     base = build_fold_addr_expr (base);
++
+   if (in_loop)
+     {
+       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
+                       false))
+         {
+-          if (dump_file && (dump_flags & TDF_DETAILS))
+-	    fprintf (dump_file, "failed: evolution of base is not affine.\n");
+-          return false;
++          if (nest)
++            {
++              if (dump_file && (dump_flags & TDF_DETAILS))
++                fprintf (dump_file, "failed: evolution of base is not"
++                                    " affine.\n");
++              return false;
++            }
++          else
++            {
++              base_iv.base = base;
++              base_iv.step = ssize_int (0);
++              base_iv.no_overflow = true;
++            }
+         }
+     }
+   else
+@@ -800,10 +811,18 @@
+       else if (!simple_iv (loop, loop_containing_stmt (stmt),
+                            poffset, &offset_iv, false))
+         {
+-          if (dump_file && (dump_flags & TDF_DETAILS))
+-            fprintf (dump_file, "failed: evolution of offset is not"
+-                                " affine.\n");
+-          return false;
++          if (nest)
++            {
++              if (dump_file && (dump_flags & TDF_DETAILS))
++                fprintf (dump_file, "failed: evolution of offset is not"
++                                    " affine.\n");
++              return false;
++            }
++          else
++            {
++              offset_iv.base = poffset;
++              offset_iv.step = ssize_int (0);
++            }
+         }
+     }
+ 
+@@ -967,7 +986,7 @@
+   DR_REF (dr) = memref;
+   DR_IS_READ (dr) = is_read;
+ 
+-  dr_analyze_innermost (dr);
++  dr_analyze_innermost (dr, nest);
+   dr_analyze_indices (dr, nest, loop);
+   dr_analyze_alias (dr);
+ 
+@@ -5185,7 +5204,7 @@
+   DR_STMT (dr) = stmt;
+   DR_REF (dr) = op0;
+ 
+-  res = dr_analyze_innermost (dr)
++  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
+     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
+ 
+   free_data_ref (dr);
+@@ -5225,7 +5244,7 @@
+ 
+   DR_STMT (dr) = stmt;
+   DR_REF (dr) = *ref->pos;
+-  dr_analyze_innermost (dr);
++  dr_analyze_innermost (dr, loop_containing_stmt (stmt));
+   base_address = DR_BASE_ADDRESS (dr);
+ 
+   if (!base_address)
+
+=== modified file 'gcc/tree-data-ref.h'
+--- old/gcc/tree-data-ref.h	2011-03-27 09:38:18 +0000
++++ new/gcc/tree-data-ref.h	2011-10-02 08:43:10 +0000
+@@ -386,7 +386,7 @@
+ DEF_VEC_ALLOC_O (data_ref_loc, heap);
+ 
+ bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
+-bool dr_analyze_innermost (struct data_reference *);
++bool dr_analyze_innermost (struct data_reference *, struct loop *);
+ extern bool compute_data_dependences_for_loop (struct loop *, bool,
+ 					       VEC (loop_p, heap) **,
+ 					       VEC (data_reference_p, heap) **,
+
+=== modified file 'gcc/tree-loop-distribution.c'
+--- old/gcc/tree-loop-distribution.c	2011-05-11 13:07:54 +0000
++++ new/gcc/tree-loop-distribution.c	2011-10-02 08:43:10 +0000
+@@ -267,7 +267,7 @@
+ 
+   DR_STMT (dr) = stmt;
+   DR_REF (dr) = op0;
+-  res = dr_analyze_innermost (dr);
++  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
+   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
+ 
+   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
+
+=== modified file 'gcc/tree-predcom.c'
+--- old/gcc/tree-predcom.c	2011-02-11 14:19:44 +0000
++++ new/gcc/tree-predcom.c	2011-10-02 08:43:10 +0000
+@@ -1114,7 +1114,7 @@
+   memset (&init_dr, 0, sizeof (struct data_reference));
+   DR_REF (&init_dr) = init_ref;
+   DR_STMT (&init_dr) = phi;
+-  if (!dr_analyze_innermost (&init_dr))
++  if (!dr_analyze_innermost (&init_dr, loop))
+     return NULL;
+ 
+   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
+
+=== modified file 'gcc/tree-vect-data-refs.c'
+--- old/gcc/tree-vect-data-refs.c	2011-07-04 11:13:51 +0000
++++ new/gcc/tree-vect-data-refs.c	2011-10-02 08:43:10 +0000
+@@ -353,11 +353,7 @@
+ 
+   /* Check that the data-refs have same bases and offsets.  If not, we can't
+      determine if they are dependent.  */
+-  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
+-       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
+-           || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+-           || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+-           != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
++  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
+       || !dr_equal_offsets_p (dra, drb))
+     return true;
+ 
+@@ -403,11 +399,7 @@
+ 
+   /* Check that the data-refs have same first location (except init) and they
+      are both either store or load (not load and store).  */
+-  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
+-       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
+-	   || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+-	   || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+-	   != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
++  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
+       || !dr_equal_offsets_p (dra, drb)
+       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
+       || DR_IS_READ (dra) != DR_IS_READ (drb))
+@@ -615,6 +607,11 @@
+       if (vect_check_interleaving (dra, drb))
+          return false;
+ 
++      /* Read-read is OK (we need this check here, after checking for
++         interleaving).  */
++      if (DR_IS_READ (dra) && DR_IS_READ (drb))
++        return false;
++
+       if (vect_print_dump_info (REPORT_DR_DETAILS))
+         {
+           fprintf (vect_dump, "can't determine dependence between ");
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch
new file mode 100644
index 0000000000..f25a37858d
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch
@@ -0,0 +1,240 @@
+2011-10-06  Ira Rosen  <ira.rosen@linaro.org>
+ 
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
+	vectorizable.
+
+	Backport from mainline:
+
+	2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
+	of vect_analyze_bb here.
+	(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect64): New.
+	* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
+	of multiple vector sizes.
+	* gcc.dg/vect/bb-slp-26.c: New.
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c	2010-11-22 12:16:52 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c	2011-10-02 10:40:34 +0000
+@@ -49,6 +49,7 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "slp" } } */
+   
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c	2011-10-02 10:40:34 +0000
+@@ -0,0 +1,60 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 3
++#define B 4
++#define N 256
++
++char src[N], dst[N];
++
++void foo (char * __restrict__ dst, char * __restrict__ src, int h,
++          int stride, int dummy)
++{
++  int i;
++  h /= 16;
++  for (i = 0; i < h; i++)
++    {
++      dst[0] += A*src[0];
++      dst[1] += A*src[1];
++      dst[2] += A*src[2];
++      dst[3] += A*src[3];
++      dst[4] += A*src[4];
++      dst[5] += A*src[5];
++      dst[6] += A*src[6];
++      dst[7] += A*src[7];
++      dst += 8;
++      src += 8;
++      if (dummy == 32)
++        abort ();
++   }
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++       dst[i] = 0;
++       src[i] = i/8;
++    }
++
++  foo (dst, src, N, 8, 0);
++
++  for (i = 0; i < N/2; i++)
++    {
++      if (dst[i] != A * src[i])
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-09-20 07:54:28 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-10-02 10:40:34 +0000
+@@ -3283,6 +3283,24 @@
+     return $et_vect_multiple_sizes_saved
+ }
+ 
++# Return 1 if the target supports vectors of 64 bits.
++
++proc check_effective_target_vect64 { } {
++    global et_vect64
++
++    if [info exists et_vect64_saved] {
++        verbose "check_effective_target_vect64: using cached result" 2
++    } else {
++        set et_vect64_saved 0
++        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
++           set et_vect64_saved 1
++        }
++    }
++
++    verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
++    return $et_vect64_saved
++}
++
+ # Return 1 if the target supports section-anchors
+ 
+ proc check_effective_target_section_anchors { } {
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-07-06 12:04:10 +0000
++++ new/gcc/tree-vect-slp.c	2011-10-02 10:40:34 +0000
+@@ -1664,42 +1664,18 @@
+ 
+ /* Check if the basic block can be vectorized.  */
+ 
+-bb_vec_info
+-vect_slp_analyze_bb (basic_block bb)
++static bb_vec_info
++vect_slp_analyze_bb_1 (basic_block bb)
+ {
+   bb_vec_info bb_vinfo;
+   VEC (ddr_p, heap) *ddrs;
+   VEC (slp_instance, heap) *slp_instances;
+   slp_instance instance;
+-  int i, insns = 0;
+-  gimple_stmt_iterator gsi;
++  int i;
+   int min_vf = 2;
+   int max_vf = MAX_VECTORIZATION_FACTOR;
+   bool data_dependence_in_bb = false;
+ 
+-  current_vector_size = 0;
+-
+-  if (vect_print_dump_info (REPORT_DETAILS))
+-    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+-
+-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+-    {
+-      gimple stmt = gsi_stmt (gsi);
+-      if (!is_gimple_debug (stmt)
+-	  && !gimple_nop_p (stmt)
+-	  && gimple_code (stmt) != GIMPLE_LABEL)
+-	insns++;
+-    }
+-
+-  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+-    {
+-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+-        fprintf (vect_dump, "not vectorized: too many instructions in basic "
+-                            "block.\n");
+-
+-      return NULL;
+-    }
+-
+   bb_vinfo = new_bb_vec_info (bb);
+   if (!bb_vinfo)
+     return NULL;
+@@ -1819,6 +1795,61 @@
+ }
+ 
+ 
++bb_vec_info
++vect_slp_analyze_bb (basic_block bb)
++{
++  bb_vec_info bb_vinfo;
++  int insns = 0;
++  gimple_stmt_iterator gsi;
++  unsigned int vector_sizes;
++
++  if (vect_print_dump_info (REPORT_DETAILS))
++    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
++
++  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple stmt = gsi_stmt (gsi);
++      if (!is_gimple_debug (stmt)
++          && !gimple_nop_p (stmt)
++          && gimple_code (stmt) != GIMPLE_LABEL)
++        insns++;
++    }
++
++  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
++    {
++      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
++        fprintf (vect_dump, "not vectorized: too many instructions in basic "
++                            "block.\n");
++
++      return NULL;
++    }
++
++  /* Autodetect first vector size we try.  */
++  current_vector_size = 0;
++  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
++
++  while (1)
++    {
++      bb_vinfo = vect_slp_analyze_bb_1 (bb);
++      if (bb_vinfo)
++        return bb_vinfo;
++
++      destroy_bb_vec_info (bb_vinfo);
++
++      vector_sizes &= ~current_vector_size;
++      if (vector_sizes == 0
++          || current_vector_size == 0)
++        return NULL;
++
++      /* Try the next biggest vector size.  */
++      current_vector_size = 1 << floor_log2 (vector_sizes);
++      if (vect_print_dump_info (REPORT_DETAILS))
++        fprintf (vect_dump, "***** Re-trying analysis with "
++                 "vector size %d\n", current_vector_size);
++    }
++}
++
++
+ /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
+    the number of created vector stmts depends on the unrolling factor).
+    However, the actual number of vector stmts for every SLP node depends on
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch
new file mode 100644
index 0000000000..13e6fd26e5
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch
@@ -0,0 +1,124 @@
+2011-10-13  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from mainline:
+
+	2011-10-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/predicates.md (shift_amount_operand): Remove constant
+	range check.
+	(shift_operator): Check range of constants for all shift operators.
+
+	gcc/testsuite/
+	* gcc.dg/pr50193-1.c: New file.
+	* gcc.target/arm/shiftable.c: New file.
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-10-03 09:47:33 +0000
++++ new/gcc/config/arm/predicates.md	2011-10-10 11:43:28 +0000
+@@ -129,11 +129,12 @@
+   (ior (match_operand 0 "arm_rhs_operand")
+        (match_operand 0 "memory_operand")))
+ 
++;; This doesn't have to do much because the constant is already checked
++;; in the shift_operator predicate.
+ (define_predicate "shift_amount_operand"
+   (ior (and (match_test "TARGET_ARM")
+ 	    (match_operand 0 "s_register_operand"))
+-       (and (match_operand 0 "const_int_operand")
+-	    (match_test "INTVAL (op) > 0"))))
++       (match_operand 0 "const_int_operand")))
+ 
+ (define_predicate "arm_add_operand"
+   (ior (match_operand 0 "arm_rhs_operand")
+@@ -219,13 +220,20 @@
+        (match_test "mode == GET_MODE (op)")))
+ 
+ ;; True for shift operators.
++;; Notes:
++;;  * mult is only permitted with a constant shift amount
++;;  * patterns that permit register shift amounts only in ARM mode use
++;;    shift_amount_operand, patterns that always allow registers do not,
++;;    so we don't have to worry about that sort of thing here.
+ (define_special_predicate "shift_operator"
+   (and (ior (ior (and (match_code "mult")
+ 		      (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
+ 		 (and (match_code "rotate")
+ 		      (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
+ 				   && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+-	    (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
++	    (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
++		 (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
++			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+        (match_test "mode == GET_MODE (op)")))
+ 
+ ;; True for MULT, to identify which variant of shift_operator is in use.
+
+=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c'
+--- old/gcc/testsuite/gcc.target/arm/shiftable.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/shiftable.c	2011-10-10 11:43:28 +0000
+@@ -0,0 +1,63 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-require-effective-target arm32 } */
++
++/* ARM has shift-and-alu insns.  Depending on the ALU op GCC represents some
++   of these as a left shift, others as a multiply.  Check that we match the
++    right one.  */
++
++int
++plus (int a, int b)
++{
++  return (a * 64) + b;
++}
++
++/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
++
++int
++minus (int a, int b)
++{
++  return a - (b * 64);
++}
++
++/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
++
++int
++ior (int a, int b)
++{
++  return (a * 64) | b;
++}
++
++/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
++
++int
++xor (int a, int b)
++{
++  return (a * 64) ^ b;
++}
++
++/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
++
++int
++and (int a, int b)
++{
++  return (a * 64) & b;
++}
++
++/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
++
++int
++rsb (int a, int b)
++{
++  return (a * 64) - b;
++}
++
++/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
++
++int
++mvn (int a, int b)
++{
++  return ~(a * 64);
++}
++
++/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch
new file mode 100644
index 0000000000..6642f8a667
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch
@@ -0,0 +1,362 @@
+2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
+	vectorization.
+	(vectorizable_type_promotion): Likewise.
+	(vect_analyze_stmt): Call vectorizable_type_demotion and
+	vectorizable_type_promotion for basic blocks.
+	(supportable_widening_operation): Don't assume loop vectorization.
+	* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
+	basic blocks.  Update vectorization factor for basic block
+	vectorization.
+	(vect_analyze_slp_instance): Allow multiple types for basic block
+	vectorization.  Recheck unrolling factor after construction of SLP
+	instance.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
+	vectors.
+	* gcc.dg/vect/bb-slp-27.c: New.
+	* gcc.dg/vect/bb-slp-28.c: New.
+
+
+	2011-10-04  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	Make et_vect_multiple_sizes_saved global.
+	(check_effective_target_vect64): Make et_vect64_saved global.
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c	2011-10-02 10:40:34 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c	2011-10-06 11:08:08 +0000
+@@ -48,8 +48,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
+-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
+ /* { dg-final { cleanup-tree-dump "slp" } } */
+   
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c	2011-10-06 11:08:08 +0000
+@@ -0,0 +1,49 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 3
++#define N 16
++
++short src[N], dst[N];
++
++void foo (int a)
++{
++  dst[0] += a*src[0];
++  dst[1] += a*src[1];
++  dst[2] += a*src[2];
++  dst[3] += a*src[3];
++  dst[4] += a*src[4];
++  dst[5] += a*src[5];
++  dst[6] += a*src[6];
++  dst[7] += a*src[7];
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      dst[i] = 0;
++      src[i] = i;
++    }
++
++  foo (A);
++
++  for (i = 0; i < 8; i++)
++    {
++      if (dst[i] != A * i)
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c	2011-10-06 11:08:08 +0000
+@@ -0,0 +1,71 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 300
++#define N 16
++
++char src[N];
++short dst[N];
++short src1[N], dst1[N];
++
++void foo (int a)
++{
++  dst[0] = (short) (a * (int) src[0]);
++  dst[1] = (short) (a * (int) src[1]);
++  dst[2] = (short) (a * (int) src[2]);
++  dst[3] = (short) (a * (int) src[3]);
++  dst[4] = (short) (a * (int) src[4]);
++  dst[5] = (short) (a * (int) src[5]);
++  dst[6] = (short) (a * (int) src[6]);
++  dst[7] = (short) (a * (int) src[7]);
++  dst[8] = (short) (a * (int) src[8]);
++  dst[9] = (short) (a * (int) src[9]);
++  dst[10] = (short) (a * (int) src[10]);
++  dst[11] = (short) (a * (int) src[11]);
++  dst[12] = (short) (a * (int) src[12]);
++  dst[13] = (short) (a * (int) src[13]);
++  dst[14] = (short) (a * (int) src[14]);
++  dst[15] = (short) (a * (int) src[15]);
++
++  dst1[0] += src1[0];
++  dst1[1] += src1[1];
++  dst1[2] += src1[2];
++  dst1[3] += src1[3];
++  dst1[4] += src1[4];
++  dst1[5] += src1[5];
++  dst1[6] += src1[6];
++  dst1[7] += src1[7];
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      dst[i] = 2;
++      dst1[i] = 0;
++      src[i] = i;
++      src1[i] = i+2;
++    }
++
++  foo (A);
++
++  for (i = 0; i < N; i++)
++    {
++      if (dst[i] != A * i
++          || (i < N/2 && dst1[i] != i + 2))
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult &&  { vect_pack_trunc && vect_unpack } } } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-10-02 10:40:34 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-10-06 11:08:08 +0000
+@@ -3268,7 +3268,7 @@
+ # Return 1 if the target supports multiple vector sizes
+ 
+ proc check_effective_target_vect_multiple_sizes { } {
+-    global et_vect_multiple_sizes
++    global et_vect_multiple_sizes_saved
+ 
+     if [info exists et_vect_multiple_sizes_saved] {
+         verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
+@@ -3286,7 +3286,7 @@
+ # Return 1 if the target supports vectors of 64 bits.
+ 
+ proc check_effective_target_vect64 { } {
+-    global et_vect64
++    global et_vect64_saved
+ 
+     if [info exists et_vect64_saved] {
+         verbose "check_effective_target_vect64: using cached result" 2
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-10-02 10:40:34 +0000
++++ new/gcc/tree-vect-slp.c	2011-10-06 11:08:08 +0000
+@@ -386,20 +386,15 @@
+           return false;
+         }
+ 
+-      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
+-      if (ncopies != 1)
+-        {
+-	  if (vect_print_dump_info (REPORT_SLP))
+-            fprintf (vect_dump, "SLP with multiple types ");
+-
+-          /* FORNOW: multiple types are unsupported in BB SLP.  */
+-	  if (bb_vinfo)
+-	    return false;
+-        }
+-
+       /* In case of multiple types we need to detect the smallest type.  */
+       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
+-        *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
++        {
++          *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
++          if (bb_vinfo)
++            vectorization_factor = *max_nunits;
++        }
++
++      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
+ 
+       if (is_gimple_call (stmt))
+ 	rhs_code = CALL_EXPR;
+@@ -1183,7 +1178,6 @@
+   if (loop_vinfo)
+     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+   else
+-    /* No multitypes in BB SLP.  */
+     vectorization_factor = nunits;
+ 
+   /* Calculate the unrolling factor.  */
+@@ -1246,16 +1240,23 @@
+ 			   &max_nunits, &load_permutation, &loads,
+ 			   vectorization_factor))
+     {
++      /* Calculate the unrolling factor based on the smallest type.  */
++      if (max_nunits > nunits)
++        unrolling_factor = least_common_multiple (max_nunits, group_size)
++                           / group_size;
++
++      if (unrolling_factor != 1 && !loop_vinfo)
++        {
++          if (vect_print_dump_info (REPORT_SLP))
++            fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
++                               " block SLP");
++          return false;
++        }
++
+       /* Create a new SLP instance.  */
+       new_instance = XNEW (struct _slp_instance);
+       SLP_INSTANCE_TREE (new_instance) = node;
+       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+-      /* Calculate the unrolling factor based on the smallest type in the
+-         loop.  */
+-      if (max_nunits > nunits)
+-        unrolling_factor = least_common_multiple (max_nunits, group_size)
+-                           / group_size;
+-
+       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+       SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
+       SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-10-04 08:57:25 +0000
++++ new/gcc/tree-vect-stmts.c	2011-10-16 12:16:07 +0000
+@@ -3081,11 +3081,9 @@
+   VEC (tree, heap) *vec_oprnds0 = NULL;
+   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+   tree last_oprnd, intermediate_type;
+-
+-  /* FORNOW: not supported by basic block SLP vectorization.  */
+-  gcc_assert (loop_vinfo);
+-
+-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
++  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
++
++  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+     return false;
+ 
+   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+@@ -3113,7 +3111,7 @@
+ 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+ 	     && CONVERT_EXPR_CODE_P (code))))
+     return false;
+-  if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
++  if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
+ 			     &def_stmt, &def, &dt[0], &vectype_in))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+@@ -3360,11 +3358,9 @@
+   int multi_step_cvt = 0;
+   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+-
+-  /* FORNOW: not supported by basic block SLP vectorization.  */
+-  gcc_assert (loop_vinfo);
+-
+-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
++  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
++
++  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+     return false;
+ 
+   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+@@ -3393,7 +3389,7 @@
+ 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+ 	     && CONVERT_EXPR_CODE_P (code))))
+     return false;
+-  if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
++  if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
+ 			     &def_stmt, &def, &dt[0], &vectype_in))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+@@ -5153,7 +5149,9 @@
+     else
+       {
+         if (bb_vinfo)
+-          ok = (vectorizable_shift (stmt, NULL, NULL, node)
++          ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
++                || vectorizable_type_demotion (stmt, NULL, NULL, node)
++               || vectorizable_shift (stmt, NULL, NULL, node)
+                 || vectorizable_operation (stmt, NULL, NULL, node)
+                 || vectorizable_assignment (stmt, NULL, NULL, node)
+                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
+@@ -5780,7 +5778,7 @@
+ {
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+-  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
++  struct loop *vect_loop = NULL;
+   bool ordered_p;
+   enum machine_mode vec_mode;
+   enum insn_code icode1, icode2;
+@@ -5789,6 +5787,9 @@
+   tree wide_vectype = vectype_out;
+   enum tree_code c1, c2;
+ 
++  if (loop_info)
++    vect_loop = LOOP_VINFO_LOOP (loop_info);
++
+   /* The result of a vectorized widening operation usually requires two vectors
+      (because the widened results do not fit int one vector). The generated
+      vector results would normally be expected to be generated in the same
+@@ -5809,7 +5810,8 @@
+      iterations in parallel).  We therefore don't allow to change the order
+      of the computation in the inner-loop during outer-loop vectorization.  */
+ 
+-   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
++   if (vect_loop
++       && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+        && !nested_in_vect_loop_p (vect_loop, stmt))
+      ordered_p = false;
+    else
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch
new file mode 100644
index 0000000000..28caa40076
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch
@@ -0,0 +1,622 @@
+2011-10-17  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r178852:
+
+	2011-09-14  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_override_options): Add unaligned_access
+	support.
+	(arm_file_start): Emit attribute for unaligned access as appropriate.
+	* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
+	(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
+	(insv, extzv): Add unaligned-access support.
+	(extv): Change to expander. Likewise.
+	(extzv_t1, extv_regsi): Add helpers.
+	(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
+	(unaligned_storesi, unaligned_storehi): New.
+	(*extv_reg): New (previous extv implementation).
+	* config/arm/arm.opt (munaligned_access): Add option.
+	* config/arm/constraints.md (Uw): New constraint.
+	* expmed.c (store_bit_field_1): Adjust bitfield numbering according
+	to size of access, not size of unit, when BITS_BIG_ENDIAN !=
+	BYTES_BIG_ENDIAN. Don't use bitfield accesses for
+	volatile accesses when -fstrict-volatile-bitfields is in effect.
+	(extract_bit_field_1): Likewise.
+
+	Backport from mainline r172697:
+
+	2011-04-19  Wei Guozhi  <carrot@google.com>
+
+	PR target/47855
+	gcc/
+	* config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
+	* config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
+	linkage.
+	* config/arm/constraints.md (Uu): New constraint.
+	* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h	2012-03-05 16:07:15.000000000 -0800
++++ gcc-4_6-branch/gcc/config/arm/arm-protos.h	2012-03-05 16:07:50.392936694 -0800
+@@ -59,6 +59,7 @@
+ 					   int);
+ extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
+ 					    int);
++extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
+ extern int arm_const_double_rtx (rtx);
+ extern int neg_const_double_rtx_ok_for_fpa (rtx);
+ extern int vfp3_const_double_rtx (rtx);
+Index: gcc-4_6-branch/gcc/config/arm/arm.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.c	2012-03-05 16:07:15.000000000 -0800
++++ gcc-4_6-branch/gcc/config/arm/arm.c	2012-03-05 16:07:50.400936694 -0800
+@@ -2065,6 +2065,28 @@
+ 	fix_cm3_ldrd = 0;
+     }
+ 
++  /* Enable -munaligned-access by default for
++     - all ARMv6 architecture-based processors
++     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
++
++     Disable -munaligned-access by default for
++     - all pre-ARMv6 architecture-based processors
++     - ARMv6-M architecture-based processors.  */
++
++  if (unaligned_access == 2)
++    {
++      if (arm_arch6 && (arm_arch_notm || arm_arch7))
++	unaligned_access = 1;
++      else
++	unaligned_access = 0;
++    }
++  else if (unaligned_access == 1
++	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
++    {
++      warning (0, "target CPU does not support unaligned accesses");
++      unaligned_access = 0;
++    }
++
+   if (TARGET_THUMB1 && flag_schedule_insns)
+     {
+       /* Don't warn since it's on by default in -O2.  */
+@@ -6123,7 +6145,7 @@
+    addresses based on the frame pointer or arg pointer until the
+    reload pass starts.  This is so that eliminating such addresses
+    into stack based ones won't produce impossible code.  */
+-static int
++int
+ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+ {
+   /* ??? Not clear if this is right.  Experiment.  */
+@@ -22251,6 +22273,10 @@
+ 	val = 6;
+       asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ 
++      /* Tag_CPU_unaligned_access.  */
++      asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
++		   unaligned_access);
++
+       /* Tag_ABI_FP_16bit_format.  */
+       if (arm_fp16_format)
+ 	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
+Index: gcc-4_6-branch/gcc/config/arm/arm.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.md	2012-03-05 16:07:15.000000000 -0800
++++ gcc-4_6-branch/gcc/config/arm/arm.md	2012-03-05 16:09:26.284941314 -0800
+@@ -114,6 +114,10 @@
+                              ; another symbolic address.
+    (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
+    (UNSPEC_PIC_UNIFIED 29)  ; Create a common pic addressing form.
++   (UNSPEC_UNALIGNED_LOAD 30)  ; Used to represent ldr/ldrh instructions that access
++                               ; unaligned locations, on architectures which support
++                               ; that.
++   (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh.
+   ]
+ )
+ 
+@@ -2461,10 +2465,10 @@
+ ;;; this insv pattern, so this pattern needs to be reevalutated.
+ 
+ (define_expand "insv"
+-  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
+-                         (match_operand:SI 1 "general_operand" "")
+-                         (match_operand:SI 2 "general_operand" ""))
+-        (match_operand:SI 3 "reg_or_int_operand" ""))]
++  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
++                      (match_operand 1 "general_operand" "")
++                      (match_operand 2 "general_operand" ""))
++        (match_operand 3 "reg_or_int_operand" ""))]
+   "TARGET_ARM || arm_arch_thumb2"
+   "
+   {
+@@ -2475,35 +2479,70 @@
+ 
+     if (arm_arch_thumb2)
+       {
+-	bool use_bfi = TRUE;
+-
+-	if (GET_CODE (operands[3]) == CONST_INT)
++        if (unaligned_access && MEM_P (operands[0])
++	    && s_register_operand (operands[3], GET_MODE (operands[3]))
++	    && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
+ 	  {
+-	    HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
++	    rtx base_addr;
++
++	    if (BYTES_BIG_ENDIAN)
++	      start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
++			  - start_bit;
+ 
+-	    if (val == 0)
++	    if (width == 32)
+ 	      {
+-		emit_insn (gen_insv_zero (operands[0], operands[1],
+-					  operands[2]));
+-		DONE;
++	        base_addr = adjust_address (operands[0], SImode,
++					    start_bit / BITS_PER_UNIT);
++		emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
+ 	      }
++	    else
++	      {
++	        rtx tmp = gen_reg_rtx (HImode);
+ 
+-	    /* See if the set can be done with a single orr instruction.  */
+-	    if (val == mask && const_ok_for_arm (val << start_bit))
+-	      use_bfi = FALSE;
++	        base_addr = adjust_address (operands[0], HImode,
++					    start_bit / BITS_PER_UNIT);
++		emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
++		emit_insn (gen_unaligned_storehi (base_addr, tmp));
++	      }
++	    DONE;
+ 	  }
+-	  
+-	if (use_bfi)
++	else if (s_register_operand (operands[0], GET_MODE (operands[0])))
+ 	  {
+-	    if (GET_CODE (operands[3]) != REG)
+-	      operands[3] = force_reg (SImode, operands[3]);
++	    bool use_bfi = TRUE;
+ 
+-	    emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+-				    operands[3]));
+-	    DONE;
++	    if (GET_CODE (operands[3]) == CONST_INT)
++	      {
++		HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
++
++		if (val == 0)
++		  {
++		    emit_insn (gen_insv_zero (operands[0], operands[1],
++					      operands[2]));
++		    DONE;
++		  }
++
++		/* See if the set can be done with a single orr instruction.  */
++		if (val == mask && const_ok_for_arm (val << start_bit))
++		  use_bfi = FALSE;
++	      }
++
++	    if (use_bfi)
++	      {
++		if (GET_CODE (operands[3]) != REG)
++		  operands[3] = force_reg (SImode, operands[3]);
++
++		emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
++					operands[3]));
++		DONE;
++	      }
+ 	  }
++	else
++	  FAIL;
+       }
+ 
++    if (!s_register_operand (operands[0], GET_MODE (operands[0])))
++      FAIL;
++
+     target = copy_rtx (operands[0]);
+     /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
+        subreg as the final target.  */
+@@ -3695,12 +3734,10 @@
+ ;; to reduce register pressure later on.
+ 
+ (define_expand "extzv"
+-  [(set (match_dup 4)
+-	(ashift:SI (match_operand:SI   1 "register_operand" "")
+-		   (match_operand:SI   2 "const_int_operand" "")))
+-   (set (match_operand:SI              0 "register_operand" "")
+-	(lshiftrt:SI (match_dup 4)
+-		     (match_operand:SI 3 "const_int_operand" "")))]
++  [(set (match_operand 0 "s_register_operand" "")
++	(zero_extract (match_operand 1 "nonimmediate_operand" "")
++		      (match_operand 2 "const_int_operand" "")
++		      (match_operand 3 "const_int_operand" "")))]
+   "TARGET_THUMB1 || arm_arch_thumb2"
+   "
+   {
+@@ -3709,10 +3746,57 @@
+     
+     if (arm_arch_thumb2)
+       {
+-	emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+-				 operands[3]));
+-	DONE;
++	HOST_WIDE_INT width = INTVAL (operands[2]);
++	HOST_WIDE_INT bitpos = INTVAL (operands[3]);
++
++	if (unaligned_access && MEM_P (operands[1])
++	    && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
++	  {
++	    rtx base_addr;
++
++	    if (BYTES_BIG_ENDIAN)
++	      bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
++		       - bitpos;
++
++	    if (width == 32)
++              {
++		base_addr = adjust_address (operands[1], SImode,
++					    bitpos / BITS_PER_UNIT);
++		emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
++              }
++	    else
++              {
++		rtx dest = operands[0];
++		rtx tmp = gen_reg_rtx (SImode);
++
++		/* We may get a paradoxical subreg here.  Strip it off.  */
++		if (GET_CODE (dest) == SUBREG
++		    && GET_MODE (dest) == SImode
++		    && GET_MODE (SUBREG_REG (dest)) == HImode)
++		  dest = SUBREG_REG (dest);
++
++		if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
++		  FAIL;
++
++		base_addr = adjust_address (operands[1], HImode,
++					    bitpos / BITS_PER_UNIT);
++		emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
++		emit_move_insn (gen_lowpart (SImode, dest), tmp);
++	      }
++	    DONE;
++	  }
++	else if (s_register_operand (operands[1], GET_MODE (operands[1])))
++	  {
++	    emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
++				     operands[3]));
++	    DONE;
++	  }
++	else
++	  FAIL;
+       }
++    
++    if (!s_register_operand (operands[1], GET_MODE (operands[1])))
++      FAIL;
+ 
+     operands[3] = GEN_INT (rshift);
+     
+@@ -3722,12 +3806,154 @@
+         DONE;
+       }
+       
+-    operands[2] = GEN_INT (lshift);
+-    operands[4] = gen_reg_rtx (SImode);
++    emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
++			     operands[3], gen_reg_rtx (SImode)));
++    DONE;
+   }"
+ )
+ 
+-(define_insn "extv"
++;; Helper for extzv, for the Thumb-1 register-shifts case.
++
++(define_expand "extzv_t1"
++  [(set (match_operand:SI 4 "s_register_operand" "")
++	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
++		   (match_operand:SI 2 "const_int_operand" "")))
++   (set (match_operand:SI 0 "s_register_operand" "")
++	(lshiftrt:SI (match_dup 4)
++		     (match_operand:SI 3 "const_int_operand" "")))]
++  "TARGET_THUMB1"
++  "")
++
++(define_expand "extv"
++  [(set (match_operand 0 "s_register_operand" "")
++	(sign_extract (match_operand 1 "nonimmediate_operand" "")
++		      (match_operand 2 "const_int_operand" "")
++		      (match_operand 3 "const_int_operand" "")))]
++  "arm_arch_thumb2"
++{
++  HOST_WIDE_INT width = INTVAL (operands[2]);
++  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
++
++  if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
++      && (bitpos % BITS_PER_UNIT)  == 0)
++    {
++      rtx base_addr;
++      
++      if (BYTES_BIG_ENDIAN)
++	bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
++      
++      if (width == 32)
++        {
++	  base_addr = adjust_address (operands[1], SImode,
++				      bitpos / BITS_PER_UNIT);
++	  emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
++        }
++      else
++        {
++	  rtx dest = operands[0];
++	  rtx tmp = gen_reg_rtx (SImode);
++	  
++	  /* We may get a paradoxical subreg here.  Strip it off.  */
++	  if (GET_CODE (dest) == SUBREG
++	      && GET_MODE (dest) == SImode
++	      && GET_MODE (SUBREG_REG (dest)) == HImode)
++	    dest = SUBREG_REG (dest);
++	  
++	  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
++	    FAIL;
++	  
++	  base_addr = adjust_address (operands[1], HImode,
++				      bitpos / BITS_PER_UNIT);
++	  emit_insn (gen_unaligned_loadhis (tmp, base_addr));
++	  emit_move_insn (gen_lowpart (SImode, dest), tmp);
++	}
++
++      DONE;
++    }
++  else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
++    FAIL;
++  else if (GET_MODE (operands[0]) == SImode
++	   && GET_MODE (operands[1]) == SImode)
++    {
++      emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
++				 operands[3]));
++      DONE;
++    }
++
++  FAIL;
++})
++
++; Helper to expand register forms of extv with the proper modes.
++
++(define_expand "extv_regsi"
++  [(set (match_operand:SI 0 "s_register_operand" "")
++	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
++			 (match_operand 2 "const_int_operand" "")
++			 (match_operand 3 "const_int_operand" "")))]
++  ""
++{
++})
++
++; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
++
++(define_insn "unaligned_loadsi"
++  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
++	(unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
++		   UNSPEC_UNALIGNED_LOAD))]
++  "unaligned_access && TARGET_32BIT"
++  "ldr%?\t%0, %1\t@ unaligned"
++  [(set_attr "arch" "t2,any")
++   (set_attr "length" "2,4")
++   (set_attr "predicable" "yes")
++   (set_attr "type" "load1")])
++
++(define_insn "unaligned_loadhis"
++  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
++	(sign_extend:SI
++	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
++		     UNSPEC_UNALIGNED_LOAD)))]
++  "unaligned_access && TARGET_32BIT"
++  "ldr%(sh%)\t%0, %1\t@ unaligned"
++  [(set_attr "arch" "t2,any")
++   (set_attr "length" "2,4")
++   (set_attr "predicable" "yes")
++   (set_attr "type" "load_byte")])
++
++(define_insn "unaligned_loadhiu"
++  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
++	(zero_extend:SI
++	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
++		     UNSPEC_UNALIGNED_LOAD)))]
++  "unaligned_access && TARGET_32BIT"
++  "ldr%(h%)\t%0, %1\t@ unaligned"
++  [(set_attr "arch" "t2,any")
++   (set_attr "length" "2,4")
++   (set_attr "predicable" "yes")
++   (set_attr "type" "load_byte")])
++
++(define_insn "unaligned_storesi"
++  [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
++	(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
++		   UNSPEC_UNALIGNED_STORE))]
++  "unaligned_access && TARGET_32BIT"
++  "str%?\t%1, %0\t@ unaligned"
++  [(set_attr "arch" "t2,any")
++   (set_attr "length" "2,4")
++   (set_attr "predicable" "yes")
++   (set_attr "type" "store1")])
++
++(define_insn "unaligned_storehi"
++  [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
++	(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
++		   UNSPEC_UNALIGNED_STORE))]
++  "unaligned_access && TARGET_32BIT"
++  "str%(h%)\t%1, %0\t@ unaligned"
++  [(set_attr "arch" "t2,any")
++   (set_attr "length" "2,4")
++   (set_attr "predicable" "yes")
++   (set_attr "type" "store1")])
++
++(define_insn "*extv_reg"
+   [(set (match_operand:SI 0 "s_register_operand" "=r")
+ 	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                          (match_operand:SI 2 "const_int_operand" "M")
+@@ -6069,8 +6295,8 @@
+ 
+ 
+ (define_insn "*arm_movqi_insn"
+-  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+-	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
++  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
++	(match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
+   "TARGET_32BIT
+    && (   register_operand (operands[0], QImode)
+        || register_operand (operands[1], QImode))"
+@@ -6078,10 +6304,14 @@
+    mov%?\\t%0, %1
+    mvn%?\\t%0, #%B1
+    ldr%(b%)\\t%0, %1
++   str%(b%)\\t%1, %0
++   ldr%(b%)\\t%0, %1
+    str%(b%)\\t%1, %0"
+-  [(set_attr "type" "*,*,load1,store1")
+-   (set_attr "insn" "mov,mvn,*,*")
+-   (set_attr "predicable" "yes")]
++  [(set_attr "type" "*,*,load1,store1,load1,store1")
++   (set_attr "insn" "mov,mvn,*,*,*,*")
++   (set_attr "predicable" "yes")
++   (set_attr "arch" "any,any,t2,t2,any,any")
++   (set_attr "length" "4,4,2,2,4,4")]
+ )
+ 
+ (define_insn "*thumb1_movqi_insn"
+Index: gcc-4_6-branch/gcc/config/arm/arm.opt
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/arm.opt	2012-03-05 16:07:14.000000000 -0800
++++ gcc-4_6-branch/gcc/config/arm/arm.opt	2012-03-05 16:07:50.404936697 -0800
+@@ -173,3 +173,7 @@
+ Target Report Var(fix_cm3_ldrd) Init(2)
+ Avoid overlapping destination and address registers on LDRD instructions
+ that may trigger Cortex-M3 errata.
++
++munaligned-access
++Target Report Var(unaligned_access) Init(2)
++Enable unaligned word and halfword accesses to packed data.
+Index: gcc-4_6-branch/gcc/config/arm/constraints.md
+===================================================================
+--- gcc-4_6-branch.orig/gcc/config/arm/constraints.md	2012-03-05 16:07:14.000000000 -0800
++++ gcc-4_6-branch/gcc/config/arm/constraints.md	2012-03-05 16:07:50.404936697 -0800
+@@ -36,6 +36,7 @@
+ ;; The following memory constraints have been used:
+ ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+ ;; in ARM state: Uq
++;; in Thumb state: Uu, Uw
+ 
+ 
+ (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
+@@ -344,6 +345,27 @@
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))")))
+ 
++(define_memory_constraint "Uu"
++ "@internal
++  In Thumb state an address that is valid in 16bit encoding."
++ (and (match_code "mem")
++      (match_test "TARGET_THUMB
++		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
++						   0)")))
++
++; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
++; are actually LDM/STM instructions, so cannot be used to access unaligned
++; data.
++(define_memory_constraint "Uw"
++ "@internal
++  In Thumb state an address that is valid in 16bit encoding, and that can be
++  used for unaligned accesses."
++ (and (match_code "mem")
++      (match_test "TARGET_THUMB
++		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
++						   0)
++		   && GET_CODE (XEXP (op, 0)) != POST_INC")))
++
+ ;; We used to have constraint letters for S and R in ARM state, but
+ ;; all uses of these now appear to have been removed.
+ 
+Index: gcc-4_6-branch/gcc/expmed.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/expmed.c	2012-01-04 15:37:51.000000000 -0800
++++ gcc-4_6-branch/gcc/expmed.c	2012-03-05 16:07:50.404936697 -0800
+@@ -657,6 +657,10 @@
+       && GET_MODE (value) != BLKmode
+       && bitsize > 0
+       && GET_MODE_BITSIZE (op_mode) >= bitsize
++      /* Do not use insv for volatile bitfields when
++         -fstrict-volatile-bitfields is in effect.  */
++      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
++	   && flag_strict_volatile_bitfields > 0)
+       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
+ 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
+       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
+@@ -700,19 +704,21 @@
+ 	  copy_back = true;
+ 	}
+ 
+-      /* On big-endian machines, we count bits from the most significant.
+-	 If the bit field insn does not, we must invert.  */
+-
+-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+-	xbitpos = unit - bitsize - xbitpos;
+-
+       /* We have been counting XBITPOS within UNIT.
+ 	 Count instead within the size of the register.  */
+-      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
++      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
+ 	xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
+ 
+       unit = GET_MODE_BITSIZE (op_mode);
+ 
++      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
++         "backwards" from the size of the unit we are inserting into.
++	 Otherwise, we count bits from the most significant on a
++	 BYTES/BITS_BIG_ENDIAN machine.  */
++
++      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
++	xbitpos = unit - bitsize - xbitpos;
++
+       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
+       value1 = value;
+       if (GET_MODE (value) != op_mode)
+@@ -1528,6 +1534,10 @@
+   if (ext_mode != MAX_MACHINE_MODE
+       && bitsize > 0
+       && GET_MODE_BITSIZE (ext_mode) >= bitsize
++      /* Do not use extv/extzv for volatile bitfields when
++         -fstrict-volatile-bitfields is in effect.  */
++      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
++	   && flag_strict_volatile_bitfields > 0)
+       /* If op0 is a register, we need it in EXT_MODE to make it
+ 	 acceptable to the format of ext(z)v.  */
+       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
+@@ -1552,17 +1562,20 @@
+ 	/* Get ref to first byte containing part of the field.  */
+ 	xop0 = adjust_address (xop0, byte_mode, xoffset);
+ 
+-      /* On big-endian machines, we count bits from the most significant.
+-	 If the bit field insn does not, we must invert.  */
+-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+-	xbitpos = unit - bitsize - xbitpos;
+-
+       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
+-      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
++      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
+ 	xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
+ 
+       unit = GET_MODE_BITSIZE (ext_mode);
+ 
++      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
++         "backwards" from the size of the unit we are extracting from.
++	 Otherwise, we count bits from the most significant on a
++	 BYTES/BITS_BIG_ENDIAN machine.  */
++
++      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
++	xbitpos = unit - bitsize - xbitpos;
++
+       if (xtarget == 0)
+ 	xtarget = xspec_target = gen_reg_rtx (tmode);
+ 
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
new file mode 100644
index 0000000000..3c0ff00856
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
@@ -0,0 +1,1951 @@
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages.
+	(SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete.
+	(node_sched_params): Remove first_reg_move and nreg_moves.
+	(ps_num_consecutive_stages, extend_node_sched_params): New functions.
+	(update_node_sched_params): Move up file.
+	(print_node_sched_params): Print the stage.  Don't dump info related
+	to first_reg_move and nreg_moves.
+	(set_columns_for_row): New function.
+	(set_columns_for_ps): Move up file and use set_columns_for_row.
+	(schedule_reg_move): New function.
+	(schedule_reg_moves): Call extend_node_sched_params and
+	schedule_reg_move.  Extend size of uses bitmap.  Initialize
+	num_consecutive_stages.  Return false if a move could not be
+	scheduled.
+	(apply_reg_moves): Don't emit moves here.
+	(permute_partial_schedule): Handle register moves.
+	(duplicate_insns_of_cycles): Remove for_prolog.  Emit moves according
+	to the same stage-count test as ddg nodes.
+	(generate_prolog_epilog): Update calls accordingly.
+	(sms_schedule): Allow move-scheduling to add a new first stage.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_insn): Adjust comment.
+	(ps_reg_move_info): New structure.
+	(partial_schedule): Add reg_moves field.
+	(SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params.
+	(node_sched_params): Turn first_reg_move into an identifier.
+	(ps_reg_move): New function.
+	(ps_rtl_insn): Cope with register moves.
+	(ps_first_note): Adjust comment and assert that the instruction
+	isn't a register move.
+	(node_sched_params): Replace with...
+	(node_sched_param_vec): ...this vector.
+	(set_node_sched_params): Adjust accordingly.
+	(print_node_sched_params): Take a partial schedule instead of a ddg.
+	Use ps_rtl_insn and ps_reg_move.
+	(generate_reg_moves): Rename to...
+	(schedule_reg_moves): ...this.  Remove rescan parameter.  Record each
+	move in the partial schedule, but don't emit it here.  Don't perform
+	register substitutions here either.
+	(apply_reg_moves): New function.
+	(duplicate_insns_of_cycles): Use register indices directly,
+	rather than finding instructions using PREV_INSN.  Use ps_reg_move.
+	(sms_schedule): Call schedule_reg_moves before committing to
+	a partial schedule.   Try the next ii if the schedule fails.
+	Use apply_reg_moves instead of generate_reg_moves.  Adjust
+	call to print_node_sched_params.  Free node_sched_param_vec
+	instead of node_sched_params.
+	(create_partial_schedule): Initialize reg_moves.
+	(free_partial_schedule): Free reg_moves.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_insn): Replace node field with an identifier.
+	(SCHED_ASAP): Replace with..
+	(NODE_ASAP): ...this macro.
+	(SCHED_PARAMS): New macro.
+	(SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW)
+	(SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS.
+	(node_sched_params): Remove asap.
+	(ps_rtl_insn, ps_first_note): New functions.
+	(set_node_sched_params): Use XCNEWVEC.  Don't copy across the
+	asap values.
+	(print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP.
+	(generate_reg_moves): Pass ids to the SCHED_* macros.
+	(update_node_sched_params): Take a ps insn identifier rather than
+	a node as parameter.  Use ps_rtl_insn.
+	(set_columns_for_ps): Update for above field and SCHED_* macro changes.
+	(permute_partial_schedule): Use ps_rtl_insn and ps_first_note.
+	(optimize_sc): Update for above field and SCHED_* macro changes.
+	Update calls to try_scheduling_node_in_cycle and
+	update_node_sched_params.
+	(duplicate_insns_of_cycles): Adjust for above field and SCHED_*
+	macro changes.  Use ps_rtl_insn and ps_first_note.
+	(sms_schedule): Pass ids to the SCHED_* macros.
+	(get_sched_window): Adjust for above field and SCHED_* macro changes.
+	Use NODE_ASAP instead of SCHED_ASAP.
+	(try_scheduling_node_in_cycle): Remove node parameter.  Update
+	call to ps_add_node_check_conflicts.  Pass ids to the SCHED_*
+	macros.
+	(sms_schedule_by_order): Update call to try_scheduling_node_in_cycle.
+	(ps_insert_empty_row): Adjust for above field changes.
+	(compute_split_row): Use ids rather than nodes.
+	(verify_partial_schedule): Adjust for above field changes.
+	(print_partial_schedule): Use ps_rtl_insn.
+	(create_ps_insn): Take an id rather than a node.
+	(ps_insn_find_column): Adjust for above field changes.
+	Use ps_rtl_insn.
+	(ps_insn_advance_column): Adjust for above field changes.
+	(add_node_to_ps): Remove node parameter.  Update call to
+	create_ps_insn.
+	(ps_has_conflicts): Use ps_rtl_insn.
+	(ps_add_node_check_conflicts): Replace node parameter than an id.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (undo_replace_buff_elem): Delete.
+	(generate_reg_moves): Don't build and return an undo list.
+	(free_undo_replace_buff): Delete.
+	(sms_schedule): Adjust call to generate_reg_moves.
+	Don't call free_undo_replace_buff.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (get_sched_window): Use a table for the debug output.
+	Print the current ii.
+	(sms_schedule_by_order): Reduce whitespace in dump line.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (get_sched_window): Use just one loop for predecessors
+	and one loop for successors.  Fix upper bound of memory range.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-10-02 06:56:53 +0000
++++ new/gcc/modulo-sched.c	2011-10-10 14:35:32 +0000
+@@ -124,8 +124,10 @@
+ /* A single instruction in the partial schedule.  */
+ struct ps_insn
+ {
+-  /* The corresponding DDG_NODE.  */
+-  ddg_node_ptr node;
++  /* Identifies the instruction to be scheduled.  Values smaller than
++     the ddg's num_nodes refer directly to ddg nodes.  A value of
++     X - num_nodes refers to register move X.  */
++  int id;
+ 
+   /* The (absolute) cycle in which the PS instruction is scheduled.
+      Same as SCHED_TIME (node).  */
+@@ -137,6 +139,33 @@
+ 
+ };
+ 
++/* Information about a register move that has been added to a partial
++   schedule.  */
++struct ps_reg_move_info
++{
++  /* The source of the move is defined by the ps_insn with id DEF.
++     The destination is used by the ps_insns with the ids in USES.  */
++  int def;
++  sbitmap uses;
++
++  /* The original form of USES' instructions used OLD_REG, but they
++     should now use NEW_REG.  */
++  rtx old_reg;
++  rtx new_reg;
++
++  /* The number of consecutive stages that the move occupies.  */
++  int num_consecutive_stages;
++
++  /* An instruction that sets NEW_REG to the correct value.  The first
++     move associated with DEF will have an rhs of OLD_REG; later moves
++     use the result of the previous move.  */
++  rtx insn;
++};
++
++typedef struct ps_reg_move_info ps_reg_move_info;
++DEF_VEC_O (ps_reg_move_info);
++DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
++
+ /* Holds the partial schedule as an array of II rows.  Each entry of the
+    array points to a linked list of PS_INSNs, which represents the
+    instructions that are scheduled for that row.  */
+@@ -148,6 +177,10 @@
+   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
+   ps_insn_ptr *rows;
+ 
++  /* All the moves added for this partial schedule.  Index X has
++     a ps_insn id of X + g->num_nodes.  */
++  VEC (ps_reg_move_info, heap) *reg_moves;
++
+   /*  rows_length[i] holds the number of instructions in the row.
+       It is used only (as an optimization) to back off quickly from
+       trying to schedule a node in a full row; that is, to avoid running
+@@ -165,17 +198,6 @@
+   int stage_count;  /* The stage count of the partial schedule.  */
+ };
+ 
+-/* We use this to record all the register replacements we do in
+-   the kernel so we can undo SMS if it is not profitable.  */
+-struct undo_replace_buff_elem
+-{
+-  rtx insn;
+-  rtx orig_reg;
+-  rtx new_reg;
+-  struct undo_replace_buff_elem *next;
+-};
+-
+-
+ 
+ static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
+ static void free_partial_schedule (partial_schedule_ptr);
+@@ -183,9 +205,7 @@
+ void print_partial_schedule (partial_schedule_ptr, FILE *);
+ static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
+ static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
+-						ddg_node_ptr node, int cycle,
+-						sbitmap must_precede,
+-						sbitmap must_follow);
++						int, int, sbitmap, sbitmap);
+ static void rotate_partial_schedule (partial_schedule_ptr, int);
+ void set_row_column_for_ps (partial_schedule_ptr);
+ static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
+@@ -201,43 +221,27 @@
+ static void permute_partial_schedule (partial_schedule_ptr, rtx);
+ static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
+                                     rtx, rtx);
+-static void duplicate_insns_of_cycles (partial_schedule_ptr,
+-				       int, int, int, rtx);
+ static int calculate_stage_count (partial_schedule_ptr, int);
+ static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ 					   int, int, sbitmap, sbitmap, sbitmap);
+ static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, 
+ 			     sbitmap, int, int *, int *, int *);
+-static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+-					  int, int, sbitmap, int *, sbitmap,
+-					  sbitmap);
++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
++					  sbitmap, int *, sbitmap, sbitmap);
+ static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+ 
+-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
+-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
+-#define SCHED_FIRST_REG_MOVE(x) \
+-	(((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
+-#define SCHED_NREG_MOVES(x) \
+-	(((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
+-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
+-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
+-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
++#define NODE_ASAP(node) ((node)->aux.count)
++
++#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
++#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
++#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
++#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
++#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
+ 
+ /* The scheduling parameters held for each node.  */
+ typedef struct node_sched_params
+ {
+-  int asap;	/* A lower-bound on the absolute scheduling cycle.  */
+-  int time;	/* The absolute scheduling cycle (time >= asap).  */
+-
+-  /* The following field (first_reg_move) is a pointer to the first
+-     register-move instruction added to handle the modulo-variable-expansion
+-     of the register defined by this node.  This register-move copies the
+-     original register defined by the node.  */
+-  rtx first_reg_move;
+-
+-  /* The number of register-move instructions added, immediately preceding
+-     first_reg_move.  */
+-  int nreg_moves;
++  int time;	/* The absolute scheduling cycle.  */
+ 
+   int row;    /* Holds time % ii.  */
+   int stage;  /* Holds time / ii.  */
+@@ -247,6 +251,9 @@
+   int column;
+ } *node_sched_params_ptr;
+ 
++typedef struct node_sched_params node_sched_params;
++DEF_VEC_O (node_sched_params);
++DEF_VEC_ALLOC_O (node_sched_params, heap);
+ 
+ /* The following three functions are copied from the current scheduler
+    code in order to use sched_analyze() for computing the dependencies.
+@@ -296,6 +303,49 @@
+   0
+ };
+ 
++/* Partial schedule instruction ID in PS is a register move.  Return
++   information about it.  */
++static struct ps_reg_move_info *
++ps_reg_move (partial_schedule_ptr ps, int id)
++{
++  gcc_checking_assert (id >= ps->g->num_nodes);
++  return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
++}
++
++/* Return the rtl instruction that is being scheduled by partial schedule
++   instruction ID, which belongs to schedule PS.  */
++static rtx
++ps_rtl_insn (partial_schedule_ptr ps, int id)
++{
++  if (id < ps->g->num_nodes)
++    return ps->g->nodes[id].insn;
++  else
++    return ps_reg_move (ps, id)->insn;
++}
++
++/* Partial schedule instruction ID, which belongs to PS, occured in
++   the original (unscheduled) loop.  Return the first instruction
++   in the loop that was associated with ps_rtl_insn (PS, ID).
++   If the instruction had some notes before it, this is the first
++   of those notes.  */
++static rtx
++ps_first_note (partial_schedule_ptr ps, int id)
++{
++  gcc_assert (id < ps->g->num_nodes);
++  return ps->g->nodes[id].first_note;
++}
++
++/* Return the number of consecutive stages that are occupied by
++   partial schedule instruction ID in PS.  */
++static int
++ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
++{
++  if (id < ps->g->num_nodes)
++    return 1;
++  else
++    return ps_reg_move (ps, id)->num_consecutive_stages;
++}
++
+ /* Given HEAD and TAIL which are the first and last insns in a loop;
+    return the register which controls the loop.  Return zero if it has
+    more than one occurrence in the loop besides the control part or the
+@@ -396,35 +446,59 @@
+ }
+ 
+ 
+-/* Points to the array that contains the sched data for each node.  */
+-static node_sched_params_ptr node_sched_params;
++/* A vector that contains the sched data for each ps_insn.  */
++static VEC (node_sched_params, heap) *node_sched_param_vec;
+ 
+-/* Allocate sched_params for each node and initialize it.  Assumes that
+-   the aux field of each node contain the asap bound (computed earlier),
+-   and copies it into the sched_params field.  */
++/* Allocate sched_params for each node and initialize it.  */
+ static void
+ set_node_sched_params (ddg_ptr g)
+ {
+-  int i;
+-
+-  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
+-  /* Initialize ASAP/ALAP/HIGHT to zero.  */
+-  node_sched_params = (node_sched_params_ptr)
+-		       xcalloc (g->num_nodes,
+-				sizeof (struct node_sched_params));
+-
+-  /* Set the pointer of the general data of the node to point to the
+-     appropriate sched_params structure.  */
+-  for (i = 0; i < g->num_nodes; i++)
+-    {
+-      /* Watch out for aliasing problems?  */
+-      node_sched_params[i].asap = g->nodes[i].aux.count;
+-      g->nodes[i].aux.info = &node_sched_params[i];
+-    }
+-}
+-
+-static void
+-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
++  VEC_truncate (node_sched_params, node_sched_param_vec, 0);
++  VEC_safe_grow_cleared (node_sched_params, heap,
++			 node_sched_param_vec, g->num_nodes);
++}
++
++/* Make sure that node_sched_param_vec has an entry for every move in PS.  */
++static void
++extend_node_sched_params (partial_schedule_ptr ps)
++{
++  VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
++			 ps->g->num_nodes + VEC_length (ps_reg_move_info,
++							ps->reg_moves));
++}
++
++/* Update the sched_params (time, row and stage) for node U using the II,
++   the CYCLE of U and MIN_CYCLE.
++   We're not simply taking the following
++   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
++   because the stages may not be aligned on cycle 0.  */
++static void
++update_node_sched_params (int u, int ii, int cycle, int min_cycle)
++{
++  int sc_until_cycle_zero;
++  int stage;
++
++  SCHED_TIME (u) = cycle;
++  SCHED_ROW (u) = SMODULO (cycle, ii);
++
++  /* The calculation of stage count is done adding the number
++     of stages before cycle zero and after cycle zero.  */
++  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
++
++  if (SCHED_TIME (u) < 0)
++    {
++      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
++      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
++    }
++  else
++    {
++      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
++      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
++    }
++}
++
++static void
++print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
+ {
+   int i;
+ 
+@@ -432,22 +506,170 @@
+     return;
+   for (i = 0; i < num_nodes; i++)
+     {
+-      node_sched_params_ptr nsp = &node_sched_params[i];
+-      rtx reg_move = nsp->first_reg_move;
+-      int j;
++      node_sched_params_ptr nsp = SCHED_PARAMS (i);
+ 
+       fprintf (file, "Node = %d; INSN = %d\n", i,
+-	       (INSN_UID (g->nodes[i].insn)));
+-      fprintf (file, " asap = %d:\n", nsp->asap);
++	       INSN_UID (ps_rtl_insn (ps, i)));
++      fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
+       fprintf (file, " time = %d:\n", nsp->time);
+-      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
+-      for (j = 0; j < nsp->nreg_moves; j++)
++      fprintf (file, " stage = %d:\n", nsp->stage);
++    }
++}
++
++/* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
++static void
++set_columns_for_row (partial_schedule_ptr ps, int row)
++{
++  ps_insn_ptr cur_insn;
++  int column;
++
++  column = 0;
++  for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
++    SCHED_COLUMN (cur_insn->id) = column++;
++}
++
++/* Set SCHED_COLUMN for each instruction in PS.  */
++static void
++set_columns_for_ps (partial_schedule_ptr ps)
++{
++  int row;
++
++  for (row = 0; row < ps->ii; row++)
++    set_columns_for_row (ps, row);
++}
++
++/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
++   Its single predecessor has already been scheduled, as has its
++   ddg node successors.  (The move may have also another move as its
++   successor, in which case that successor will be scheduled later.)
++
++   The move is part of a chain that satisfies register dependencies
++   between a producing ddg node and various consuming ddg nodes.
++   If some of these dependencies have a distance of 1 (meaning that
++   the use is upward-exposoed) then DISTANCE1_USES is nonnull and
++   contains the set of uses with distance-1 dependencies.
++   DISTANCE1_USES is null otherwise.
++
++   MUST_FOLLOW is a scratch bitmap that is big enough to hold
++   all current ps_insn ids.
++
++   Return true on success.  */
++static bool
++schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
++		   sbitmap distance1_uses, sbitmap must_follow)
++{
++  unsigned int u;
++  int this_time, this_distance, this_start, this_end, this_latency;
++  int start, end, c, ii;
++  sbitmap_iterator sbi;
++  ps_reg_move_info *move;
++  rtx this_insn;
++  ps_insn_ptr psi;
++
++  move = ps_reg_move (ps, i_reg_move);
++  ii = ps->ii;
++  if (dump_file)
++    {
++      fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
++	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
++	       PS_MIN_CYCLE (ps));
++      print_rtl_single (dump_file, move->insn);
++      fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
++      fprintf (dump_file, "=========== =========== =====\n");
++    }
++
++  start = INT_MIN;
++  end = INT_MAX;
++
++  /* For dependencies of distance 1 between a producer ddg node A
++     and consumer ddg node B, we have a chain of dependencies:
++
++        A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
++
++     where Mi is the ith move.  For dependencies of distance 0 between
++     a producer ddg node A and consumer ddg node C, we have a chain of
++     dependencies:
++
++        A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
++
++     where Mi' occupies the same position as Mi but occurs a stage later.
++     We can only schedule each move once, so if we have both types of
++     chain, we model the second as:
++
++        A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
++
++     First handle the dependencies between the previously-scheduled
++     predecessor and the move.  */
++  this_insn = ps_rtl_insn (ps, move->def);
++  this_latency = insn_latency (this_insn, move->insn);
++  this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
++  this_time = SCHED_TIME (move->def) - this_distance * ii;
++  this_start = this_time + this_latency;
++  this_end = this_time + ii;
++  if (dump_file)
++    fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
++	     this_start, this_end, SCHED_TIME (move->def),
++	     INSN_UID (this_insn), this_latency, this_distance,
++	     INSN_UID (move->insn));
++
++  if (start < this_start)
++    start = this_start;
++  if (end > this_end)
++    end = this_end;
++
++  /* Handle the dependencies between the move and previously-scheduled
++     successors.  */
++  EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
++    {
++      this_insn = ps_rtl_insn (ps, u);
++      this_latency = insn_latency (move->insn, this_insn);
++      if (distance1_uses && !TEST_BIT (distance1_uses, u))
++	this_distance = -1;
++      else
++	this_distance = 0;
++      this_time = SCHED_TIME (u) + this_distance * ii;
++      this_start = this_time - ii;
++      this_end = this_time - this_latency;
++      if (dump_file)
++	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
++		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
++		 this_latency, this_distance, INSN_UID (this_insn));
++
++      if (start < this_start)
++	start = this_start;
++      if (end > this_end)
++	end = this_end;
++    }
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "----------- ----------- -----\n");
++      fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
++    }
++
++  sbitmap_zero (must_follow);
++  SET_BIT (must_follow, move->def);
++
++  start = MAX (start, end - (ii - 1));
++  for (c = end; c >= start; c--)
++    {
++      psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
++					 move->uses, must_follow);
++      if (psi)
+ 	{
+-	  fprintf (file, " reg_move = ");
+-	  print_rtl_single (file, reg_move);
+-	  reg_move = PREV_INSN (reg_move);
++	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
++	  if (dump_file)
++	    fprintf (dump_file, "\nScheduled register move INSN %d at"
++		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
++		     SCHED_ROW (i_reg_move));
++	  return true;
+ 	}
+     }
++
++  if (dump_file)
++    fprintf (dump_file, "\nNo available slot\n\n");
++
++  return false;
+ }
+ 
+ /*
+@@ -461,22 +683,23 @@
+    nreg_moves = ----------------------------------- + 1 - {   dependence.
+                             ii                          { 1 if not.
+ */
+-static struct undo_replace_buff_elem *
+-generate_reg_moves (partial_schedule_ptr ps, bool rescan)
++static bool
++schedule_reg_moves (partial_schedule_ptr ps)
+ {
+   ddg_ptr g = ps->g;
+   int ii = ps->ii;
+   int i;
+-  struct undo_replace_buff_elem *reg_move_replaces = NULL;
+ 
+   for (i = 0; i < g->num_nodes; i++)
+     {
+       ddg_node_ptr u = &g->nodes[i];
+       ddg_edge_ptr e;
+       int nreg_moves = 0, i_reg_move;
+-      sbitmap *uses_of_defs;
+-      rtx last_reg_move;
+       rtx prev_reg, old_reg;
++      int first_move;
++      int distances[2];
++      sbitmap must_follow;
++      sbitmap distance1_uses;
+       rtx set = single_set (u->insn);
+       
+       /* Skip instructions that do not set a register.  */
+@@ -485,18 +708,21 @@
+  
+       /* Compute the number of reg_moves needed for u, by looking at life
+ 	 ranges started at u (excluding self-loops).  */
++      distances[0] = distances[1] = false;
+       for (e = u->out; e; e = e->next_out)
+ 	if (e->type == TRUE_DEP && e->dest != e->src)
+ 	  {
+-	    int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
++	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
++				- SCHED_TIME (e->src->cuid)) / ii;
+ 
+             if (e->distance == 1)
+-              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
++              nreg_moves4e = (SCHED_TIME (e->dest->cuid)
++			      - SCHED_TIME (e->src->cuid) + ii) / ii;
+ 
+ 	    /* If dest precedes src in the schedule of the kernel, then dest
+ 	       will read before src writes and we can save one reg_copy.  */
+-	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
+-		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
++	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
++		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
+ 	      nreg_moves4e--;
+ 
+             if (nreg_moves4e >= 1)
+@@ -513,125 +739,105 @@
+ 		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
+ 	      }
+ 	    
++	    if (nreg_moves4e)
++	      {
++		gcc_assert (e->distance < 2);
++		distances[e->distance] = true;
++	      }
+ 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
+ 	  }
+ 
+       if (nreg_moves == 0)
+ 	continue;
+ 
++      /* Create NREG_MOVES register moves.  */
++      first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
++      VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
++			     first_move + nreg_moves);
++      extend_node_sched_params (ps);
++
++      /* Record the moves associated with this node.  */
++      first_move += ps->g->num_nodes;
++
++      /* Generate each move.  */
++      old_reg = prev_reg = SET_DEST (single_set (u->insn));
++      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
++	{
++	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
++
++	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
++	  move->uses = sbitmap_alloc (first_move + nreg_moves);
++	  move->old_reg = old_reg;
++	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
++	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
++	  move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
++	  sbitmap_zero (move->uses);
++
++	  prev_reg = move->new_reg;
++	}
++
++      distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
++
+       /* Every use of the register defined by node may require a different
+ 	 copy of this register, depending on the time the use is scheduled.
+-	 Set a bitmap vector, telling which nodes use each copy of this
+-	 register.  */
+-      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
+-      sbitmap_vector_zero (uses_of_defs, nreg_moves);
++	 Record which uses require which move results.  */
+       for (e = u->out; e; e = e->next_out)
+ 	if (e->type == TRUE_DEP && e->dest != e->src)
+ 	  {
+-	    int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
++	    int dest_copy = (SCHED_TIME (e->dest->cuid)
++			     - SCHED_TIME (e->src->cuid)) / ii;
+ 
+ 	    if (e->distance == 1)
+-	      dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
++	      dest_copy = (SCHED_TIME (e->dest->cuid)
++			   - SCHED_TIME (e->src->cuid) + ii) / ii;
+ 
+-	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
+-		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
++	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
++		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
+ 	      dest_copy--;
+ 
+ 	    if (dest_copy)
+-	      SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
++	      {
++		ps_reg_move_info *move;
++
++		move = ps_reg_move (ps, first_move + dest_copy - 1);
++		SET_BIT (move->uses, e->dest->cuid);
++		if (e->distance == 1)
++		  SET_BIT (distance1_uses, e->dest->cuid);
++	      }
+ 	  }
+ 
+-      /* Now generate the reg_moves, attaching relevant uses to them.  */
+-      SCHED_NREG_MOVES (u) = nreg_moves;
+-      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
+-      /* Insert the reg-moves right before the notes which precede
+-         the insn they relates to.  */
+-      last_reg_move = u->first_note;
+-
++      must_follow = sbitmap_alloc (first_move + nreg_moves);
+       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
++	if (!schedule_reg_move (ps, first_move + i_reg_move,
++				distance1_uses, must_follow))
++	  break;
++      sbitmap_free (must_follow);
++      if (distance1_uses)
++	sbitmap_free (distance1_uses);
++      if (i_reg_move < nreg_moves)
++	return false;
++    }
++  return true;
++}
++
++/* Emit the moves associatied with PS.  Apply the substitutions
++   associated with them.  */
++static void
++apply_reg_moves (partial_schedule_ptr ps)
++{
++  ps_reg_move_info *move;
++  int i;
++
++  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
++    {
++      unsigned int i_use;
++      sbitmap_iterator sbi;
++
++      EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
+ 	{
+-	  unsigned int i_use = 0;
+-	  rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
+-	  rtx reg_move = gen_move_insn (new_reg, prev_reg);
+-	  sbitmap_iterator sbi;
+-
+-	  add_insn_before (reg_move, last_reg_move, NULL);
+-	  last_reg_move = reg_move;
+-
+-	  if (!SCHED_FIRST_REG_MOVE (u))
+-	    SCHED_FIRST_REG_MOVE (u) = reg_move;
+-
+-	  EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
+-	    {
+-	      struct undo_replace_buff_elem *rep;
+-
+-	      rep = (struct undo_replace_buff_elem *)
+-		    xcalloc (1, sizeof (struct undo_replace_buff_elem));
+-	      rep->insn = g->nodes[i_use].insn;
+-	      rep->orig_reg = old_reg;
+-	      rep->new_reg = new_reg;
+-
+-	      if (! reg_move_replaces)
+-		reg_move_replaces = rep;
+-	      else
+-		{
+-		  rep->next = reg_move_replaces;
+-		  reg_move_replaces = rep;
+-		}
+-
+-	      replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
+-	      if (rescan)
+-		df_insn_rescan (g->nodes[i_use].insn);
+-	    }
+-
+-	  prev_reg = new_reg;
++	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
++	  df_insn_rescan (ps->g->nodes[i_use].insn);
+ 	}
+-      sbitmap_vector_free (uses_of_defs);
+-    }
+-  return reg_move_replaces;
+-}
+-
+-/* Free memory allocated for the undo buffer.  */
+-static void
+-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
+-{
+-
+-  while (reg_move_replaces)
+-    {
+-      struct undo_replace_buff_elem *rep = reg_move_replaces;
+-
+-      reg_move_replaces = reg_move_replaces->next;
+-      free (rep);
+-    }
+-}
+-
+-/* Update the sched_params (time, row and stage) for node U using the II,
+-   the CYCLE of U and MIN_CYCLE.  
+-   We're not simply taking the following
+-   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+-   because the stages may not be aligned on cycle 0.  */
+-static void
+-update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+-{
+-  int sc_until_cycle_zero;
+-  int stage;
+-
+-  SCHED_TIME (u) = cycle;
+-  SCHED_ROW (u) = SMODULO (cycle, ii);
+-
+-  /* The calculation of stage count is done adding the number
+-     of stages before cycle zero and after cycle zero.  */
+-  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+-
+-  if (SCHED_TIME (u) < 0)
+-    {
+-      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+-      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+-    }
+-  else
+-    {
+-      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+-      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+     }
+ }
+ 
+@@ -647,18 +853,19 @@
+   for (row = 0; row < ii; row++)
+     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+       {
+-	ddg_node_ptr u = crr_insn->node;
++	int u = crr_insn->id;
+ 	int normalized_time = SCHED_TIME (u) - amount;
+ 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
+ 
+         if (dump_file)
+           {
+             /* Print the scheduling times after the rotation.  */
++	    rtx insn = ps_rtl_insn (ps, u);
++
+             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+-                     "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
+-                     INSN_UID (crr_insn->node->insn), normalized_time,
+-                     new_min_cycle);
+-            if (JUMP_P (crr_insn->node->insn))
++                     "crr_insn->cycle=%d, min_cycle=%d", u,
++                     INSN_UID (insn), normalized_time, new_min_cycle);
++            if (JUMP_P (insn))
+               fprintf (dump_file, " (branch)");
+             fprintf (dump_file, "\n");
+           }
+@@ -671,22 +878,6 @@
+       }
+ }
+  
+-/* Set SCHED_COLUMN of each node according to its position in PS.  */
+-static void
+-set_columns_for_ps (partial_schedule_ptr ps)
+-{
+-  int row;
+-
+-  for (row = 0; row < ps->ii; row++)
+-    {
+-      ps_insn_ptr cur_insn = ps->rows[row];
+-      int column = 0;
+-
+-      for (; cur_insn; cur_insn = cur_insn->next_in_row)
+-	SCHED_COLUMN (cur_insn->node) = column++;
+-    }
+-}
+-
+ /* Permute the insns according to their order in PS, from row 0 to
+    row ii-1, and position them right before LAST.  This schedules
+    the insns of the loop kernel.  */
+@@ -699,9 +890,18 @@
+ 
+   for (row = 0; row < ii ; row++)
+     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
+-      if (PREV_INSN (last) != ps_ij->node->insn)
+-      	reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
+-			    PREV_INSN (last));
++      {
++	rtx insn = ps_rtl_insn (ps, ps_ij->id);
++
++	if (PREV_INSN (last) != insn)
++	  {
++	    if (ps_ij->id < ps->g->num_nodes)
++	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
++				  PREV_INSN (last));
++	    else
++	      add_insn_before (insn, last, NULL);
++	  }
++      }
+ }
+ 
+ /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+@@ -750,7 +950,7 @@
+      to row ii-1.  If they are equal just bail out.  */
+   stage_count = calculate_stage_count (ps, amount);
+   stage_count_curr =
+-    calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
++    calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
+ 
+   if (stage_count == stage_count_curr)
+     {
+@@ -779,7 +979,7 @@
+       print_partial_schedule (ps, dump_file);
+     }
+ 
+-  if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
++  if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
+     {
+       ok = true;
+       goto clear;
+@@ -794,7 +994,7 @@
+     {
+       bool success;
+       ps_insn_ptr next_ps_i;
+-      int branch_cycle = SCHED_TIME (g->closing_branch);
++      int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
+       int row = SMODULO (branch_cycle, ps->ii);
+       int num_splits = 0;
+       sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+@@ -850,13 +1050,12 @@
+          branch so we can remove it from it's current cycle.  */
+       for (next_ps_i = ps->rows[row];
+ 	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
+-	if (next_ps_i->node->cuid == g->closing_branch->cuid)
++	if (next_ps_i->id == g->closing_branch->cuid)
+ 	  break;
+ 
+       remove_node_from_ps (ps, next_ps_i);
+       success =
+-	try_scheduling_node_in_cycle (ps, g->closing_branch,
+-				      g->closing_branch->cuid, c,
++	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
+ 				      sched_nodes, &num_splits,
+ 				      tmp_precede, tmp_follow);
+       gcc_assert (num_splits == 0);
+@@ -874,8 +1073,7 @@
+ 				   must_precede, branch_cycle, start, end,
+ 				   step);
+ 	  success =
+-	    try_scheduling_node_in_cycle (ps, g->closing_branch,
+-					  g->closing_branch->cuid,
++	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
+ 					  branch_cycle, sched_nodes,
+ 					  &num_splits, tmp_precede,
+ 					  tmp_follow);
+@@ -889,7 +1087,7 @@
+ 	    fprintf (dump_file,
+ 		     "SMS success in moving branch to cycle %d\n", c);
+ 
+-	  update_node_sched_params (g->closing_branch, ii, c,
++	  update_node_sched_params (g->closing_branch->cuid, ii, c,
+ 				    PS_MIN_CYCLE (ps));
+ 	  ok = true;
+ 	}
+@@ -905,7 +1103,7 @@
+ 
+ static void
+ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
+-			   int to_stage, int for_prolog, rtx count_reg)
++			   int to_stage, rtx count_reg)
+ {
+   int row;
+   ps_insn_ptr ps_ij;
+@@ -913,9 +1111,9 @@
+   for (row = 0; row < ps->ii; row++)
+     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
+       {
+-	ddg_node_ptr u_node = ps_ij->node;
+-	int j, i_reg_moves;
+-	rtx reg_move = NULL_RTX;
++	int u = ps_ij->id;
++	int first_u, last_u;
++	rtx u_insn;
+ 
+         /* Do not duplicate any insn which refers to count_reg as it
+            belongs to the control part.
+@@ -923,52 +1121,20 @@
+            be ignored.
+            TODO: This should be done by analyzing the control part of
+            the loop.  */
+-        if (reg_mentioned_p (count_reg, u_node->insn)
+-            || JUMP_P (ps_ij->node->insn))
++	u_insn = ps_rtl_insn (ps, u);
++        if (reg_mentioned_p (count_reg, u_insn)
++            || JUMP_P (u_insn))
+           continue;
+ 
+-	if (for_prolog)
+-	  {
+-	    /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
+-	       number of reg_moves starting with the second occurrence of
+-	       u_node, which is generated if its SCHED_STAGE <= to_stage.  */
+-	    i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
+-	    i_reg_moves = MAX (i_reg_moves, 0);
+-	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
+-
+-	    /* The reg_moves start from the *first* reg_move backwards.  */
+-	    if (i_reg_moves)
+-	      {
+-		reg_move = SCHED_FIRST_REG_MOVE (u_node);
+-		for (j = 1; j < i_reg_moves; j++)
+-		  reg_move = PREV_INSN (reg_move);
+-	      }
+-	  }
+-	else /* It's for the epilog.  */
+-	  {
+-	    /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
+-	       starting to decrease one stage after u_node no longer occurs;
+-	       that is, generate all reg_moves until
+-	       SCHED_STAGE (u_node) == from_stage - 1.  */
+-	    i_reg_moves = SCHED_NREG_MOVES (u_node)
+-	    	       - (from_stage - SCHED_STAGE (u_node) - 1);
+-	    i_reg_moves = MAX (i_reg_moves, 0);
+-	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
+-
+-	    /* The reg_moves start from the *last* reg_move forwards.  */
+-	    if (i_reg_moves)
+-	      {
+-		reg_move = SCHED_FIRST_REG_MOVE (u_node);
+-		for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
+-		  reg_move = PREV_INSN (reg_move);
+-	      }
+-	  }
+-
+-	for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
+-	  emit_insn (copy_rtx (PATTERN (reg_move)));
+-	if (SCHED_STAGE (u_node) >= from_stage
+-	    && SCHED_STAGE (u_node) <= to_stage)
+-	  duplicate_insn_chain (u_node->first_note, u_node->insn);
++	first_u = SCHED_STAGE (u);
++	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
++	if (from_stage <= last_u && to_stage >= first_u)
++	  {
++	    if (u < ps->g->num_nodes)
++	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
++	    else
++	      emit_insn (copy_rtx (PATTERN (u_insn)));
++	  }
+       }
+ }
+ 
+@@ -1002,7 +1168,7 @@
+     }
+ 
+   for (i = 0; i < last_stage; i++)
+-    duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
++    duplicate_insns_of_cycles (ps, 0, i, count_reg);
+ 
+   /* Put the prolog on the entry edge.  */
+   e = loop_preheader_edge (loop);
+@@ -1014,7 +1180,7 @@
+   start_sequence ();
+ 
+   for (i = 0; i < last_stage; i++)
+-    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
++    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
+ 
+   /* Put the epilogue on the exit edge.  */
+   gcc_assert (single_exit (loop));
+@@ -1350,10 +1516,9 @@
+     {
+       rtx head, tail;
+       rtx count_reg, count_init;
+-      int mii, rec_mii;
+-      unsigned stage_count = 0;
++      int mii, rec_mii, stage_count, min_cycle;
+       HOST_WIDEST_INT loop_count = 0;
+-      bool opt_sc_p = false;
++      bool opt_sc_p;
+ 
+       if (! (g = g_arr[loop->num]))
+         continue;
+@@ -1430,62 +1595,63 @@
+ 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
+ 		 rec_mii, mii, maxii);
+ 
+-      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
+-	 ASAP.  */
+-      set_node_sched_params (g);
+-
+-      ps = sms_schedule_by_order (g, mii, maxii, node_order);
+-      
+-      if (ps)
++      for (;;)
+ 	{
+-	  /* Try to achieve optimized SC by normalizing the partial
+-	     schedule (having the cycles start from cycle zero).
+-	     The branch location must be placed in row ii-1 in the
+-	     final scheduling.	If failed, shift all instructions to
+-	     position the branch in row ii-1.  */
+-	  opt_sc_p = optimize_sc (ps, g);
+-	  if (opt_sc_p)
+-	    stage_count = calculate_stage_count (ps, 0);
+-	  else
++	  set_node_sched_params (g);
++
++	  stage_count = 0;
++	  opt_sc_p = false;
++	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
++      
++	  if (ps)
+ 	    {
+-	      /* Bring the branch to cycle ii-1.  */
+-	      int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
++	      /* Try to achieve optimized SC by normalizing the partial
++		 schedule (having the cycles start from cycle zero).
++		 The branch location must be placed in row ii-1 in the
++		 final scheduling.	If failed, shift all instructions to
++		 position the branch in row ii-1.  */
++	      opt_sc_p = optimize_sc (ps, g);
++	      if (opt_sc_p)
++		stage_count = calculate_stage_count (ps, 0);
++	      else
++		{
++		  /* Bring the branch to cycle ii-1.  */
++		  int amount = (SCHED_TIME (g->closing_branch->cuid)
++				- (ps->ii - 1));
+ 	      
++		  if (dump_file)
++		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
++
++		  stage_count = calculate_stage_count (ps, amount);
++		}
++	  
++	      gcc_assert (stage_count >= 1);
++	    }
++      
++	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
++	     1 means that there is no interleaving between iterations thus
++	     we let the scheduling passes do the job in this case.  */
++	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
++	      || (count_init && (loop_count <= stage_count))
++	      || (flag_branch_probabilities && (trip_count <= stage_count)))
++	    {
+ 	      if (dump_file)
+-		fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
+-	      
+-	      stage_count = calculate_stage_count (ps, amount);
+-	    }
+-	  
+-	  gcc_assert (stage_count >= 1);
+-	  PS_STAGE_COUNT (ps) = stage_count;
+-	}
+-      
+-      /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+-         1 means that there is no interleaving between iterations thus
+-         we let the scheduling passes do the job in this case.  */
+-      if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
+-	  || (count_init && (loop_count <= stage_count))
+-	  || (flag_branch_probabilities && (trip_count <= stage_count)))
+-	{
+-	  if (dump_file)
+-	    {
+-	      fprintf (dump_file, "SMS failed... \n");
+-	      fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
+-	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
+-	      fprintf (dump_file, ", trip-count=");
+-	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
+-	      fprintf (dump_file, ")\n");
+-	    }
+-	}
+-      else
+-	{
+-	  struct undo_replace_buff_elem *reg_move_replaces;
++		{
++		  fprintf (dump_file, "SMS failed... \n");
++		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
++			   " loop-count=", stage_count);
++		  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
++		  fprintf (dump_file, ", trip-count=");
++		  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
++		  fprintf (dump_file, ")\n");
++		}
++	      break;
++	    }
+ 
+           if (!opt_sc_p)
+             {
+ 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
+-              int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
++              int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
+ 	      
+               reset_sched_times (ps, amount);
+               rotate_partial_schedule (ps, amount);
+@@ -1493,6 +1659,29 @@
+ 	  
+ 	  set_columns_for_ps (ps);
+ 
++	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
++	  if (!schedule_reg_moves (ps))
++	    {
++	      mii = ps->ii + 1;
++	      free_partial_schedule (ps);
++	      continue;
++	    }
++
++	  /* Moves that handle incoming values might have been added
++	     to a new first stage.  Bump the stage count if so.
++
++	     ??? Perhaps we could consider rotating the schedule here
++	     instead?  */
++	  if (PS_MIN_CYCLE (ps) < min_cycle)
++	    {
++	      reset_sched_times (ps, 0);
++	      stage_count++;
++	    }
++
++	  /* The stage count should now be correct without rotation.  */
++	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
++	  PS_STAGE_COUNT (ps) = stage_count;
++
+ 	  canon_loop (loop);
+ 
+           if (dump_file)
+@@ -1531,17 +1720,16 @@
+ 	  /* The life-info is not valid any more.  */
+ 	  df_set_bb_dirty (g->bb);
+ 
+-	  reg_move_replaces = generate_reg_moves (ps, true);
++	  apply_reg_moves (ps);
+ 	  if (dump_file)
+-	    print_node_sched_params (dump_file, g->num_nodes, g);
++	    print_node_sched_params (dump_file, g->num_nodes, ps);
+ 	  /* Generate prolog and epilog.  */
+           generate_prolog_epilog (ps, loop, count_reg, count_init);
+-
+-	  free_undo_replace_buff (reg_move_replaces);
++	  break;
+ 	}
+ 
+       free_partial_schedule (ps);
+-      free (node_sched_params);
++      VEC_free (node_sched_params, heap, node_sched_param_vec);
+       free (node_order);
+       free_ddg (g);
+     }
+@@ -1643,9 +1831,11 @@
+ 
+ static int
+ get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
+-		  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
++		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
++		  int *end_p)
+ {
+   int start, step, end;
++  int early_start, late_start;
+   ddg_edge_ptr e;
+   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
+   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
+@@ -1653,6 +1843,8 @@
+   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
+   int psp_not_empty;
+   int pss_not_empty;
++  int count_preds;
++  int count_succs;
+ 
+   /* 1. compute sched window for u (start, end, step).  */
+   sbitmap_zero (psp);
+@@ -1660,214 +1852,119 @@
+   psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
+   pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
+ 
+-  if (psp_not_empty && !pss_not_empty)
+-    {
+-      int early_start = INT_MIN;
+-
+-      end = INT_MAX;
+-      for (e = u_node->in; e != 0; e = e->next_in)
+-	{
+-	  ddg_node_ptr v_node = e->src;
+-
+-          if (dump_file)
+-            {
+-	      fprintf (dump_file, "\nProcessing edge: ");
+-              print_ddg_edge (dump_file, e);
+-	      fprintf (dump_file,
+-		       "\nScheduling %d (%d) in psp_not_empty,"
+-		       " checking p %d (%d): ", u_node->cuid,
+-		       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
+-		       (v_node->insn));
+-            }
+-
+-	  if (TEST_BIT (sched_nodes, v_node->cuid))
+-	    {
+-              int p_st = SCHED_TIME (v_node);
+-
+-              early_start =
+-                MAX (early_start, p_st + e->latency - (e->distance * ii));
+-
+-              if (dump_file)
+-                fprintf (dump_file,
+-                         "pred st = %d; early_start = %d; latency: %d",
+-                         p_st, early_start, e->latency);
+-
+-	      if (e->data_type == MEM_DEP)
+-		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
+-	    }
+-         else if (dump_file)
+-            fprintf (dump_file, "the node is not scheduled\n");
+-	}
+-      start = early_start;
+-      end = MIN (end, early_start + ii);
+-      /* Schedule the node close to it's predecessors.  */
+-      step = 1;
+-
+-      if (dump_file)
+-        fprintf (dump_file,
+-		 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
+-		 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+-    }
+-
+-  else if (!psp_not_empty && pss_not_empty)
+-    {
+-      int late_start = INT_MAX;
+-
+-      end = INT_MIN;
+-      for (e = u_node->out; e != 0; e = e->next_out)
+-	{
+-	  ddg_node_ptr v_node = e->dest;
+-
+-          if (dump_file)
+-            {
+-              fprintf (dump_file, "\nProcessing edge:");
+-              print_ddg_edge (dump_file, e);
+-              fprintf (dump_file,
+-                       "\nScheduling %d (%d) in pss_not_empty,"
+-                       " checking s %d (%d): ", u_node->cuid,
+-                       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
+-                       (v_node->insn));
+-            }
+-
+-	  if (TEST_BIT (sched_nodes, v_node->cuid))
+-	    {
+-              int s_st = SCHED_TIME (v_node);
+-
+-              late_start = MIN (late_start,
+-                                s_st - e->latency + (e->distance * ii));
+-
+-              if (dump_file)
+-                fprintf (dump_file,
+-                         "succ st = %d; late_start = %d; latency = %d",
+-                         s_st, late_start, e->latency);
+-
+-	      if (e->data_type == MEM_DEP)
+-		end = MAX (end, SCHED_TIME (v_node) - ii + 1);
+-             if (dump_file)
+-                 fprintf (dump_file, "end = %d\n", end);
+-
+-	    }
+-          else if (dump_file)
+-            fprintf (dump_file, "the node is not scheduled\n");
+-
+-	}
+-      start = late_start;
+-      end = MAX (end, late_start - ii);
+-      /* Schedule the node close to it's successors.  */
++  /* We first compute a forward range (start <= end), then decide whether
++     to reverse it.  */
++  early_start = INT_MIN;
++  late_start = INT_MAX;
++  start = INT_MIN;
++  end = INT_MAX;
++  step = 1;
++
++  count_preds = 0;
++  count_succs = 0;
++
++  if (dump_file && (psp_not_empty || pss_not_empty))
++    {
++      fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
++	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
++      fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
++	       "start", "early start", "late start", "end", "time");
++      fprintf (dump_file, "=========== =========== =========== ==========="
++	       " =====\n");
++    }
++  /* Calculate early_start and limit end.  Both bounds are inclusive.  */
++  if (psp_not_empty)
++    for (e = u_node->in; e != 0; e = e->next_in)
++      {
++	int v = e->src->cuid;
++
++	if (TEST_BIT (sched_nodes, v))
++	  {
++	    int p_st = SCHED_TIME (v);
++	    int earliest = p_st + e->latency - (e->distance * ii);
++	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
++
++	    if (dump_file)
++	      {
++		fprintf (dump_file, "%11s %11d %11s %11d %5d",
++			 "", earliest, "", latest, p_st);
++		print_ddg_edge (dump_file, e);
++		fprintf (dump_file, "\n");
++	      }
++
++	    early_start = MAX (early_start, earliest);
++	    end = MIN (end, latest);
++
++	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
++	      count_preds++;
++	  }
++      }
++
++  /* Calculate late_start and limit start.  Both bounds are inclusive.  */
++  if (pss_not_empty)
++    for (e = u_node->out; e != 0; e = e->next_out)
++      {
++	int v = e->dest->cuid;
++
++	if (TEST_BIT (sched_nodes, v))
++	  {
++	    int s_st = SCHED_TIME (v);
++	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
++	    int latest = s_st - e->latency + (e->distance * ii);
++
++	    if (dump_file)
++	      {
++		fprintf (dump_file, "%11d %11s %11d %11s %5d",
++			 earliest, "", latest, "", s_st);
++		print_ddg_edge (dump_file, e);
++		fprintf (dump_file, "\n");
++	      }
++
++	    start = MAX (start, earliest);
++	    late_start = MIN (late_start, latest);
++
++	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
++	      count_succs++;
++	  }
++      }
++
++  if (dump_file && (psp_not_empty || pss_not_empty))
++    {
++      fprintf (dump_file, "----------- ----------- ----------- -----------"
++	       " -----\n");
++      fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
++	       start, early_start, late_start, end, "",
++	       "(max, max, min, min)");
++    }
++
++  /* Get a target scheduling window no bigger than ii.  */
++  if (early_start == INT_MIN && late_start == INT_MAX)
++    early_start = NODE_ASAP (u_node);
++  else if (early_start == INT_MIN)
++    early_start = late_start - (ii - 1);
++  late_start = MIN (late_start, early_start + (ii - 1));
++
++  /* Apply memory dependence limits.  */
++  start = MAX (start, early_start);
++  end = MIN (end, late_start);
++
++  if (dump_file && (psp_not_empty || pss_not_empty))
++    fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
++	     "", start, end, "", "");
++
++  /* If there are at least as many successors as predecessors, schedule the
++     node close to its successors.  */
++  if (pss_not_empty && count_succs >= count_preds)
++    {
++      int tmp = end;
++      end = start;
++      start = tmp;
+       step = -1;
+-
+-      if (dump_file)
+-        fprintf (dump_file,
+-                 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
+-                 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+-
+-    }
+-
+-  else if (psp_not_empty && pss_not_empty)
+-    {
+-      int early_start = INT_MIN;
+-      int late_start = INT_MAX;
+-      int count_preds = 0;
+-      int count_succs = 0;
+-
+-      start = INT_MIN;
+-      end = INT_MAX;
+-      for (e = u_node->in; e != 0; e = e->next_in)
+-	{
+-	  ddg_node_ptr v_node = e->src;
+-
+-	  if (dump_file)
+-	    {
+-              fprintf (dump_file, "\nProcessing edge:");
+-              print_ddg_edge (dump_file, e);
+-	      fprintf (dump_file,
+-		       "\nScheduling %d (%d) in psp_pss_not_empty,"
+-		       " checking p %d (%d): ", u_node->cuid, INSN_UID
+-		       (u_node->insn), v_node->cuid, INSN_UID
+-		       (v_node->insn));
+-	    }
+-
+-	  if (TEST_BIT (sched_nodes, v_node->cuid))
+-	    {
+-              int p_st = SCHED_TIME (v_node);
+-
+-	      early_start = MAX (early_start,
+-				 p_st + e->latency
+-				 - (e->distance * ii));
+-
+-              if (dump_file)
+-                fprintf (dump_file,
+-                         "pred st = %d; early_start = %d; latency = %d",
+-                         p_st, early_start, e->latency);
+-
+-              if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+-                count_preds++;
+-
+-	      if (e->data_type == MEM_DEP)
+-		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
+-	    }
+-          else if (dump_file)
+-            fprintf (dump_file, "the node is not scheduled\n");
+-
+-	}
+-      for (e = u_node->out; e != 0; e = e->next_out)
+-	{
+-	  ddg_node_ptr v_node = e->dest;
+-
+-	  if (dump_file)
+-	    {
+-              fprintf (dump_file, "\nProcessing edge:");
+-              print_ddg_edge (dump_file, e);
+-	      fprintf (dump_file,
+-		       "\nScheduling %d (%d) in psp_pss_not_empty,"
+-		       " checking s %d (%d): ", u_node->cuid, INSN_UID
+-		       (u_node->insn), v_node->cuid, INSN_UID
+-		       (v_node->insn));
+-	    }
+-
+-	  if (TEST_BIT (sched_nodes, v_node->cuid))
+-	    {
+-              int s_st = SCHED_TIME (v_node);
+-
+-	      late_start = MIN (late_start,
+-				s_st - e->latency
+-				+ (e->distance * ii));
+-
+-              if (dump_file)
+-                fprintf (dump_file,
+-                         "succ st = %d; late_start = %d; latency = %d",
+-                         s_st, late_start, e->latency);
+-
+-               if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+-                 count_succs++;
+-
+-	      if (e->data_type == MEM_DEP)
+-		start = MAX (start, SCHED_TIME (v_node) - ii + 1);
+-	    }
+-          else if (dump_file)
+-            fprintf (dump_file, "the node is not scheduled\n");
+-
+-	}
+-      start = MAX (start, early_start);
+-      end = MIN (end, MIN (early_start + ii, late_start + 1));
+-      step = 1;
+-      /* If there are more successors than predecessors schedule the
+-         node close to it's successors.  */
+-      if (count_succs >= count_preds)
+-        {
+-          int old_start = start;
+-
+-          start = end - 1;
+-          end = old_start - 1;
+-          step = -1;
+-        }
+-    }
+-  else /* psp is empty && pss is empty.  */
+-    {
+-      start = SCHED_ASAP (u_node);
+-      end = start + ii;
+-      step = 1;
+-    }
++    }
++
++  /* Now that we've finalized the window, make END an exclusive rather
++     than an inclusive bound.  */
++  end += step;
+ 
+   *start_p = start;
+   *step_p = step;
+@@ -1880,10 +1977,10 @@
+       if (dump_file)
+ 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
+ 		 start, end, step);
+-    return -1;
++      return -1;
+     }
+ 
+-    return 0;
++  return 0;
+ }
+ 
+ /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
+@@ -1939,7 +2036,7 @@
+       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
+   for (e = u_node->in; e != 0; e = e->next_in)
+     if (TEST_BIT (sched_nodes, e->src->cuid)
+-	&& ((SCHED_TIME (e->src) - (e->distance * ii)) ==
++	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
+              first_cycle_in_window))
+       {
+ 	if (dump_file)
+@@ -1964,7 +2061,7 @@
+       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
+   for (e = u_node->out; e != 0; e = e->next_out)
+     if (TEST_BIT (sched_nodes, e->dest->cuid)
+-	&& ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
++	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
+              last_cycle_in_window))
+       {
+ 	if (dump_file)
+@@ -1988,7 +2085,7 @@
+    last row of the scheduling window)  */
+ 
+ static bool
+-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
++try_scheduling_node_in_cycle (partial_schedule_ptr ps,
+ 			      int u, int cycle, sbitmap sched_nodes,
+ 			      int *num_splits, sbitmap must_precede,
+ 			      sbitmap must_follow)
+@@ -1997,11 +2094,10 @@
+   bool success = 0;
+ 
+   verify_partial_schedule (ps, sched_nodes);
+-  psi = ps_add_node_check_conflicts (ps, u_node, cycle,
+-				     must_precede, must_follow);
++  psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
+   if (psi)
+     {
+-      SCHED_TIME (u_node) = cycle;
++      SCHED_TIME (u) = cycle;
+       SET_BIT (sched_nodes, u);
+       success = 1;
+       *num_splits = 0;
+@@ -2062,8 +2158,8 @@
+                                 &step, &end) == 0)
+             {
+               if (dump_file)
+-                fprintf (dump_file, "\nTrying to schedule node %d \
+-                        INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
++                fprintf (dump_file, "\nTrying to schedule node %d "
++			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
+                         (g->nodes[u].insn)), start, end, step);
+ 
+               gcc_assert ((step > 0 && start < end)
+@@ -2081,7 +2177,7 @@
+ 		                           &tmp_precede, must_precede, 
+                                            c, start, end, step);
+                   success =
+-                    try_scheduling_node_in_cycle (ps, u_node, u, c,
++                    try_scheduling_node_in_cycle (ps, u, c,
+                                                   sched_nodes,
+                                                   &num_splits, tmp_precede,
+                                                   tmp_follow);
+@@ -2181,7 +2277,7 @@
+       for (crr_insn = rows_new[row];
+ 	   crr_insn; crr_insn = crr_insn->next_in_row)
+ 	{
+-	  ddg_node_ptr u = crr_insn->node;
++	  int u = crr_insn->id;
+ 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
+ 
+ 	  SCHED_TIME (u) = new_time;
+@@ -2202,7 +2298,7 @@
+       for (crr_insn = rows_new[row + 1];
+ 	   crr_insn; crr_insn = crr_insn->next_in_row)
+ 	{
+-	  ddg_node_ptr u = crr_insn->node;
++	  int u = crr_insn->id;
+ 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
+ 
+ 	  SCHED_TIME (u) = new_time;
+@@ -2242,24 +2338,24 @@
+ {
+   ddg_edge_ptr e;
+   int lower = INT_MIN, upper = INT_MAX;
+-  ddg_node_ptr crit_pred = NULL;
+-  ddg_node_ptr crit_succ = NULL;
++  int crit_pred = -1;
++  int crit_succ = -1;
+   int crit_cycle;
+ 
+   for (e = u_node->in; e != 0; e = e->next_in)
+     {
+-      ddg_node_ptr v_node = e->src;
++      int v = e->src->cuid;
+ 
+-      if (TEST_BIT (sched_nodes, v_node->cuid)
+-	  && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
+-	if (SCHED_TIME (v_node) > lower)
++      if (TEST_BIT (sched_nodes, v)
++	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
++	if (SCHED_TIME (v) > lower)
+ 	  {
+-	    crit_pred = v_node;
+-	    lower = SCHED_TIME (v_node);
++	    crit_pred = v;
++	    lower = SCHED_TIME (v);
+ 	  }
+     }
+ 
+-  if (crit_pred != NULL)
++  if (crit_pred >= 0)
+     {
+       crit_cycle = SCHED_TIME (crit_pred) + 1;
+       return SMODULO (crit_cycle, ii);
+@@ -2267,17 +2363,18 @@
+ 
+   for (e = u_node->out; e != 0; e = e->next_out)
+     {
+-      ddg_node_ptr v_node = e->dest;
+-      if (TEST_BIT (sched_nodes, v_node->cuid)
+-	  && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
+-	if (SCHED_TIME (v_node) < upper)
++      int v = e->dest->cuid;
++
++      if (TEST_BIT (sched_nodes, v)
++	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
++	if (SCHED_TIME (v) < upper)
+ 	  {
+-	    crit_succ = v_node;
+-	    upper = SCHED_TIME (v_node);
++	    crit_succ = v;
++	    upper = SCHED_TIME (v);
+ 	  }
+     }
+ 
+-  if (crit_succ != NULL)
++  if (crit_succ >= 0)
+     {
+       crit_cycle = SCHED_TIME (crit_succ);
+       return SMODULO (crit_cycle, ii);
+@@ -2301,10 +2398,10 @@
+       
+       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+ 	{
+-	  ddg_node_ptr u = crr_insn->node;
++	  int u = crr_insn->id;
+ 	  
+ 	  length++;
+-	  gcc_assert (TEST_BIT (sched_nodes, u->cuid));
++	  gcc_assert (TEST_BIT (sched_nodes, u));
+ 	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+ 	     popcount (sched_nodes) == number of insns in ps.  */
+ 	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+@@ -2719,6 +2816,7 @@
+   partial_schedule_ptr ps = XNEW (struct partial_schedule);
+   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
+   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
++  ps->reg_moves = NULL;
+   ps->ii = ii;
+   ps->history = history;
+   ps->min_cycle = INT_MAX;
+@@ -2753,8 +2851,16 @@
+ static void
+ free_partial_schedule (partial_schedule_ptr ps)
+ {
++  ps_reg_move_info *move;
++  unsigned int i;
++
+   if (!ps)
+     return;
++
++  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
++    sbitmap_free (move->uses);
++  VEC_free (ps_reg_move_info, heap, ps->reg_moves);
++
+   free_ps_insns (ps);
+   free (ps->rows);
+   free (ps->rows_length);
+@@ -2796,12 +2902,12 @@
+       fprintf (dump, "\n[ROW %d ]: ", i);
+       while (ps_i)
+ 	{
+-	  if (JUMP_P (ps_i->node->insn))
+-	    fprintf (dump, "%d (branch), ",
+-		     INSN_UID (ps_i->node->insn));
++	  rtx insn = ps_rtl_insn (ps, ps_i->id);
++
++	  if (JUMP_P (insn))
++	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
+ 	  else
+-	    fprintf (dump, "%d, ",
+-		     INSN_UID (ps_i->node->insn));
++	    fprintf (dump, "%d, ", INSN_UID (insn));
+ 	
+ 	  ps_i = ps_i->next_in_row;
+ 	}
+@@ -2810,11 +2916,11 @@
+ 
+ /* Creates an object of PS_INSN and initializes it to the given parameters.  */
+ static ps_insn_ptr
+-create_ps_insn (ddg_node_ptr node, int cycle)
++create_ps_insn (int id, int cycle)
+ {
+   ps_insn_ptr ps_i = XNEW (struct ps_insn);
+ 
+-  ps_i->node = node;
++  ps_i->id = id;
+   ps_i->next_in_row = NULL;
+   ps_i->prev_in_row = NULL;
+   ps_i->cycle = cycle;
+@@ -2879,10 +2985,11 @@
+        next_ps_i;
+        next_ps_i = next_ps_i->next_in_row)
+     {
+-      if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
++      if (must_follow
++	  && TEST_BIT (must_follow, next_ps_i->id)
+ 	  && ! first_must_follow)
+         first_must_follow = next_ps_i;
+-      if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
++      if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
+         {
+           /* If we have already met a node that must follow, then
+ 	     there is no possible column.  */
+@@ -2893,8 +3000,8 @@
+         }
+       /* The closing branch must be the last in the row.  */
+       if (must_precede 
+-	  && TEST_BIT (must_precede, next_ps_i->node->cuid) 
+-	  && JUMP_P (next_ps_i->node->insn))     
++	  && TEST_BIT (must_precede, next_ps_i->id)
++	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
+ 	return false;
+              
+        last_in_row = next_ps_i;
+@@ -2903,7 +3010,7 @@
+   /* The closing branch is scheduled as well.  Make sure there is no
+      dependent instruction after it as the branch should be the last
+      instruction in the row.  */
+-  if (JUMP_P (ps_i->node->insn)) 
++  if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
+     {
+       if (first_must_follow)
+ 	return false;
+@@ -2954,7 +3061,6 @@
+ {
+   ps_insn_ptr prev, next;
+   int row;
+-  ddg_node_ptr next_node;
+ 
+   if (!ps || !ps_i)
+     return false;
+@@ -2964,11 +3070,9 @@
+   if (! ps_i->next_in_row)
+     return false;
+ 
+-  next_node = ps_i->next_in_row->node;
+-
+   /* Check if next_in_row is dependent on ps_i, both having same sched
+      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
+-  if (must_follow && TEST_BIT (must_follow, next_node->cuid))
++  if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
+     return false;
+ 
+   /* Advance PS_I over its next_in_row in the doubly linked list.  */
+@@ -2999,7 +3103,7 @@
+    before/after (respectively) the node pointed to by PS_I when scheduled
+    in the same cycle.  */
+ static ps_insn_ptr
+-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
++add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
+ 		sbitmap must_precede, sbitmap must_follow)
+ {
+   ps_insn_ptr ps_i;
+@@ -3008,7 +3112,7 @@
+   if (ps->rows_length[row] >= issue_rate)
+     return NULL;
+ 
+-  ps_i = create_ps_insn (node, cycle);
++  ps_i = create_ps_insn (id, cycle);
+ 
+   /* Finds and inserts PS_I according to MUST_FOLLOW and
+      MUST_PRECEDE.  */
+@@ -3060,7 +3164,7 @@
+ 	   crr_insn;
+ 	   crr_insn = crr_insn->next_in_row)
+ 	{
+-	  rtx insn = crr_insn->node->insn;
++	  rtx insn = ps_rtl_insn (ps, crr_insn->id);
+ 
+ 	  if (!NONDEBUG_INSN_P (insn))
+ 	    continue;
+@@ -3097,7 +3201,7 @@
+    cuid N must be come before/after (respectively) the node pointed to by
+    PS_I when scheduled in the same cycle.  */
+ ps_insn_ptr
+-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
++ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
+    			     int c, sbitmap must_precede,
+ 			     sbitmap must_follow)
+ {
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch
new file mode 100644
index 0000000000..02f8e51779
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch
@@ -0,0 +1,147 @@
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-09-09  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm-cores.def (generic-armv7-a): New architecture.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm.c (arm_file_start): Output .arch directive when
+	user passes -mcpu=generic-*.
+	(arm_issue_rate): Add genericv7a support.
+	* config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
+	(ASM_CPU_SPEC): New define.
+	* config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
+	* config/arm/semi.h (ASM_SPEC): Likewise.
+	* doc/invoke.texi (ARM Options): Document -mcpu=generic-*
+	and -mtune=generic-*.
+
+=== modified file 'gcc/config/arm/arm-cores.def'
+--- old/gcc/config/arm/arm-cores.def	2011-06-14 16:00:30 +0000
++++ new/gcc/config/arm/arm-cores.def	2011-10-19 16:46:51 +0000
+@@ -124,6 +124,7 @@
+ ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
+ ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, v6t2)
+ ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, v6t2)
++ARM_CORE("generic-armv7-a", genericv7a,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
+ ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+@@ -135,3 +136,4 @@
+ ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
++
+
+=== modified file 'gcc/config/arm/arm-tune.md'
+--- old/gcc/config/arm/arm-tune.md	2011-06-14 14:37:30 +0000
++++ new/gcc/config/arm/arm-tune.md	2011-10-19 16:46:51 +0000
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from arm-cores.def
+ (define_attr "tune"
+-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
++	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+ 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-10-11 02:31:01 +0000
++++ new/gcc/config/arm/arm.c	2011-10-19 16:46:51 +0000
+@@ -22185,6 +22185,8 @@
+       const char *fpu_name;
+       if (arm_selected_arch)
+ 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
++      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
++	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
+       else
+ 	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
+ 
+@@ -23717,6 +23719,7 @@
+     case cortexr4:
+     case cortexr4f:
+     case cortexr5:
++    case genericv7a:
+     case cortexa5:
+     case cortexa8:
+     case cortexa9:
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-09-05 14:32:11 +0000
++++ new/gcc/config/arm/arm.h	2011-10-19 16:46:51 +0000
+@@ -198,6 +198,7 @@
+    Do not define this macro if it does not need to do anything.  */
+ #define EXTRA_SPECS						\
+   { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },           \
++  { "asm_cpu_spec",		ASM_CPU_SPEC },			\
+   SUBTARGET_EXTRA_SPECS
+ 
+ #ifndef SUBTARGET_EXTRA_SPECS
+@@ -2278,4 +2279,8 @@
+    instruction.  */
+ #define MAX_LDM_STM_OPS 4
+ 
++#define ASM_CPU_SPEC \
++   " %{mcpu=generic-*:-march=%*;"				\
++   "   :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
++
+ #endif /* ! GCC_ARM_H */
+
+=== modified file 'gcc/config/arm/elf.h'
+--- old/gcc/config/arm/elf.h	2009-06-21 19:48:15 +0000
++++ new/gcc/config/arm/elf.h	2011-10-19 16:46:51 +0000
+@@ -56,8 +56,7 @@
+ #define ASM_SPEC "\
+ %{mbig-endian:-EB} \
+ %{mlittle-endian:-EL} \
+-%{mcpu=*:-mcpu=%*} \
+-%{march=*:-march=%*} \
++%(asm_cpu_spec) \
+ %{mapcs-*:-mapcs-%*} \
+ %(subtarget_asm_float_spec) \
+ %{mthumb-interwork:-mthumb-interwork} \
+
+=== modified file 'gcc/config/arm/semi.h'
+--- old/gcc/config/arm/semi.h	2007-08-02 09:49:31 +0000
++++ new/gcc/config/arm/semi.h	2011-10-19 16:46:51 +0000
+@@ -65,8 +65,7 @@
+ #define ASM_SPEC "\
+ %{fpic|fpie: -k} %{fPIC|fPIE: -k} \
+ %{mbig-endian:-EB} \
+-%{mcpu=*:-mcpu=%*} \
+-%{march=*:-march=%*} \
++%(arm_cpu_spec) \
+ %{mapcs-float:-mfloat} \
+ %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
+ %{mfloat-abi=*} %{mfpu=*} \
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2011-08-13 08:32:32 +0000
++++ new/gcc/doc/invoke.texi	2011-10-19 16:46:51 +0000
+@@ -10215,6 +10215,10 @@
+ @samp{cortex-m0},
+ @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+ 
++@option{-mcpu=generic-@var{arch}} is also permissible, and is
++equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
++See @option{-mtune} for more information.
++
+ @item -mtune=@var{name}
+ @opindex mtune
+ This option is very similar to the @option{-mcpu=} option, except that
+@@ -10226,6 +10230,13 @@
+ For some ARM implementations better performance can be obtained by using
+ this option.
+ 
++@option{-mtune=generic-@var{arch}} specifies that GCC should tune the
++performance for a blend of processors within architecture @var{arch}.
++The aim is to generate code that run well on the current most popular
++processors, balancing between optimizations that benefit some CPUs in the
++range, and avoiding performance pitfalls of other CPUs.  The effects of
++this option may change in future GCC versions as CPU models come and go.
++
+ @item -march=@var{name}
+ @opindex march
+ This specifies the name of the target ARM architecture.  GCC uses this
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch
new file mode 100644
index 0000000000..695aa85591
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch
@@ -0,0 +1,304 @@
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+ 
+ 	Backport from FSF:
+	
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/arm/driver-arm.c (host_detect_local_cpu): Close the file
+	before exiting.
+
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config.host (arm*-*-linux*): Add driver-arm.o and x-arm.
+	* config/arm/arm.opt: Add 'native' processor_type and
+	arm_arch enum values.
+	* config/arm/arm.h (host_detect_local_cpu): New prototype.
+	(EXTRA_SPEC_FUNCTIONS): New define.
+	(MCPU_MTUNE_NATIVE_SPECS): New define.
+	(DRIVER_SELF_SPECS): New define.
+	* config/arm/driver-arm.c: New file.
+	* config/arm/x-arm: New file.
+	* doc/invoke.texi (ARM Options): Document -mcpu=native,
+	-mtune=native and -march=native.
+
+=== modified file 'gcc/config.host'
+--- old/gcc/config.host	2011-02-15 09:49:14 +0000
++++ new/gcc/config.host	2011-10-19 17:01:50 +0000
+@@ -100,6 +100,14 @@
+ esac
+ 
+ case ${host} in
++  arm*-*-linux*)
++    case ${target} in
++      arm*-*-*)
++	host_extra_gcc_objs="driver-arm.o"
++	host_xmake_file="${host_xmake_file} arm/x-arm"
++	;;
++    esac
++    ;;
+   alpha*-*-linux*)
+     case ${target} in
+       alpha*-*-linux*)
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-10-19 16:46:51 +0000
++++ new/gcc/config/arm/arm.h	2011-10-19 17:01:50 +0000
+@@ -2283,4 +2283,21 @@
+    " %{mcpu=generic-*:-march=%*;"				\
+    "   :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
+ 
++/* -mcpu=native handling only makes sense with compiler running on
++   an ARM chip.  */
++#if defined(__arm__)
++extern const char *host_detect_local_cpu (int argc, const char **argv);
++# define EXTRA_SPEC_FUNCTIONS						\
++  { "local_cpu_detect", host_detect_local_cpu },
++
++# define MCPU_MTUNE_NATIVE_SPECS					\
++   " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
++   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
++   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
++#else
++# define MCPU_MTUNE_NATIVE_SPECS ""
++#endif
++
++#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
++
+ #endif /* ! GCC_ARM_H */
+
+=== modified file 'gcc/config/arm/arm.opt'
+--- old/gcc/config/arm/arm.opt	2011-10-11 02:31:01 +0000
++++ new/gcc/config/arm/arm.opt	2011-10-19 17:01:50 +0000
+@@ -48,6 +48,11 @@
+ Target RejectNegative Joined
+ Specify the name of the target architecture
+ 
++; Other arm_arch values are loaded from arm-tables.opt
++; but that is a generated file and this is an odd-one-out.
++EnumValue
++Enum(arm_arch) String(native) Value(-1) DriverOnly
++
+ marm
+ Target RejectNegative InverseMask(THUMB) Undocumented
+ 
+@@ -153,6 +158,11 @@
+ Target RejectNegative Joined
+ Tune code for the given processor
+ 
++; Other processor_type values are loaded from arm-tables.opt
++; but that is a generated file and this is an odd-one-out.
++EnumValue
++Enum(processor_type) String(native) Value(-1) DriverOnly
++
+ mwords-little-endian
+ Target Report RejectNegative Mask(LITTLE_WORDS)
+ Assume big endian bytes, little endian words
+
+=== added file 'gcc/config/arm/driver-arm.c'
+--- old/gcc/config/arm/driver-arm.c	1970-01-01 00:00:00 +0000
++++ new/gcc/config/arm/driver-arm.c	2011-10-19 17:07:55 +0000
+@@ -0,0 +1,149 @@
++/* Subroutines for the gcc driver.
++   Copyright (C) 2011 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "configargs.h"
++
++struct vendor_cpu {
++  const char *part_no;
++  const char *arch_name;
++  const char *cpu_name;
++};
++
++static struct vendor_cpu arm_cpu_table[] = {
++    {"0x926", "armv5te", "arm926ej-s"},
++    {"0xa26", "armv5te", "arm1026ej-s"},
++    {"0xb02", "armv6k", "mpcore"},
++    {"0xb36", "armv6j", "arm1136j-s"},
++    {"0xb56", "armv6t2", "arm1156t2-s"},
++    {"0xb76", "armv6zk", "arm1176jz-s"},
++    {"0xc05", "armv7-a", "cortex-a5"},
++    {"0xc08", "armv7-a", "cortex-a8"},
++    {"0xc09", "armv7-a", "cortex-a9"},
++    {"0xc0f", "armv7-a", "cortex-a15"},
++    {"0xc14", "armv7-r", "cortex-r4"},
++    {"0xc15", "armv7-r", "cortex-r5"},
++    {"0xc20", "armv6-m", "cortex-m0"},
++    {"0xc21", "armv6-m", "cortex-m1"},
++    {"0xc23", "armv7-m", "cortex-m3"},
++    {"0xc24", "armv7e-m", "cortex-m4"},
++    {NULL, NULL, NULL}
++};
++
++struct {
++  const char *vendor_no;
++  const struct vendor_cpu *vendor_parts;
++} vendors[] = {
++    {"0x41", arm_cpu_table},
++    {NULL, NULL}
++};
++
++/* This will be called by the spec parser in gcc.c when it sees
++   a %:local_cpu_detect(args) construct.  Currently it will be called
++   with either "arch", "cpu" or "tune" as argument depending on if
++   -march=native, -mcpu=native or -mtune=native is to be substituted.
++
++   It returns a string containing new command line parameters to be
++   put at the place of the above two options, depending on what CPU
++   this is executed.  E.g. "-march=armv7-a" on a Cortex-A8 for
++   -march=native.  If the routine can't detect a known processor,
++   the -march or -mtune option is discarded.
++
++   ARGC and ARGV are set depending on the actual arguments given
++   in the spec.  */
++const char *
++host_detect_local_cpu (int argc, const char **argv)
++{
++  const char *val = NULL;
++  char buf[128];
++  FILE *f = NULL;
++  bool arch;
++  const struct vendor_cpu *cpu_table = NULL;
++
++  if (argc < 1)
++    goto not_found;
++
++  arch = strcmp (argv[0], "arch") == 0;
++  if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
++    goto not_found;
++
++  f = fopen ("/proc/cpuinfo", "r");
++  if (f == NULL)
++    goto not_found;
++
++  while (fgets (buf, sizeof (buf), f) != NULL)
++    {
++      /* Ensure that CPU implementer is ARM (0x41).  */
++      if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
++	{
++	  int i;
++	  for (i = 0; vendors[i].vendor_no != NULL; i++)
++	    if (strstr (buf, vendors[i].vendor_no) != NULL)
++	      {
++		cpu_table = vendors[i].vendor_parts;
++		break;
++	      }
++	}
++
++      /* Detect arch/cpu.  */
++      if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
++	{
++	  int i;
++
++	  if (cpu_table == NULL)
++	    goto not_found;
++
++	  for (i = 0; cpu_table[i].part_no != NULL; i++)
++	    if (strstr (buf, cpu_table[i].part_no) != NULL)
++	      {
++		val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
++		break;
++	      }
++	  break;
++	}
++    }
++
++  fclose (f);
++
++  if (val == NULL)
++    goto not_found;
++
++  return concat ("-m", argv[0], "=", val, NULL);
++
++not_found:
++  {
++    unsigned int i;
++    unsigned int opt;
++    const char *search[] = {NULL, "arch"};
++
++    if (f)
++      fclose (f);
++
++    search[0] = argv[0];
++    for (opt = 0; opt < ARRAY_SIZE (search); opt++)
++      for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
++	if (strcmp (configure_default_options[i].name, search[opt]) == 0)
++	  return concat ("-m", search[opt], "=",
++			 configure_default_options[i].value, NULL);
++    return NULL;
++  }
++}
+
+=== added file 'gcc/config/arm/x-arm'
+--- old/gcc/config/arm/x-arm	1970-01-01 00:00:00 +0000
++++ new/gcc/config/arm/x-arm	2011-10-19 17:01:50 +0000
+@@ -0,0 +1,3 @@
++driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
++  $(CONFIG_H) $(SYSTEM_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2011-10-19 16:46:51 +0000
++++ new/gcc/doc/invoke.texi	2011-10-19 17:01:50 +0000
+@@ -10215,10 +10215,16 @@
+ @samp{cortex-m0},
+ @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+ 
++
+ @option{-mcpu=generic-@var{arch}} is also permissible, and is
+ equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
+ See @option{-mtune} for more information.
+ 
++@option{-mcpu=native} causes the compiler to auto-detect the CPU
++of the build computer.  At present, this feature is only supported on
++Linux, and not all architectures are recognised.  If the auto-detect is
++unsuccessful the option has no effect.
++
+ @item -mtune=@var{name}
+ @opindex mtune
+ This option is very similar to the @option{-mcpu=} option, except that
+@@ -10237,6 +10243,11 @@
+ range, and avoiding performance pitfalls of other CPUs.  The effects of
+ this option may change in future GCC versions as CPU models come and go.
+ 
++@option{-mtune=native} causes the compiler to auto-detect the CPU
++of the build computer.  At present, this feature is only supported on
++Linux, and not all architectures are recognised.  If the auto-detect is
++unsuccessful the option has no effect.
++
+ @item -march=@var{name}
+ @opindex march
+ This specifies the name of the target ARM architecture.  GCC uses this
+@@ -10250,6 +10261,11 @@
+ @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m},
+ @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+ 
++@option{-march=native} causes the compiler to auto-detect the architecture
++of the build computer.  At present, this feature is only supported on
++Linux, and not all architectures are recognised.  If the auto-detect is
++unsuccessful the option has no effect.
++
+ @item -mfpu=@var{name}
+ @itemx -mfpe=@var{number}
+ @itemx -mfp=@var{number}
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch
new file mode 100644
index 0000000000..ad91d77366
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch
@@ -0,0 +1,123 @@
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+ 
+ 	Backport from FSF:
+
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR tree-optimization/50717
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type'
+	parameter.  Calculate 'type' from stmt.
+	(convert_mult_to_widen): Update call the is_widening_mult_p.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/pr50717-1.c: New file.
+	* gcc.target/arm/wmul-12.c: Correct types.
+	* gcc.target/arm/wmul-8.c: Correct types.
+
+=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c'
+--- old/gcc/testsuite/gcc.dg/pr50717-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/pr50717-1.c	2011-10-19 14:42:50 +0000
+@@ -0,0 +1,26 @@
++/* PR tree-optimization/50717  */
++/* Ensure that widening multiply-and-accumulate is not used where integer
++   type promotion or users' casts should prevent it.  */
++
++/* { dg-options "-O2 -fdump-tree-widening_mul" } */
++
++long long
++f (unsigned int a, char b, long long c)
++{
++  return (a * b) + c;
++}
++
++int
++g (short a, short b, int c)
++{
++  return (short)(a * b) + c;
++}
++
++int
++h (char a, char b, int c)
++{
++  return (char)(a * b) + c;
++}
++
++/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */
++/* { dg-final { cleanup-tree-dump "widening_mul" } } */
+
+=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-12.c	2011-07-22 15:46:42 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-12.c	2011-10-19 14:42:50 +0000
+@@ -4,8 +4,8 @@
+ long long
+ foo (int *b, int *c)
+ {
+-  int tmp = *b * *c;
+-  return 10 + (long long)tmp;
++  long long tmp = (long long)*b * *c;
++  return 10 + tmp;
+ }
+ 
+ /* { dg-final { scan-assembler "smlal" } } */
+
+=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c'
+--- old/gcc/testsuite/gcc.target/arm/wmul-8.c	2011-07-15 14:16:54 +0000
++++ new/gcc/testsuite/gcc.target/arm/wmul-8.c	2011-10-19 14:42:50 +0000
+@@ -4,7 +4,7 @@
+ long long
+ foo (long long a, int *b, int *c)
+ {
+-  return a + *b * *c;
++  return a + (long long)*b * *c;
+ }
+ 
+ /* { dg-final { scan-assembler "smlal" } } */
+
+=== modified file 'gcc/tree-ssa-math-opts.c'
+--- old/gcc/tree-ssa-math-opts.c	2011-09-08 20:11:43 +0000
++++ new/gcc/tree-ssa-math-opts.c	2011-10-19 14:42:50 +0000
+@@ -1351,10 +1351,12 @@
+    and *TYPE2_OUT would give the operands of the multiplication.  */
+ 
+ static bool
+-is_widening_mult_p (tree type, gimple stmt,
++is_widening_mult_p (gimple stmt,
+ 		    tree *type1_out, tree *rhs1_out,
+ 		    tree *type2_out, tree *rhs2_out)
+ {
++  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
++
+   if (TREE_CODE (type) != INTEGER_TYPE
+       && TREE_CODE (type) != FIXED_POINT_TYPE)
+     return false;
+@@ -1416,7 +1418,7 @@
+   if (TREE_CODE (type) != INTEGER_TYPE)
+     return false;
+ 
+-  if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2))
++  if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
+     return false;
+ 
+   to_mode = TYPE_MODE (type);
+@@ -1592,7 +1594,7 @@
+   if (code == PLUS_EXPR
+       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
+     {
+-      if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1,
++      if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
+ 			       &type2, &mult_rhs2))
+ 	return false;
+       add_rhs = rhs2;
+@@ -1600,7 +1602,7 @@
+     }
+   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
+     {
+-      if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1,
++      if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
+ 			       &type2, &mult_rhs2))
+ 	return false;
+       add_rhs = rhs1;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch
new file mode 100644
index 0000000000..843f1cff25
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch
@@ -0,0 +1,24 @@
+2011-10-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+	
+	2011-10-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR target/50809
+
+	gcc/
+	* config/arm/driver-arm.c (vendors): Make static.
+
+=== modified file 'gcc/config/arm/driver-arm.c'
+--- old/gcc/config/arm/driver-arm.c	2011-10-19 17:07:55 +0000
++++ new/gcc/config/arm/driver-arm.c	2011-10-21 19:27:47 +0000
+@@ -49,7 +49,7 @@
+     {NULL, NULL, NULL}
+ };
+ 
+-struct {
++static struct {
+   const char *vendor_no;
+   const struct vendor_cpu *vendor_parts;
+ } vendors[] = {
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch
new file mode 100644
index 0000000000..1ad48e512e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch
@@ -0,0 +1,453 @@
+2011-10-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-stmts.c (vectorizable_load): For SLP without permutation
+	treat the first load of the node as the first element in its
+	interleaving chain.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
+	necessary and possible.
+	(vect_build_slp_tree): Add new argument.  Allow load groups of any size
+	in basic blocks.  Keep all the loads for further permutation check.
+	Use the new argument to determine if there is a permutation.  Update
+	the recursive calls.
+	(vect_supported_load_permutation_p): Allow subchains of interleaving
+	chains in basic block vectorization.
+	(vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
+	Check load permutation based on the new parameter.
+	(vect_schedule_slp_instance): Don't start from the first element in
+	interleaving chain unless the loads are permuted.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-29.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c	2011-10-23 11:29:25 +0000
+@@ -0,0 +1,59 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define A 3
++#define B 4
++#define N 256
++
++short src[N], dst[N];
++
++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
++{
++  int i;
++  h /= 16;
++  for (i = 0; i < h; i++)
++    {
++      dst[0] = A*src[0] + B*src[1];
++      dst[1] = A*src[1] + B*src[2];
++      dst[2] = A*src[2] + B*src[3];
++      dst[3] = A*src[3] + B*src[4];
++      dst[4] = A*src[4] + B*src[5];
++      dst[5] = A*src[5] + B*src[6];
++      dst[6] = A*src[6] + B*src[7];
++      dst[7] = A*src[7] + B*src[8];
++      dst += stride;
++      src += stride;
++      if (dummy == 32)
++        abort ();
++   }
++}
++
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++       dst[i] = 0;
++       src[i] = i;
++    }
++
++  foo (dst, src, N, 8, 0);
++
++  for (i = 0; i < N/2; i++)
++    {
++      if (dst[i] != A * src[i] + B * src[i+1])
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp"  { target { vect_int_mult &&  vect_element_align } } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-10-06 11:08:08 +0000
++++ new/gcc/tree-vect-slp.c	2011-10-23 11:29:25 +0000
+@@ -115,13 +115,15 @@
+ {
+   tree oprnd;
+   unsigned int i, number_of_oprnds;
+-  tree def;
++  tree def[2];
+   gimple def_stmt;
+   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+   stmt_vec_info stmt_info =
+     vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
+   enum gimple_rhs_class rhs_class;
+   struct loop *loop = NULL;
++  enum tree_code rhs_code;
++  bool different_types = false;
+ 
+   if (loop_vinfo)
+     loop = LOOP_VINFO_LOOP (loop_vinfo);
+@@ -133,7 +135,7 @@
+     {
+       oprnd = gimple_op (stmt, i + 1);
+ 
+-      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
++      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
+                                &dt[i])
+ 	  || (!def_stmt && dt[i] != vect_constant_def))
+ 	{
+@@ -188,11 +190,11 @@
+           switch (gimple_code (def_stmt))
+             {
+               case GIMPLE_PHI:
+-                def = gimple_phi_result (def_stmt);
++                def[i] = gimple_phi_result (def_stmt);
+                 break;
+ 
+               case GIMPLE_ASSIGN:
+-                def = gimple_assign_lhs (def_stmt);
++                def[i] = gimple_assign_lhs (def_stmt);
+                 break;
+ 
+               default:
+@@ -206,8 +208,8 @@
+ 	{
+ 	  /* op0 of the first stmt of the group - store its info.  */
+ 	  *first_stmt_dt0 = dt[i];
+-	  if (def)
+-	    *first_stmt_def0_type = TREE_TYPE (def);
++	  if (def[i])
++	    *first_stmt_def0_type = TREE_TYPE (def[i]);
+ 	  else
+ 	    *first_stmt_const_oprnd = oprnd;
+ 
+@@ -227,8 +229,8 @@
+ 	    {
+ 	      /* op1 of the first stmt of the group - store its info.  */
+ 	      *first_stmt_dt1 = dt[i];
+-	      if (def)
+-		*first_stmt_def1_type = TREE_TYPE (def);
++	      if (def[i])
++		*first_stmt_def1_type = TREE_TYPE (def[i]);
+ 	      else
+ 		{
+ 		  /* We assume that the stmt contains only one constant
+@@ -249,22 +251,53 @@
+ 		 the def-stmt/s of the first stmt.  */
+ 	      if ((i == 0
+ 		   && (*first_stmt_dt0 != dt[i]
+-		       || (*first_stmt_def0_type && def
++		       || (*first_stmt_def0_type && def[0]
+ 			   && !types_compatible_p (*first_stmt_def0_type,
+-						   TREE_TYPE (def)))))
++						   TREE_TYPE (def[0])))))
+ 		  || (i == 1
+ 		      && (*first_stmt_dt1 != dt[i]
+-			  || (*first_stmt_def1_type && def
++			  || (*first_stmt_def1_type && def[1]
+ 			      && !types_compatible_p (*first_stmt_def1_type,
+-						      TREE_TYPE (def)))))
+-		  || (!def
++						      TREE_TYPE (def[1])))))
++		  || (!def[i]
+ 		      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
+-					      TREE_TYPE (oprnd))))
++					      TREE_TYPE (oprnd)))
++                  || different_types)
+ 		{
+-		  if (vect_print_dump_info (REPORT_SLP))
+-		    fprintf (vect_dump, "Build SLP failed: different types ");
++                  if (i != number_of_oprnds - 1)
++                    different_types = true;
++                  else
++                   {
++                      if (is_gimple_assign (stmt)
++                          && (rhs_code = gimple_assign_rhs_code (stmt))
++                          && TREE_CODE_CLASS (rhs_code) == tcc_binary
++                          && commutative_tree_code (rhs_code)
++                          && *first_stmt_dt0 == dt[1]
++                          && *first_stmt_dt1 == dt[0]
++                          && def[0] && def[1]
++                          && !(*first_stmt_def0_type
++                               && !types_compatible_p (*first_stmt_def0_type,
++                                                       TREE_TYPE (def[1])))
++                          && !(*first_stmt_def1_type
++                               && !types_compatible_p (*first_stmt_def1_type,
++                                                       TREE_TYPE (def[0]))))
++                        {
++                           if (vect_print_dump_info (REPORT_SLP))
++                            {
++                              fprintf (vect_dump, "Swapping operands of ");
++                              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++                            }
++                           swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
++                                               gimple_assign_rhs2_ptr (stmt));
++                        }
++                      else
++                        {
++                          if (vect_print_dump_info (REPORT_SLP))
++                            fprintf (vect_dump, "Build SLP failed: different types ");
+ 
+-		  return false;
++                          return false;
++                        }
++                   }
+ 		}
+ 	    }
+ 	}
+@@ -278,10 +311,10 @@
+ 
+ 	case vect_internal_def:
+         case vect_reduction_def:
+-	  if (i == 0)
++	  if ((i == 0 && !different_types) || (i == 1 && different_types))
+ 	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
+ 	  else
+-	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
++ 	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+ 	  break;
+ 
+ 	default:
+@@ -289,7 +322,7 @@
+ 	  if (vect_print_dump_info (REPORT_SLP))
+ 	    {
+ 	      fprintf (vect_dump, "Build SLP failed: illegal type of def ");
+-	      print_generic_expr (vect_dump, def, TDF_SLIM);
++	      print_generic_expr (vect_dump, def[i], TDF_SLIM);
+ 	    }
+ 
+ 	  return false;
+@@ -312,7 +345,7 @@
+                      int ncopies_for_cost, unsigned int *max_nunits,
+                      VEC (int, heap) **load_permutation,
+                      VEC (slp_tree, heap) **loads,
+-                     unsigned int vectorization_factor)
++                     unsigned int vectorization_factor, bool *loads_permuted)
+ {
+   VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
+   VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
+@@ -523,7 +556,9 @@
+ 
+               /* Check that the size of interleaved loads group is not
+                  greater than the SLP group size.  */
+-              if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
++              if (loop_vinfo
++                  && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) 
++			> ncopies * group_size)
+                 {
+                   if (vect_print_dump_info (REPORT_SLP))
+                     {
+@@ -644,19 +679,22 @@
+   /* Strided loads were reached - stop the recursion.  */
+   if (stop_recursion)
+     {
++      VEC_safe_push (slp_tree, heap, *loads, *node);
+       if (permutation)
+         {
+-          VEC_safe_push (slp_tree, heap, *loads, *node);
++
++          *loads_permuted = true;
+           *inside_cost 
+             += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) 
+                * group_size;
+         }
+       else
+-        { 
+-          /* We don't check here complex numbers chains, so we keep them in
+-	     LOADS for further check in vect_supported_load_permutation_p.  */ 
++        {
++          /* We don't check here complex numbers chains, so we set
++             LOADS_PERMUTED for further check in
++             vect_supported_load_permutation_p.  */
+           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
+-            VEC_safe_push (slp_tree, heap, *loads, *node);
++            *loads_permuted = true;
+         }
+ 
+       return true;
+@@ -675,7 +713,7 @@
+       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
+ 				inside_cost, outside_cost, ncopies_for_cost,
+ 				max_nunits, load_permutation, loads,
+-				vectorization_factor))
++				vectorization_factor, loads_permuted))
+ 	return false;
+ 
+       SLP_TREE_LEFT (*node) = left_node;
+@@ -693,7 +731,7 @@
+       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
+ 				inside_cost, outside_cost, ncopies_for_cost,
+ 				max_nunits, load_permutation, loads,
+-				vectorization_factor))
++				vectorization_factor, loads_permuted))
+ 	return false;
+ 
+       SLP_TREE_RIGHT (*node) = right_node;
+@@ -879,8 +917,10 @@
+   bool supported, bad_permutation = false;
+   sbitmap load_index;
+   slp_tree node, other_complex_node;
+-  gimple stmt, first = NULL, other_node_first;
++  gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
+   unsigned complex_numbers = 0;
++  struct data_reference *dr;
++  bb_vec_info bb_vinfo;
+ 
+   /* FORNOW: permutations are only supported in SLP.  */
+   if (!slp_instn)
+@@ -1040,6 +1080,76 @@
+         }
+     }
+ 
++  /* In basic block vectorization we allow any subchain of an interleaving
++     chain.
++     FORNOW: not supported in loop SLP because of realignment compications.  */
++  bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
++  bad_permutation = false;
++  /* Check that for every node in the instance teh loads form a subchain.  */
++  if (bb_vinfo)
++    {
++      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
++        {
++          next_load = NULL;
++          first_load = NULL;
++          FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
++            {
++              if (!first_load)
++                first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load));
++              else if (first_load
++                         != DR_GROUP_FIRST_DR (vinfo_for_stmt (load)))
++                {
++                  bad_permutation = true;
++	          break;
++	        }
++
++              if (j != 0 && next_load != load)
++                {
++                  bad_permutation = true;
++                  break;
++                }
++
++              next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load));
++            }
++
++          if (bad_permutation)
++            break;
++        }
++
++      /* Check that the alignment of the first load in every subchain, i.e.,
++         the first statement in every load node, is supported.  */
++      if (!bad_permutation)
++        {
++          FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
++            {
++              first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
++              if (first_load 
++		      != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load)))
++                {
++                  dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
++                  if (vect_supportable_dr_alignment (dr, false)
++ 	               == dr_unaligned_unsupported)
++                    {
++   		      if (vect_print_dump_info (REPORT_SLP))
++		        {
++  	                  fprintf (vect_dump, "unsupported unaligned load ");
++                          print_gimple_stmt (vect_dump, first_load, 0,
++					     TDF_SLIM);
++                        }
++  		      bad_permutation = true;
++                      break;
++                    }
++	        }
++            }
++
++          if (!bad_permutation)
++            {
++              VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
++              return true;
++    	    }
++        }
++    }
++
+   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
+      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
+      well (unless it's reduction).  */
+@@ -1149,6 +1259,7 @@
+   VEC (int, heap) *load_permutation;
+   VEC (slp_tree, heap) *loads;
+   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
++  bool loads_permuted = false;
+ 
+   if (dr)
+     {
+@@ -1238,7 +1349,7 @@
+   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
+                            &inside_cost, &outside_cost, ncopies_for_cost,
+ 			   &max_nunits, &load_permutation, &loads,
+-			   vectorization_factor))
++			   vectorization_factor, &loads_permuted))
+     {
+       /* Calculate the unrolling factor based on the smallest type.  */
+       if (max_nunits > nunits)
+@@ -1263,7 +1374,8 @@
+       SLP_INSTANCE_LOADS (new_instance) = loads;
+       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
+       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
+-      if (VEC_length (slp_tree, loads))
++
++      if (loads_permuted)
+         {
+           if (!vect_supported_load_permutation_p (new_instance, group_size,
+                                                   load_permutation))
+@@ -2542,10 +2654,11 @@
+   /* Loads should be inserted before the first load.  */
+   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
+       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
+-      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
++      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
++      && SLP_INSTANCE_LOAD_PERMUTATION (instance))
+     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
+   else if (is_pattern_stmt_p (stmt_info))
+-     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
++    si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
+   else
+     si = gsi_for_stmt (stmt);
+ 
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-10-16 12:16:07 +0000
++++ new/gcc/tree-vect-stmts.c	2011-10-23 11:29:25 +0000
+@@ -4285,6 +4285,11 @@
+   if (strided_load)
+     {
+       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
++      if (slp
++          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
++          && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
++        first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
++
+       /* Check if the chain of loads is already vectorized.  */
+       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
+ 	{
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
new file mode 100644
index 0000000000..421a8fe3a9
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
@@ -0,0 +1,1505 @@
+	2011-10-18  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
+	vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document.
+	* tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR,
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	(op_code_prio): Likewise.
+	(op_symbol_code): Handle WIDEN_LSHIFT_EXPR.
+	* optabs.c (optab_for_tree_code): Handle
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	(init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
+	* optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
+	* genopinit.c (optabs): Initialize the new optabs.
+	* expr.c (expand_expr_real_2): Handle
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	* gimple-pretty-print.c (dump_binary_rhs): Likewise.
+	* tree-vectorizer.h (NUM_PATTERNS): Increase to 8.
+	* tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR,
+	VEC_WIDEN_LSHIFT_LO_EXPR): New.
+	* cfgexpand.c (expand_debug_expr): Handle new tree codes.
+	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
+	vect_recog_widen_shift_pattern.
+	(vect_handle_widen_mult_by_const): Rename...
+	(vect_handle_widen_op_by_const): ...to this.  Handle shifts.
+	Add a new argument, update documentation.
+	(vect_recog_widen_mult_pattern): Assume that only second
+	operand can be constant.  Update call to
+	vect_handle_widen_op_by_const.
+	(vect_recog_over_widening_pattern): Fix typo.
+	(vect_recog_widen_shift_pattern): New.
+	* tree-vect-stmts.c (vectorizable_type_promotion): Handle
+	widening shifts.
+	(supportable_widening_operation): Likewise.
+	* tree-inline.c (estimate_operator_cost): Handle new tree codes.
+	* tree-vect-generic.c (expand_vector_operations_1): Likewise.
+	* tree-cfg.c (verify_gimple_assign_binary): Likewise.
+	* config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New.
+	(vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>,
+	vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>):
+	Likewise.
+	* config/arm/predicates.md (const_neon_scalar_shift_amount_operand):
+	New.
+	* config/arm/iterators.md (V_innermode): New.
+	* tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
+	for widening shift.
+
+	gcc/testsuite
+	* testsuite/lib/target-supports.exp
+	(check_effective_target_vect_widen_shift): New.
+	* gcc.dg/vect/vect-widen-shift-s16.c: New.
+	* gcc.dg/vect/vect-widen-shift-s8.c: New.
+	* gcc.dg/vect/vect-widen-shift-u16.c: New.
+	* gcc.dg/vect/vect-widen-shift-u8.c: New.
+
+	2011-10-06  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	* tree-vect-patterns.c (vect_pattern_recog_1): Use
+	vect_recog_func_ptr typedef for the first argument.
+	(vect_pattern_recog): Rename vect_recog_func_ptr variable
+	to vect_recog_func, use vect_recog_func_ptr typedef for it.
+
+	2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50727
+	* tree-vect-patterns.c (vect_operation_fits_smaller_type): Add
+	DEF_STMT to the list of statements to be replaced by the
+	pattern statements.
+
+	2011-10-09  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50635
+	* tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add
+	DEF_STMT to the list of statements to be replaced by the
+	pattern statements.
+	(vect_handle_widen_mult_by_const): Don't check TYPE_OUT.
+
+=== modified file 'gcc/cfgexpand.c'
+--- old/gcc/cfgexpand.c	2011-07-01 09:19:21 +0000
++++ new/gcc/cfgexpand.c	2011-10-23 13:33:07 +0000
+@@ -3215,6 +3215,8 @@
+     case VEC_UNPACK_LO_EXPR:
+     case VEC_WIDEN_MULT_HI_EXPR:
+     case VEC_WIDEN_MULT_LO_EXPR:
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
+       return NULL;
+ 
+    /* Misc codes.  */
+
+=== modified file 'gcc/config/arm/iterators.md'
+--- old/gcc/config/arm/iterators.md	2011-09-06 14:29:24 +0000
++++ new/gcc/config/arm/iterators.md	2011-10-23 13:33:07 +0000
+@@ -388,6 +388,9 @@
+ (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
+ (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+ 
++;; Mode attribute for vshll.
++(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
++
+ ;;----------------------------------------------------------------------------
+ ;; Code attributes
+ ;;----------------------------------------------------------------------------
+
+=== modified file 'gcc/config/arm/neon.md'
+--- old/gcc/config/arm/neon.md	2011-10-03 01:32:17 +0000
++++ new/gcc/config/arm/neon.md	2011-10-23 13:33:07 +0000
+@@ -5316,6 +5316,44 @@
+  }
+ )
+ 
++(define_insn "neon_vec_<US>shiftl_<mode>"
++ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
++       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
++       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
++  "TARGET_NEON"
++{
++  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
++}
++  [(set_attr "neon_type" "neon_shift_1")]
++)
++
++(define_expand "vec_widen_<US>shiftl_lo_<mode>"
++  [(match_operand:<V_unpack> 0 "register_operand" "")
++   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
++   (match_operand:SI 2 "immediate_operand" "i")]
++ "TARGET_NEON && !BYTES_BIG_ENDIAN"
++ {
++  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
++		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
++		operands[2]));
++   DONE;
++ }
++)
++
++(define_expand "vec_widen_<US>shiftl_hi_<mode>"
++  [(match_operand:<V_unpack> 0 "register_operand" "")
++   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
++   (match_operand:SI 2 "immediate_operand" "i")]
++ "TARGET_NEON && !BYTES_BIG_ENDIAN"
++ {
++  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
++                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
++				     GET_MODE_SIZE (<V_HALF>mode)),
++                operands[2]));
++   DONE;
++ }
++)
++
+ ;; Vectorize for non-neon-quad case
+ (define_insn "neon_unpack<US>_<mode>"
+  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+@@ -5392,6 +5430,34 @@
+  }
+ )
+ 
++(define_expand "vec_widen_<US>shiftl_hi_<mode>"
++ [(match_operand:<V_double_width> 0 "register_operand" "")
++   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
++   (match_operand:SI 2 "immediate_operand" "i")]
++ "TARGET_NEON"
++ {
++   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
++   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
++   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
++
++   DONE;
++ }
++)
++
++(define_expand "vec_widen_<US>shiftl_lo_<mode>"
++  [(match_operand:<V_double_width> 0 "register_operand" "")
++   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
++   (match_operand:SI 2 "immediate_operand" "i")]
++ "TARGET_NEON"
++ {
++   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
++   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
++   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
++
++   DONE;
++ }
++)
++
+ ;; The case when using all quad registers.
+ (define_insn "vec_pack_trunc_<mode>"
+  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-10-10 11:43:28 +0000
++++ new/gcc/config/arm/predicates.md	2011-10-23 13:33:07 +0000
+@@ -136,6 +136,11 @@
+ 	    (match_operand 0 "s_register_operand"))
+        (match_operand 0 "const_int_operand")))
+ 
++(define_predicate "const_neon_scalar_shift_amount_operand"
++  (and (match_code "const_int")
++       (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
++	&& ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
++
+ (define_predicate "arm_add_operand"
+   (ior (match_operand 0 "arm_rhs_operand")
+        (match_operand 0 "arm_neg_immediate_operand")))
+
+=== modified file 'gcc/doc/md.texi'
+--- old/gcc/doc/md.texi	2011-08-13 08:32:32 +0000
++++ new/gcc/doc/md.texi	2011-10-23 13:33:07 +0000
+@@ -4230,6 +4230,17 @@
+ elements of the two vectors, and put the N/2 products of size 2*S in the
+ output vector (operand 0).
+ 
++@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
++@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
++@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern
++@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern
++@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}}
++@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}}
++Signed/Unsigned widening shift left.  The first input (operand 1) is a vector
++with N signed/unsigned elements of size S@.  Operand 2 is a constant.  Shift
++the high/low elements of operand 1, and put the N/2 results of size 2*S in the
++output vector (operand 0).
++
+ @cindex @code{mulhisi3} instruction pattern
+ @item @samp{mulhisi3}
+ Multiply operands 1 and 2, which have mode @code{HImode}, and store
+
+=== modified file 'gcc/expr.c'
+--- old/gcc/expr.c	2011-08-25 11:42:09 +0000
++++ new/gcc/expr.c	2011-10-23 13:33:07 +0000
+@@ -8290,6 +8290,19 @@
+ 	return target;
+       }
+ 
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
++      {
++        tree oprnd0 = treeop0;
++        tree oprnd1 = treeop1;
++
++        expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
++        target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
++                                            target, unsignedp);
++        gcc_assert (target);
++        return target;
++      }
++
+     case VEC_PACK_TRUNC_EXPR:
+     case VEC_PACK_SAT_EXPR:
+     case VEC_PACK_FIX_TRUNC_EXPR:
+
+=== modified file 'gcc/genopinit.c'
+--- old/gcc/genopinit.c	2011-07-15 13:06:31 +0000
++++ new/gcc/genopinit.c	2011-10-23 13:33:07 +0000
+@@ -268,6 +268,10 @@
+   "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
+   "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
+   "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
++  "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
++  "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
++  "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
++  "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
+   "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
+   "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
+   "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
+
+=== modified file 'gcc/gimple-pretty-print.c'
+--- old/gcc/gimple-pretty-print.c	2011-05-05 15:42:22 +0000
++++ new/gcc/gimple-pretty-print.c	2011-10-23 13:33:07 +0000
+@@ -343,6 +343,8 @@
+     case VEC_EXTRACT_ODD_EXPR:
+     case VEC_INTERLEAVE_HIGH_EXPR:
+     case VEC_INTERLEAVE_LOW_EXPR:
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
+       for (p = tree_code_name [(int) code]; *p; p++)
+ 	pp_character (buffer, TOUPPER (*p));
+       pp_string (buffer, " <");
+
+=== modified file 'gcc/optabs.c'
+--- old/gcc/optabs.c	2011-08-11 15:46:01 +0000
++++ new/gcc/optabs.c	2011-10-23 13:33:07 +0000
+@@ -454,6 +454,14 @@
+       return TYPE_UNSIGNED (type) ?
+ 	vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+ 
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++      return TYPE_UNSIGNED (type) ?
++        vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
++
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
++      return TYPE_UNSIGNED (type) ?
++        vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
++
+     case VEC_UNPACK_HI_EXPR:
+       return TYPE_UNSIGNED (type) ?
+ 	vec_unpacku_hi_optab : vec_unpacks_hi_optab;
+@@ -6351,6 +6359,10 @@
+   init_optab (vec_widen_umult_lo_optab, UNKNOWN);
+   init_optab (vec_widen_smult_hi_optab, UNKNOWN);
+   init_optab (vec_widen_smult_lo_optab, UNKNOWN);
++  init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
++  init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
++  init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
++  init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
+   init_optab (vec_unpacks_hi_optab, UNKNOWN);
+   init_optab (vec_unpacks_lo_optab, UNKNOWN);
+   init_optab (vec_unpacku_hi_optab, UNKNOWN);
+
+=== modified file 'gcc/optabs.h'
+--- old/gcc/optabs.h	2011-07-27 14:12:45 +0000
++++ new/gcc/optabs.h	2011-10-23 13:33:07 +0000
+@@ -350,6 +350,12 @@
+   OTI_vec_widen_umult_lo,
+   OTI_vec_widen_smult_hi,
+   OTI_vec_widen_smult_lo,
++  /* Widening shift left.
++     The high/low part of the resulting vector is returned.  */
++  OTI_vec_widen_ushiftl_hi,
++  OTI_vec_widen_ushiftl_lo,
++  OTI_vec_widen_sshiftl_hi,
++  OTI_vec_widen_sshiftl_lo,
+   /* Extract and widen the high/low part of a vector of signed or
+      floating point elements.  */
+   OTI_vec_unpacks_hi,
+@@ -542,6 +548,10 @@
+ #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
+ #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
+ #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
++#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
++#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
++#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
++#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
+ #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
+ #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
+ #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c	2011-10-23 13:33:07 +0000
+@@ -0,0 +1,107 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++#define C 16
++
++__attribute__ ((noinline)) void
++foo (short *src, int *dst)
++{
++  int i;
++  short b, b0, b1, b2, b3, *s = src;
++  int *d = dst;
++
++  for (i = 0; i < N/4; i++)
++    {
++      b0 = *s++;
++      b1 = *s++;
++      b2 = *s++;
++      b3 = *s++;
++      *d = b0 << C;
++      d++;
++      *d = b1 << C;
++      d++;
++      *d = b2 << C;
++      d++;
++      *d = b3 << C;
++      d++;
++    }
++
++  s = src;
++  d = dst;
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++    }
++
++  s = src;
++  d = dst;
++  for (i = 0; i < N/4; i++)
++    {
++      b0 = *s++;
++      b1 = *s++;
++      b2 = *s++;
++      b3 = *s++;
++      *d = b0 << C;
++      d++;
++      *d = b1 << C;
++      d++;
++      *d = b2 << C;
++      d++;
++      *d = b3 << 6;
++      d++;
++    }
++
++  s = src;
++  d = dst;
++  for (i = 0; i < N/4; i++)
++    {
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++      b = *s++;
++      if (*d != b << 6)
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  short in[N];
++  int out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c	2011-10-23 13:33:07 +0000
+@@ -0,0 +1,58 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++#define C 12
++
++__attribute__ ((noinline)) void
++foo (char *src, int *dst)
++{
++  int i;
++  char b, *s = src;
++  int *d = dst;
++
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      *d = b << C;
++      d++;
++    }
++
++  s = src;
++  d = dst;
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  char in[N];
++  int out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c	2011-10-23 13:33:07 +0000
+@@ -0,0 +1,58 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++#define C 7
++
++__attribute__ ((noinline)) void
++foo (unsigned short *src, unsigned int *dst)
++{
++  int i;
++  unsigned short b, *s = src;
++  unsigned int *d = dst;
++
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      *d = b << C;
++      d++;
++    }
++
++  s = src;
++  d = dst;
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      if (*d != b << C)
++        abort ();
++      d++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  unsigned short in[N];
++  unsigned int out[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c	2011-10-23 13:33:07 +0000
+@@ -0,0 +1,65 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_shift } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 64
++#define C1 10
++#define C2 5
++
++__attribute__ ((noinline)) void
++foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
++{
++  int i;
++  unsigned char b, *s = src;
++  unsigned int *d1 = dst1, *d2 = dst2;
++
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      *d1 = b << C1;
++      d1++;
++      *d2 = b << C2;
++      d2++;
++    }
++
++  s = src;
++  d1 = dst1;
++  d2 = dst2;
++  for (i = 0; i < N; i++)
++    {
++      b = *s++;
++      if (*d1 != b << C1 || *d2 != b << C2)
++        abort ();
++      d1++;
++      d2++;
++    }
++}
++
++int main (void)
++{
++  int i;
++  unsigned char in[N];
++  unsigned int out1[N];
++  unsigned int out2[N];
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      in[i] = i;
++      out1[i] = 255;
++      out2[i] = 255;
++      __asm__ volatile ("");
++    }
++
++  foo (in, out1, out2);
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-10-06 11:08:08 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-10-23 13:33:07 +0000
+@@ -2783,6 +2783,26 @@
+ }
+ 
+ # Return 1 if the target plus current options supports a vector
++# widening shift, 0 otherwise.
++#
++# This won't change for different subtargets so cache the result.
++
++proc check_effective_target_vect_widen_shift { } {
++    global et_vect_widen_shift_saved
++
++    if [info exists et_vect_shift_saved] {
++        verbose "check_effective_target_vect_widen_shift: using cached result" 2
++    } else {
++        set et_vect_widen_shift_saved 0
++        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
++            set et_vect_widen_shift_saved 1
++        }
++    }
++    verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
++    return $et_vect_widen_shift_saved
++}
++
++# Return 1 if the target plus current options supports a vector
+ # dot-product of signed chars, 0 otherwise.
+ #
+ # This won't change for different subtargets so cache the result.
+
+=== modified file 'gcc/tree-cfg.c'
+--- old/gcc/tree-cfg.c	2011-07-15 13:44:50 +0000
++++ new/gcc/tree-cfg.c	2011-10-23 13:33:07 +0000
+@@ -3473,6 +3473,44 @@
+ 	return false;
+       }
+ 
++    case WIDEN_LSHIFT_EXPR:
++      {
++        if (!INTEGRAL_TYPE_P (lhs_type)
++            || !INTEGRAL_TYPE_P (rhs1_type)
++            || TREE_CODE (rhs2) != INTEGER_CST
++            || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)))
++          {
++            error ("type mismatch in widening vector shift expression");
++            debug_generic_expr (lhs_type);
++            debug_generic_expr (rhs1_type);
++            debug_generic_expr (rhs2_type);
++            return true;
++          }
++
++        return false;
++      }
++
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
++      {
++        if (TREE_CODE (rhs1_type) != VECTOR_TYPE
++            || TREE_CODE (lhs_type) != VECTOR_TYPE
++            || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
++            || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))
++            || TREE_CODE (rhs2) != INTEGER_CST
++            || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type))
++                > TYPE_PRECISION (TREE_TYPE (lhs_type))))
++          {
++            error ("type mismatch in widening vector shift expression");
++            debug_generic_expr (lhs_type);
++            debug_generic_expr (rhs1_type);
++            debug_generic_expr (rhs2_type);
++            return true;
++          }
++
++        return false;
++      }
++
+     case PLUS_EXPR:
+     case MINUS_EXPR:
+       {
+
+=== modified file 'gcc/tree-inline.c'
+--- old/gcc/tree-inline.c	2011-08-13 08:32:32 +0000
++++ new/gcc/tree-inline.c	2011-10-23 13:33:07 +0000
+@@ -3343,6 +3343,7 @@
+     case DOT_PROD_EXPR:
+     case WIDEN_MULT_PLUS_EXPR:
+     case WIDEN_MULT_MINUS_EXPR:
++    case WIDEN_LSHIFT_EXPR:
+ 
+     case VEC_WIDEN_MULT_HI_EXPR:
+     case VEC_WIDEN_MULT_LO_EXPR:
+@@ -3357,6 +3358,8 @@
+     case VEC_EXTRACT_ODD_EXPR:
+     case VEC_INTERLEAVE_HIGH_EXPR:
+     case VEC_INTERLEAVE_LOW_EXPR:
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
+ 
+       return 1;
+ 
+
+=== modified file 'gcc/tree-pretty-print.c'
+--- old/gcc/tree-pretty-print.c	2010-11-05 09:00:50 +0000
++++ new/gcc/tree-pretty-print.c	2011-10-23 13:33:07 +0000
+@@ -1539,6 +1539,7 @@
+     case RROTATE_EXPR:
+     case VEC_LSHIFT_EXPR:
+     case VEC_RSHIFT_EXPR:
++    case WIDEN_LSHIFT_EXPR:
+     case BIT_IOR_EXPR:
+     case BIT_XOR_EXPR:
+     case BIT_AND_EXPR:
+@@ -2209,6 +2210,22 @@
+       pp_string (buffer, " > ");
+       break;
+ 
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
++      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
++      pp_string (buffer, ", ");
++      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
++      pp_string (buffer, " > ");
++      break;
++
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
++      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
++      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
++      pp_string (buffer, ", ");
++      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
++      pp_string (buffer, " > ");
++      break;
++
+     case VEC_UNPACK_HI_EXPR:
+       pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
+       dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+@@ -2531,6 +2548,9 @@
+     case RSHIFT_EXPR:
+     case LROTATE_EXPR:
+     case RROTATE_EXPR:
++    case VEC_WIDEN_LSHIFT_HI_EXPR:
++    case VEC_WIDEN_LSHIFT_LO_EXPR:
++    case WIDEN_LSHIFT_EXPR:
+       return 11;
+ 
+     case WIDEN_SUM_EXPR:
+@@ -2706,6 +2726,9 @@
+     case VEC_RSHIFT_EXPR:
+       return "v>>";
+ 
++    case WIDEN_LSHIFT_EXPR:
++      return "w<<";
++
+     case POINTER_PLUS_EXPR:
+       return "+";
+ 
+
+=== modified file 'gcc/tree-vect-generic.c'
+--- old/gcc/tree-vect-generic.c	2011-02-08 14:16:50 +0000
++++ new/gcc/tree-vect-generic.c	2011-10-23 13:33:07 +0000
+@@ -552,7 +552,9 @@
+       || code == VEC_UNPACK_LO_EXPR
+       || code == VEC_PACK_TRUNC_EXPR
+       || code == VEC_PACK_SAT_EXPR
+-      || code == VEC_PACK_FIX_TRUNC_EXPR)
++      || code == VEC_PACK_FIX_TRUNC_EXPR
++      || code == VEC_WIDEN_LSHIFT_HI_EXPR
++      || code == VEC_WIDEN_LSHIFT_LO_EXPR)
+     type = TREE_TYPE (rhs1);
+ 
+   /* Optabs will try converting a negation into a subtraction, so
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-09-05 06:23:37 +0000
++++ new/gcc/tree-vect-patterns.c	2011-10-23 13:33:07 +0000
+@@ -48,12 +48,15 @@
+ static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
+ static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
+                                                  tree *);
++static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
++                                       tree *, tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ 	vect_recog_widen_mult_pattern,
+ 	vect_recog_widen_sum_pattern,
+ 	vect_recog_dot_prod_pattern,
+ 	vect_recog_pow_pattern,
+-        vect_recog_over_widening_pattern};
++        vect_recog_over_widening_pattern,
++	vect_recog_widen_shift_pattern};
+ 
+ 
+ /* Function widened_name_p
+@@ -331,27 +334,38 @@
+   return pattern_stmt;
+ }
+ 
+-/* Handle two cases of multiplication by a constant.  The first one is when
+-   the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
+-   operand (OPRND).  In that case, we can peform widen-mult from HALF_TYPE to
+-   TYPE.
++
++/* Handle widening operation by a constant.  At the moment we support MULT_EXPR
++   and LSHIFT_EXPR.
++
++   For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
++   we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
+ 
+    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
+-   HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
+-   TYPE), we can perform widen-mult from the intermediate type to TYPE and
+-   replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t;  */
++   HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
++   that satisfies the above restrictions,  we can perform a widening opeartion
++   from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
++   with a_it = (interm_type) a_t;  */
+ 
+ static bool
+-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
+-                                 VEC (gimple, heap) **stmts, tree type,
+-                                 tree *half_type, gimple def_stmt)
++vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
++                               tree const_oprnd, tree *oprnd,
++                               VEC (gimple, heap) **stmts, tree type,
++                               tree *half_type, gimple def_stmt)
+ {
+   tree new_type, new_oprnd, tmp;
+   gimple new_stmt;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
+   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+ 
+-  if (int_fits_type_p (const_oprnd, *half_type))
++  if (code != MULT_EXPR && code != LSHIFT_EXPR)
++    return false;
++
++  if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
++        || (code == LSHIFT_EXPR
++            && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
++                != 1))
++      && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
+     {
+       /* CONST_OPRND is a constant of HALF_TYPE.  */
+       *oprnd = gimple_assign_rhs1 (def_stmt);
+@@ -364,14 +378,16 @@
+       || !vinfo_for_stmt (def_stmt))
+     return false;
+ 
+-  /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
++  /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
+      a type 2 times bigger than HALF_TYPE.  */
+   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
+                                              TYPE_UNSIGNED (type));
+-  if (!int_fits_type_p (const_oprnd, new_type))
++  if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
++      || (code == LSHIFT_EXPR
++          && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
+     return false;
+ 
+-  /* Use NEW_TYPE for widen_mult.  */
++  /* Use NEW_TYPE for widening operation.  */
+   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
+     {
+       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+@@ -381,6 +397,7 @@
+           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
+         return false;
+ 
++      VEC_safe_push (gimple, heap, *stmts, def_stmt);
+       *oprnd = gimple_assign_lhs (new_stmt);
+     }
+   else
+@@ -392,7 +409,6 @@
+       new_oprnd = make_ssa_name (tmp, NULL);
+       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
+                                                NULL_TREE);
+-      SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
+       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
+       VEC_safe_push (gimple, heap, *stmts, def_stmt);
+       *oprnd = new_oprnd;
+@@ -402,7 +418,6 @@
+   return true;
+ }
+ 
+-
+ /* Function vect_recog_widen_mult_pattern
+ 
+    Try to find the following pattern:
+@@ -491,7 +506,7 @@
+   enum tree_code dummy_code;
+   int dummy_int;
+   VEC (tree, heap) *dummy_vec;
+-  bool op0_ok, op1_ok;
++  bool op1_ok;
+ 
+   if (!is_gimple_assign (last_stmt))
+     return NULL;
+@@ -511,38 +526,23 @@
+     return NULL;
+ 
+   /* Check argument 0.  */
+-  op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
++  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
++    return NULL;
+   /* Check argument 1.  */
+   op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
+ 
+-  /* In case of multiplication by a constant one of the operands may not match
+-     the pattern, but not both.  */
+-  if (!op0_ok && !op1_ok)
+-     return NULL;
+-
+-  if (op0_ok && op1_ok)
++  if (op1_ok)
+     {
+       oprnd0 = gimple_assign_rhs1 (def_stmt0);
+       oprnd1 = gimple_assign_rhs1 (def_stmt1);
+     }
+-  else if (!op0_ok)
+-    {
+-      if (TREE_CODE (oprnd0) == INTEGER_CST
+-          && TREE_CODE (half_type1) == INTEGER_TYPE
+-          && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, 
+-		                              stmts, type,
+-                                              &half_type1, def_stmt1))
+-        half_type0 = half_type1;
+-      else
+-        return NULL;
+-    }
+-  else if (!op1_ok)
++  else
+     {
+       if (TREE_CODE (oprnd1) == INTEGER_CST
+           && TREE_CODE (half_type0) == INTEGER_TYPE
+-          && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
+-		                              stmts, type,
+-                                              &half_type0, def_stmt0))
++          && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
++                                            &oprnd0, stmts, type,
++                                            &half_type0, def_stmt0))
+         half_type1 = half_type0;
+       else
+         return NULL;
+@@ -998,6 +998,7 @@
+                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
+                 return false;
+ 
++              VEC_safe_push (gimple, heap, *stmts, def_stmt);
+               oprnd = gimple_assign_lhs (new_stmt);
+             }
+           else
+@@ -1128,7 +1129,7 @@
+          statetments, except for the case when the last statement in the
+          sequence doesn't have a corresponding pattern statement.  In such
+          case we associate the last pattern statement with the last statement
+-         in the sequence.  Therefore, we only add an original statetement to
++         in the sequence.  Therefore, we only add the original statement to
+          the list if we know that it is not the last.  */
+       if (prev_stmt)
+         VEC_safe_push (gimple, heap, *stmts, prev_stmt);
+@@ -1215,6 +1216,231 @@
+ }
+ 
+ 
++/* Detect widening shift pattern:
++
++   type a_t;
++   TYPE a_T, res_T;
++
++   S1 a_t = ;
++   S2 a_T = (TYPE) a_t;
++   S3 res_T = a_T << CONST;
++
++  where type 'TYPE' is at least double the size of type 'type'.
++
++  Also detect unsigned cases:
++
++  unsigned type a_t;
++  unsigned TYPE u_res_T;
++  TYPE a_T, res_T;
++
++  S1 a_t = ;
++  S2 a_T = (TYPE) a_t;
++  S3 res_T = a_T << CONST;
++  S4 u_res_T = (unsigned TYPE) res_T;
++
++  And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
++  create an additional pattern stmt for S2 to create a variable of an
++  intermediate type, and perform widen-shift on the intermediate type:
++
++  type a_t;
++  interm_type a_it;
++  TYPE a_T, res_T, res_T';
++
++  S1 a_t = ;
++  S2 a_T = (TYPE) a_t;
++      '--> a_it = (interm_type) a_t;
++  S3 res_T = a_T << CONST;
++      '--> res_T' = a_it <<* CONST;
++
++  Input/Output:
++
++  * STMTS: Contains a stmt from which the pattern search begins.
++    In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
++    in STMTS.  When an intermediate type is used and a pattern statement is
++    created for S2, we also put S2 here (before S3).
++
++  Output:
++
++  * TYPE_IN: The type of the input arguments to the pattern.
++
++  * TYPE_OUT: The type of the output of this pattern.
++
++  * Return value: A new stmt that will be used to replace the sequence of
++    stmts that constitute the pattern.  In this case it will be:
++    WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
++
++static gimple
++vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
++                                tree *type_in, tree *type_out)
++{
++  gimple last_stmt = VEC_pop (gimple, *stmts);
++  gimple def_stmt0;
++  tree oprnd0, oprnd1;
++  tree type, half_type0;
++  gimple pattern_stmt, orig_stmt = NULL;
++  tree vectype, vectype_out = NULL_TREE;
++  tree dummy;
++  tree var;
++  enum tree_code dummy_code;
++  int dummy_int;
++  VEC (tree, heap) * dummy_vec;
++  gimple use_stmt = NULL;
++  bool over_widen = false;
++
++  if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
++    return NULL;
++
++  orig_stmt = last_stmt;
++  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
++    {
++      /* This statement was also detected as over-widening operation (it can't
++         be any other pattern, because only over-widening detects shifts).
++         LAST_STMT is the final type demotion statement, but its related
++         statement is shift.  We analyze the related statement to catch cases:
++
++         orig code:
++          type a_t;
++          itype res;
++          TYPE a_T, res_T;
++
++          S1 a_T = (TYPE) a_t;
++          S2 res_T = a_T << CONST;
++          S3 res = (itype)res_T;
++
++          (size of type * 2 <= size of itype
++           and size of itype * 2 <= size of TYPE)
++
++         code after over-widening pattern detection:
++
++          S1 a_T = (TYPE) a_t;
++               --> a_it = (itype) a_t;
++          S2 res_T = a_T << CONST;
++          S3 res = (itype)res_T;  <--- LAST_STMT
++               --> res = a_it << CONST;
++
++         after widen_shift:
++
++          S1 a_T = (TYPE) a_t;
++               --> a_it = (itype) a_t; - redundant
++          S2 res_T = a_T << CONST;
++          S3 res = (itype)res_T;
++               --> res = a_t w<< CONST;
++
++      i.e., we replace the three statements with res = a_t w<< CONST.  */
++      last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
++      over_widen = true;
++    }
++
++  if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
++    return NULL;
++
++  oprnd0 = gimple_assign_rhs1 (last_stmt);
++  oprnd1 = gimple_assign_rhs2 (last_stmt);
++  if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
++    return NULL;
++
++  /* Check operand 0: it has to be defined by a type promotion.  */
++  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
++    return NULL;
++
++  /* Check operand 1: has to be positive.  We check that it fits the type
++     in vect_handle_widen_op_by_const ().  */
++  if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
++    return NULL;
++
++  oprnd0 = gimple_assign_rhs1 (def_stmt0);
++  type = gimple_expr_type (last_stmt);
++
++  /* Check if this a widening operation.  */
++  if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
++                                      &oprnd0, stmts,
++                                      type, &half_type0, def_stmt0))
++    return NULL;
++
++  /* Handle unsigned case.  Look for
++     S4  u_res_T = (unsigned TYPE) res_T;
++     Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR.  */
++  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
++    {
++      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
++      imm_use_iterator imm_iter;
++      use_operand_p use_p;
++      int nuses = 0;
++      tree use_type;
++
++      if (over_widen)
++        {
++          /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
++             We check here that TYPE is the correct type for the operation,
++             i.e., it's the type of the original result.  */
++          tree orig_type = gimple_expr_type (orig_stmt);
++          if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
++              || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
++            return NULL;
++        }
++      else
++        {
++          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++            {
++              if (is_gimple_debug (USE_STMT (use_p)))
++                continue;
++              use_stmt = USE_STMT (use_p);
++              nuses++;
++            }
++
++          if (nuses != 1 || !is_gimple_assign (use_stmt)
++              || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
++            return NULL;
++
++          use_lhs = gimple_assign_lhs (use_stmt);
++          use_type = TREE_TYPE (use_lhs);
++
++          if (!INTEGRAL_TYPE_P (use_type)
++              || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
++              || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
++            return NULL;
++
++          type = use_type;
++        }
++    }
++
++  /* Pattern detected.  */
++  if (vect_print_dump_info (REPORT_DETAILS))
++    fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
++
++  /* Check target support.  */
++  vectype = get_vectype_for_scalar_type (half_type0);
++  vectype_out = get_vectype_for_scalar_type (type);
++
++  if (!vectype
++      || !vectype_out
++      || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
++                                          vectype_out, vectype,
++                                          &dummy, &dummy, &dummy_code,
++                                          &dummy_code, &dummy_int,
++                                          &dummy_vec))
++    return NULL;
++
++  *type_in = vectype;
++  *type_out = vectype_out;
++
++  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
++  var = vect_recog_temp_ssa_var (type, NULL);
++  pattern_stmt =
++    gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
++
++  if (vect_print_dump_info (REPORT_DETAILS))
++    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
++
++  if (use_stmt)
++    last_stmt = use_stmt;
++  else
++    last_stmt = orig_stmt;
++
++  VEC_safe_push (gimple, heap, *stmts, last_stmt);
++  return pattern_stmt;
++}
++
+ /* Mark statements that are involved in a pattern.  */
+ 
+ static inline void
+@@ -1278,7 +1504,8 @@
+ static void
+ vect_pattern_recog_1 (
+ 	gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
+-	gimple_stmt_iterator si)
++	gimple_stmt_iterator si,
++	VEC (gimple, heap) **stmts_to_replace)
+ {
+   gimple stmt = gsi_stmt (si), pattern_stmt;
+   stmt_vec_info stmt_info;
+@@ -1288,14 +1515,14 @@
+   enum tree_code code;
+   int i;
+   gimple next;
+-  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
+ 
+-  VEC_quick_push (gimple, stmts_to_replace, stmt);
+-  pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
++  VEC_truncate (gimple, *stmts_to_replace, 0);
++  VEC_quick_push (gimple, *stmts_to_replace, stmt);
++  pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
+   if (!pattern_stmt)
+     return;
+ 
+-  stmt = VEC_last (gimple, stmts_to_replace);
++  stmt = VEC_last (gimple, *stmts_to_replace);
+   stmt_info = vinfo_for_stmt (stmt);
+   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  
+@@ -1303,8 +1530,6 @@
+     {
+       /* No need to check target support (already checked by the pattern
+          recognition function).  */
+-      if (type_out)
+-	gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
+       pattern_vectype = type_out ? type_out : type_in;
+     }
+   else
+@@ -1360,8 +1585,8 @@
+   /* It is possible that additional pattern stmts are created and inserted in
+      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
+      relevant statements.  */
+-  for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
+-              && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
++  for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
++	      && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
+        i++)
+     {
+       stmt_info = vinfo_for_stmt (stmt);
+@@ -1374,8 +1599,6 @@
+ 
+       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
+     }
+-
+-  VEC_free (gimple, heap, stmts_to_replace);
+ }
+ 
+ 
+@@ -1465,6 +1688,7 @@
+   gimple_stmt_iterator si;
+   unsigned int i, j;
+   gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
++  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_pattern_recog ===");
+@@ -1480,8 +1704,11 @@
+           for (j = 0; j < NUM_PATTERNS; j++)
+             {
+               vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
+-              vect_pattern_recog_1 (vect_recog_func_ptr, si);
++	      vect_pattern_recog_1 (vect_recog_func_ptr, si,
++				    &stmts_to_replace);
+             }
+         }
+     }
++
++  VEC_free (gimple, heap, stmts_to_replace);
+ }
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-10-23 11:29:25 +0000
++++ new/gcc/tree-vect-slp.c	2011-10-27 11:27:59 +0000
+@@ -480,6 +480,11 @@
+ 		    }
+ 		}
+ 	    }
++	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
++            {
++              need_same_oprnds = true;
++              first_op1 = gimple_assign_rhs2 (stmt);
++            }
+ 	}
+       else
+ 	{
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-10-23 11:29:25 +0000
++++ new/gcc/tree-vect-stmts.c	2011-10-27 11:27:59 +0000
+@@ -3359,6 +3359,7 @@
+   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
++  unsigned int k;
+ 
+   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+     return false;
+@@ -3375,7 +3376,8 @@
+ 
+   code = gimple_assign_rhs_code (stmt);
+   if (!CONVERT_EXPR_CODE_P (code)
+-      && code != WIDEN_MULT_EXPR)
++      && code != WIDEN_MULT_EXPR
++      && code != WIDEN_LSHIFT_EXPR)
+     return false;
+ 
+   scalar_dest = gimple_assign_lhs (stmt);
+@@ -3403,7 +3405,7 @@
+       bool ok;
+ 
+       op1 = gimple_assign_rhs2 (stmt);
+-      if (code == WIDEN_MULT_EXPR)
++      if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
+         {
+ 	  /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
+ 	     OP1.  */
+@@ -3480,7 +3482,7 @@
+     fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
+                         ncopies);
+ 
+-  if (code == WIDEN_MULT_EXPR)
++  if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
+     {
+       if (CONSTANT_CLASS_P (op0))
+ 	op0 = fold_convert (TREE_TYPE (op1), op0);
+@@ -3521,6 +3523,8 @@
+       if (op_type == binary_op)
+         vec_oprnds1 = VEC_alloc (tree, heap, 1);
+     }
++  else if (code == WIDEN_LSHIFT_EXPR)
++    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
+ 
+   /* In case the vectorization factor (VF) is bigger than the number
+      of elements that we can fit in a vectype (nunits), we have to generate
+@@ -3534,15 +3538,33 @@
+       if (j == 0)
+         {
+           if (slp_node)
+-              vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
+-                                 &vec_oprnds1, -1);
+-          else
++	    {
++	      if (code == WIDEN_LSHIFT_EXPR)
++                {
++                  vec_oprnd1 = op1;
++		  /* Store vec_oprnd1 for every vector stmt to be created
++		     for SLP_NODE.  We check during the analysis that all
++		     the shift arguments are the same.  */
++                  for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
++                    VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
++
++    		  vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
++ 	                             -1);
++                }
++              else
++                vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
++                                   &vec_oprnds1, -1);
++	    }
++	  else
+             {
+               vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
+               VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
+               if (op_type == binary_op)
+                 {
+-                  vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
++                  if (code == WIDEN_LSHIFT_EXPR)
++                    vec_oprnd1 = op1;
++                  else
++                    vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+                 }
+             }
+@@ -3553,7 +3575,10 @@
+           VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
+           if (op_type == binary_op)
+             {
+-              vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
++              if (code == WIDEN_LSHIFT_EXPR)
++                vec_oprnd1 = op1;
++              else
++                vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+               VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
+             }
+         }
+@@ -5853,6 +5878,19 @@
+         }
+       break;
+ 
++    case WIDEN_LSHIFT_EXPR:
++      if (BYTES_BIG_ENDIAN)
++        {
++          c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
++          c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
++        }
++      else
++        {
++          c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
++          c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
++        }
++      break;
++
+     CASE_CONVERT:
+       if (BYTES_BIG_ENDIAN)
+         {
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-09-05 06:23:37 +0000
++++ new/gcc/tree-vectorizer.h	2011-10-23 13:33:07 +0000
+@@ -896,7 +896,7 @@
+    Additional pattern recognition functions can (and will) be added
+    in the future.  */
+ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+-#define NUM_PATTERNS 5
++#define NUM_PATTERNS 6 
+ void vect_pattern_recog (loop_vec_info);
+ 
+ /* In tree-vectorizer.c.  */
+
+=== modified file 'gcc/tree.def'
+--- old/gcc/tree.def	2011-01-21 14:14:12 +0000
++++ new/gcc/tree.def	2011-10-23 13:33:07 +0000
+@@ -1092,6 +1092,19 @@
+    is subtracted from t3.  */
+ DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
+ 
++/* Widening shift left.
++   The first operand is of type t1.
++   The second operand is the number of bits to shift by; it need not be the
++   same type as the first operand and result.
++   Note that the result is undefined if the second operand is larger
++   than or equal to the first operand's type size.
++   The type of the entire expression is t2, such that t2 is at least twice
++   the size of t1.
++   WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting)
++   the first argument from type t1 to type t2, and then shifting it
++   by the second argument.  */
++DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
++
+ /* Fused multiply-add.
+    All operands and the result are of the same type.  No intermediate
+    rounding is performed after multiplying operand one with operand two
+@@ -1147,6 +1160,16 @@
+ DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
+ DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
+ 
++/* Widening vector shift left in bits.
++   Operand 0 is a vector to be shifted with N elements of size S.
++   Operand 1 is an integer shift amount in bits.
++   The result of the operation is N elements of size 2*S.
++   VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results.
++   VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results.
++ */
++DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
++DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
++
+ /* PREDICT_EXPR.  Specify hint for branch prediction.  The
+    PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
+    outcome (0 for not taken and 1 for taken).  Once the profile is guessed
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch
new file mode 100644
index 0000000000..95b9ea9b83
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch
@@ -0,0 +1,61 @@
+2011-11-04  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from mainline -r180673:
+
+	gcc/
+	* modulo-sched.c (generate_prolog_epilog): Mark prolog
+	and epilog as BB_DISABLE_SCHEDULE.
+	(mark_loop_unsched): New function.
+	(sms_schedule): Call it.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-10-10 14:35:32 +0000
++++ new/gcc/modulo-sched.c	2011-10-30 05:31:00 +0000
+@@ -1173,6 +1173,8 @@
+   /* Put the prolog on the entry edge.  */
+   e = loop_preheader_edge (loop);
+   split_edge_and_insert (e, get_insns ());
++  if (!flag_resched_modulo_sched)
++    e->dest->flags |= BB_DISABLE_SCHEDULE;
+ 
+   end_sequence ();
+ 
+@@ -1186,9 +1188,24 @@
+   gcc_assert (single_exit (loop));
+   e = single_exit (loop);
+   split_edge_and_insert (e, get_insns ());
++  if (!flag_resched_modulo_sched)
++    e->dest->flags |= BB_DISABLE_SCHEDULE;
++
+   end_sequence ();
+ }
+ 
++/* Mark LOOP as software pipelined so the later
++   scheduling passes don't touch it.  */
++static void
++mark_loop_unsched (struct loop *loop)
++{
++  unsigned i;
++  basic_block *bbs = get_loop_body (loop);
++
++  for (i = 0; i < loop->num_nodes; i++)
++    bbs[i]->flags |= BB_DISABLE_SCHEDULE;
++}
++
+ /* Return true if all the BBs of the loop are empty except the
+    loop header.  */
+ static bool
+@@ -1714,9 +1731,10 @@
+ 	  permute_partial_schedule (ps, g->closing_branch->first_note);
+ 
+           /* Mark this loop as software pipelined so the later
+-	     scheduling passes doesn't touch it.  */
++	     scheduling passes don't touch it.  */
+ 	  if (! flag_resched_modulo_sched)
+-	    g->bb->flags |= BB_DISABLE_SCHEDULE;
++	    mark_loop_unsched (loop);
++	  
+ 	  /* The life-info is not valid any more.  */
+ 	  df_set_bb_dirty (g->bb);
+ 
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch
new file mode 100644
index 0000000000..dcffe1945c
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch
@@ -0,0 +1,23 @@
+2011-11-02  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-11-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning.
+
+=== modified file 'gcc/config/arm/bpabi.h'
+--- old/gcc/config/arm/bpabi.h	2010-12-20 17:48:51 +0000
++++ new/gcc/config/arm/bpabi.h	2011-11-02 21:02:53 +0000
+@@ -56,7 +56,8 @@
+   "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
+ 
+ #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
+-  "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
++  "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
++  ":%{!r:--be8}}}"
+ 
+ /* Tell the assembler to build BPABI binaries.  */
+ #undef  SUBTARGET_EXTRA_ASM_SPEC
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch
new file mode 100644
index 0000000000..70a7bdfa2b
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch
@@ -0,0 +1,1400 @@
+2011-11-17  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-11-03  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (slp_void_p): New.
+	(struct _slp_tree): Replace left and right with children.  Update
+	documentation.
+	(struct _slp_oprnd_info): New.
+	(vect_get_vec_defs): Declare.
+	(vect_get_slp_defs): Update arguments.
+	* tree-vect-loop.c (vect_create_epilog_for_reduction): Call
+	vect_get_vec_defs instead of vect_get_slp_defs.
+	(vectorizable_reduction): Likewise.
+	* tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument.
+	Update call to vect_get_slp_defs.
+	(vectorizable_conversion): Update call to vect_get_vec_defs.
+	(vectorizable_assignment, vectorizable_shift,
+	vectorizable_operation): Likewise.
+	(vectorizable_type_demotion): Call vect_get_vec_defs instead of
+	vect_get_slp_defs.
+	(vectorizable_type_promotion, vectorizable_store): Likewise.
+	(vect_analyze_stmt): Fix typo.
+	* tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal.
+	(vect_print_slp_tree, vect_mark_slp_stmts,
+	vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts,
+	vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations,
+	vect_schedule_slp_instance): Likewise.
+	(vect_create_new_slp_node): New.
+	(vect_create_oprnd_info, vect_free_oprnd_info): Likewise.
+	(vect_get_and_check_slp_defs): Pass information about defs using
+	oprnds_info, allow any number of operands.
+	(vect_build_slp_tree): Likewise.  Update calls to
+	vect_get_and_check_slp_defs.  Fix comments.
+	(vect_analyze_slp_instance): Move node creation to
+	vect_create_new_slp_node.
+	(vect_get_slp_defs): Allow any number of operands.
+
+	2011-11-11  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	* tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN
+	vector.
+	(vect_create_new_slp_node): Don't allocate node before checking stmt
+	type.
+	(vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always
+	free def_stmts vectors and additionally free oprnd_info.
+	(vect_build_slp_tree): Adjust callers.  Call it even if
+	stop_recursion.  If vect_create_new_slp_node or
+	vect_build_slp_tree fails, properly handle freeing memory.
+	If it succeeded, clear def_stmts in oprnd_info.
+
+=== modified file 'gcc/tree-vect-loop.c'
+--- old/gcc/tree-vect-loop.c	2011-09-05 06:23:37 +0000
++++ new/gcc/tree-vect-loop.c	2011-11-14 11:38:08 +0000
+@@ -3282,8 +3282,8 @@
+ 
+   /* Get the loop-entry arguments.  */
+   if (slp_node)
+-    vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
+-                       NULL, reduc_index);
++    vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
++                       NULL, slp_node, reduc_index);
+   else
+     {
+       vec_initial_defs = VEC_alloc (tree, heap, 1);
+@@ -4451,8 +4451,8 @@
+             }
+ 
+           if (slp_node)
+-            vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
+-                               -1);
++            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
++                               slp_node, -1);
+           else
+             {
+               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-10-27 11:27:59 +0000
++++ new/gcc/tree-vect-slp.c	2011-11-14 11:38:08 +0000
+@@ -67,15 +67,16 @@
+ static void
+ vect_free_slp_tree (slp_tree node)
+ {
++  int i;
++  slp_void_p child;
++
+   if (!node)
+     return;
+ 
+-  if (SLP_TREE_LEFT (node))
+-    vect_free_slp_tree (SLP_TREE_LEFT (node));
+-
+-  if (SLP_TREE_RIGHT (node))
+-    vect_free_slp_tree (SLP_TREE_RIGHT (node));
+-
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_free_slp_tree ((slp_tree) child);
++
++  VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
+   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
+ 
+   if (SLP_TREE_VEC_STMTS (node))
+@@ -96,48 +97,116 @@
+ }
+ 
+ 
+-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
+-   they are of a legal type and that they match the defs of the first stmt of
+-   the SLP group (stored in FIRST_STMT_...).  */
++/* Create an SLP node for SCALAR_STMTS.  */
++
++static slp_tree
++vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
++{
++  slp_tree node;
++  gimple stmt = VEC_index (gimple, scalar_stmts, 0);
++  unsigned int nops;
++
++  if (is_gimple_call (stmt))
++    nops = gimple_call_num_args (stmt);
++  else if (is_gimple_assign (stmt))
++    nops = gimple_num_ops (stmt) - 1;
++  else
++    return NULL;
++
++  node = XNEW (struct _slp_tree);
++  SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
++  SLP_TREE_VEC_STMTS (node) = NULL;
++  SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
++  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
++  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
++
++  return node;
++}
++
++
++/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
++   operand.  */
++static VEC (slp_oprnd_info, heap) *
++vect_create_oprnd_info (int nops, int group_size)
++{
++  int i;
++  slp_oprnd_info oprnd_info;
++  VEC (slp_oprnd_info, heap) *oprnds_info;
++
++  oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
++  for (i = 0; i < nops; i++)
++    {
++      oprnd_info = XNEW (struct _slp_oprnd_info);
++      oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
++      oprnd_info->first_dt = vect_uninitialized_def;
++      oprnd_info->first_def_type = NULL_TREE;
++      oprnd_info->first_const_oprnd = NULL_TREE;
++      oprnd_info->first_pattern = false;
++      VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
++    }
++
++  return oprnds_info;
++}
++
++
++/* Free operands info.  */
++
++static void
++vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
++{
++  int i;
++  slp_oprnd_info oprnd_info;
++
++  FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
++    {
++      VEC_free (gimple, heap, oprnd_info->def_stmts);
++      XDELETE (oprnd_info);
++    }
++
++  VEC_free (slp_oprnd_info, heap, *oprnds_info);
++}
++
++
++/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
++   they are of a valid type and that they match the defs of the first stmt of
++   the SLP group (stored in OPRNDS_INFO).  */
+ 
+ static bool
+ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+                              slp_tree slp_node, gimple stmt,
+-			     VEC (gimple, heap) **def_stmts0,
+-			     VEC (gimple, heap) **def_stmts1,
+-			     enum vect_def_type *first_stmt_dt0,
+-			     enum vect_def_type *first_stmt_dt1,
+-			     tree *first_stmt_def0_type,
+-			     tree *first_stmt_def1_type,
+-			     tree *first_stmt_const_oprnd,
+-			     int ncopies_for_cost,
+-                             bool *pattern0, bool *pattern1)
++			     int ncopies_for_cost, bool first,
++                             VEC (slp_oprnd_info, heap) **oprnds_info)
+ {
+   tree oprnd;
+   unsigned int i, number_of_oprnds;
+-  tree def[2];
++  tree def, def_op0 = NULL_TREE;
+   gimple def_stmt;
+-  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+-  stmt_vec_info stmt_info =
+-    vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
+-  enum gimple_rhs_class rhs_class;
++  enum vect_def_type dt = vect_uninitialized_def;
++  enum vect_def_type dt_op0 = vect_uninitialized_def;
++  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
++  tree lhs = gimple_get_lhs (stmt);
+   struct loop *loop = NULL;
+   enum tree_code rhs_code;
+   bool different_types = false;
++  bool pattern = false;
++  slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
+ 
+   if (loop_vinfo)
+     loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+-  rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
+-  number_of_oprnds = gimple_num_ops (stmt) - 1;	/* RHS only */
++  if (is_gimple_call (stmt))
++    number_of_oprnds = gimple_call_num_args (stmt);
++  else
++    number_of_oprnds = gimple_num_ops (stmt) - 1;
+ 
+   for (i = 0; i < number_of_oprnds; i++)
+     {
+       oprnd = gimple_op (stmt, i + 1);
++      oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
+ 
+-      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
+-                               &dt[i])
+-	  || (!def_stmt && dt[i] != vect_constant_def))
++      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
++                               &dt)
++	  || (!def_stmt && dt != vect_constant_def))
+ 	{
+ 	  if (vect_print_dump_info (REPORT_SLP))
+ 	    {
+@@ -158,29 +227,24 @@
+           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
+           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+         {
+-          if (!*first_stmt_dt0)
+-            *pattern0 = true;
+-          else
+-            {
+-              if (i == 1 && !*first_stmt_dt1)
+-                *pattern1 = true;
+-              else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
+-                {
+-                  if (vect_print_dump_info (REPORT_DETAILS))
+-                    {
+-                      fprintf (vect_dump, "Build SLP failed: some of the stmts"
+-                                     " are in a pattern, and others are not ");
+-                      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
+-                    }
++          pattern = true;
++          if (!first && !oprnd_info->first_pattern)
++	    {
++	      if (vect_print_dump_info (REPORT_DETAILS))
++		{
++		  fprintf (vect_dump, "Build SLP failed: some of the stmts"
++				" are in a pattern, and others are not ");
++		  print_generic_expr (vect_dump, oprnd, TDF_SLIM);
++		}
+ 
+-                  return false;
+-                }
++	      return false;
+             }
+ 
+           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+-          dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
++          dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
+ 
+-          if (*dt == vect_unknown_def_type)
++          if (dt == vect_unknown_def_type
++	      || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
+             {
+               if (vect_print_dump_info (REPORT_DETAILS))
+                 fprintf (vect_dump, "Unsupported pattern.");
+@@ -190,11 +254,11 @@
+           switch (gimple_code (def_stmt))
+             {
+               case GIMPLE_PHI:
+-                def[i] = gimple_phi_result (def_stmt);
++                def = gimple_phi_result (def_stmt);
+                 break;
+ 
+               case GIMPLE_ASSIGN:
+-                def[i] = gimple_assign_lhs (def_stmt);
++                def = gimple_assign_lhs (def_stmt);
+                 break;
+ 
+               default:
+@@ -204,117 +268,125 @@
+             }
+         }
+ 
+-      if (!*first_stmt_dt0)
++      if (first)
+ 	{
+-	  /* op0 of the first stmt of the group - store its info.  */
+-	  *first_stmt_dt0 = dt[i];
+-	  if (def[i])
+-	    *first_stmt_def0_type = TREE_TYPE (def[i]);
+-	  else
+-	    *first_stmt_const_oprnd = oprnd;
++          oprnd_info->first_dt = dt;
++          oprnd_info->first_pattern = pattern;
++          if (def)
++            {
++              oprnd_info->first_def_type = TREE_TYPE (def);
++              oprnd_info->first_const_oprnd = NULL_TREE;
++            }
++          else
++            {
++              oprnd_info->first_def_type = NULL_TREE;
++              oprnd_info->first_const_oprnd = oprnd;
++            }
+ 
+-	  /* Analyze costs (for the first stmt of the group only).  */
+-	  if (rhs_class != GIMPLE_SINGLE_RHS)
+-	    /* Not memory operation (we don't call this functions for loads).  */
+-	    vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
+-	  else
+-	    /* Store.  */
+-	    vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+-				   dt[0], slp_node);
++          if (i == 0)
++            {
++              def_op0 = def;
++              dt_op0 = dt;
++              /* Analyze costs (for the first stmt of the group only).  */
++              if (REFERENCE_CLASS_P (lhs))
++                /* Store.  */
++                vect_model_store_cost (stmt_info, ncopies_for_cost, false,
++                                       dt, slp_node);
++              else
++                /* Not memory operation (we don't call this function for
++                   loads).  */
++                vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
++                                        slp_node);
++            }
+ 	}
+ 
+       else
+ 	{
+-	  if (!*first_stmt_dt1 && i == 1)
+-	    {
+-	      /* op1 of the first stmt of the group - store its info.  */
+-	      *first_stmt_dt1 = dt[i];
+-	      if (def[i])
+-		*first_stmt_def1_type = TREE_TYPE (def[i]);
+-	      else
+-		{
+-		  /* We assume that the stmt contains only one constant
+-		     operand. We fail otherwise, to be on the safe side.  */
+-		  if (*first_stmt_const_oprnd)
+-		    {
+-		      if (vect_print_dump_info (REPORT_SLP))
+-			fprintf (vect_dump, "Build SLP failed: two constant "
+-				 "oprnds in stmt");
+-		      return false;
+-		    }
+-		  *first_stmt_const_oprnd = oprnd;
+-		}
+-	    }
+-	  else
+-	    {
+-	      /* Not first stmt of the group, check that the def-stmt/s match
+-		 the def-stmt/s of the first stmt.  */
+-	      if ((i == 0
+-		   && (*first_stmt_dt0 != dt[i]
+-		       || (*first_stmt_def0_type && def[0]
+-			   && !types_compatible_p (*first_stmt_def0_type,
+-						   TREE_TYPE (def[0])))))
+-		  || (i == 1
+-		      && (*first_stmt_dt1 != dt[i]
+-			  || (*first_stmt_def1_type && def[1]
+-			      && !types_compatible_p (*first_stmt_def1_type,
+-						      TREE_TYPE (def[1])))))
+-		  || (!def[i]
+-		      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
+-					      TREE_TYPE (oprnd)))
+-                  || different_types)
+-		{
+-                  if (i != number_of_oprnds - 1)
+-                    different_types = true;
++          /* Not first stmt of the group, check that the def-stmt/s match
++             the def-stmt/s of the first stmt.  Allow different definition
++             types for reduction chains: the first stmt must be a
++             vect_reduction_def (a phi node), and the rest
++             vect_internal_def.  */
++          if (((oprnd_info->first_dt != dt
++                && !(oprnd_info->first_dt == vect_reduction_def
++                     && dt == vect_internal_def))
++               || (oprnd_info->first_def_type != NULL_TREE
++                   && def
++                   && !types_compatible_p (oprnd_info->first_def_type,
++                                           TREE_TYPE (def))))
++               || (!def
++                   && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
++                                           TREE_TYPE (oprnd)))
++               || different_types)
++	    {
++              if (number_of_oprnds != 2)
++		{
++                  if (vect_print_dump_info (REPORT_SLP))
++                    fprintf (vect_dump, "Build SLP failed: different types ");
++
++                  return false;
++                }
++
++              /* Try to swap operands in case of binary operation.  */
++              if (i == 0)
++                different_types = true;
++              else
++		{
++                  oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
++                  if (is_gimple_assign (stmt)
++                      && (rhs_code = gimple_assign_rhs_code (stmt))
++                      && TREE_CODE_CLASS (rhs_code) == tcc_binary
++                      && commutative_tree_code (rhs_code)
++                      && oprnd0_info->first_dt == dt
++                      && oprnd_info->first_dt == dt_op0
++                      && def_op0 && def
++                      && !(oprnd0_info->first_def_type
++                           && !types_compatible_p (oprnd0_info->first_def_type,
++                                                   TREE_TYPE (def)))
++                      && !(oprnd_info->first_def_type
++                           && !types_compatible_p (oprnd_info->first_def_type,
++                                                   TREE_TYPE (def_op0))))
++                    {
++                      if (vect_print_dump_info (REPORT_SLP))
++                       {
++                         fprintf (vect_dump, "Swapping operands of ");
++                         print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++                       }
++
++                      swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
++                                          gimple_assign_rhs2_ptr (stmt));
++                    }
+                   else
+-                   {
+-                      if (is_gimple_assign (stmt)
+-                          && (rhs_code = gimple_assign_rhs_code (stmt))
+-                          && TREE_CODE_CLASS (rhs_code) == tcc_binary
+-                          && commutative_tree_code (rhs_code)
+-                          && *first_stmt_dt0 == dt[1]
+-                          && *first_stmt_dt1 == dt[0]
+-                          && def[0] && def[1]
+-                          && !(*first_stmt_def0_type
+-                               && !types_compatible_p (*first_stmt_def0_type,
+-                                                       TREE_TYPE (def[1])))
+-                          && !(*first_stmt_def1_type
+-                               && !types_compatible_p (*first_stmt_def1_type,
+-                                                       TREE_TYPE (def[0]))))
+-                        {
+-                           if (vect_print_dump_info (REPORT_SLP))
+-                            {
+-                              fprintf (vect_dump, "Swapping operands of ");
+-                              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+-                            }
+-                           swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+-                                               gimple_assign_rhs2_ptr (stmt));
+-                        }
+-                      else
+-                        {
+-                          if (vect_print_dump_info (REPORT_SLP))
+-                            fprintf (vect_dump, "Build SLP failed: different types ");
+-
+-                          return false;
+-                        }
+-                   }
++                    {
++                      if (vect_print_dump_info (REPORT_SLP))
++                        fprintf (vect_dump, "Build SLP failed: different types ");
++ 
++                      return false;
++                    }
+ 		}
+ 	    }
+ 	}
+ 
+       /* Check the types of the definitions.  */
+-      switch (dt[i])
++      switch (dt)
+ 	{
+ 	case vect_constant_def:
+ 	case vect_external_def:
++        case vect_reduction_def:
+ 	  break;
+ 
+ 	case vect_internal_def:
+-        case vect_reduction_def:
+-	  if ((i == 0 && !different_types) || (i == 1 && different_types))
+-	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
++          if (different_types)
++            {
++              oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
++              oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
++              if (i == 0)
++                VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
++              else
++                VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
++            }
+ 	  else
+- 	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
++ 	    VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
+ 	  break;
+ 
+ 	default:
+@@ -322,7 +394,7 @@
+ 	  if (vect_print_dump_info (REPORT_SLP))
+ 	    {
+ 	      fprintf (vect_dump, "Build SLP failed: illegal type of def ");
+-	      print_generic_expr (vect_dump, def[i], TDF_SLIM);
++	      print_generic_expr (vect_dump, def, TDF_SLIM);
+ 	    }
+ 
+ 	  return false;
+@@ -347,15 +419,10 @@
+                      VEC (slp_tree, heap) **loads,
+                      unsigned int vectorization_factor, bool *loads_permuted)
+ {
+-  VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
+-  VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
+   unsigned int i;
+   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
+   gimple stmt = VEC_index (gimple, stmts, 0);
+-  enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
+-  enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
+   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
+-  tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
+   tree lhs;
+   bool stop_recursion = false, need_same_oprnds = false;
+   tree vectype, scalar_type, first_op1 = NULL_TREE;
+@@ -364,13 +431,21 @@
+   int icode;
+   enum machine_mode optab_op2_mode;
+   enum machine_mode vec_mode;
+-  tree first_stmt_const_oprnd = NULL_TREE;
+   struct data_reference *first_dr;
+-  bool pattern0 = false, pattern1 = false;
+   HOST_WIDE_INT dummy;
+   bool permutation = false;
+   unsigned int load_place;
+   gimple first_load, prev_first_load = NULL;
++  VEC (slp_oprnd_info, heap) *oprnds_info;
++  unsigned int nops;
++  slp_oprnd_info oprnd_info;
++
++  if (is_gimple_call (stmt))
++    nops = gimple_call_num_args (stmt);
++  else
++    nops = gimple_num_ops (stmt) - 1;
++
++  oprnds_info = vect_create_oprnd_info (nops, group_size);
+ 
+   /* For every stmt in NODE find its def stmt/s.  */
+   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
+@@ -391,6 +466,7 @@
+               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+             }
+ 
++	  vect_free_oprnd_info (&oprnds_info);
+           return false;
+         }
+ 
+@@ -400,10 +476,11 @@
+ 	  if (vect_print_dump_info (REPORT_SLP))
+ 	    {
+ 	      fprintf (vect_dump,
+-		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
++		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
+ 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ 	    }
+ 
++	  vect_free_oprnd_info (&oprnds_info);
+ 	  return false;
+ 	}
+ 
+@@ -416,6 +493,8 @@
+               fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
+               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+             }
++
++	  vect_free_oprnd_info (&oprnds_info);
+           return false;
+         }
+ 
+@@ -462,6 +541,7 @@
+ 		    {
+ 		      if (vect_print_dump_info (REPORT_SLP))
+ 			fprintf (vect_dump, "Build SLP failed: no optab.");
++	  	      vect_free_oprnd_info (&oprnds_info);
+ 		      return false;
+ 		    }
+ 		  icode = (int) optab_handler (optab, vec_mode);
+@@ -470,6 +550,7 @@
+ 		      if (vect_print_dump_info (REPORT_SLP))
+ 			fprintf (vect_dump, "Build SLP failed: "
+ 				            "op not supported by target.");
++	  	      vect_free_oprnd_info (&oprnds_info);
+ 		      return false;
+ 		    }
+ 		  optab_op2_mode = insn_data[icode].operand[2].mode;
+@@ -506,6 +587,7 @@
+ 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ 		}
+ 
++	      vect_free_oprnd_info (&oprnds_info);
+ 	      return false;
+ 	    }
+ 
+@@ -519,6 +601,7 @@
+ 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ 		}
+ 
++	      vect_free_oprnd_info (&oprnds_info);
+ 	      return false;
+ 	    }
+ 	}
+@@ -530,15 +613,12 @@
+ 	    {
+ 	      /* Store.  */
+ 	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
+-						stmt, &def_stmts0, &def_stmts1,
+-						&first_stmt_dt0,
+-						&first_stmt_dt1,
+-						&first_stmt_def0_type,
+-						&first_stmt_def1_type,
+-						&first_stmt_const_oprnd,
+-						ncopies_for_cost,
+-                                                &pattern0, &pattern1))
+-		return false;
++						stmt, ncopies_for_cost,
++						(i == 0), &oprnds_info))
++		{
++	  	  vect_free_oprnd_info (&oprnds_info);
++ 		  return false;
++		}
+ 	    }
+ 	  else
+ 	    {
+@@ -556,6 +636,7 @@
+                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                     }
+ 
++	  	  vect_free_oprnd_info (&oprnds_info);
+                   return false;
+                 }
+ 
+@@ -573,6 +654,7 @@
+                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                     }
+ 
++	  	  vect_free_oprnd_info (&oprnds_info);
+                   return false;
+                 }
+ 
+@@ -593,6 +675,7 @@
+                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                         }
+  
++	  	      vect_free_oprnd_info (&oprnds_info);
+                       return false;
+                     }
+                 }
+@@ -612,6 +695,7 @@
+                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                         }
+ 
++	  	      vect_free_oprnd_info (&oprnds_info);
+                       return false;
+                     }
+ 
+@@ -639,7 +723,7 @@
+ 	{
+ 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
+ 	    {
+-	      /* Not strided load. */
++	      /* Not strided load.  */
+ 	      if (vect_print_dump_info (REPORT_SLP))
+ 		{
+ 		  fprintf (vect_dump, "Build SLP failed: not strided load ");
+@@ -647,6 +731,7 @@
+ 		}
+ 
+ 	      /* FORNOW: Not strided loads are not supported.  */
++	      vect_free_oprnd_info (&oprnds_info);
+ 	      return false;
+ 	    }
+ 
+@@ -661,19 +746,18 @@
+ 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ 		}
+ 
++	      vect_free_oprnd_info (&oprnds_info);
+ 	      return false;
+ 	    }
+ 
+ 	  /* Find the def-stmts.  */
+ 	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
+-					    &def_stmts0, &def_stmts1,
+-					    &first_stmt_dt0, &first_stmt_dt1,
+-					    &first_stmt_def0_type,
+-					    &first_stmt_def1_type,
+-					    &first_stmt_const_oprnd,
+-					    ncopies_for_cost,
+-                                            &pattern0, &pattern1))
+-	    return false;
++					    ncopies_for_cost, (i == 0),
++					    &oprnds_info))
++	    {
++	      vect_free_oprnd_info (&oprnds_info);
++	      return false;
++	    }
+ 	}
+     }
+ 
+@@ -702,46 +786,37 @@
+             *loads_permuted = true;
+         }
+ 
++      vect_free_oprnd_info (&oprnds_info);
+       return true;
+     }
+ 
+   /* Create SLP_TREE nodes for the definition node/s.  */
+-  if (first_stmt_dt0 == vect_internal_def)
+-    {
+-      slp_tree left_node = XNEW (struct _slp_tree);
+-      SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
+-      SLP_TREE_VEC_STMTS (left_node) = NULL;
+-      SLP_TREE_LEFT (left_node) = NULL;
+-      SLP_TREE_RIGHT (left_node) = NULL;
+-      SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
+-      SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
+-      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
+-				inside_cost, outside_cost, ncopies_for_cost,
+-				max_nunits, load_permutation, loads,
+-				vectorization_factor, loads_permuted))
+-	return false;
+-
+-      SLP_TREE_LEFT (*node) = left_node;
+-    }
+-
+-  if (first_stmt_dt1 == vect_internal_def)
+-    {
+-      slp_tree right_node = XNEW (struct _slp_tree);
+-      SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
+-      SLP_TREE_VEC_STMTS (right_node) = NULL;
+-      SLP_TREE_LEFT (right_node) = NULL;
+-      SLP_TREE_RIGHT (right_node) = NULL;
+-      SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
+-      SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
+-      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
+-				inside_cost, outside_cost, ncopies_for_cost,
+-				max_nunits, load_permutation, loads,
+-				vectorization_factor, loads_permuted))
+-	return false;
+-
+-      SLP_TREE_RIGHT (*node) = right_node;
+-    }
+-
++  FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
++    {
++      slp_tree child;
++
++      if (oprnd_info->first_dt != vect_internal_def)
++        continue;
++
++      child = vect_create_new_slp_node (oprnd_info->def_stmts);
++      if (!child
++          || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
++				inside_cost, outside_cost, ncopies_for_cost,
++				max_nunits, load_permutation, loads,
++				vectorization_factor, loads_permuted))
++        {
++	  if (child)
++	    oprnd_info->def_stmts = NULL;
++	  vect_free_slp_tree (child);
++	  vect_free_oprnd_info (&oprnds_info);
++   	  return false;
++	}
++
++      oprnd_info->def_stmts = NULL;
++      VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
++    }
++
++  vect_free_oprnd_info (&oprnds_info);
+   return true;
+ }
+ 
+@@ -751,6 +826,7 @@
+ {
+   int i;
+   gimple stmt;
++  slp_void_p child;
+ 
+   if (!node)
+     return;
+@@ -763,8 +839,8 @@
+     }
+   fprintf (vect_dump, "\n");
+ 
+-  vect_print_slp_tree (SLP_TREE_LEFT (node));
+-  vect_print_slp_tree (SLP_TREE_RIGHT (node));
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_print_slp_tree ((slp_tree) child);
+ }
+ 
+ 
+@@ -778,6 +854,7 @@
+ {
+   int i;
+   gimple stmt;
++  slp_void_p child;
+ 
+   if (!node)
+     return;
+@@ -786,8 +863,8 @@
+     if (j < 0 || i == j)
+       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
+ 
+-  vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
+-  vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_mark_slp_stmts ((slp_tree) child, mark, j);
+ }
+ 
+ 
+@@ -799,6 +876,7 @@
+   int i;
+   gimple stmt;
+   stmt_vec_info stmt_info;
++  slp_void_p child;
+ 
+   if (!node)
+     return;
+@@ -811,8 +889,8 @@
+       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
+     }
+ 
+-  vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
+-  vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_mark_slp_stmts_relevant ((slp_tree) child);
+ }
+ 
+ 
+@@ -885,12 +963,13 @@
+   gimple stmt;
+   VEC (gimple, heap) *tmp_stmts;
+   unsigned int index, i;
++  slp_void_p child;
+ 
+   if (!node)
+     return;
+ 
+-  vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
+-  vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
+ 
+   gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
+   tmp_stmts = VEC_alloc (gimple, heap, group_size);
+@@ -1253,7 +1332,7 @@
+                            gimple stmt)
+ {
+   slp_instance new_instance;
+-  slp_tree node = XNEW (struct _slp_tree);
++  slp_tree node;
+   unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+   unsigned int unrolling_factor = 1, nunits;
+   tree vectype, scalar_type = NULL_TREE;
+@@ -1265,6 +1344,7 @@
+   VEC (slp_tree, heap) *loads;
+   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+   bool loads_permuted = false;
++  VEC (gimple, heap) *scalar_stmts;
+ 
+   if (dr)
+     {
+@@ -1308,39 +1388,26 @@
+     }
+ 
+   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
+-  SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
++  scalar_stmts = VEC_alloc (gimple, heap, group_size);
+   next = stmt;
+   if (dr)
+     {
+       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
+       while (next)
+         {
+-          VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
++          VEC_safe_push (gimple, heap, scalar_stmts, next);
+           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+         }
+     }
+   else
+     {
+       /* Collect reduction statements.  */
+-      for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, 
+-                               next); 
+-           i++)
+-        {
+-          VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
+-          if (vect_print_dump_info (REPORT_DETAILS))
+-            {
+-              fprintf (vect_dump, "pushing reduction into node: ");
+-              print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
+-            }
+-        }
++      VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
++      for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
++        VEC_safe_push (gimple, heap, scalar_stmts, next);
+     }
+ 
+-  SLP_TREE_VEC_STMTS (node) = NULL;
+-  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
+-  SLP_TREE_LEFT (node) = NULL;
+-  SLP_TREE_RIGHT (node) = NULL;
+-  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
+-  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
++  node = vect_create_new_slp_node (scalar_stmts);
+ 
+   /* Calculate the number of vector stmts to create based on the unrolling
+      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+@@ -1517,6 +1584,7 @@
+   imm_use_iterator imm_iter;
+   gimple use_stmt;
+   stmt_vec_info stmt_vinfo; 
++  slp_void_p child;
+ 
+   if (!node)
+     return;
+@@ -1534,8 +1602,8 @@
+                      == vect_reduction_def))
+ 	  vect_mark_slp_stmts (node, hybrid, i);
+ 
+-  vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
+-  vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_detect_hybrid_slp_stmts ((slp_tree) child);
+ }
+ 
+ 
+@@ -1625,13 +1693,14 @@
+   bool dummy;
+   int i;
+   gimple stmt;
++  slp_void_p child;
+ 
+   if (!node)
+     return true;
+ 
+-  if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
+-      || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
+-    return false;
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
++      return false;
+ 
+   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
+     {
+@@ -2207,88 +2276,102 @@
+    If the scalar definitions are loop invariants or constants, collect them and
+    call vect_get_constant_vectors() to create vector stmts.
+    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
+-   must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
+-   vect_get_slp_vect_defs() to retrieve them.
+-   If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
+-   the right node. This is used when the second operand must remain scalar.  */
++   must be stored in the corresponding child of SLP_NODE, and we call
++   vect_get_slp_vect_defs () to retrieve them.  */
+ 
+ void
+-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
+-                   VEC (tree,heap) **vec_oprnds0,
+-                   VEC (tree,heap) **vec_oprnds1, int reduc_index)
++vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
++                   VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
+ {
+-  gimple first_stmt;
+-  enum tree_code code;
+-  int number_of_vects;
++  gimple first_stmt, first_def;
++  int number_of_vects = 0, i;
++  unsigned int child_index = 0;
+   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
++  slp_tree child = NULL;
++  VEC (tree, heap) *vec_defs;
++  tree oprnd, def_lhs;
++  bool vectorized_defs;
+ 
+   first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+-  /* The number of vector defs is determined by the number of vector statements
+-     in the node from which we get those statements.  */
+-  if (SLP_TREE_LEFT (slp_node))
+-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
+-  else
+-    {
+-      number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+-      /* Number of vector stmts was calculated according to LHS in
+-         vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
+-         necessary.  See vect_get_smallest_scalar_type () for details.  */
+-      vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
+-                                     &rhs_size_unit);
+-      if (rhs_size_unit != lhs_size_unit)
+-        {
+-          number_of_vects *= rhs_size_unit;
+-          number_of_vects /= lhs_size_unit;
+-        }
++  FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
++   {
++     /* For each operand we check if it has vectorized definitions in a child
++        node or we need to create them (for invariants and constants).  We
++        check if the LHS of the first stmt of the next child matches OPRND.
++        If it does, we found the correct child.  Otherwise, we call
++        vect_get_constant_vectors (), and not advance CHILD_INDEX in order
++        to check this child node for the next operand.  */
++      vectorized_defs = false;
++      if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
++        {
++          child = (slp_tree) VEC_index (slp_void_p,
++                                        SLP_TREE_CHILDREN (slp_node),
++                                        child_index);
++          first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
++
++          /* In the end of a pattern sequence we have a use of the original stmt,
++             so we need to compare OPRND with the original def.  */
++          if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
++              && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
++              && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
++            first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
++
++          if (is_gimple_call (first_def))
++            def_lhs = gimple_call_lhs (first_def);
++          else
++            def_lhs = gimple_assign_lhs (first_def);
++
++          if (operand_equal_p (oprnd, def_lhs, 0))
++            {
++              /* The number of vector defs is determined by the number of
++                 vector statements in the node from which we get those
++                 statements.  */
++               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
++               vectorized_defs = true;
++               child_index++;
++            }
++        }
++
++      if (!vectorized_defs)
++        {
++          if (i == 0)
++            {
++              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
++              /* Number of vector stmts was calculated according to LHS in
++                 vect_schedule_slp_instance (), fix it by replacing LHS with
++                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
++                 details.  */
++              vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
++                                             &rhs_size_unit);
++              if (rhs_size_unit != lhs_size_unit)
++                {
++                  number_of_vects *= rhs_size_unit;
++                  number_of_vects /= lhs_size_unit;
++                }
++            }
++        }
++
++      /* Allocate memory for vectorized defs.  */
++      vec_defs = VEC_alloc (tree, heap, number_of_vects);
++
++      /* For reduction defs we call vect_get_constant_vectors (), since we are
++         looking for initial loop invariant values.  */
++      if (vectorized_defs && reduc_index == -1)
++        /* The defs are already vectorized.  */
++        vect_get_slp_vect_defs (child, &vec_defs);
++      else
++        /* Build vectors from scalar defs.  */
++        vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
++                                   number_of_vects, reduc_index);
++
++      VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
++
++      /* For reductions, we only need initial values.  */
++      if (reduc_index != -1)
++        return;
+     }
+-
+-  /* Allocate memory for vectorized defs.  */
+-  *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
+-
+-  /* SLP_NODE corresponds either to a group of stores or to a group of
+-     unary/binary operations.  We don't call this function for loads.
+-     For reduction defs we call vect_get_constant_vectors(), since we are
+-     looking for initial loop invariant values.  */
+-  if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
+-    /* The defs are already vectorized.  */
+-    vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
+-  else
+-    /* Build vectors from scalar defs.  */
+-    vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
+-                               reduc_index);
+-
+-  if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
+-    /* Since we don't call this function with loads, this is a group of
+-       stores.  */
+-    return;
+-
+-  /* For reductions, we only need initial values.  */
+-  if (reduc_index != -1)
+-    return;
+-
+-  code = gimple_assign_rhs_code (first_stmt);
+-  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
+-    return;
+-
+-  /* The number of vector defs is determined by the number of vector statements
+-     in the node from which we get those statements.  */
+-  if (SLP_TREE_RIGHT (slp_node))
+-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
+-  else
+-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+-
+-  *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
+-
+-  if (SLP_TREE_RIGHT (slp_node))
+-    /* The defs are already vectorized.  */
+-    vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
+-  else
+-    /* Build vectors from scalar defs.  */
+-    vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
+-                               -1);
+ }
+ 
+-
+ /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
+    building a vector of type MASK_TYPE from it) and two input vectors placed in
+    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
+@@ -2605,14 +2688,14 @@
+   tree vectype;
+   int i;
+   slp_tree loads_node;
++  slp_void_p child;
+ 
+   if (!node)
+     return false;
+ 
+-  vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
+-                              vectorization_factor);
+-  vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
+-                              vectorization_factor);
++  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
++    vect_schedule_slp_instance ((slp_tree) child, instance,
++                                vectorization_factor);
+ 
+   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+   stmt_info = vinfo_for_stmt (stmt);
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-10-27 11:27:59 +0000
++++ new/gcc/tree-vect-stmts.c	2011-11-14 11:38:08 +0000
+@@ -1419,16 +1419,35 @@
+ }
+ 
+ 
+-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
+-   NULL.  */
++/* Get vectorized definitions for OP0 and OP1.
++   REDUC_INDEX is the index of reduction operand in case of reduction,
++   and -1 otherwise.  */
+ 
+-static void
++void
+ vect_get_vec_defs (tree op0, tree op1, gimple stmt,
+-		   VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
+-		   slp_tree slp_node)
++		   VEC (tree, heap) **vec_oprnds0,
++		   VEC (tree, heap) **vec_oprnds1,
++		   slp_tree slp_node, int reduc_index)
+ {
+   if (slp_node)
+-    vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
++    {
++      int nops = (op1 == NULL_TREE) ? 1 : 2;
++      VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
++      VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
++
++      VEC_quick_push (tree, ops, op0);
++      if (op1)
++        VEC_quick_push (tree, ops, op1);
++
++      vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
++
++      *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
++      if (op1)
++        *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
++
++      VEC_free (tree, heap, ops);
++      VEC_free (slp_void_p, heap, vec_defs);
++    }
+   else
+     {
+       tree vec_oprnd;
+@@ -2016,7 +2035,8 @@
+       for (j = 0; j < ncopies; j++)
+ 	{
+ 	  if (j == 0)
+-	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
++	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
++			       -1);
+ 	  else
+ 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
+ 
+@@ -2221,7 +2241,7 @@
+     {
+       /* Handle uses.  */
+       if (j == 0)
+-        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
++        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
+       else
+         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
+ 
+@@ -2576,10 +2596,10 @@
+              operand 1 should be of a vector type (the usual case).  */
+           if (vec_oprnd1)
+             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
+-                               slp_node);
++                               slp_node, -1);
+           else
+             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+-                               slp_node);
++                               slp_node, -1);
+         }
+       else
+         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
+@@ -2887,10 +2907,10 @@
+ 	{
+ 	  if (op_type == binary_op || op_type == ternary_op)
+ 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+-			       slp_node);
++			       slp_node, -1);
+ 	  else
+ 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
+-			       slp_node);
++			       slp_node, -1);
+ 	  if (op_type == ternary_op)
+ 	    {
+ 	      vec_oprnds2 = VEC_alloc (tree, heap, 1);
+@@ -3202,7 +3222,8 @@
+     {
+       /* Handle uses.  */
+       if (slp_node)
+-        vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
++        vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
++			   slp_node, -1);
+       else
+         {
+           VEC_free (tree, heap, vec_oprnds0);
+@@ -3548,12 +3569,12 @@
+                   for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
+                     VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+ 
+-    		  vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
+- 	                             -1);
++    		  vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
++ 	                             slp_node, -1);
+                 }
+               else
+-                vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
+-                                   &vec_oprnds1, -1);
++                vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
++                                   &vec_oprnds1, slp_node, -1);
+ 	    }
+ 	  else
+             {
+@@ -3796,6 +3817,7 @@
+           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+           first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 
+           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
++	  op = gimple_assign_rhs1 (first_stmt);
+         } 
+       else
+         /* VEC_NUM is the number of vect stmts to be created for this 
+@@ -3878,8 +3900,8 @@
+           if (slp)
+             {
+ 	      /* Get vectorized arguments for SLP_NODE.  */
+-              vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
+-                                 NULL, -1);
++              vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
++                                 NULL, slp_node, -1);
+ 
+               vec_oprnd = VEC_index (tree, vec_oprnds, 0);
+             }
+@@ -5040,7 +5062,7 @@
+      In basic blocks we only analyze statements that are a part of some SLP
+      instance, therefore, all the statements are relevant.  
+ 
+-     Pattern statement need to be analyzed instead of the original statement
++     Pattern statement needs to be analyzed instead of the original statement
+      if the original statement is not relevant. Otherwise, we analyze both
+      statements.  */
+ 
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-10-23 13:33:07 +0000
++++ new/gcc/tree-vectorizer.h	2011-11-14 11:38:08 +0000
+@@ -73,15 +73,15 @@
+ /************************************************************************
+   SLP
+  ************************************************************************/
++typedef void *slp_void_p;
++DEF_VEC_P (slp_void_p);
++DEF_VEC_ALLOC_P (slp_void_p, heap);
+ 
+-/* A computation tree of an SLP instance. Each node corresponds to a group of
++/* A computation tree of an SLP instance.  Each node corresponds to a group of
+    stmts to be packed in a SIMD stmt.  */
+ typedef struct _slp_tree {
+-  /* Only binary and unary operations are supported. LEFT child corresponds to
+-     the first operand and RIGHT child to the second if the operation is
+-     binary.  */
+-  struct _slp_tree *left;
+-  struct _slp_tree *right;
++  /* Nodes that contain def-stmts of this node statements operands.  */
++  VEC (slp_void_p, heap) *children;
+   /* A group of scalar stmts to be vectorized together.  */
+   VEC (gimple, heap) *stmts;
+   /* Vectorized stmt/s.  */
+@@ -146,14 +146,32 @@
+ #define SLP_INSTANCE_LOADS(S)                    (S)->loads
+ #define SLP_INSTANCE_FIRST_LOAD_STMT(S)          (S)->first_load
+ 
+-#define SLP_TREE_LEFT(S)                         (S)->left
+-#define SLP_TREE_RIGHT(S)                        (S)->right
++#define SLP_TREE_CHILDREN(S)                     (S)->children
+ #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
+ #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
+ #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
+ #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S)         (S)->cost.outside_of_loop
+ #define SLP_TREE_INSIDE_OF_LOOP_COST(S)          (S)->cost.inside_of_loop
+ 
++/* This structure is used in creation of an SLP tree.  Each instance
++   corresponds to the same operand in a group of scalar stmts in an SLP
++   node.  */
++typedef struct _slp_oprnd_info
++{
++  /* Def-stmts for the operands.  */
++  VEC (gimple, heap) *def_stmts;
++  /* Information about the first statement, its vector def-type, type, the
++     operand itself in case it's constant, and an indication if it's a pattern
++     stmt.  */
++  enum vect_def_type first_dt;
++  tree first_def_type;
++  tree first_const_oprnd;
++  bool first_pattern;
++} *slp_oprnd_info;
++
++DEF_VEC_P(slp_oprnd_info);
++DEF_VEC_ALLOC_P(slp_oprnd_info, heap);
++
+ 
+ typedef struct _vect_peel_info
+ {
+@@ -819,6 +837,8 @@
+                                 unsigned int *, unsigned int *);
+ extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
+ extern bool vect_supportable_shift (enum tree_code, tree);
++extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **,
++			       VEC (tree, heap) **, slp_tree, int);
+ 
+ /* In tree-vect-data-refs.c.  */
+ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
+@@ -885,8 +905,9 @@
+ extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
+ extern void vect_make_slp_decision (loop_vec_info);
+ extern void vect_detect_hybrid_slp (loop_vec_info);
+-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
+-                               VEC (tree,heap) **, int);
++extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree,
++			       VEC (slp_void_p, heap) **, int);
++
+ extern LOC find_bb_location (basic_block);
+ extern bb_vec_info vect_slp_analyze_bb (basic_block);
+ extern void vect_slp_transform_bb (basic_block);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
new file mode 100644
index 0000000000..d72446919f
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
@@ -0,0 +1,515 @@
+2011-11-21  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r180131:
+
+	2011-10-18  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_block_move_unaligned_straight)
+	(arm_adjust_block_mem, arm_block_move_unaligned_loop)
+	(arm_movmemqi_unaligned): New.
+	(arm_gen_movmemqi): Support unaligned block copies.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_arm_unaligned): New.
+	* gcc.target/arm/unaligned-memcpy-1.c: New.
+	* gcc.target/arm/unaligned-memcpy-2.c: New.
+	* gcc.target/arm/unaligned-memcpy-3.c: New.
+	* gcc.target/arm/unaligned-memcpy-4.c: New.
+
+	2011-09-15  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	gcc/
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-10-26 11:38:30 +0000
++++ new/gcc/config/arm/arm.c	2011-11-21 01:45:54 +0000
+@@ -10803,6 +10803,335 @@
+   return true;
+ }
+ 
++/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
++   unaligned copies on processors which support unaligned semantics for those
++   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
++   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
++   An interleave factor of 1 (the minimum) will perform no interleaving. 
++   Load/store multiple are used for aligned addresses where possible.  */
++
++static void
++arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
++				   HOST_WIDE_INT length,
++				   unsigned int interleave_factor)
++{
++  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
++  int *regnos = XALLOCAVEC (int, interleave_factor);
++  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
++  HOST_WIDE_INT i, j;
++  HOST_WIDE_INT remaining = length, words;
++  rtx halfword_tmp = NULL, byte_tmp = NULL;
++  rtx dst, src;
++  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
++  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
++  HOST_WIDE_INT srcoffset, dstoffset;
++  HOST_WIDE_INT src_autoinc, dst_autoinc;
++  rtx mem, addr;
++  
++  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
++  
++  /* Use hard registers if we have aligned source or destination so we can use
++     load/store multiple with contiguous registers.  */
++  if (dst_aligned || src_aligned)
++    for (i = 0; i < interleave_factor; i++)
++      regs[i] = gen_rtx_REG (SImode, i);
++  else
++    for (i = 0; i < interleave_factor; i++)
++      regs[i] = gen_reg_rtx (SImode);
++
++  dst = copy_addr_to_reg (XEXP (dstbase, 0));
++  src = copy_addr_to_reg (XEXP (srcbase, 0));
++
++  srcoffset = dstoffset = 0;
++  
++  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
++     For copying the last bytes we want to subtract this offset again.  */
++  src_autoinc = dst_autoinc = 0;
++
++  for (i = 0; i < interleave_factor; i++)
++    regnos[i] = i;
++
++  /* Copy BLOCK_SIZE_BYTES chunks.  */
++
++  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
++    {
++      /* Load words.  */
++      if (src_aligned && interleave_factor > 1)
++	{
++	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
++					    TRUE, srcbase, &srcoffset));
++	  src_autoinc += UNITS_PER_WORD * interleave_factor;
++	}
++      else
++	{
++	  for (j = 0; j < interleave_factor; j++)
++	    {
++	      addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
++					 - src_autoinc);
++	      mem = adjust_automodify_address (srcbase, SImode, addr,
++					       srcoffset + j * UNITS_PER_WORD);
++	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
++	    }
++	  srcoffset += block_size_bytes;
++	}
++
++      /* Store words.  */
++      if (dst_aligned && interleave_factor > 1)
++	{
++	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
++					     TRUE, dstbase, &dstoffset));
++	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
++	}
++      else
++	{
++	  for (j = 0; j < interleave_factor; j++)
++	    {
++	      addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
++					 - dst_autoinc);
++	      mem = adjust_automodify_address (dstbase, SImode, addr,
++					       dstoffset + j * UNITS_PER_WORD);
++	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
++	    }
++	  dstoffset += block_size_bytes;
++	}
++
++      remaining -= block_size_bytes;
++    }
++  
++  /* Copy any whole words left (note these aren't interleaved with any
++     subsequent halfword/byte load/stores in the interests of simplicity).  */
++  
++  words = remaining / UNITS_PER_WORD;
++
++  gcc_assert (words < interleave_factor);
++  
++  if (src_aligned && words > 1)
++    {
++      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
++					&srcoffset));
++      src_autoinc += UNITS_PER_WORD * words;
++    }
++  else
++    {
++      for (j = 0; j < words; j++)
++	{
++	  addr = plus_constant (src,
++				srcoffset + j * UNITS_PER_WORD - src_autoinc);
++	  mem = adjust_automodify_address (srcbase, SImode, addr,
++					   srcoffset + j * UNITS_PER_WORD);
++	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
++	}
++      srcoffset += words * UNITS_PER_WORD;
++    }
++
++  if (dst_aligned && words > 1)
++    {
++      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
++					 &dstoffset));
++      dst_autoinc += words * UNITS_PER_WORD;
++    }
++  else
++    {
++      for (j = 0; j < words; j++)
++	{
++	  addr = plus_constant (dst,
++				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, SImode, addr,
++					   dstoffset + j * UNITS_PER_WORD);
++	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
++	}
++      dstoffset += words * UNITS_PER_WORD;
++    }
++
++  remaining -= words * UNITS_PER_WORD;
++  
++  gcc_assert (remaining < 4);
++  
++  /* Copy a halfword if necessary.  */
++  
++  if (remaining >= 2)
++    {
++      halfword_tmp = gen_reg_rtx (SImode);
++
++      addr = plus_constant (src, srcoffset - src_autoinc);
++      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
++      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
++
++      /* Either write out immediately, or delay until we've loaded the last
++	 byte, depending on interleave factor.  */
++      if (interleave_factor == 1)
++	{
++	  addr = plus_constant (dst, dstoffset - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
++	  emit_insn (gen_unaligned_storehi (mem,
++		       gen_lowpart (HImode, halfword_tmp)));
++	  halfword_tmp = NULL;
++	  dstoffset += 2;
++	}
++
++      remaining -= 2;
++      srcoffset += 2;
++    }
++  
++  gcc_assert (remaining < 2);
++  
++  /* Copy last byte.  */
++  
++  if ((remaining & 1) != 0)
++    {
++      byte_tmp = gen_reg_rtx (SImode);
++
++      addr = plus_constant (src, srcoffset - src_autoinc);
++      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
++      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
++
++      if (interleave_factor == 1)
++	{
++	  addr = plus_constant (dst, dstoffset - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
++	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
++	  byte_tmp = NULL;
++	  dstoffset++;
++	}
++
++      remaining--;
++      srcoffset++;
++    }
++  
++  /* Store last halfword if we haven't done so already.  */
++  
++  if (halfword_tmp)
++    {
++      addr = plus_constant (dst, dstoffset - dst_autoinc);
++      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
++      emit_insn (gen_unaligned_storehi (mem,
++		   gen_lowpart (HImode, halfword_tmp)));
++      dstoffset += 2;
++    }
++
++  /* Likewise for last byte.  */
++
++  if (byte_tmp)
++    {
++      addr = plus_constant (dst, dstoffset - dst_autoinc);
++      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
++      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
++      dstoffset++;
++    }
++  
++  gcc_assert (remaining == 0 && srcoffset == dstoffset);
++}
++
++/* From mips_adjust_block_mem:
++
++   Helper function for doing a loop-based block operation on memory
++   reference MEM.  Each iteration of the loop will operate on LENGTH
++   bytes of MEM.
++
++   Create a new base register for use within the loop and point it to
++   the start of MEM.  Create a new memory reference that uses this
++   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
++
++static void
++arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
++		      rtx *loop_mem)
++{
++  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
++  
++  /* Although the new mem does not refer to a known location,
++     it does keep up to LENGTH bytes of alignment.  */
++  *loop_mem = change_address (mem, BLKmode, *loop_reg);
++  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
++}
++
++/* From mips_block_move_loop:
++
++   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
++   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
++   the memory regions do not overlap.  */
++
++static void
++arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
++			       unsigned int interleave_factor,
++			       HOST_WIDE_INT bytes_per_iter)
++{
++  rtx label, src_reg, dest_reg, final_src, test;
++  HOST_WIDE_INT leftover;
++  
++  leftover = length % bytes_per_iter;
++  length -= leftover;
++  
++  /* Create registers and memory references for use within the loop.  */
++  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
++  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
++  
++  /* Calculate the value that SRC_REG should have after the last iteration of
++     the loop.  */
++  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
++				   0, 0, OPTAB_WIDEN);
++
++  /* Emit the start of the loop.  */
++  label = gen_label_rtx ();
++  emit_label (label);
++  
++  /* Emit the loop body.  */
++  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
++				     interleave_factor);
++
++  /* Move on to the next block.  */
++  emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
++  emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
++  
++  /* Emit the loop condition.  */
++  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
++  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
++  
++  /* Mop up any left-over bytes.  */
++  if (leftover)
++    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
++}
++
++/* Emit a block move when either the source or destination is unaligned (not
++   aligned to a four-byte boundary).  This may need further tuning depending on
++   core type, optimize_size setting, etc.  */
++
++static int
++arm_movmemqi_unaligned (rtx *operands)
++{
++  HOST_WIDE_INT length = INTVAL (operands[2]);
++  
++  if (optimize_size)
++    {
++      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
++      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
++      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
++	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
++	 or dst_aligned though: allow more interleaving in those cases since the
++	 resulting code can be smaller.  */
++      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
++      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
++      
++      if (length > 12)
++	arm_block_move_unaligned_loop (operands[0], operands[1], length,
++				       interleave_factor, bytes_per_iter);
++      else
++	arm_block_move_unaligned_straight (operands[0], operands[1], length,
++					   interleave_factor);
++    }
++  else
++    {
++      /* Note that the loop created by arm_block_move_unaligned_loop may be
++	 subject to loop unrolling, which makes tuning this condition a little
++	 redundant.  */
++      if (length > 32)
++	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
++      else
++	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
++    }
++  
++  return 1;
++}
++
+ int
+ arm_gen_movmemqi (rtx *operands)
+ {
+@@ -10815,8 +11144,13 @@
+ 
+   if (GET_CODE (operands[2]) != CONST_INT
+       || GET_CODE (operands[3]) != CONST_INT
+-      || INTVAL (operands[2]) > 64
+-      || INTVAL (operands[3]) & 3)
++      || INTVAL (operands[2]) > 64)
++    return 0;
++
++  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
++    return arm_movmemqi_unaligned (operands);
++
++  if (INTVAL (operands[3]) & 3)
+     return 0;
+ 
+   dstbase = operands[0];
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-10-19 17:01:50 +0000
++++ new/gcc/config/arm/arm.h	2011-11-21 01:45:54 +0000
+@@ -47,6 +47,8 @@
+     {							\
+ 	if (TARGET_DSP_MULTIPLY)			\
+ 	   builtin_define ("__ARM_FEATURE_DSP");	\
++	if (unaligned_access)				\
++	  builtin_define ("__ARM_FEATURE_UNALIGNED");	\
+ 	/* Define __arm__ even when in thumb mode, for	\
+ 	   consistency with armcc.  */			\
+ 	builtin_define ("__arm__");			\
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++void unknown_alignment (char *dest, char *src)
++{
++  memcpy (dest, src, 15);
++}
++
++/* We should see three unaligned word loads and store pairs, one unaligned
++   ldrh/strh pair, and an ldrb/strb pair.  Sanity check that.  */
++
++/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char dest[16];
++
++void aligned_dest (char *src)
++{
++  memcpy (dest, src, 15);
++}
++
++/* Expect a multi-word store for the main part of the copy, but subword
++   loads/stores for the remainder.  */
++
++/* { dg-final { scan-assembler-times "stmia" 1 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char src[16];
++
++void aligned_src (char *dest)
++{
++  memcpy (dest, src, 15);
++}
++
++/* Expect a multi-word load for the main part of the copy, but subword
++   loads/stores for the remainder.  */
++
++/* { dg-final { scan-assembler-times "ldmia" 1 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char src[16];
++char dest[16];
++
++void aligned_both (void)
++{
++  memcpy (dest, src, 15);
++}
++
++/* We know both src and dest to be aligned: expect multiword loads/stores.  */
++
++/* { dg-final { scan-assembler-times "ldmia" 1 } } */
++/* { dg-final { scan-assembler-times "stmia" 1 } } */
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-10-23 13:33:07 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-11-21 01:45:54 +0000
+@@ -1894,6 +1894,18 @@
+     }]
+ }
+ 
++# Return 1 if this is an ARM target that supports unaligned word/halfword
++# load/store instructions.
++
++proc check_effective_target_arm_unaligned { } {
++    return [check_no_compiler_messages arm_unaligned assembly {
++	#ifndef __ARM_FEATURE_UNALIGNED
++	#error no unaligned support
++	#endif
++	int i;
++    }]
++}
++
+ # Add the options needed for NEON.  We need either -mfloat-abi=softfp
+ # or -mfloat-abi=hard, but if one is already specified by the
+ # multilib, use it.  Similarly, if a -mfpu option already enables
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch
new file mode 100644
index 0000000000..2cf2741ba6
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch
@@ -0,0 +1,375 @@
+2011-11-22  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-10-06  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	PR tree-optimization/50596
+	* tree-vectorizer.h (vect_is_simple_cond): New prototype.
+	(NUM_PATTERNS): Change to 6.
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New
+	function.
+	(vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern.
+	(vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt
+	if it already has one, and don't set STMT_VINFO_VECTYPE in it
+	if it is already set.
+	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle
+	COND_EXPR in pattern stmts.
+	(vect_is_simple_cond): No longer static.
+
+	gcc/testsuite:
+	PR tree-optimization/50596
+	* gcc.dg/vect/vect-cond-8.c: New test.
+
+	2011-10-07  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	PR tree-optimization/50650
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't
+	call vect_is_simple_cond here, instead fail if cond_expr isn't
+	COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL
+	for cond_expr's first operand.
+	* tree-vect-stmts.c (vect_is_simple_cond): Static again.
+	* tree-vectorizer.h (vect_is_simple_cond): Remove prototype.
+
+
+	gcc/
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce
+	it to integral types only.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr30858.c: Expect the error message twice for targets
+	with multiple vector sizes.
+	* gcc.dg/vect/vect-cond-8.c: Rename to...
+	* gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float
+	to int.
+	* lib/target-supports.exp (check_effective_target_vect_condition):
+	Return true for NEON.
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c'
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/testsuite/gcc.dg/vect/pr30858.c	2012-01-04 15:33:52.000000000 -0800
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c	2012-03-05 16:23:47.748983031 -0800
+@@ -11,5 +11,6 @@
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */
++/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c	2012-03-05 16:23:47.748983031 -0800
+@@ -0,0 +1,75 @@
++/* { dg-require-effective-target vect_condition } */
++
++#include "tree-vect.h"
++
++#define N 1024
++int a[N], b[N], c[N];
++char d[N], e[N], f[N];
++unsigned char k[N];
++
++__attribute__((noinline, noclone)) void
++f1 (void)
++{
++  int i;
++  for (i = 0; i < N; ++i)
++    k[i] = a[i] < b[i] ? 17 : 0;
++}
++
++__attribute__((noinline, noclone)) void
++f2 (void)
++{
++  int i;
++  for (i = 0; i < N; ++i)
++    k[i] = a[i] < b[i] ? 0 : 24;
++}
++
++__attribute__((noinline, noclone)) void
++f3 (void)
++{
++  int i;
++  for (i = 0; i < N; ++i)
++    k[i] = a[i] < b[i] ? 51 : 12;
++}
++
++int
++main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      switch (i % 9)
++	{
++	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
++	case 1: a[i] = 0; b[i] = 0; break;
++	case 2: a[i] = i + 1; b[i] = - i - 1; break;
++	case 3: a[i] = i; b[i] = i + 7; break;
++	case 4: a[i] = i; b[i] = i; break;
++	case 5: a[i] = i + 16; b[i] = i + 3; break;
++	case 6: a[i] = - i - 5; b[i] = - i; break;
++	case 7: a[i] = - i; b[i] = - i; break;
++	case 8: a[i] = - i; b[i] = - i - 7; break;
++	}
++      d[i] = i;
++      e[i] = 2 * i;
++    }
++  f1 ();
++  for (i = 0; i < N; i++)
++    if (k[i] != ((i % 3) == 0 ? 17 : 0))
++      abort ();
++  f2 ();
++  for (i = 0; i < N; i++)
++    if (k[i] != ((i % 3) == 0 ? 0 : 24))
++      abort ();
++  f3 ();
++  for (i = 0; i < N; i++)
++    if (k[i] != ((i % 3) == 0 ? 51 : 12))
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c	2012-03-05 16:23:10.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-patterns.c	2012-03-05 16:23:47.748983031 -0800
+@@ -50,13 +50,16 @@
+                                                  tree *);
+ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
+                                        tree *, tree *);
++static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
++                                                 tree *, tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ 	vect_recog_widen_mult_pattern,
+ 	vect_recog_widen_sum_pattern,
+ 	vect_recog_dot_prod_pattern,
+ 	vect_recog_pow_pattern,
+         vect_recog_over_widening_pattern,
+-	vect_recog_widen_shift_pattern};
++	vect_recog_widen_shift_pattern,
++	vect_recog_mixed_size_cond_pattern};
+ 
+ 
+ /* Function widened_name_p
+@@ -1441,6 +1444,118 @@
+   return pattern_stmt;
+ }
+ 
++/* Function vect_recog_mixed_size_cond_pattern
++
++   Try to find the following pattern:
++
++     type x_t, y_t;
++     TYPE a_T, b_T, c_T;
++   loop:
++     S1  a_T = x_t CMP y_t ? b_T : c_T;
++
++   where type 'TYPE' is an integral type which has different size
++   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
++   than 'type', the constants need to fit into an integer type
++   with the same width as 'type'.
++
++   Input:
++
++   * LAST_STMT: A stmt from which the pattern search begins.
++
++   Output:
++
++   * TYPE_IN: The type of the input arguments to the pattern.
++
++   * TYPE_OUT: The type of the output of this pattern.
++
++   * Return value: A new stmt that will be used to replace the pattern.
++	Additionally a def_stmt is added.
++
++	a_it = x_t CMP y_t ? b_it : c_it;
++	a_T = (TYPE) a_it;  */
++
++static gimple
++vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
++				    tree *type_out)
++{
++  gimple last_stmt = VEC_index (gimple, *stmts, 0);
++  tree cond_expr, then_clause, else_clause;
++  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
++  tree type, vectype, comp_vectype, comp_type, op, tmp;
++  enum machine_mode cmpmode;
++  gimple pattern_stmt, def_stmt;
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++
++  if (!is_gimple_assign (last_stmt)
++      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
++      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
++    return NULL;
++
++  op = gimple_assign_rhs1 (last_stmt);
++  cond_expr = TREE_OPERAND (op, 0);
++  then_clause = TREE_OPERAND (op, 1);
++  else_clause = TREE_OPERAND (op, 2);
++
++  if (TREE_CODE (then_clause) != INTEGER_CST
++      || TREE_CODE (else_clause) != INTEGER_CST)
++    return NULL;
++
++  if (!COMPARISON_CLASS_P (cond_expr))
++    return NULL;
++
++  type = gimple_expr_type (last_stmt);
++  comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
++  if (!INTEGRAL_TYPE_P (comp_type)
++      || !INTEGRAL_TYPE_P (type))
++    return NULL;
++
++  comp_vectype = get_vectype_for_scalar_type (comp_type);
++  if (comp_vectype == NULL_TREE)
++    return NULL;
++
++  cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
++
++  if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
++    return NULL;
++
++  vectype = get_vectype_for_scalar_type (type);
++  if (vectype == NULL_TREE)
++    return NULL;
++
++  if (types_compatible_p (vectype, comp_vectype))
++    return NULL;
++
++  if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
++    return NULL;
++
++  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
++    {
++      if (!int_fits_type_p (then_clause, comp_type)
++	  || !int_fits_type_p (else_clause, comp_type))
++	return NULL;
++    }
++
++  tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
++		fold_convert (comp_type, then_clause),
++	       	fold_convert (comp_type, else_clause));
++  def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
++
++  pattern_stmt
++    = gimple_build_assign_with_ops (NOP_EXPR,
++				    vect_recog_temp_ssa_var (type, NULL),
++				    gimple_assign_lhs (def_stmt), NULL_TREE);
++
++  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
++  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
++  set_vinfo_for_stmt (def_stmt, def_stmt_info);
++  STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
++  *type_in = vectype;
++  *type_out = vectype;
++
++  return pattern_stmt;
++}
++
++
+ /* Mark statements that are involved in a pattern.  */
+ 
+ static inline void
+@@ -1468,14 +1583,18 @@
+   if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
+     {
+       def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
+-      set_vinfo_for_stmt (def_stmt,
+-                          new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
+-      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
+       def_stmt_info = vinfo_for_stmt (def_stmt);
++      if (def_stmt_info == NULL)
++	{
++	  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
++	  set_vinfo_for_stmt (def_stmt, def_stmt_info);
++	}
++      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
+       STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
+       STMT_VINFO_DEF_TYPE (def_stmt_info)
+ 	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
+-      STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
++      if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
++	STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
+     }
+ }
+ 
+Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c	2012-03-05 16:23:11.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-stmts.c	2012-03-05 16:23:47.748983031 -0800
+@@ -655,20 +655,40 @@
+               tree rhs = gimple_assign_rhs1 (stmt);
+               unsigned int op_num;
+               tree op;
++	      enum tree_code rhs_code;
+               switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
+                 {
+                   case GIMPLE_SINGLE_RHS:
+-                     op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
+-                     for (i = 0; i < op_num; i++)
+-                       {
+-                         op = TREE_OPERAND (rhs, i);
+-                         if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+-                                           &worklist))
+-                           {
+-                             VEC_free (gimple, heap, worklist);
+-                             return false;
+-                           }
+-                       }
++                    op = gimple_assign_rhs1 (stmt);
++                    rhs_code = gimple_assign_rhs_code (stmt);
++		    i = 0;
++		    if (rhs_code == COND_EXPR
++			&& COMPARISON_CLASS_P (TREE_OPERAND (op, 0)))
++		      {
++			op = TREE_OPERAND (op, 0);
++			if (!process_use (stmt, TREE_OPERAND (op, 0),
++				   	  loop_vinfo,
++					  live_p, relevant, &worklist)
++			    || !process_use (stmt, TREE_OPERAND (op, 1),
++				   	 loop_vinfo,
++				         live_p, relevant, &worklist))
++		    	  {
++			    VEC_free (gimple, heap, worklist);
++			    return false;
++			  }
++		  	i = 1;
++		      }
++                    op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
++                    for (i; i < op_num; i++)
++                      {
++                        op = TREE_OPERAND (rhs, i);
++                        if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++                                          &worklist))
++                          {
++                            VEC_free (gimple, heap, worklist);
++                            return false;
++                          }
++                      }
+                     break;
+                    
+                   case GIMPLE_BINARY_RHS:
+Index: gcc-4_6-branch/gcc/tree-vectorizer.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h	2012-03-05 16:23:11.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vectorizer.h	2012-03-05 16:23:47.748983031 -0800
+@@ -917,7 +917,7 @@
+    Additional pattern recognition functions can (and will) be added
+    in the future.  */
+ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+-#define NUM_PATTERNS 6 
++#define NUM_PATTERNS 7 
+ void vect_pattern_recog (loop_vec_info);
+ 
+ /* In tree-vectorizer.c.  */
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch
new file mode 100644
index 0000000000..ea42ad6eeb
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch
@@ -0,0 +1,805 @@
+2011-11-22  Ira Rosen  <ira.rosen@linaro.org>
+ 	Backport from mainline:
+ 
+	2011-11-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vectorizable_condition): Add argument.
+	* tree-vect-loop.c (vectorizable_reduction): Fail for condition
+	in SLP.  Update calls to vectorizable_condition.
+	* tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to
+	the arguments.  Pass it to vect_is_simple_use_1.
+	(vectorizable_condition): Add slp_node to the arguments.  Support
+	vectorization of basic blocks.  Fail for reduction in SLP.  Update
+	calls to vect_is_simple_cond and vect_is_simple_use.  Support SLP:
+	call vect_get_slp_defs to get vector operands.
+	(vect_analyze_stmt): Update calls to vectorizable_condition.
+	(vect_transform_stmt): Likewise.
+	* tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR.
+	(vect_get_and_check_slp_defs): Handle COND_EXPR.  Allow pattern
+	def stmts.
+	(vect_build_slp_tree): Handle COND_EXPR.
+	(vect_analyze_slp_instance): Push pattern statements to root node.
+	(vect_get_constant_vectors): Fix comments.  Handle COND_EXPR.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-cond-1.c: New test.
+	* gcc.dg/vect/slp-cond-1.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c	2011-11-20 08:24:08 +0000
+@@ -0,0 +1,46 @@
++/* { dg-require-effective-target vect_condition } */
++
++#include "tree-vect.h"
++
++#define N 128
++
++__attribute__((noinline, noclone)) void
++foo (int *a, int stride)
++{
++  int i;
++
++  for (i = 0; i < N/stride; i++, a += stride)
++   {
++     a[0] = a[0] ? 1 : 5;
++     a[1] = a[1] ? 2 : 6;
++     a[2] = a[2] ? 3 : 7;
++     a[3] = a[3] ? 4 : 8;
++   }
++}
++
++
++int a[N];
++int main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    a[i] = i;
++
++  foo (a, 4);
++
++  for (i = 1; i < N; i++)
++    if (a[i] != i%4 + 1)
++      abort ();
++
++  if (a[0] != 5)
++    abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-cond-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/slp-cond-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/slp-cond-1.c	2011-11-20 08:24:08 +0000
+@@ -0,0 +1,126 @@
++/* { dg-require-effective-target vect_condition } */
++#include "tree-vect.h"
++
++#define N 32
++int a[N], b[N];
++int d[N], e[N];
++int k[N];
++
++__attribute__((noinline, noclone)) void
++f1 (void)
++{
++  int i;
++  for (i = 0; i < N/4; i++)
++    {
++      k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
++      k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
++      k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
++      k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
++    }
++}
++
++__attribute__((noinline, noclone)) void
++f2 (void)
++{
++  int i;
++  for (i = 0; i < N/2; ++i)
++    {
++      k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
++      k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
++    }
++}
++
++__attribute__((noinline, noclone)) void
++f3 (void)
++{
++  int i;
++  for (i = 0; i < N/2; ++i)
++    {
++      k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
++      k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
++    }
++}
++
++__attribute__((noinline, noclone)) void
++f4 (void)
++{
++  int i;
++  for (i = 0; i < N/2; ++i)
++    {
++      int d0 = d[2*i], e0 = e[2*i];
++      int d1 = d[2*i+1], e1 = e[2*i+1];
++      k[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
++      k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
++    }
++}
++
++int
++main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      switch (i % 9)
++	{
++	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
++	case 1: a[i] = 0; b[i] = 0; break;
++	case 2: a[i] = i + 1; b[i] = - i - 1; break;
++	case 3: a[i] = i; b[i] = i + 7; break;
++	case 4: a[i] = i; b[i] = i; break;
++	case 5: a[i] = i + 16; b[i] = i + 3; break;
++	case 6: a[i] = - i - 5; b[i] = - i; break;
++	case 7: a[i] = - i; b[i] = - i; break;
++	case 8: a[i] = - i; b[i] = - i - 7; break;
++	}
++      d[i] = i;
++      e[i] = 2 * i;
++    }
++  f1 ();
++  for (i = 0; i < N; i++)
++    if (k[i] != ((i % 3) == 0 ? 17 : 0))
++      abort ();
++
++  f2 ();
++  for (i = 0; i < N; i++)
++    {
++      switch (i % 9)
++        {
++        case 0:
++	case 6:
++	  if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
++	    abort ();
++	  break;
++        case 1:
++        case 5:
++        case 7:
++	  if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
++            abort ();
++          break;
++        case 2:
++        case 4:
++        case 8:
++	  if (k[i] != ((i/9 % 2) == 0 ? 24 : 4))
++            abort ();
++          break;
++        case 3:
++	  if (k[i] != ((i/9 % 2) == 0 ? 7 : 0))
++            abort ();
++          break;
++        }
++    }
++
++  f3 ();
++
++  f4 ();
++  for (i = 0; i < N; i++)
++    if (k[i] != ((i % 3) == 0 ? e[i] : d[i]))
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/tree-vect-loop.c'
+--- old/gcc/tree-vect-loop.c	2011-11-14 11:38:08 +0000
++++ new/gcc/tree-vect-loop.c	2011-11-20 08:24:08 +0000
+@@ -4087,6 +4087,9 @@
+       gcc_unreachable ();
+     }
+ 
++  if (code == COND_EXPR && slp_node)
++    return false;
++
+   scalar_dest = gimple_assign_lhs (stmt);
+   scalar_type = TREE_TYPE (scalar_dest);
+   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
+@@ -4161,7 +4164,7 @@
+ 
+   if (code == COND_EXPR)
+     {
+-      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
++      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
+         {
+           if (vect_print_dump_info (REPORT_DETAILS))
+             fprintf (vect_dump, "unsupported condition in reduction");
+@@ -4433,7 +4436,7 @@
+           gcc_assert (!slp_node);
+           vectorizable_condition (stmt, gsi, vec_stmt, 
+                                   PHI_RESULT (VEC_index (gimple, phis, 0)), 
+-                                  reduc_index);
++                                  reduc_index, NULL);
+           /* Multiple types are not supported for condition.  */
+           break;
+         }
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-11-14 11:38:08 +0000
++++ new/gcc/tree-vect-slp.c	2011-11-21 06:58:40 +0000
+@@ -109,7 +109,11 @@
+   if (is_gimple_call (stmt))
+     nops = gimple_call_num_args (stmt);
+   else if (is_gimple_assign (stmt))
+-    nops = gimple_num_ops (stmt) - 1;
++    {
++      nops = gimple_num_ops (stmt) - 1;
++      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
++	nops = 4;
++    }
+   else
+     return NULL;
+ 
+@@ -190,20 +194,51 @@
+   bool different_types = false;
+   bool pattern = false;
+   slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
++  int op_idx = 1;
++  tree compare_rhs = NULL_TREE, rhs = NULL_TREE;
++  int cond_idx = -1;
+ 
+   if (loop_vinfo)
+     loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+   if (is_gimple_call (stmt))
+     number_of_oprnds = gimple_call_num_args (stmt);
++  else if (is_gimple_assign (stmt))
++    {
++      number_of_oprnds = gimple_num_ops (stmt) - 1;
++      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
++	{ 	      
++          number_of_oprnds = 4;
++	  cond_idx = 0;
++	  rhs = gimple_assign_rhs1 (stmt);
++	}	  
++    }
+   else
+-    number_of_oprnds = gimple_num_ops (stmt) - 1;
++    return false;
+ 
+   for (i = 0; i < number_of_oprnds; i++)
+     {
+-      oprnd = gimple_op (stmt, i + 1);
++      if (compare_rhs)
++	oprnd = compare_rhs;
++      else
++        oprnd = gimple_op (stmt, op_idx++);
++
+       oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
+ 
++      if (-1 < cond_idx && cond_idx < 4)
++	{
++  	  if (compare_rhs)
++	    compare_rhs = NULL_TREE;
++	  else	  
++	    oprnd = TREE_OPERAND (rhs, cond_idx++);
++	}
++		      
++      if (COMPARISON_CLASS_P (oprnd))
++        {
++          compare_rhs = TREE_OPERAND (oprnd, 1);
++          oprnd = TREE_OPERAND (oprnd, 0);
++	}
++
+       if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+                                &dt)
+ 	  || (!def_stmt && dt != vect_constant_def))
+@@ -243,8 +278,7 @@
+           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
+ 
+-          if (dt == vect_unknown_def_type
+-	      || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
++          if (dt == vect_unknown_def_type)
+             {
+               if (vect_print_dump_info (REPORT_DETAILS))
+                 fprintf (vect_dump, "Unsupported pattern.");
+@@ -423,6 +457,7 @@
+   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
+   gimple stmt = VEC_index (gimple, stmts, 0);
+   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
++  enum tree_code first_cond_code = ERROR_MARK;
+   tree lhs;
+   bool stop_recursion = false, need_same_oprnds = false;
+   tree vectype, scalar_type, first_op1 = NULL_TREE;
+@@ -439,11 +474,18 @@
+   VEC (slp_oprnd_info, heap) *oprnds_info;
+   unsigned int nops;
+   slp_oprnd_info oprnd_info;
++  tree cond;
+ 
+   if (is_gimple_call (stmt))
+     nops = gimple_call_num_args (stmt);
++  else if (is_gimple_assign (stmt))
++    {
++      nops = gimple_num_ops (stmt) - 1;
++      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
++	nops = 4;
++    }
+   else
+-    nops = gimple_num_ops (stmt) - 1;
++    return false;
+ 
+   oprnds_info = vect_create_oprnd_info (nops, group_size);
+ 
+@@ -484,6 +526,22 @@
+ 	  return false;
+ 	}
+ 
++       if (is_gimple_assign (stmt)
++	   && gimple_assign_rhs_code (stmt) == COND_EXPR
++           && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0))
++           && !COMPARISON_CLASS_P (cond))
++        {
++          if (vect_print_dump_info (REPORT_SLP))
++            {
++              fprintf (vect_dump,
++                       "Build SLP failed: condition is not comparison ");
++              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++            }
++
++          vect_free_oprnd_info (&oprnds_info);
++          return false;
++        }
++
+       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
+       vectype = get_vectype_for_scalar_type (scalar_type);
+       if (!vectype)
+@@ -737,7 +795,8 @@
+ 
+ 	  /* Not memory operation.  */
+ 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
+-	      && TREE_CODE_CLASS (rhs_code) != tcc_unary)
++	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
++              && rhs_code != COND_EXPR)
+ 	    {
+ 	      if (vect_print_dump_info (REPORT_SLP))
+ 		{
+@@ -750,6 +809,26 @@
+ 	      return false;
+ 	    }
+ 
++          if (rhs_code == COND_EXPR)
++            {
++              tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
++
++	      if (i == 0)
++		first_cond_code = TREE_CODE (cond_expr);
++              else if (first_cond_code != TREE_CODE (cond_expr))
++                {
++                  if (vect_print_dump_info (REPORT_SLP))
++                    {
++                      fprintf (vect_dump, "Build SLP failed: different"
++					  " operation");
++                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++                    }
++
++		  vect_free_oprnd_info (&oprnds_info);
++                  return false;
++		}
++            }
++
+ 	  /* Find the def-stmts.  */
+ 	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
+ 					    ncopies_for_cost, (i == 0),
+@@ -1395,7 +1474,12 @@
+       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
+       while (next)
+         {
+-          VEC_safe_push (gimple, heap, scalar_stmts, next);
++          if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
++              && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
++            VEC_safe_push (gimple, heap, scalar_stmts,
++                       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
++          else
++            VEC_safe_push (gimple, heap, scalar_stmts, next);
+           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+         }
+     }
+@@ -1404,7 +1488,7 @@
+       /* Collect reduction statements.  */
+       VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
+       for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
+-        VEC_safe_push (gimple, heap, scalar_stmts, next);
++	VEC_safe_push (gimple, heap, scalar_stmts, next);
+     }
+ 
+   node = vect_create_new_slp_node (scalar_stmts);
+@@ -2160,15 +2244,15 @@
+ 
+      For example, we have two scalar operands, s1 and s2 (e.g., group of
+      strided accesses of size two), while NUNITS is four (i.e., four scalars
+-     of this type can be packed in a vector). The output vector will contain
+-     two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
++     of this type can be packed in a vector).  The output vector will contain
++     two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
+      will be 2).
+ 
+      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
+      containing the operands.
+ 
+      For example, NUNITS is four as before, and the group size is 8
+-     (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
++     (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
+      {s5, s6, s7, s8}.  */
+ 
+   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
+@@ -2180,8 +2264,18 @@
+         {
+           if (is_store)
+             op = gimple_assign_rhs1 (stmt);
+-          else
++          else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
+             op = gimple_op (stmt, op_num + 1);
++	  else
++	    {
++	      if (op_num == 0 || op_num == 1)
++		{
++		  tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
++		  op = TREE_OPERAND (cond, op_num);
++		}
++	      else
++		op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1);
++	    }
+ 
+           if (reduc_index != -1)
+             {
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-11-22 16:52:23 +0000
++++ new/gcc/tree-vect-stmts.c	2011-11-22 17:10:17 +0000
+@@ -4816,7 +4816,7 @@
+    condition operands are supportable using vec_is_simple_use.  */
+ 
+ static bool
+-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
++vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+ {
+   tree lhs, rhs;
+   tree def;
+@@ -4831,7 +4831,7 @@
+   if (TREE_CODE (lhs) == SSA_NAME)
+     {
+       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
+-      if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
++      if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
+                                &dt))
+ 	return false;
+     }
+@@ -4842,7 +4842,7 @@
+   if (TREE_CODE (rhs) == SSA_NAME)
+     {
+       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
+-      if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
++      if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
+                                &dt))
+ 	return false;
+     }
+@@ -4868,7 +4868,8 @@
+ 
+ bool
+ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
+-			gimple *vec_stmt, tree reduc_def, int reduc_index)
++			gimple *vec_stmt, tree reduc_def, int reduc_index,
++			slp_tree slp_node)
+ {
+   tree scalar_dest = NULL_TREE;
+   tree vec_dest = NULL_TREE;
+@@ -4885,19 +4886,24 @@
+   tree def;
+   enum vect_def_type dt, dts[4];
+   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+-  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
++  int ncopies;
+   enum tree_code code;
+   stmt_vec_info prev_stmt_info = NULL;
+-  int j;
++  int i, j;
++  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
++  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
++  VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
+ 
+-  /* FORNOW: unsupported in basic block SLP.  */
+-  gcc_assert (loop_vinfo);
++  if (slp_node || PURE_SLP_STMT (stmt_info))
++    ncopies = 1;
++  else
++    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ 
+   gcc_assert (ncopies >= 1);
+-  if (reduc_index && ncopies > 1)
++  if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info)))
+     return false; /* FORNOW */
+ 
+-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
++  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 
+     return false;
+ 
+   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+@@ -4905,10 +4911,6 @@
+            && reduc_def))
+     return false;
+ 
+-  /* FORNOW: SLP not supported.  */
+-  if (STMT_SLP_TYPE (stmt_info))
+-    return false;
+-
+   /* FORNOW: not yet supported.  */
+   if (STMT_VINFO_LIVE_P (stmt_info))
+     {
+@@ -4932,7 +4934,7 @@
+   then_clause = TREE_OPERAND (op, 1);
+   else_clause = TREE_OPERAND (op, 2);
+ 
+-  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
++  if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo))
+     return false;
+ 
+   /* We do not handle two different vector types for the condition
+@@ -4944,7 +4946,7 @@
+   if (TREE_CODE (then_clause) == SSA_NAME)
+     {
+       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
+-      if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
++      if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
+ 			       &then_def_stmt, &def, &dt))
+ 	return false;
+     }
+@@ -4956,7 +4958,7 @@
+   if (TREE_CODE (else_clause) == SSA_NAME)
+     {
+       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
+-      if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
++      if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
+ 			       &else_def_stmt, &def, &dt))
+ 	return false;
+     }
+@@ -4974,7 +4976,15 @@
+       return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
+     }
+ 
+-  /* Transform */
++  /* Transform.  */
++
++  if (!slp_node)
++    {
++      vec_oprnds0 = VEC_alloc (tree, heap, 1);
++      vec_oprnds1 = VEC_alloc (tree, heap, 1);
++      vec_oprnds2 = VEC_alloc (tree, heap, 1);
++      vec_oprnds3 = VEC_alloc (tree, heap, 1);
++    }
+ 
+   /* Handle def.  */
+   scalar_dest = gimple_assign_lhs (stmt);
+@@ -4983,67 +4993,118 @@
+   /* Handle cond expr.  */
+   for (j = 0; j < ncopies; j++)
+     {
+-      gimple new_stmt;
++      gimple new_stmt = NULL;
+       if (j == 0)
+ 	{
+-	  gimple gtemp;
+-	  vec_cond_lhs =
++          if (slp_node)
++            {
++              VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
++              VEC (slp_void_p, heap) *vec_defs;
++
++	      vec_defs = VEC_alloc (slp_void_p, heap, 4);
++              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
++              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
++              VEC_safe_push (tree, heap, ops, then_clause);
++              VEC_safe_push (tree, heap, ops, else_clause);
++              vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
++              vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
++              vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
++              vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
++              vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
++
++              VEC_free (tree, heap, ops);
++              VEC_free (slp_void_p, heap, vec_defs);
++            }
++          else
++            {
++	      gimple gtemp;
++	      vec_cond_lhs =
+ 	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
+ 					    stmt, NULL);
+-	  vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
++	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
+ 			      NULL, &gtemp, &def, &dts[0]);
+-	  vec_cond_rhs =
+-	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
+-					    stmt, NULL);
+-	  vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
+-			      NULL, &gtemp, &def, &dts[1]);
+-	  if (reduc_index == 1)
+-	    vec_then_clause = reduc_def;
+-	  else
+-	    {
+-	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
+-							      stmt, NULL);
+-	      vect_is_simple_use (then_clause, loop_vinfo,
+-				  NULL, &gtemp, &def, &dts[2]);
+-	    }
+-	  if (reduc_index == 2)
+-	    vec_else_clause = reduc_def;
+-	  else
+-	    {
+-	      vec_else_clause = vect_get_vec_def_for_operand (else_clause,
+-							      stmt, NULL);
+-	      vect_is_simple_use (else_clause, loop_vinfo,
++
++	      vec_cond_rhs =
++		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
++						stmt, NULL);
++	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
++					NULL, &gtemp, &def, &dts[1]);
++	      if (reduc_index == 1)
++		vec_then_clause = reduc_def;
++	      else
++		{
++		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
++		 		  			      stmt, NULL);
++	          vect_is_simple_use (then_clause, loop_vinfo,
++					  NULL, &gtemp, &def, &dts[2]);
++		}
++	      if (reduc_index == 2)
++		vec_else_clause = reduc_def;
++	      else
++		{
++		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
++							      stmt, NULL);
++		  vect_is_simple_use (else_clause, loop_vinfo,
+ 				  NULL, &gtemp, &def, &dts[3]);
++		}
+ 	    }
+ 	}
+       else
+ 	{
+-	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
+-	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
++	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
++						VEC_pop (tree, vec_oprnds0));
++	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
++						VEC_pop (tree, vec_oprnds1));
+ 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
+-							    vec_then_clause);
++						VEC_pop (tree, vec_oprnds2));
+ 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
+-							    vec_else_clause);
++						VEC_pop (tree, vec_oprnds3));
++	}
++
++      if (!slp_node)
++        {
++	  VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
++	  VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
++	  VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
++	  VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
+ 	}
+ 
+       /* Arguments are ready.  Create the new vector stmt.  */
+-      vec_compare = build2 (TREE_CODE (cond_expr), vectype,
+-			    vec_cond_lhs, vec_cond_rhs);
+-      vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
+-			      vec_compare, vec_then_clause, vec_else_clause);
+-
+-      new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
+-      new_temp = make_ssa_name (vec_dest, new_stmt);
+-      gimple_assign_set_lhs (new_stmt, new_temp);
+-      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+-      if (j == 0)
+-        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+-      else
+-        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+-
+-      prev_stmt_info = vinfo_for_stmt (new_stmt);
++      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
++        {
++          vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
++          vec_then_clause = VEC_index (tree, vec_oprnds2, i);
++          vec_else_clause = VEC_index (tree, vec_oprnds3, i);
++
++          vec_compare = build2 (TREE_CODE (cond_expr), vectype,
++  			       vec_cond_lhs, vec_cond_rhs);
++          vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
++ 		         vec_compare, vec_then_clause, vec_else_clause);
++
++          new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
++          new_temp = make_ssa_name (vec_dest, new_stmt);
++          gimple_assign_set_lhs (new_stmt, new_temp);
++          vect_finish_stmt_generation (stmt, new_stmt, gsi);
++          if (slp_node)
++            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
++        }
++
++        if (slp_node)
++          continue;
++
++        if (j == 0)
++          STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
++        else
++          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
++
++        prev_stmt_info = vinfo_for_stmt (new_stmt);
+     }
+ 
++  VEC_free (tree, heap, vec_oprnds0);
++  VEC_free (tree, heap, vec_oprnds1);
++  VEC_free (tree, heap, vec_oprnds2);
++  VEC_free (tree, heap, vec_oprnds3);
++
+   return true;
+ }
+ 
+@@ -5217,7 +5278,7 @@
+             || vectorizable_call (stmt, NULL, NULL)
+             || vectorizable_store (stmt, NULL, NULL, NULL)
+             || vectorizable_reduction (stmt, NULL, NULL, NULL)
+-            || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
++            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
+     else
+       {
+         if (bb_vinfo)
+@@ -5227,7 +5288,8 @@
+                 || vectorizable_operation (stmt, NULL, NULL, node)
+                 || vectorizable_assignment (stmt, NULL, NULL, node)
+                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
+-                || vectorizable_store (stmt, NULL, NULL, node));
++                || vectorizable_store (stmt, NULL, NULL, node)
++                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+       }
+ 
+   if (!ok)
+@@ -5343,8 +5405,7 @@
+       break;
+ 
+     case condition_vec_info_type:
+-      gcc_assert (!slp_node);
+-      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
++      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
+       gcc_assert (done);
+       break;
+ 
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-11-22 16:52:23 +0000
++++ new/gcc/tree-vectorizer.h	2011-11-22 17:10:17 +0000
+@@ -832,7 +832,7 @@
+ extern void vect_remove_stores (gimple);
+ extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
+-                                    tree, int);
++                                    tree, int, slp_tree);
+ extern void vect_get_load_cost (struct data_reference *, int, bool,
+                                 unsigned int *, unsigned int *);
+ extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch
new file mode 100644
index 0000000000..e501959c7a
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch
@@ -0,0 +1,495 @@
+2011-11-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vect_pattern_recog): Add new argument.
+	* tree-vect-loop.c (vect_analyze_loop_2): Update call to
+	vect_pattern_recog.
+	* tree-vect-patterns.c (widened_name_p): Pass basic block
+	info to vect_is_simple_use.
+	(vect_recog_dot_prod_pattern): Fail for basic blocks.
+	(vect_recog_widen_sum_pattern): Likewise.
+	(vect_handle_widen_op_by_const): Support basic blocks.
+	(vect_operation_fits_smaller_type,
+	vect_recog_over_widening_pattern): Likewise.
+	(vect_recog_mixed_size_cond_pattern): Support basic blocks.
+	Add printing.
+	(vect_mark_pattern_stmts): Update calls to new_stmt_vec_info.
+	(vect_pattern_recog_1): Check for reduction only in loops.
+	(vect_pattern_recog): Add new argument.  Support basic blocks.
+	* tree-vect-stmts.c (vectorizable_conversion): Pass basic block
+	info to vect_is_simple_use_1.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic
+	blocks.
+	(vect_slp_analyze_bb_1): Call vect_pattern_recog.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-pattern-1.c: New test.
+	* gcc.dg/vect/bb-slp-pattern-2.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c	2011-11-23 06:37:10 +0000
+@@ -0,0 +1,55 @@
++/* { dg-require-effective-target vect_int } */
++
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 8
++
++unsigned short X[N];
++unsigned short Y[N];
++unsigned int result[N];
++
++/* unsigned short->unsigned int widening-mult.  */
++__attribute__ ((noinline, noclone)) void 
++foo (void)
++{
++  result[0] = (unsigned int)(X[0] * Y[0]);
++  result[1] = (unsigned int)(X[1] * Y[1]);
++  result[2] = (unsigned int)(X[2] * Y[2]);
++  result[3] = (unsigned int)(X[3] * Y[3]);
++  result[4] = (unsigned int)(X[4] * Y[4]);
++  result[5] = (unsigned int)(X[5] * Y[5]);
++  result[6] = (unsigned int)(X[6] * Y[6]);
++  result[7] = (unsigned int)(X[7] * Y[7]);
++}
++
++int main (void)
++{
++  int i, tmp;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      X[i] = i;
++      Y[i] = 64-i;
++    }
++
++  foo ();
++
++  for (i = 0; i < N; i++)
++    {
++      __asm__ volatile ("");
++      tmp = X[i] * Y[i];
++      if (result[i] != tmp)
++        abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c	2011-11-23 06:37:10 +0000
+@@ -0,0 +1,53 @@
++/* { dg-require-effective-target vect_condition } */
++
++#include "tree-vect.h"
++
++#define N 128
++
++__attribute__((noinline, noclone)) void
++foo (short * __restrict__ a, int * __restrict__ b, int stride)
++{
++  int i;
++
++  for (i = 0; i < N/stride; i++, a += stride, b += stride)
++   {
++     a[0] = b[0] ? 1 : 7;
++     a[1] = b[1] ? 2 : 0;
++     a[2] = b[2] ? 3 : 0;
++     a[3] = b[3] ? 4 : 0;
++     a[4] = b[4] ? 5 : 0;
++     a[5] = b[5] ? 6 : 0;
++     a[6] = b[6] ? 7 : 0;
++     a[7] = b[7] ? 8 : 0;
++   }
++}
++
++short a[N];
++int b[N];
++int main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      a[i] = i;
++      b[i] = -i;
++    }
++
++  foo (a, b, 8);
++
++  for (i = 1; i < N; i++)
++    if (a[i] != i%8 + 1)
++      abort ();
++
++  if (a[0] != 7)
++    abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== modified file 'gcc/tree-vect-loop.c'
+--- old/gcc/tree-vect-loop.c	2011-11-20 08:24:08 +0000
++++ new/gcc/tree-vect-loop.c	2011-11-23 06:47:35 +0000
+@@ -1458,7 +1458,7 @@
+ 
+   vect_analyze_scalar_cycles (loop_vinfo);
+ 
+-  vect_pattern_recog (loop_vinfo);
++  vect_pattern_recog (loop_vinfo, NULL);
+ 
+   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
+ 
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-11-20 09:11:09 +0000
++++ new/gcc/tree-vect-patterns.c	2011-11-23 07:49:33 +0000
+@@ -83,11 +83,13 @@
+   tree oprnd0;
+   enum vect_def_type dt;
+   tree def;
++  bb_vec_info bb_vinfo;
+ 
+   stmt_vinfo = vinfo_for_stmt (use_stmt);
+   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++  bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+ 
+-  if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
++  if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt))
+     return false;
+ 
+   if (dt != vect_internal_def
+@@ -111,7 +113,7 @@
+       || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
+     return false;
+ 
+-  if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
++  if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
+                            &dt))
+     return false;
+ 
+@@ -188,9 +190,14 @@
+   gimple pattern_stmt;
+   tree prod_type;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
++  struct loop *loop;
+   tree var, rhs;
+ 
++  if (!loop_info)
++    return NULL;
++
++  loop = LOOP_VINFO_LOOP (loop_info);
++
+   if (!is_gimple_assign (last_stmt))
+     return NULL;
+ 
+@@ -358,8 +365,16 @@
+ {
+   tree new_type, new_oprnd, tmp;
+   gimple new_stmt;
+-  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
++  loop_vec_info loop_vinfo;
++  struct loop *loop = NULL;
++  bb_vec_info bb_vinfo;
++  stmt_vec_info stmt_vinfo;
++
++  stmt_vinfo = vinfo_for_stmt (stmt);
++  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++  bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
++  if (loop_vinfo)
++    loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+   if (code != MULT_EXPR && code != LSHIFT_EXPR)
+     return false;
+@@ -377,7 +392,9 @@
+ 
+   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
+       || !gimple_bb (def_stmt)
+-      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
++      || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
++      || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo)
++          && gimple_code (def_stmt) != GIMPLE_PHI)
+       || !vinfo_for_stmt (def_stmt))
+     return false;
+ 
+@@ -774,9 +791,14 @@
+   tree type, half_type;
+   gimple pattern_stmt;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
++  struct loop *loop;
+   tree var;
+ 
++  if (!loop_info)
++    return NULL;
++
++  loop = LOOP_VINFO_LOOP (loop_info);
++
+   if (!is_gimple_assign (last_stmt))
+     return NULL;
+ 
+@@ -877,7 +899,11 @@
+   gimple def_stmt, new_stmt;
+   bool first = false;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
++  bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
++  struct loop *loop = NULL;
++
++  if (loop_info)
++    loop = LOOP_VINFO_LOOP (loop_info);
+ 
+   *new_def_stmt = NULL;
+ 
+@@ -909,7 +935,9 @@
+       first = true;
+       if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
+           || !gimple_bb (def_stmt)
+-          || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
++          || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
++	  || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
++	      && gimple_code (def_stmt) != GIMPLE_PHI)
+           || !vinfo_for_stmt (def_stmt))
+         return false;
+     }
+@@ -1087,7 +1115,16 @@
+   int nuses = 0;
+   tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
+   bool first;
+-  struct loop *loop = (gimple_bb (stmt))->loop_father;
++  loop_vec_info loop_vinfo;
++  struct loop *loop = NULL;
++  bb_vec_info bb_vinfo;
++  stmt_vec_info stmt_vinfo;
++
++  stmt_vinfo = vinfo_for_stmt (stmt);
++  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++  bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
++  if (loop_vinfo)
++    loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+   first = true;
+   while (1)
+@@ -1120,7 +1157,8 @@
+ 
+       if (nuses != 1 || !is_gimple_assign (use_stmt)
+           || !gimple_bb (use_stmt)
+-          || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++          || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++	  || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
+         return NULL;
+ 
+       /* Create pattern statement for STMT.  */
+@@ -1485,6 +1523,7 @@
+   enum machine_mode cmpmode;
+   gimple pattern_stmt, def_stmt;
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+ 
+   if (!is_gimple_assign (last_stmt)
+       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
+@@ -1538,7 +1577,8 @@
+   tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
+ 		fold_convert (comp_type, then_clause),
+ 	       	fold_convert (comp_type, else_clause));
+-  def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
++  def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL),
++				  tmp);
+ 
+   pattern_stmt
+     = gimple_build_assign_with_ops (NOP_EXPR,
+@@ -1546,12 +1586,15 @@
+ 				    gimple_assign_lhs (def_stmt), NULL_TREE);
+ 
+   STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
+-  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
++  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+   set_vinfo_for_stmt (def_stmt, def_stmt_info);
+   STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
+   *type_in = vectype;
+   *type_out = vectype;
+ 
++  if (vect_print_dump_info (REPORT_DETAILS))
++    fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
++
+   return pattern_stmt;
+ }
+ 
+@@ -1565,10 +1608,11 @@
+   stmt_vec_info pattern_stmt_info, def_stmt_info;
+   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
++  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
+   gimple def_stmt;
+ 
+   set_vinfo_for_stmt (pattern_stmt,
+-                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
++                      new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo));
+   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
+   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+ 
+@@ -1586,7 +1630,7 @@
+       def_stmt_info = vinfo_for_stmt (def_stmt);
+       if (def_stmt_info == NULL)
+ 	{
+-	  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
++	  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+ 	  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+ 	}
+       gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
+@@ -1697,9 +1741,10 @@
+ 
+   /* Patterns cannot be vectorized using SLP, because they change the order of
+      computation.  */
+-  FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
+-    if (next == stmt)
+-      VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 
++  if (loop_vinfo)
++    FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
++      if (next == stmt)
++        VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
+ 
+   /* It is possible that additional pattern stmts are created and inserted in
+      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
+@@ -1799,26 +1844,46 @@
+    be recorded in S3.  */
+ 
+ void
+-vect_pattern_recog (loop_vec_info loop_vinfo)
++vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+ {
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+-  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+-  unsigned int nbbs = loop->num_nodes;
++  struct loop *loop;
++  basic_block *bbs, bb;
++  unsigned int nbbs;
+   gimple_stmt_iterator si;
+   unsigned int i, j;
+   gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+   VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
++  gimple stmt;
+ 
+   if (vect_print_dump_info (REPORT_DETAILS))
+     fprintf (vect_dump, "=== vect_pattern_recog ===");
+ 
+-  /* Scan through the loop stmts, applying the pattern recognition
++  if (loop_vinfo)
++    {
++      loop = LOOP_VINFO_LOOP (loop_vinfo);
++      bbs = LOOP_VINFO_BBS (loop_vinfo);
++      nbbs = loop->num_nodes;
++    }
++  else
++    {
++      bb = BB_VINFO_BB (bb_vinfo);
++      nbbs = 1;
++      bbs = XNEW (basic_block);
++      bbs[0] = bb;
++    }
++
++  /* Scan through the stmts, applying the pattern recognition
+      functions starting at each stmt visited:  */
+   for (i = 0; i < nbbs; i++)
+     {
+       basic_block bb = bbs[i];
+       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+         {
++	  if (bb_vinfo && (stmt = gsi_stmt (si))
++	      && vinfo_for_stmt (stmt)
++	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
++	   continue;
++
+           /* Scan over all generic vect_recog_xxx_pattern functions.  */
+           for (j = 0; j < NUM_PATTERNS; j++)
+             {
+@@ -1830,4 +1895,6 @@
+     }
+ 
+   VEC_free (gimple, heap, stmts_to_replace);
++  if (bb_vinfo)
++    free (bbs);
+ }
+
+=== modified file 'gcc/tree-vect-slp.c'
+--- old/gcc/tree-vect-slp.c	2011-11-21 06:58:40 +0000
++++ new/gcc/tree-vect-slp.c	2011-11-23 06:47:35 +0000
+@@ -255,12 +255,14 @@
+       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
+          from the pattern.  Check that all the stmts of the node are in the
+          pattern.  */
+-      if (loop && def_stmt && gimple_bb (def_stmt)
+-          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
++      if (def_stmt && gimple_bb (def_stmt)
++          && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
++	      || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
++		  && gimple_code (def_stmt) != GIMPLE_PHI))
+           && vinfo_for_stmt (def_stmt)
+           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
+-          && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
+-          && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
++	  && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
++	  && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+         {
+           pattern = true;
+           if (!first && !oprnd_info->first_pattern)
+@@ -1972,6 +1974,8 @@
+       return NULL;
+     }
+ 
++   vect_pattern_recog (NULL, bb_vinfo);
++
+    if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, 
+                                            &data_dependence_in_bb)
+        || min_vf > max_vf
+
+=== modified file 'gcc/tree-vect-stmts.c'
+--- old/gcc/tree-vect-stmts.c	2011-11-22 17:10:17 +0000
++++ new/gcc/tree-vect-stmts.c	2011-11-23 06:47:35 +0000
+@@ -3451,11 +3451,11 @@
+ 	  /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
+ 	     OP1.  */
+           if (CONSTANT_CLASS_P (op0))
+-            ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
++            ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo,
+                              &def_stmt, &def, &dt[1], &vectype_in);
+           else
+-            ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
+-                                     &dt[1]);
++            ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt,
++				     &def, &dt[1]);
+ 
+           if (!ok)
+             {
+
+=== modified file 'gcc/tree-vectorizer.h'
+--- old/gcc/tree-vectorizer.h	2011-11-22 17:10:17 +0000
++++ new/gcc/tree-vectorizer.h	2011-11-23 06:47:35 +0000
+@@ -918,7 +918,7 @@
+    in the future.  */
+ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+ #define NUM_PATTERNS 7 
+-void vect_pattern_recog (loop_vec_info);
++void vect_pattern_recog (loop_vec_info, bb_vec_info);
+ 
+ /* In tree-vectorizer.c.  */
+ unsigned vectorize_loops (void);
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch
new file mode 100644
index 0000000000..17cfd10682
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch
@@ -0,0 +1,1818 @@
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19983):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/testsuite/
+	* gcc.dg/di-longlong64-sync-1.c: New test.
+	* gcc.dg/di-sync-multithread.c: New test.
+	* gcc.target/arm/di-longlong64-sync-withhelpers.c: New test.
+	* gcc.target/arm/di-longlong64-sync-withldrexd.c: New test.
+	* lib/target-supports.exp: (arm_arch_*_ok): Series of effective-target
+	tests for v5, v6, v6k, and v7-a, and add-options helpers.
+	(check_effective_target_arm_arm_ok): New helper.
+	(check_effective_target_sync_longlong): New helper.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19982):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/linux-atomic-64bit.c: New (based on linux-atomic.c).
+	* config/arm/linux-atomic.c: Change comment to point to 64bit version.
+	(SYNC_LOCK_RELEASE): Instantiate 64bit version.
+	* config/arm/t-linux-eabi: Pull in linux-atomic-64bit.c.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19981):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_output_ldrex): Support ldrexd.
+	(arm_output_strex): Support strexd.
+	(arm_output_it): New helper to output it in Thumb2 mode only.
+	(arm_output_sync_loop): Support DI mode.  Change comment to
+	not support const_int.
+	(arm_expand_sync): Support DI mode.
+	* config/arm/arm.h (TARGET_HAVE_LDREXBHD): Split into LDREXBH
+	and LDREXD.
+	* config/arm/iterators.md (NARROW): move from sync.md.
+	(QHSD): New iterator for all current ARM integer modes.
+	(SIDI): New iterator for SI and DI modes only.
+	* config/arm/sync.md (sync_predtab): New mode_attr.
+	(sync_compare_and_swapsi): Fold into sync_compare_and_swap<mode>.
+	(sync_lock_test_and_setsi): Fold into sync_lock_test_and_setsi<mode>.
+	(sync_<sync_optab>si): Fold into sync_<sync_optab><mode>.
+	(sync_nandsi): Fold into sync_nand<mode>.
+	(sync_new_<sync_optab>si): Fold into sync_new_<sync_optab><mode>.
+	(sync_new_nandsi): Fold into sync_new_nand<mode>.
+	(sync_old_<sync_optab>si): Fold into sync_old_<sync_optab><mode>.
+	(sync_old_nandsi): Fold into sync_old_nand<mode>.
+	(sync_compare_and_swap<mode>): Support SI & DI.
+	(sync_lock_test_and_set<mode>): Likewise.
+	(sync_<sync_optab><mode>): Likewise.
+	(sync_nand<mode>): Likewise.
+	(sync_new_<sync_optab><mode>): Likewise.
+	(sync_new_nand<mode>): Likewise.
+	(sync_old_<sync_optab><mode>): Likewise.
+	(sync_old_nand<mode>): Likewise.
+	(arm_sync_compare_and_swapsi): Turn into iterator on SI & DI.
+	(arm_sync_lock_test_and_setsi): Likewise.
+	(arm_sync_new_<sync_optab>si): Likewise.
+	(arm_sync_new_nandsi): Likewise.
+	(arm_sync_old_<sync_optab>si): Likewise.
+	(arm_sync_old_nandsi): Likewise.
+	(arm_sync_compare_and_swap<mode> NARROW): use sync_predtab, fix indent.
+	(arm_sync_lock_test_and_setsi<mode> NARROW): Likewise.
+	(arm_sync_new_<sync_optab><mode> NARROW): Likewise.
+	(arm_sync_new_nand<mode> NARROW): Likewise.
+	(arm_sync_old_<sync_optab><mode> NARROW): Likewise.
+	(arm_sync_old_nand<mode> NARROW): Likewise.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19980):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	PR target/48126
+
+	gcc/
+	* config/arm/arm.c (arm_output_sync_loop): Move label before barrier.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19979):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/arm.h (TARGET_HAVE_DMB_MCR): MCR Not available in Thumb1.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-11-21 01:45:54 +0000
++++ new/gcc/config/arm/arm.c	2011-11-28 15:07:01 +0000
+@@ -24307,12 +24307,26 @@
+ 		  rtx target,
+ 		  rtx memory)
+ {
+-  const char *suffix = arm_ldrex_suffix (mode);
+-  rtx operands[2];
++  rtx operands[3];
+ 
+   operands[0] = target;
+-  operands[1] = memory;
+-  arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
++  if (mode != DImode)
++    {
++      const char *suffix = arm_ldrex_suffix (mode);
++      operands[1] = memory;
++      arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
++    }
++  else
++    {
++      /* The restrictions on target registers in ARM mode are that the two
++	 registers are consecutive and the first one is even; Thumb is
++	 actually more flexible, but DI should give us this anyway.
++	 Note that the 1st register always gets the lowest word in memory.  */
++      gcc_assert ((REGNO (target) & 1) == 0);
++      operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
++      operands[2] = memory;
++      arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
++    }
+ }
+ 
+ /* Emit a strex{b,h,d, } instruction appropriate for the specified
+@@ -24325,14 +24339,41 @@
+ 		  rtx value,
+ 		  rtx memory)
+ {
+-  const char *suffix = arm_ldrex_suffix (mode);
+-  rtx operands[3];
++  rtx operands[4];
+ 
+   operands[0] = result;
+   operands[1] = value;
+-  operands[2] = memory;
+-  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+-		       cc);
++  if (mode != DImode)
++    {
++      const char *suffix = arm_ldrex_suffix (mode);
++      operands[2] = memory;
++      arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
++			  suffix, cc);
++    }
++  else
++    {
++      /* The restrictions on target registers in ARM mode are that the two
++	 registers are consecutive and the first one is even; Thumb is
++	 actually more flexible, but DI should give us this anyway.
++	 Note that the 1st register always gets the lowest word in memory.  */
++      gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
++      operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
++      operands[3] = memory;
++      arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
++			   cc);
++    }
++}
++
++/* Helper to emit an it instruction in Thumb2 mode only; although the assembler
++   will ignore it in ARM mode, emitting it will mess up instruction counts we
++   sometimes keep 'flags' are the extra t's and e's if it's more than one
++   instruction that is conditional.  */
++static void
++arm_output_it (emit_f emit, const char *flags, const char *cond)
++{
++  rtx operands[1]; /* Don't actually use the operand.  */
++  if (TARGET_THUMB2)
++    arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
+ }
+ 
+ /* Helper to emit a two operand instruction.  */
+@@ -24374,7 +24415,7 @@
+ 
+    required_value:
+ 
+-   RTX register or const_int representing the required old_value for
++   RTX register representing the required old_value for
+    the modify to continue, if NULL no comparsion is performed.  */
+ static void
+ arm_output_sync_loop (emit_f emit,
+@@ -24388,7 +24429,13 @@
+ 		      enum attr_sync_op sync_op,
+ 		      int early_barrier_required)
+ {
+-  rtx operands[1];
++  rtx operands[2];
++  /* We'll use the lo for the normal rtx in the none-DI case
++     as well as the least-sig word in the DI case.  */
++  rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
++  rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
++
++  bool is_di = mode == DImode;
+ 
+   gcc_assert (t1 != t2);
+ 
+@@ -24399,82 +24446,142 @@
+ 
+   arm_output_ldrex (emit, mode, old_value, memory);
+ 
++  if (is_di)
++    {
++      old_value_lo = gen_lowpart (SImode, old_value);
++      old_value_hi = gen_highpart (SImode, old_value);
++      if (required_value)
++	{
++	  required_value_lo = gen_lowpart (SImode, required_value);
++	  required_value_hi = gen_highpart (SImode, required_value);
++	}
++      else
++	{
++	  /* Silence false potentially unused warning.  */
++	  required_value_lo = NULL_RTX;
++	  required_value_hi = NULL_RTX;
++	}
++      new_value_lo = gen_lowpart (SImode, new_value);
++      new_value_hi = gen_highpart (SImode, new_value);
++      t1_lo = gen_lowpart (SImode, t1);
++      t1_hi = gen_highpart (SImode, t1);
++    }
++  else
++    {
++      old_value_lo = old_value;
++      new_value_lo = new_value;
++      required_value_lo = required_value;
++      t1_lo = t1;
++
++      /* Silence false potentially unused warning.  */
++      t1_hi = NULL_RTX;
++      new_value_hi = NULL_RTX;
++      required_value_hi = NULL_RTX;
++      old_value_hi = NULL_RTX;
++    }
++
+   if (required_value)
+     {
+-      rtx operands[2];
++      operands[0] = old_value_lo;
++      operands[1] = required_value_lo;
+ 
+-      operands[0] = old_value;
+-      operands[1] = required_value;
+       arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
++      if (is_di)
++        {
++          arm_output_it (emit, "", "eq");
++          arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
++        }
+       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+     }
+ 
+   switch (sync_op)
+     {
+     case SYNC_OP_ADD:
+-      arm_output_op3 (emit, "add", t1, old_value, new_value);
++      arm_output_op3 (emit, is_di ? "adds" : "add",
++		      t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
+       break;
+ 
+     case SYNC_OP_SUB:
+-      arm_output_op3 (emit, "sub", t1, old_value, new_value);
++      arm_output_op3 (emit, is_di ? "subs" : "sub",
++		      t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
+       break;
+ 
+     case SYNC_OP_IOR:
+-      arm_output_op3 (emit, "orr", t1, old_value, new_value);
++      arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
+       break;
+ 
+     case SYNC_OP_XOR:
+-      arm_output_op3 (emit, "eor", t1, old_value, new_value);
++      arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
+       break;
+ 
+     case SYNC_OP_AND:
+-      arm_output_op3 (emit,"and", t1, old_value, new_value);
++      arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
+       break;
+ 
+     case SYNC_OP_NAND:
+-      arm_output_op3 (emit, "and", t1, old_value, new_value);
+-      arm_output_op2 (emit, "mvn", t1, t1);
++      arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
++      if (is_di)
++	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
++      arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
++      if (is_di)
++	arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
+       break;
+ 
+     case SYNC_OP_NONE:
+       t1 = new_value;
++      t1_lo = new_value_lo;
++      if (is_di)
++	t1_hi = new_value_hi;
+       break;
+     }
+ 
++  /* Note that the result of strex is a 0/1 flag that's always 1 register.  */
+   if (t2)
+     {
+-       arm_output_strex (emit, mode, "", t2, t1, memory);
+-       operands[0] = t2;
+-       arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+-       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+-			    LOCAL_LABEL_PREFIX);
++      arm_output_strex (emit, mode, "", t2, t1, memory);
++      operands[0] = t2;
++      arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
++      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
++			   LOCAL_LABEL_PREFIX);
+     }
+   else
+     {
+       /* Use old_value for the return value because for some operations
+ 	 the old_value can easily be restored.  This saves one register.  */
+-      arm_output_strex (emit, mode, "", old_value, t1, memory);
+-      operands[0] = old_value;
++      arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
++      operands[0] = old_value_lo;
+       arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+ 			   LOCAL_LABEL_PREFIX);
+ 
++      /* Note that we only used the _lo half of old_value as a temporary
++	 so in DI we don't have to restore the _hi part.  */
+       switch (sync_op)
+ 	{
+ 	case SYNC_OP_ADD:
+-	  arm_output_op3 (emit, "sub", old_value, t1, new_value);
++	  arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
+ 	  break;
+ 
+ 	case SYNC_OP_SUB:
+-	  arm_output_op3 (emit, "add", old_value, t1, new_value);
++	  arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
+ 	  break;
+ 
+ 	case SYNC_OP_XOR:
+-	  arm_output_op3 (emit, "eor", old_value, t1, new_value);
++	  arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
+ 	  break;
+ 
+ 	case SYNC_OP_NONE:
+-	  arm_output_op2 (emit, "mov", old_value, required_value);
++	  arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
+ 	  break;
+ 
+ 	default:
+@@ -24482,8 +24589,11 @@
+ 	}
+     }
+ 
++  /* Note: label is before barrier so that in cmp failure case we still get
++     a barrier to stop subsequent loads floating upwards past the ldrex
++     PR target/48126.  */
++  arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+   arm_process_output_memory_barrier (emit, NULL);
+-  arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+ }
+ 
+ static rtx
+@@ -24577,7 +24687,7 @@
+     target = gen_reg_rtx (mode);
+ 
+   memory = arm_legitimize_sync_memory (memory);
+-  if (mode != SImode)
++  if (mode != SImode && mode != DImode)
+     {
+       rtx load_temp = gen_reg_rtx (SImode);
+ 
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-11-21 01:45:54 +0000
++++ new/gcc/config/arm/arm.h	2011-11-28 15:07:01 +0000
+@@ -300,7 +300,8 @@
+ #define TARGET_HAVE_DMB		(arm_arch7)
+ 
+ /* Nonzero if this chip implements a memory barrier via CP15.  */
+-#define TARGET_HAVE_DMB_MCR	(arm_arch6k && ! TARGET_HAVE_DMB)
++#define TARGET_HAVE_DMB_MCR	(arm_arch6 && ! TARGET_HAVE_DMB \
++				 && ! TARGET_THUMB1)
+ 
+ /* Nonzero if this chip implements a memory barrier instruction.  */
+ #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
+@@ -308,8 +309,12 @@
+ /* Nonzero if this chip supports ldrex and strex */
+ #define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
+ 
+-/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}.  */
+-#define TARGET_HAVE_LDREXBHD	((arm_arch6k && TARGET_ARM) || arm_arch7)
++/* Nonzero if this chip supports ldrex{bh} and strex{bh}.  */
++#define TARGET_HAVE_LDREXBH	((arm_arch6k && TARGET_ARM) || arm_arch7)
++
++/* Nonzero if this chip supports ldrexd and strexd.  */
++#define TARGET_HAVE_LDREXD	(((arm_arch6k && TARGET_ARM) || arm_arch7) \
++				 && arm_arch_notm)
+ 
+ /* Nonzero if integer division instructions supported.  */
+ #define TARGET_IDIV		((TARGET_ARM && arm_arch_arm_hwdiv) \
+
+=== modified file 'gcc/config/arm/iterators.md'
+--- old/gcc/config/arm/iterators.md	2011-10-23 13:33:07 +0000
++++ new/gcc/config/arm/iterators.md	2011-11-28 15:07:01 +0000
+@@ -33,6 +33,15 @@
+ ;; A list of integer modes that are up to one word long
+ (define_mode_iterator QHSI [QI HI SI])
+ 
++;; A list of integer modes that are less than a word
++(define_mode_iterator NARROW [QI HI])
++
++;; A list of all the integer modes upto 64bit
++(define_mode_iterator QHSD [QI HI SI DI])
++
++;; A list of the 32bit and 64bit integer modes
++(define_mode_iterator SIDI [SI DI])
++
+ ;; Integer element sizes implemented by IWMMXT.
+ (define_mode_iterator VMMX [V2SI V4HI V8QI])
+ 
+
+=== added file 'gcc/config/arm/linux-atomic-64bit.c'
+--- old/gcc/config/arm/linux-atomic-64bit.c	1970-01-01 00:00:00 +0000
++++ new/gcc/config/arm/linux-atomic-64bit.c	2011-10-14 15:50:44 +0000
+@@ -0,0 +1,166 @@
++/* 64bit Linux-specific atomic operations for ARM EABI.
++   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
++   Based on linux-atomic.c
++
++   64 bit additions david.gilbert@linaro.org
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
++
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
++
++/* 64bit helper functions for atomic operations; the compiler will
++   call these when the code is compiled for a CPU without ldrexd/strexd.
++   (If the CPU had those then the compiler inlines the operation).
++
++   These helpers require a kernel helper that's only present on newer
++   kernels; we check for that in an init section and bail out rather
++   unceremoneously.  */
++
++extern unsigned int __write (int fd, const void *buf, unsigned int count);
++extern void abort (void);
++
++/* Kernel helper for compare-and-exchange.  */
++typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
++					const long long* newval,
++					long long *ptr);
++#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60)
++
++/* Kernel helper page version number.  */
++#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
++
++/* Check that the kernel has a new enough version at load.  */
++static void __check_for_sync8_kernelhelper (void)
++{
++  if (__kernel_helper_version < 5)
++    {
++      const char err[] = "A newer kernel is required to run this binary. "
++				"(__kernel_cmpxchg64 helper)\n";
++      /* At this point we need a way to crash with some information
++	 for the user - I'm not sure I can rely on much else being
++	 available at this point, so do the same as generic-morestack.c
++	 write () and abort ().  */
++      __write (2 /* stderr.  */, err, sizeof (err));
++      abort ();
++    }
++};
++
++static void (*__sync8_kernelhelper_inithook[]) (void)
++		__attribute__ ((used, section (".init_array"))) = {
++  &__check_for_sync8_kernelhelper
++};
++
++#define HIDDEN __attribute__ ((visibility ("hidden")))
++
++#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP)			\
++  long long HIDDEN						\
++  __sync_fetch_and_##OP##_8 (long long *ptr, long long val)	\
++  {								\
++    int failure;						\
++    long long tmp,tmp2;						\
++								\
++    do {							\
++      tmp = *ptr;						\
++      tmp2 = PFX_OP (tmp INF_OP val);				\
++      failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr);		\
++    } while (failure != 0);					\
++								\
++    return tmp;							\
++  }
++
++FETCH_AND_OP_WORD64 (add,   , +)
++FETCH_AND_OP_WORD64 (sub,   , -)
++FETCH_AND_OP_WORD64 (or,    , |)
++FETCH_AND_OP_WORD64 (and,   , &)
++FETCH_AND_OP_WORD64 (xor,   , ^)
++FETCH_AND_OP_WORD64 (nand, ~, &)
++
++#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
++#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
++
++/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
++   subword-sized quantities.  */
++
++#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP)			\
++  long long HIDDEN						\
++  __sync_##OP##_and_fetch_8 (long long *ptr, long long val)	\
++  {								\
++    int failure;						\
++    long long tmp,tmp2;						\
++								\
++    do {							\
++      tmp = *ptr;						\
++      tmp2 = PFX_OP (tmp INF_OP val);				\
++      failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr);		\
++    } while (failure != 0);					\
++								\
++    return tmp2;						\
++  }
++
++OP_AND_FETCH_WORD64 (add,   , +)
++OP_AND_FETCH_WORD64 (sub,   , -)
++OP_AND_FETCH_WORD64 (or,    , |)
++OP_AND_FETCH_WORD64 (and,   , &)
++OP_AND_FETCH_WORD64 (xor,   , ^)
++OP_AND_FETCH_WORD64 (nand, ~, &)
++
++long long HIDDEN
++__sync_val_compare_and_swap_8 (long long *ptr, long long oldval,
++				long long newval)
++{
++  int failure;
++  long long actual_oldval;
++
++  while (1)
++    {
++      actual_oldval = *ptr;
++
++      if (__builtin_expect (oldval != actual_oldval, 0))
++	return actual_oldval;
++
++      failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr);
++
++      if (__builtin_expect (!failure, 1))
++	return oldval;
++    }
++}
++
++typedef unsigned char bool;
++
++bool HIDDEN
++__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval,
++				 long long newval)
++{
++  int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr);
++  return (failure == 0);
++}
++
++long long HIDDEN
++__sync_lock_test_and_set_8 (long long *ptr, long long val)
++{
++  int failure;
++  long long oldval;
++
++  do {
++    oldval = *ptr;
++    failure = __kernel_cmpxchg64 (&oldval, &val, ptr);
++  } while (failure != 0);
++
++  return oldval;
++}
+
+=== modified file 'gcc/config/arm/linux-atomic.c'
+--- old/gcc/config/arm/linux-atomic.c	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/linux-atomic.c	2011-10-14 15:50:44 +0000
+@@ -32,8 +32,8 @@
+ #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
+ 
+ /* Note: we implement byte, short and int versions of atomic operations using
+-   the above kernel helpers, but there is no support for "long long" (64-bit)
+-   operations as yet.  */
++   the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit)
++   operations.  */
+ 
+ #define HIDDEN __attribute__ ((visibility ("hidden")))
+ 
+@@ -273,6 +273,7 @@
+     *ptr = 0;								\
+   }
+ 
++SYNC_LOCK_RELEASE (long long,   8)
+ SYNC_LOCK_RELEASE (int,   4)
+ SYNC_LOCK_RELEASE (short, 2)
+ SYNC_LOCK_RELEASE (char,  1)
+
+=== modified file 'gcc/config/arm/sync.md'
+--- old/gcc/config/arm/sync.md	2010-12-31 13:25:33 +0000
++++ new/gcc/config/arm/sync.md	2011-10-14 15:47:15 +0000
+@@ -1,6 +1,7 @@
+ ;; Machine description for ARM processor synchronization primitives.
+ ;; Copyright (C) 2010 Free Software Foundation, Inc.
+ ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
++;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
+ ;;
+ ;; This file is part of GCC.
+ ;;
+@@ -33,31 +34,24 @@
+   MEM_VOLATILE_P (operands[0]) = 1;
+ })
+ 
+-(define_expand "sync_compare_and_swapsi"
+-  [(set (match_operand:SI 0 "s_register_operand")
+-        (unspec_volatile:SI [(match_operand:SI 1 "memory_operand")
+-			     (match_operand:SI 2 "s_register_operand")
+-			     (match_operand:SI 3 "s_register_operand")]
+-			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omrn;
+-    generator.u.omrn = gen_arm_sync_compare_and_swapsi;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2],
+-                     operands[3]);
+-    DONE;
+-  })
+ 
+-(define_mode_iterator NARROW [QI HI])
++(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX &&
++					TARGET_HAVE_MEMORY_BARRIER")
++				(QI "TARGET_HAVE_LDREXBH &&
++					TARGET_HAVE_MEMORY_BARRIER")
++				(HI "TARGET_HAVE_LDREXBH &&
++					TARGET_HAVE_MEMORY_BARRIER")
++				(DI "TARGET_HAVE_LDREXD &&
++					ARM_DOUBLEWORD_ALIGN &&
++					TARGET_HAVE_MEMORY_BARRIER")])
+ 
+ (define_expand "sync_compare_and_swap<mode>"
+-  [(set (match_operand:NARROW 0 "s_register_operand")
+-        (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand")
+-			     (match_operand:NARROW 2 "s_register_operand")
+-			     (match_operand:NARROW 3 "s_register_operand")]
++  [(set (match_operand:QHSD 0 "s_register_operand")
++        (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand")
++			     (match_operand:QHSD 2 "s_register_operand")
++			     (match_operand:QHSD 3 "s_register_operand")]
+ 			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omrn;
+@@ -67,25 +61,11 @@
+     DONE;
+   })
+ 
+-(define_expand "sync_lock_test_and_setsi"
+-  [(match_operand:SI 0 "s_register_operand")
+-   (match_operand:SI 1 "memory_operand")
+-   (match_operand:SI 2 "s_register_operand")]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_lock_test_and_setsi;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+-                     operands[2]);
+-    DONE;
+-  })
+-
+ (define_expand "sync_lock_test_and_set<mode>"
+-  [(match_operand:NARROW 0 "s_register_operand")
+-   (match_operand:NARROW 1 "memory_operand")
+-   (match_operand:NARROW 2 "s_register_operand")]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "s_register_operand")
++   (match_operand:QHSD 1 "memory_operand")
++   (match_operand:QHSD 2 "s_register_operand")]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+@@ -115,51 +95,25 @@
+ 				(plus "*")
+ 				(minus "*")])
+ 
+-(define_expand "sync_<sync_optab>si"
+-  [(match_operand:SI 0 "memory_operand")
+-   (match_operand:SI 1 "s_register_operand")
+-   (syncop:SI (match_dup 0) (match_dup 1))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
+-    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
+-    DONE;
+-  })
+-
+-(define_expand "sync_nandsi"
+-  [(match_operand:SI 0 "memory_operand")
+-   (match_operand:SI 1 "s_register_operand")
+-   (not:SI (and:SI (match_dup 0) (match_dup 1)))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_new_nandsi;
+-    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
+-    DONE;
+-  })
+-
+ (define_expand "sync_<sync_optab><mode>"
+-  [(match_operand:NARROW 0 "memory_operand")
+-   (match_operand:NARROW 1 "s_register_operand")
+-   (syncop:NARROW (match_dup 0) (match_dup 1))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "memory_operand")
++   (match_operand:QHSD 1 "s_register_operand")
++   (syncop:QHSD (match_dup 0) (match_dup 1))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+     generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
+     arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
+-    		     operands[1]);
++		     operands[1]);
+     DONE;
+   })
+ 
+ (define_expand "sync_nand<mode>"
+-  [(match_operand:NARROW 0 "memory_operand")
+-   (match_operand:NARROW 1 "s_register_operand")
+-   (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "memory_operand")
++   (match_operand:QHSD 1 "s_register_operand")
++   (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+@@ -169,57 +123,27 @@
+     DONE;
+   })
+ 
+-(define_expand "sync_new_<sync_optab>si"
+-  [(match_operand:SI 0 "s_register_operand")
+-   (match_operand:SI 1 "memory_operand")
+-   (match_operand:SI 2 "s_register_operand")
+-   (syncop:SI (match_dup 1) (match_dup 2))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+-                     operands[2]);
+-    DONE;
+-  })
+-
+-(define_expand "sync_new_nandsi"
+-  [(match_operand:SI 0 "s_register_operand")
+-   (match_operand:SI 1 "memory_operand")
+-   (match_operand:SI 2 "s_register_operand")
+-   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_new_nandsi;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+-    		     operands[2]);
+-    DONE;
+-  })
+-
+ (define_expand "sync_new_<sync_optab><mode>"
+-  [(match_operand:NARROW 0 "s_register_operand")
+-   (match_operand:NARROW 1 "memory_operand")
+-   (match_operand:NARROW 2 "s_register_operand")
+-   (syncop:NARROW (match_dup 1) (match_dup 2))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "s_register_operand")
++   (match_operand:QHSD 1 "memory_operand")
++   (match_operand:QHSD 2 "s_register_operand")
++   (syncop:QHSD (match_dup 1) (match_dup 2))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+     generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
+     arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+-    		     NULL, operands[2]);
++		     NULL, operands[2]);
+     DONE;
+   })
+ 
+ (define_expand "sync_new_nand<mode>"
+-  [(match_operand:NARROW 0 "s_register_operand")
+-   (match_operand:NARROW 1 "memory_operand")
+-   (match_operand:NARROW 2 "s_register_operand")
+-   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "s_register_operand")
++   (match_operand:QHSD 1 "memory_operand")
++   (match_operand:QHSD 2 "s_register_operand")
++   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+@@ -229,57 +153,27 @@
+     DONE;
+   });
+ 
+-(define_expand "sync_old_<sync_optab>si"
+-  [(match_operand:SI 0 "s_register_operand")
+-   (match_operand:SI 1 "memory_operand")
+-   (match_operand:SI 2 "s_register_operand")
+-   (syncop:SI (match_dup 1) (match_dup 2))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_old_<sync_optab>si;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+-                     operands[2]);
+-    DONE;
+-  })
+-
+-(define_expand "sync_old_nandsi"
+-  [(match_operand:SI 0 "s_register_operand")
+-   (match_operand:SI 1 "memory_operand")
+-   (match_operand:SI 2 "s_register_operand")
+-   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    struct arm_sync_generator generator;
+-    generator.op = arm_sync_generator_omn;
+-    generator.u.omn = gen_arm_sync_old_nandsi;
+-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+-                     operands[2]);
+-    DONE;
+-  })
+-
+ (define_expand "sync_old_<sync_optab><mode>"
+-  [(match_operand:NARROW 0 "s_register_operand")
+-   (match_operand:NARROW 1 "memory_operand")
+-   (match_operand:NARROW 2 "s_register_operand")
+-   (syncop:NARROW (match_dup 1) (match_dup 2))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "s_register_operand")
++   (match_operand:QHSD 1 "memory_operand")
++   (match_operand:QHSD 2 "s_register_operand")
++   (syncop:QHSD (match_dup 1) (match_dup 2))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+     generator.u.omn = gen_arm_sync_old_<sync_optab><mode>;
+     arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+-    		     NULL, operands[2]);
++		     NULL, operands[2]);
+     DONE;
+   })
+ 
+ (define_expand "sync_old_nand<mode>"
+-  [(match_operand:NARROW 0 "s_register_operand")
+-   (match_operand:NARROW 1 "memory_operand")
+-   (match_operand:NARROW 2 "s_register_operand")
+-   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  [(match_operand:QHSD 0 "s_register_operand")
++   (match_operand:QHSD 1 "memory_operand")
++   (match_operand:QHSD 2 "s_register_operand")
++   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
++  "<sync_predtab>"
+   {
+     struct arm_sync_generator generator;
+     generator.op = arm_sync_generator_omn;
+@@ -289,22 +183,22 @@
+     DONE;
+   })
+ 
+-(define_insn "arm_sync_compare_and_swapsi"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI
+-	  [(match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+-   	   (match_operand:SI 2 "s_register_operand" "r")
+-	   (match_operand:SI 3 "s_register_operand" "r")]
+-	  VUNSPEC_SYNC_COMPARE_AND_SWAP))
+-   (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)]
++(define_insn "arm_sync_compare_and_swap<mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++        (unspec_volatile:SIDI
++	 [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
++	  (match_operand:SIDI 2 "s_register_operand" "r")
++	  (match_operand:SIDI 3 "s_register_operand" "r")]
++	 VUNSPEC_SYNC_COMPARE_AND_SWAP))
++   (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)]
+                                           VUNSPEC_SYNC_COMPARE_AND_SWAP))
+    (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
+                                                 VUNSPEC_SYNC_COMPARE_AND_SWAP))
+    ]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+    (set_attr "sync_required_value"  "2")
+@@ -318,7 +212,7 @@
+         (zero_extend:SI
+ 	  (unspec_volatile:NARROW
+ 	    [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")
+-   	     (match_operand:SI 2 "s_register_operand" "r")
++	     (match_operand:SI 2 "s_register_operand" "r")
+ 	     (match_operand:SI 3 "s_register_operand" "r")]
+ 	    VUNSPEC_SYNC_COMPARE_AND_SWAP)))
+    (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
+@@ -326,10 +220,10 @@
+    (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
+                                                 VUNSPEC_SYNC_COMPARE_AND_SWAP))
+    ]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+    (set_attr "sync_required_value"  "2")
+@@ -338,18 +232,18 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_lock_test_and_setsi"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (match_operand:SI 1 "arm_sync_memory_operand" "+Q"))
++(define_insn "arm_sync_lock_test_and_set<mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++	(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q"))
+    (set (match_dup 1)
+-        (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")]
+-	                    VUNSPEC_SYNC_LOCK))
++	(unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")]
++	VUNSPEC_SYNC_LOCK))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_release_barrier" "no")
+    (set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+@@ -364,10 +258,10 @@
+         (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")))
+    (set (match_dup 1)
+         (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")]
+-	                        VUNSPEC_SYNC_LOCK))
++				VUNSPEC_SYNC_LOCK))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+   } 
+@@ -380,22 +274,48 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_new_<sync_optab>si"
++(define_insn "arm_sync_new_<sync_optab><mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++        (unspec_volatile:SIDI [(syncop:SIDI
++			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
++			       (match_operand:SIDI 2 "s_register_operand" "r"))
++			    ]
++			    VUNSPEC_SYNC_NEW_OP))
++   (set (match_dup 1)
++	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
++			    VUNSPEC_SYNC_NEW_OP))
++   (clobber (reg:CC CC_REGNUM))
++   (clobber (match_scratch:SI 3 "=&r"))]
++  "<sync_predtab>"
++  {
++    return arm_output_sync_insn (insn, operands);
++  }
++  [(set_attr "sync_result"          "0")
++   (set_attr "sync_memory"          "1")
++   (set_attr "sync_new_value"       "2")
++   (set_attr "sync_t1"              "0")
++   (set_attr "sync_t2"              "3")
++   (set_attr "sync_op"              "<sync_optab>")
++   (set_attr "conds" "clob")
++   (set_attr "predicable" "no")])
++
++(define_insn "arm_sync_new_<sync_optab><mode>"
+   [(set (match_operand:SI 0 "s_register_operand" "=&r")
+         (unspec_volatile:SI [(syncop:SI
+-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+-                               (match_operand:SI 2 "s_register_operand" "r"))
+-	                    ]
+-	                    VUNSPEC_SYNC_NEW_OP))
++			       (zero_extend:SI
++				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
++			       (match_operand:SI 2 "s_register_operand" "r"))
++			    ]
++			    VUNSPEC_SYNC_NEW_OP))
+    (set (match_dup 1)
+-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_NEW_OP))
++	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
++				VUNSPEC_SYNC_NEW_OP))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+    (set_attr "sync_new_value"       "2")
+@@ -405,22 +325,22 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_new_nandsi"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI [(not:SI (and:SI
+-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+-                               (match_operand:SI 2 "s_register_operand" "r")))
+-	                    ]
+-	                    VUNSPEC_SYNC_NEW_OP))
++(define_insn "arm_sync_new_nand<mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++        (unspec_volatile:SIDI [(not:SIDI (and:SIDI
++			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
++			       (match_operand:SIDI 2 "s_register_operand" "r")))
++			    ]
++			    VUNSPEC_SYNC_NEW_OP))
+    (set (match_dup 1)
+-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_NEW_OP))
++	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
++			    VUNSPEC_SYNC_NEW_OP))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+    (set_attr "sync_new_value"       "2")
+@@ -430,50 +350,24 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_new_<sync_optab><mode>"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI [(syncop:SI
+-                               (zero_extend:SI
+-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+-                               (match_operand:SI 2 "s_register_operand" "r"))
+-	                    ]
+-	                    VUNSPEC_SYNC_NEW_OP))
+-   (set (match_dup 1)
+-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+-	                        VUNSPEC_SYNC_NEW_OP))
+-   (clobber (reg:CC CC_REGNUM))
+-   (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    return arm_output_sync_insn (insn, operands);
+-  } 
+-  [(set_attr "sync_result"          "0")
+-   (set_attr "sync_memory"          "1")
+-   (set_attr "sync_new_value"       "2")
+-   (set_attr "sync_t1"              "0")
+-   (set_attr "sync_t2"              "3")
+-   (set_attr "sync_op"              "<sync_optab>")
+-   (set_attr "conds" "clob")
+-   (set_attr "predicable" "no")])
+-
+ (define_insn "arm_sync_new_nand<mode>"
+   [(set (match_operand:SI 0 "s_register_operand" "=&r")
+         (unspec_volatile:SI
+ 	  [(not:SI
+ 	     (and:SI
+-               (zero_extend:SI	  
+-	         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+-               (match_operand:SI 2 "s_register_operand" "r")))
++	       (zero_extend:SI
++		 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
++	       (match_operand:SI 2 "s_register_operand" "r")))
+ 	  ] VUNSPEC_SYNC_NEW_OP))
+    (set (match_dup 1)
+         (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+-	                        VUNSPEC_SYNC_NEW_OP))
++				VUNSPEC_SYNC_NEW_OP))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+-  } 
++  }
+   [(set_attr "sync_result"          "0")
+    (set_attr "sync_memory"          "1")
+    (set_attr "sync_new_value"       "2")
+@@ -483,20 +377,20 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_old_<sync_optab>si"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI [(syncop:SI
+-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+-                               (match_operand:SI 2 "s_register_operand" "r"))
+-	                    ]
+-	                    VUNSPEC_SYNC_OLD_OP))
++(define_insn "arm_sync_old_<sync_optab><mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++	(unspec_volatile:SIDI [(syncop:SIDI
++			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
++			       (match_operand:SIDI 2 "s_register_operand" "r"))
++			    ]
++			    VUNSPEC_SYNC_OLD_OP))
+    (set (match_dup 1)
+-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_OLD_OP))
++        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
++			      VUNSPEC_SYNC_OLD_OP))
+    (clobber (reg:CC CC_REGNUM))
+-   (clobber (match_scratch:SI 3 "=&r"))
++   (clobber (match_scratch:SIDI 3 "=&r"))
+    (clobber (match_scratch:SI 4 "<sync_clobber>"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+   } 
+@@ -509,47 +403,21 @@
+    (set_attr "conds" "clob")
+    (set_attr "predicable" "no")])
+ 
+-(define_insn "arm_sync_old_nandsi"
+-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI [(not:SI (and:SI
+-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+-                               (match_operand:SI 2 "s_register_operand" "r")))
+-	                    ]
+-	                    VUNSPEC_SYNC_OLD_OP))
+-   (set (match_dup 1)
+-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_OLD_OP))
+-   (clobber (reg:CC CC_REGNUM))
+-   (clobber (match_scratch:SI 3 "=&r"))
+-   (clobber (match_scratch:SI 4 "=&r"))]
+-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+-  {
+-    return arm_output_sync_insn (insn, operands);
+-  } 
+-  [(set_attr "sync_result"          "0")
+-   (set_attr "sync_memory"          "1")
+-   (set_attr "sync_new_value"       "2")
+-   (set_attr "sync_t1"              "3")
+-   (set_attr "sync_t2"              "4")
+-   (set_attr "sync_op"              "nand")
+-   (set_attr "conds" 		    "clob")
+-   (set_attr "predicable" "no")])
+-
+ (define_insn "arm_sync_old_<sync_optab><mode>"
+   [(set (match_operand:SI 0 "s_register_operand" "=&r")
+         (unspec_volatile:SI [(syncop:SI
+-                               (zero_extend:SI
+-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+-                               (match_operand:SI 2 "s_register_operand" "r"))
+-	                    ]
+-	                    VUNSPEC_SYNC_OLD_OP))
++			       (zero_extend:SI
++				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
++			       (match_operand:SI 2 "s_register_operand" "r"))
++			    ]
++			    VUNSPEC_SYNC_OLD_OP))
+    (set (match_dup 1)
+-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_OLD_OP))
++	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
++			    VUNSPEC_SYNC_OLD_OP))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))
+    (clobber (match_scratch:SI 4 "<sync_clobber>"))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+   } 
+@@ -563,20 +431,46 @@
+    (set_attr "predicable" "no")])
+ 
+ (define_insn "arm_sync_old_nand<mode>"
++  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
++	(unspec_volatile:SIDI [(not:SIDI (and:SIDI
++			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
++			       (match_operand:SIDI 2 "s_register_operand" "r")))
++			    ]
++			    VUNSPEC_SYNC_OLD_OP))
++   (set (match_dup 1)
++        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
++	                    VUNSPEC_SYNC_OLD_OP))
++   (clobber (reg:CC CC_REGNUM))
++   (clobber (match_scratch:SIDI 3 "=&r"))
++   (clobber (match_scratch:SI 4 "=&r"))]
++  "<sync_predtab>"
++  {
++    return arm_output_sync_insn (insn, operands);
++  } 
++  [(set_attr "sync_result"          "0")
++   (set_attr "sync_memory"          "1")
++   (set_attr "sync_new_value"       "2")
++   (set_attr "sync_t1"              "3")
++   (set_attr "sync_t2"              "4")
++   (set_attr "sync_op"              "nand")
++   (set_attr "conds" 		    "clob")
++   (set_attr "predicable" "no")])
++
++(define_insn "arm_sync_old_nand<mode>"
+   [(set (match_operand:SI 0 "s_register_operand" "=&r")
+-        (unspec_volatile:SI [(not:SI (and:SI
+-                               (zero_extend:SI
+-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+-                               (match_operand:SI 2 "s_register_operand" "r")))
+-	                    ]
+-	                    VUNSPEC_SYNC_OLD_OP))
++	(unspec_volatile:SI [(not:SI (and:SI
++			       (zero_extend:SI
++				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
++			       (match_operand:SI 2 "s_register_operand" "r")))
++			    ]
++			    VUNSPEC_SYNC_OLD_OP))
+    (set (match_dup 1)
+-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+-	                    VUNSPEC_SYNC_OLD_OP))
++	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
++			    VUNSPEC_SYNC_OLD_OP))
+    (clobber (reg:CC CC_REGNUM))
+    (clobber (match_scratch:SI 3 "=&r"))
+    (clobber (match_scratch:SI 4 "=&r"))]
+-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
++  "<sync_predtab>"
+   {
+     return arm_output_sync_insn (insn, operands);
+   } 
+
+=== modified file 'gcc/config/arm/t-linux-eabi'
+--- old/gcc/config/arm/t-linux-eabi	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/t-linux-eabi	2011-10-14 15:50:44 +0000
+@@ -36,3 +36,4 @@
+ EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+ 
+ LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
++LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c
+
+=== added file 'gcc/testsuite/gcc.dg/di-longlong64-sync-1.c'
+--- old/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c	2011-10-14 15:56:32 +0000
+@@ -0,0 +1,164 @@
++/* { dg-do run } */
++/* { dg-require-effective-target sync_longlong } */
++/* { dg-options "-std=gnu99" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++
++
++/* Test basic functionality of the intrinsics.  The operations should
++   not be optimized away if no one checks the return values.  */
++
++/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long
++   (an explicit 64bit type maybe a better bet) and 2) Use values that cross
++   the 32bit boundary and cause carries since the actual maths are done as
++   pairs of 32 bit instructions.  */
++
++/* Note: This file is #included by some of the ARM tests.  */
++
++__extension__ typedef __SIZE_TYPE__ size_t;
++
++extern void abort (void);
++extern void *memcpy (void *, const void *, size_t);
++extern int memcmp (const void *, const void *, size_t);
++
++/* Temporary space where the work actually gets done.  */
++static long long AL[24];
++/* Values copied into AL before we start.  */
++static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1,
++
++				 0x100000002ll, 0x100000002ll,
++				 0x100000002ll, 0x100000002ll,
++
++				 0, 0x1000e0de0000ll,
++				 42 , 0xc001c0de0000ll,
++
++				 -1ll, 0, 0xff00ff0000ll, -1ll,
++
++				 0, 0x1000e0de0000ll,
++				 42 , 0xc001c0de0000ll,
++
++				 -1ll, 0, 0xff00ff0000ll, -1ll};
++/* This is what should be in AL at the end.  */
++static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0,
++
++				 0x100000002ll, 0x100000002ll,
++				 0x100000002ll, 0x100000002ll,
++
++				 1, 0xc001c0de0000ll,
++				 20, 0x1000e0de0000ll,
++
++				 0x300000007ll , 0x500000009ll,
++				 0xf100ff0001ll, ~0xa00000007ll,
++
++				 1, 0xc001c0de0000ll,
++				 20, 0x1000e0de0000ll,
++
++				 0x300000007ll , 0x500000009ll,
++				 0xf100ff0001ll, ~0xa00000007ll };
++
++/* First check they work in terms of what they do to memory.  */
++static void
++do_noret_di (void)
++{
++  __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll);
++  __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll);
++  __sync_lock_test_and_set (AL+2, 1);
++  __sync_lock_release (AL+3);
++
++  /* The following tests should not change the value since the
++     original does NOT match.  */
++  __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll);
++  __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll);
++  __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll);
++  __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll);
++
++  __sync_fetch_and_add (AL+8, 1);
++  __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry.  */
++  __sync_fetch_and_sub (AL+10, 22);
++  __sync_fetch_and_sub (AL+11, 0xb000e0000000ll);
++
++  __sync_fetch_and_and (AL+12, 0x300000007ll);
++  __sync_fetch_and_or (AL+13, 0x500000009ll);
++  __sync_fetch_and_xor (AL+14, 0xe00000001ll);
++  __sync_fetch_and_nand (AL+15, 0xa00000007ll);
++
++  /* These should be the same as the fetch_and_* cases except for
++     return value.  */
++  __sync_add_and_fetch (AL+16, 1);
++  /* add to both halves & carry.  */
++  __sync_add_and_fetch (AL+17, 0xb000e0000000ll);
++  __sync_sub_and_fetch (AL+18, 22);
++  __sync_sub_and_fetch (AL+19, 0xb000e0000000ll);
++
++  __sync_and_and_fetch (AL+20, 0x300000007ll);
++  __sync_or_and_fetch (AL+21, 0x500000009ll);
++  __sync_xor_and_fetch (AL+22, 0xe00000001ll);
++  __sync_nand_and_fetch (AL+23, 0xa00000007ll);
++}
++
++/* Now check return values.  */
++static void
++do_ret_di (void)
++{
++  if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) !=
++	0x100000002ll) abort ();
++  if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) !=
++	1) abort ();
++  if (__sync_lock_test_and_set (AL+2, 1) != 0) abort ();
++  __sync_lock_release (AL+3); /* no return value, but keep to match results.  */
++
++  /* The following tests should not change the value since the
++     original does NOT match.  */
++  if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) !=
++	0x100000002ll) abort ();
++  if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) !=
++	0x100000002ll) abort ();
++  if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) !=
++	0) abort ();
++  if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) !=
++	0) abort ();
++
++  if (__sync_fetch_and_add (AL+8, 1) != 0) abort ();
++  if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort ();
++  if (__sync_fetch_and_sub (AL+10, 22) != 42) abort ();
++  if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll)
++	abort ();
++
++  if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort ();
++  if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort ();
++  if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort ();
++  if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort ();
++
++  /* These should be the same as the fetch_and_* cases except for
++     return value.  */
++  if (__sync_add_and_fetch (AL+16, 1) != 1) abort ();
++  if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll)
++	abort ();
++  if (__sync_sub_and_fetch (AL+18, 22) != 20) abort ();
++  if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll)
++	abort ();
++
++  if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort ();
++  if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort ();
++  if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort ();
++  if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort ();
++}
++
++int main ()
++{
++  memcpy (AL, init_di, sizeof (init_di));
++
++  do_noret_di ();
++
++  if (memcmp (AL, test_di, sizeof (test_di)))
++    abort ();
++
++  memcpy (AL, init_di, sizeof (init_di));
++
++  do_ret_di ();
++
++  if (memcmp (AL, test_di, sizeof (test_di)))
++    abort ();
++
++  return 0;
++}
+
+=== added file 'gcc/testsuite/gcc.dg/di-sync-multithread.c'
+--- old/gcc/testsuite/gcc.dg/di-sync-multithread.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/di-sync-multithread.c	2011-10-14 15:56:32 +0000
+@@ -0,0 +1,205 @@
++/* { dg-do run } */
++/* { dg-require-effective-target sync_longlong } */
++/* { dg-require-effective-target pthread_h } */
++/* { dg-require-effective-target pthread } */
++/* { dg-options "-pthread -std=gnu99" } */
++
++/* test of long long atomic ops performed in parallel in 3 pthreads
++   david.gilbert@linaro.org */
++
++#include <pthread.h>
++#include <unistd.h>
++
++/*#define DEBUGIT 1 */
++
++#ifdef DEBUGIT
++#include <stdio.h>
++
++#define DOABORT(x,...) {\
++	 fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\
++	 }
++
++#else
++
++#define DOABORT(x,...) abort ();
++
++#endif
++
++/* Passed to each thread to describe which bits it is going to work on.  */
++struct threadwork {
++  unsigned long long count; /* incremented each time the worker loops.  */
++  unsigned int thread;    /* ID */
++  unsigned int addlsb;    /* 8 bit */
++  unsigned int logic1lsb; /* 5 bit */
++  unsigned int logic2lsb; /* 8 bit */
++};
++
++/* The shared word where all the atomic work is done.  */
++static volatile long long workspace;
++
++/* A shared word to tell the workers to quit when non-0.  */
++static long long doquit;
++
++extern void abort (void);
++
++/* Note this test doesn't test the return values much.  */
++void*
++worker (void* data)
++{
++  struct threadwork *tw = (struct threadwork*)data;
++  long long add1bit = 1ll << tw->addlsb;
++  long long logic1bit = 1ll << tw->logic1lsb;
++  long long logic2bit = 1ll << tw->logic2lsb;
++
++  /* Clear the bits we use.  */
++  __sync_and_and_fetch (&workspace, ~(0xffll * add1bit));
++  __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit));
++  __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit));
++
++  do
++    {
++      long long tmp1, tmp2, tmp3;
++      /* OK, lets try and do some stuff to the workspace - by the end
++         of the main loop our area should be the same as it is now - i.e. 0.  */
++
++      /* Push the arithmetic section upto 128 - one of the threads will
++         case this to carry accross the 32bit boundary.  */
++      for (tmp2 = 0; tmp2 < 64; tmp2++)
++	{
++	  /* Add 2 using the two different adds.  */
++	  tmp1 = __sync_add_and_fetch (&workspace, add1bit);
++	  tmp3 = __sync_fetch_and_add (&workspace, add1bit);
++
++	  /* The value should be the intermediate add value in both cases.  */
++	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
++	    DOABORT ("Mismatch of add intermediates on thread %d "
++			"workspace=0x%llx tmp1=0x%llx "
++			"tmp2=0x%llx tmp3=0x%llx\n",
++			 tw->thread, workspace, tmp1, tmp2, tmp3);
++	}
++
++      /* Set the logic bits.  */
++      tmp2=__sync_or_and_fetch (&workspace,
++			  0x1fll * logic1bit | 0xffll * logic2bit);
++
++      /* Check the logic bits are set and the arithmetic value is correct.  */
++      if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit
++			| 0xffll * add1bit))
++	  != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit))
++	DOABORT ("Midloop check failed on thread %d "
++			"workspace=0x%llx tmp2=0x%llx "
++			"masktmp2=0x%llx expected=0x%llx\n",
++		tw->thread, workspace, tmp2,
++		tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit |
++			 0xffll * add1bit),
++		(0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit));
++
++      /* Pull the arithmetic set back down to 0 - again this should cause a
++	 carry across the 32bit boundary in one thread.  */
++
++      for (tmp2 = 0; tmp2 < 64; tmp2++)
++	{
++	  /* Subtract 2 using the two different subs.  */
++	  tmp1=__sync_sub_and_fetch (&workspace, add1bit);
++	  tmp3=__sync_fetch_and_sub (&workspace, add1bit);
++
++	  /* The value should be the intermediate sub value in both cases.  */
++	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
++	    DOABORT ("Mismatch of sub intermediates on thread %d "
++			"workspace=0x%llx tmp1=0x%llx "
++			"tmp2=0x%llx tmp3=0x%llx\n",
++			tw->thread, workspace, tmp1, tmp2, tmp3);
++	}
++
++
++      /* Clear the logic bits.  */
++      __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit);
++      tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit));
++
++      /* The logic bits and the arithmetic bits should be zero again.  */
++      if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit))
++	DOABORT ("End of worker loop; bits none 0 on thread %d "
++			"workspace=0x%llx tmp3=0x%llx "
++			"mask=0x%llx maskedtmp3=0x%llx\n",
++		tw->thread, workspace, tmp3, (0x1fll * logic1bit |
++			0xffll * logic2bit | 0xffll * add1bit),
++		tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit));
++
++      __sync_add_and_fetch (&tw->count, 1);
++    }
++  while (!__sync_bool_compare_and_swap (&doquit, 1, 1));
++
++  pthread_exit (0);
++}
++
++int
++main ()
++{
++  /* We have 3 threads doing three sets of operations, an 8 bit
++     arithmetic field, a 5 bit logic field and an 8 bit logic
++     field (just to pack them all in).
++
++  6      5       4       4       3       2       1
++  3      6       8       0       2       4       6       8       0
++  |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,...
++  - T0   --  T1  -- T2   --T2 --  T0  -*- T2-- T1-- T1   -***- T0-
++   logic2  logic2  arith   log2  arith  log1 log1  arith     log1
++
++  */
++  unsigned int t;
++  long long tmp;
++  int err;
++
++  struct threadwork tw[3]={
++    { 0ll, 0, 27, 0, 56 },
++    { 0ll, 1,  8,16, 48 },
++    { 0ll, 2, 40,21, 35 }
++  };
++
++  pthread_t threads[3];
++
++  __sync_lock_release (&doquit);
++
++  /* Get the work space into a known value - All 1's.  */
++  __sync_lock_release (&workspace); /* Now all 0.  */
++  tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll);
++  if (tmp!=0)
++    DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx "
++		"tmp=0x%llx\n", workspace,tmp);
++
++  for (t = 0; t < 3; t++)
++  {
++    err=pthread_create (&threads[t], NULL , worker, &tw[t]);
++    if (err) DOABORT ("pthread_create failed on thread %d with error %d\n",
++	t, err);
++  };
++
++  sleep (5);
++
++  /* Stop please.  */
++  __sync_lock_test_and_set (&doquit, 1ll);
++
++  for (t = 0; t < 3; t++)
++    {
++      err=pthread_join (threads[t], NULL);
++      if (err)
++	DOABORT ("pthread_join failed on thread %d with error %d\n", t, err);
++    };
++
++  __sync_synchronize ();
++
++  /* OK, so all the workers have finished -
++     the workers should have zero'd their workspace, the unused areas
++     should still be 1.  */
++  if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0))
++    DOABORT ("End of run workspace mismatch, got %llx\n", workspace);
++
++  /* All the workers should have done some work.  */
++  for (t = 0; t < 3; t++)
++    {
++      if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t);
++    };
++
++  return 0;
++}
++
+
+=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c'
+--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c	2011-10-14 15:56:32 +0000
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_arch_v5_ok } */
++/* { dg-options "-std=gnu99" } */
++/* { dg-add-options arm_arch_v5 } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
++
++#include "../../gcc.dg/di-longlong64-sync-1.c"
++
++/* On an old ARM we have no ldrexd or strexd so we have to use helpers.  */
++/* { dg-final { scan-assembler-not "ldrexd" } } */
++/* { dg-final { scan-assembler-not "strexd" } } */
++/* { dg-final { scan-assembler "__sync_" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c'
+--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c	2011-10-14 15:56:32 +0000
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_arm_ok } */
++/* { dg-options "-marm -std=gnu99" } */
++/* { dg-require-effective-target arm_arch_v6k_ok } */
++/* { dg-add-options arm_arch_v6k } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
++/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
++
++#include "../../gcc.dg/di-longlong64-sync-1.c"
++
++/* We should be using ldrexd, strexd and no helpers or shorter ldrex.  */
++/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */
++/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */
++/* { dg-final { scan-assembler-not "__sync_" } } */
++/* { dg-final { scan-assembler-not "ldrex\t" } } */
++/* { dg-final { scan-assembler-not "strex\t" } } */
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-11-22 17:10:17 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-11-28 15:07:01 +0000
+@@ -2000,6 +2000,47 @@
+ 		check_effective_target_arm_fp16_ok_nocache]
+ }
+ 
++# Creates a series of routines that return 1 if the given architecture
++# can be selected and a routine to give the flags to select that architecture
++# Note: Extra flags may be added to disable options from newer compilers
++# (Thumb in particular - but others may be added in the future)
++# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
++#        /* { dg-add-options arm_arch_v5 } */
++foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__
++				     v6 "-march=armv6" __ARM_ARCH_6__
++				     v6k "-march=armv6k" __ARM_ARCH_6K__
++				     v7a "-march=armv7-a" __ARM_ARCH_7A__ } {
++    eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
++	proc check_effective_target_arm_arch_FUNC_ok { } {
++	    if { [ string match "*-marm*" "FLAG" ] &&
++		![check_effective_target_arm_arm_ok] } {
++		return 0
++	    }
++	    return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
++		#if !defined (DEF)
++		#error FOO
++		#endif
++	    } "FLAG" ]
++	}
++
++	proc add_options_for_arm_arch_FUNC { flags } {
++	    return "$flags FLAG"
++	}
++    }]
++}
++
++# Return 1 if this is an ARM target where -marm causes ARM to be
++# used (not Thumb)
++
++proc check_effective_target_arm_arm_ok { } {
++    return [check_no_compiler_messages arm_arm_ok assembly {
++	#if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__)
++	#error FOO
++	#endif
++    } "-marm"]
++}
++
++
+ # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be
+ # used.
+ 
+@@ -3384,6 +3425,31 @@
+     return $et_sync_int_long_saved
+ }
+ 
++# Return 1 if the target supports atomic operations on "long long" and can
++# execute them
++# So far only put checks in for ARM, others may want to add their own
++proc check_effective_target_sync_longlong { } {
++    return [check_runtime sync_longlong_runtime {
++      #include <stdlib.h>
++      int main ()
++      {
++	long long l1;
++
++	if (sizeof (long long) != 8)
++	  exit (1);
++
++      #ifdef __arm__
++	/* Just check for native; checking for kernel fallback is tricky.  */
++	asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1");
++      #else
++      # error "Add other suitable archs here"
++      #endif
++
++	exit (0);
++      }
++    } "" ]
++}
++
+ # Return 1 if the target supports atomic operations on "char" and "short".
+ 
+ proc check_effective_target_sync_char_short { } {
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch
new file mode 100644
index 0000000000..88d74c72f3
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch
@@ -0,0 +1,487 @@
+2011-11-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Needs to be merged upstream:
+
+	gcc/
+	* tree-vect-patterns.c (widened_name_p): Rename to ...
+	(type_conversion_p): ... this.  Add new argument to determine
+	if it's a promotion or demotion operation.  Check for
+	CONVERT_EXPR_CODE_P instead of NOP_EXPR.
+	(vect_recog_dot_prod_pattern): Call type_conversion_p instead
+	widened_name_p.
+	(vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
+	vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
+	Likewise.
+	(vect_recog_mixed_size_cond_pattern): Likewise and allow
+	non-constant then and else clauses.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-cond-3.c: New test.
+	* gcc.dg/vect/bb-slp-cond-4.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c	2011-11-27 11:29:32 +0000
+@@ -0,0 +1,85 @@
++/* { dg-require-effective-target vect_condition } */
++
++#include "tree-vect.h"
++
++#define N 64 
++
++/* Comparison in int, then/else and result in unsigned char.  */ 
++
++static inline unsigned char
++foo (int x, int y, int a, int b)
++{
++  if (x >= y)
++    return a;
++  else
++    return b;
++}
++
++__attribute__((noinline, noclone)) void
++bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
++     unsigned char * __restrict__ c, unsigned char * __restrict__ d,
++     unsigned char * __restrict__ e, int stride, int w)
++{
++  int i;
++  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
++				d += stride, e += stride)
++    {
++      e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
++      e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
++      e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
++      e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
++      e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
++      e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
++      e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
++      e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
++      e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
++      e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
++      e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
++      e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
++      e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
++      e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
++      e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
++      e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
++    }
++}
++
++
++unsigned char a[N], b[N], c[N], d[N], e[N];
++
++int main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      a[i] = i;
++      b[i] = 5;
++      e[i] = 0;
++
++      switch (i % 9)
++        {
++        case 0: asm (""); c[i] = i; d[i] = i + 1; break;
++        case 1: c[i] = 0; d[i] = 0; break;
++        case 2: c[i] = i + 1; d[i] = i - 1; break;
++        case 3: c[i] = i; d[i] = i + 7; break;
++        case 4: c[i] = i; d[i] = i; break;
++        case 5: c[i] = i + 16; d[i] = i + 3; break;
++        case 6: c[i] = i - 5; d[i] = i; break;
++        case 7: c[i] = i; d[i] = i; break;
++        case 8: c[i] = i; d[i] = i - 7; break;
++        }
++    }
++
++  bar (a, b, c, d, e, 16, 2);
++  for (i = 0; i < N; i++)
++    if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c'
+--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c	2011-11-27 11:29:32 +0000
+@@ -0,0 +1,85 @@
++/* { dg-require-effective-target vect_condition } */
++
++#include "tree-vect.h"
++
++#define N 64 
++
++/* Comparison in short, then/else and result in int.  */
++static inline int
++foo (short x, short y, int a, int b)
++{
++  if (x >= y)
++    return a;
++  else
++    return b;
++}
++
++__attribute__((noinline, noclone)) void
++bar (short * __restrict__ a, short * __restrict__ b,
++     short * __restrict__ c, short * __restrict__ d,
++     int * __restrict__ e, int stride, int w)
++{
++  int i;
++  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
++				d += stride, e += stride)
++    {
++      e[0] = foo (c[0], d[0], a[0], b[0]);
++      e[1] = foo (c[1], d[1], a[1], b[1]);
++      e[2] = foo (c[2], d[2], a[2], b[2]);
++      e[3] = foo (c[3], d[3], a[3], b[3]);
++      e[4] = foo (c[4], d[4], a[4], b[4]);
++      e[5] = foo (c[5], d[5], a[5], b[5]);
++      e[6] = foo (c[6], d[6], a[6], b[6]);
++      e[7] = foo (c[7], d[7], a[7], b[7]);
++      e[8] = foo (c[8], d[8], a[8], b[8]);
++      e[9] = foo (c[9], d[9], a[9], b[9]);
++      e[10] = foo (c[10], d[10], a[10], b[10]);
++      e[11] = foo (c[11], d[11], a[11], b[11]);
++      e[12] = foo (c[12], d[12], a[12], b[12]);
++      e[13] = foo (c[13], d[13], a[13], b[13]);
++      e[14] = foo (c[14], d[14], a[14], b[14]);
++      e[15] = foo (c[15], d[15], a[15], b[15]);
++    }
++}
++
++
++short a[N], b[N], c[N], d[N];
++int e[N];
++
++int main ()
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N; i++)
++    {
++      a[i] = i;
++      b[i] = 5;
++      e[i] = 0;
++
++      switch (i % 9)
++        {
++        case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break;
++        case 1: c[i] = 0; d[i] = 0; break;
++        case 2: c[i] = i + 1; d[i] = - i - 1; break;
++        case 3: c[i] = i; d[i] = i + 7; break;
++        case 4: c[i] = i; d[i] = i; break;
++        case 5: c[i] = i + 16; d[i] = i + 3; break;
++        case 6: c[i] = - i - 5; d[i] = - i; break;
++        case 7: c[i] = - i; d[i] = - i; break;
++        case 8: c[i] = - i; d[i] = - i - 7; break;
++        }
++    }
++
++  bar (a, b, c, d, e, 16, 2);
++  for (i = 0; i < N; i++)
++    if (e[i] != ((i % 3) == 0 ? 5 : i))
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
++/* { dg-final { cleanup-tree-dump "slp" } } */
++
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-11-23 07:49:33 +0000
++++ new/gcc/tree-vect-patterns.c	2011-11-27 12:17:31 +0000
+@@ -62,18 +62,16 @@
+ 	vect_recog_mixed_size_cond_pattern};
+ 
+ 
+-/* Function widened_name_p
+-
+-   Check whether NAME, an ssa-name used in USE_STMT,
+-   is a result of a type-promotion, such that:
+-     DEF_STMT: NAME = NOP (name0)
+-   where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
++/* Check whether NAME, an ssa-name used in USE_STMT,
++   is a result of a type promotion or demotion, such that:
++      DEF_STMT: NAME = NOP (name0)
++   where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
+    If CHECK_SIGN is TRUE, check that either both types are signed or both are
+    unsigned.  */
+ 
+ static bool
+-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
+-		bool check_sign)
++type_conversion_p (tree name, gimple use_stmt, bool check_sign,
++                   tree *orig_type, gimple *def_stmt, bool *promotion)
+ {
+   tree dummy;
+   gimple dummy_gimple;
+@@ -96,21 +94,27 @@
+       && dt != vect_external_def && dt != vect_constant_def)
+     return false;
+ 
+-  if (! *def_stmt)
++  if (!*def_stmt)
+     return false;
+ 
+   if (!is_gimple_assign (*def_stmt))
+     return false;
+ 
+-  if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
++  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
+     return false;
+ 
+   oprnd0 = gimple_assign_rhs1 (*def_stmt);
+ 
+-  *half_type = TREE_TYPE (oprnd0);
+-  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
+-      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
+-      || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
++  *orig_type = TREE_TYPE (oprnd0);
++  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
++      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
++    return false;
++
++  if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
++    *promotion = true;
++  else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
++    *promotion = false;
++  else
+     return false;
+ 
+   if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
+@@ -192,6 +196,7 @@
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+   struct loop *loop;
+   tree var, rhs;
++  bool promotion;
+ 
+   if (!loop_info)
+     return NULL;
+@@ -255,7 +260,9 @@
+         return NULL;
+       stmt = last_stmt;
+ 
+-      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
++      if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
++                             &promotion)
++          && promotion)
+         {
+           stmt = def_stmt;
+           oprnd0 = gimple_assign_rhs1 (stmt);
+@@ -310,10 +317,14 @@
+       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
+           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
+         return NULL;
+-      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
++      if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
++                              &promotion)
++          || !promotion)
+         return NULL;
+       oprnd00 = gimple_assign_rhs1 (def_stmt);
+-      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
++      if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
++                              &promotion)
++          || !promotion)
+         return NULL;
+       oprnd01 = gimple_assign_rhs1 (def_stmt);
+       if (!types_compatible_p (half_type0, half_type1))
+@@ -526,7 +537,7 @@
+   enum tree_code dummy_code;
+   int dummy_int;
+   VEC (tree, heap) *dummy_vec;
+-  bool op1_ok;
++  bool op1_ok, promotion;
+ 
+   if (!is_gimple_assign (last_stmt))
+     return NULL;
+@@ -546,12 +557,14 @@
+     return NULL;
+ 
+   /* Check argument 0.  */
+-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
++  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
++                          &promotion)
++      || !promotion)
+     return NULL;
+-  /* Check argument 1.  */
+-  op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
+-
+-  if (op1_ok)
++   /* Check argument 1.  */
++  op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
++                              &def_stmt1, &promotion);
++  if (op1_ok && promotion)
+     {
+       oprnd0 = gimple_assign_rhs1 (def_stmt0);
+       oprnd1 = gimple_assign_rhs1 (def_stmt1);
+@@ -793,6 +806,7 @@
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+   struct loop *loop;
+   tree var;
++  bool promotion;
+ 
+   if (!loop_info)
+     return NULL;
+@@ -832,8 +846,10 @@
+      Left to check that oprnd0 is defined by a cast from type 'type' to type
+      'TYPE'.  */
+ 
+-  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
+-    return NULL;
++  if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
++                          &promotion)
++      || !promotion)
++     return NULL;
+ 
+   oprnd0 = gimple_assign_rhs1 (stmt);
+   *type_in = half_type;
+@@ -899,6 +915,7 @@
+   gimple def_stmt, new_stmt;
+   bool first = false;
+   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
++  bool promotion;
+   bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+   struct loop *loop = NULL;
+ 
+@@ -933,7 +950,9 @@
+   else
+     {
+       first = true;
+-      if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
++      if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
++                               &promotion)
++         || !promotion
+           || !gimple_bb (def_stmt)
+           || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
+ 	  || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
+@@ -1327,6 +1346,7 @@
+   VEC (tree, heap) * dummy_vec;
+   gimple use_stmt = NULL;
+   bool over_widen = false;
++  bool promotion;
+ 
+   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
+     return NULL;
+@@ -1381,8 +1401,10 @@
+     return NULL;
+ 
+   /* Check operand 0: it has to be defined by a type promotion.  */
+-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
+-    return NULL;
++  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
++                          &promotion)
++      || !promotion)
++     return NULL;
+ 
+   /* Check operand 1: has to be positive.  We check that it fits the type
+      in vect_handle_widen_op_by_const ().  */
+@@ -1492,9 +1514,9 @@
+      S1  a_T = x_t CMP y_t ? b_T : c_T;
+ 
+    where type 'TYPE' is an integral type which has different size
+-   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
++   from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
+    than 'type', the constants need to fit into an integer type
+-   with the same width as 'type'.
++   with the same width as 'type') or results of conversion from 'type'.
+ 
+    Input:
+ 
+@@ -1523,6 +1545,9 @@
+   enum machine_mode cmpmode;
+   gimple pattern_stmt, def_stmt;
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
++  tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
++  gimple def_stmt0 = NULL, def_stmt1 = NULL;
++  bool promotion;
+   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+ 
+   if (!is_gimple_assign (last_stmt)
+@@ -1535,25 +1560,40 @@
+   then_clause = TREE_OPERAND (op, 1);
+   else_clause = TREE_OPERAND (op, 2);
+ 
+-  if (TREE_CODE (then_clause) != INTEGER_CST
+-      || TREE_CODE (else_clause) != INTEGER_CST)
+-    return NULL;
+-
+   if (!COMPARISON_CLASS_P (cond_expr))
+     return NULL;
+ 
+   type = gimple_expr_type (last_stmt);
+   comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
+-  if (!INTEGRAL_TYPE_P (comp_type)
+-      || !INTEGRAL_TYPE_P (type))
+-    return NULL;
+-
+   comp_vectype = get_vectype_for_scalar_type (comp_type);
+   if (comp_vectype == NULL_TREE)
+     return NULL;
+ 
++  if (types_compatible_p (type, comp_type)
++      || !INTEGRAL_TYPE_P (comp_type)
++      || !INTEGRAL_TYPE_P (type))
++    return NULL;
++
++  if ((TREE_CODE (then_clause) != INTEGER_CST
++       && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
++			      &def_stmt0, &promotion))
++      || (TREE_CODE (else_clause) != INTEGER_CST
++	  && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
++				 &def_stmt1, &promotion)))
++    return NULL;
++
++  if (orig_type0 && orig_type1
++      && (!types_compatible_p (orig_type0, orig_type1)
++	  || !types_compatible_p (orig_type0, comp_type)))
++    return NULL;
++
++  if (orig_type0)
++    then_clause = gimple_assign_rhs1 (def_stmt0);
++
++  if (orig_type1)
++    else_clause = gimple_assign_rhs1 (def_stmt1);
++
+   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
+-
+   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
+     return NULL;
+ 
+@@ -1561,18 +1601,15 @@
+   if (vectype == NULL_TREE)
+     return NULL;
+ 
+-  if (types_compatible_p (vectype, comp_vectype))
+-    return NULL;
+-
+   if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
+     return NULL;
+ 
+-  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
+-    {
+-      if (!int_fits_type_p (then_clause, comp_type)
+-	  || !int_fits_type_p (else_clause, comp_type))
+-	return NULL;
+-    }
++  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)
++      && ((TREE_CODE (then_clause) == INTEGER_CST
++           && !int_fits_type_p (then_clause, comp_type))
++          || (TREE_CODE (else_clause) == INTEGER_CST
++              && !int_fits_type_p (else_clause, comp_type))))
++    return NULL;
+ 
+   tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
+ 		fold_convert (comp_type, then_clause),
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch
new file mode 100644
index 0000000000..43a2a4da96
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch
@@ -0,0 +1,276 @@
+2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline -A15 tuning.
+	2011-11-30  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue.
+	* config/arm/arm.md (mul64): New attribute.
+	(generic_sched): Cortex-A15 is not scheduled generically.
+	(cortex-a15.md): Include.
+	* config/arm/cortex-a15.md: New machine description.
+	* config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md.
+
+        2011-11-30  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+	* config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-12-05 10:55:48 +0000
++++ new/gcc/config/arm/arm.c	2011-12-05 12:33:25 +0000
+@@ -24056,6 +24056,9 @@
+ {
+   switch (arm_tune)
+     {
++    case cortexa15:
++      return 3;
++
+     case cortexr4:
+     case cortexr4f:
+     case cortexr5:
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-10-26 11:38:30 +0000
++++ new/gcc/config/arm/arm.md	2011-12-02 00:38:59 +0000
+@@ -345,6 +345,13 @@
+ 	 (const_string "mult")
+ 	 (const_string "alu")))
+ 
++; Is this an (integer side) multiply with a 64-bit result?
++(define_attr "mul64" "no,yes"
++	     (if_then_else
++	       (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
++	       (const_string "yes")
++	       (const_string "no")))
++
+ ; Load scheduling, set from the arm_ld_sched variable
+ ; initialized by arm_option_override()
+ (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
+@@ -511,7 +518,7 @@
+ 
+ (define_attr "generic_sched" "yes,no"
+   (const (if_then_else
+-          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
++          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
+ 	       (eq_attr "tune_cortexr4" "yes"))
+           (const_string "no")
+           (const_string "yes"))))
+@@ -537,6 +544,7 @@
+ (include "cortex-a5.md")
+ (include "cortex-a8.md")
+ (include "cortex-a9.md")
++(include "cortex-a15.md")
+ (include "cortex-r4.md")
+ (include "cortex-r4f.md")
+ (include "cortex-m4.md")
+
+=== added file 'gcc/config/arm/cortex-a15.md'
+--- old/gcc/config/arm/cortex-a15.md	1970-01-01 00:00:00 +0000
++++ new/gcc/config/arm/cortex-a15.md	2011-12-02 00:38:59 +0000
+@@ -0,0 +1,186 @@
++;; ARM Cortex-A15 pipeline description
++;; Copyright (C) 2011 Free Software Foundation, Inc.
++;;
++;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
++
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but
++;; WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++;; General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_automaton "cortex_a15")
++
++;; The Cortex-A15 core is modelled as a triple issue pipeline that has
++;; the following dispatch units.
++;; 1. Two pipelines for simple integer operations: SX1, SX2
++;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2
++;; 3. One pipeline for branch operations: BX
++;; 4. One pipeline for integer multiply and divide operations: MX
++;; 5. Two pipelines for load and store operations: LS1, LS2
++;;
++;; We can issue into three pipelines per-cycle.
++;;
++;; We assume that where we have unit pairs xx1 is always filled before xx2.
++
++;; The three issue units
++(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15")
++
++(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)")
++(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))")
++(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)")
++(final_presence_set "ca15_i1" "ca15_i0")
++(final_presence_set "ca15_i2" "ca15_i1")
++
++;; The main dispatch units
++(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15")
++(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15")
++(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15")
++(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15")
++
++(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)")
++
++;; The extended load-store pipeline
++(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15")
++
++;; The extended ALU pipeline
++(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15")
++(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15")
++
++;; Simple Execution Unit:
++;;
++;; Simple ALU without shift
++(define_insn_reservation "cortex_a15_alu" 2
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "alu")
++            (eq_attr "neon_type" "none")))
++  "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
++
++;; ALU ops with immediate shift
++(define_insn_reservation "cortex_a15_alu_shift" 3
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "alu_shift")
++            (eq_attr "neon_type" "none")))
++  "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
++	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
++
++;; ALU ops with register controlled shift
++(define_insn_reservation "cortex_a15_alu_shift_reg" 3
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "alu_shift_reg")
++	    (eq_attr "neon_type" "none")))
++  "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
++   |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
++   |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
++
++;; Multiply Execution Unit:
++;;
++;; 32-bit multiplies
++(define_insn_reservation "cortex_a15_mult32" 3
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "mult")
++	    (and (eq_attr "neon_type" "none")
++		 (eq_attr "mul64" "no"))))
++  "ca15_issue1,ca15_mx")
++
++;; 64-bit multiplies
++(define_insn_reservation "cortex_a15_mult64" 4
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "mult")
++	    (and (eq_attr "neon_type" "none")
++		 (eq_attr "mul64" "yes"))))
++  "ca15_issue1,ca15_mx*2")
++
++;; Integer divide
++(define_insn_reservation "cortex_a15_udiv" 9
++  (and (eq_attr "tune" "cortexa15")
++       (eq_attr "insn" "udiv"))
++  "ca15_issue1,ca15_mx")
++
++(define_insn_reservation "cortex_a15_sdiv" 10
++  (and (eq_attr "tune" "cortexa15")
++       (eq_attr "insn" "sdiv"))
++  "ca15_issue1,ca15_mx")
++
++;; Block all issue pipes for a cycle
++(define_insn_reservation "cortex_a15_block" 1
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "block")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue3")
++
++;; Branch execution Unit
++;;
++;; Branches take one issue slot.
++;; No latency as there is no result
++(define_insn_reservation "cortex_a15_branch" 0
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "branch")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue1,ca15_bx")
++
++
++;; We lie with calls.  They take up all issue slots, and form a block in the
++;; pipeline.  The result however is available the next cycle.
++;;
++;; Addition of new units requires this to be updated.
++(define_insn_reservation "cortex_a15_call" 1
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "call")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue3,\
++   ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\
++   ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\
++    +ca15_sx2_sat+ca15_ldr+ca15_str")
++
++;; Load-store execution Unit
++;;
++;; Loads of up to two words.
++(define_insn_reservation "cortex_a15_load1" 4
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "load_byte,load1,load2")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue1,ca15_ls,ca15_ldr,nothing")
++
++;; Loads of three or four words.
++(define_insn_reservation "cortex_a15_load3" 5
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "load3,load4")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
++
++;; Stores of up to two words.
++(define_insn_reservation "cortex_a15_store1" 0
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "store1,store2")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue1,ca15_ls,ca15_str")
++
++;; Stores of three or four words.
++(define_insn_reservation "cortex_a15_store3" 0
++  (and (eq_attr "tune" "cortexa15")
++       (and (eq_attr "type" "store3,store4")
++	    (eq_attr "neon_type" "none")))
++  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
++
++;; Simple execution unit bypasses
++(define_bypass 1 "cortex_a15_alu"
++	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
++(define_bypass 2 "cortex_a15_alu_shift"
++	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
++(define_bypass 2 "cortex_a15_alu_shift_reg"
++	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
++(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3")
++(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3")
++(define_bypass 2 "cortex_a15_alu_shift_reg"
++	       "cortex_a15_load1,cortex_a15_load3")
+
+=== modified file 'gcc/config/arm/t-arm'
+--- old/gcc/config/arm/t-arm	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/t-arm	2011-12-02 00:38:59 +0000
+@@ -31,6 +31,16 @@
+ 		$(srcdir)/config/arm/fmp626.md \
+ 		$(srcdir)/config/arm/fa726te.md \
+ 		$(srcdir)/config/arm/arm926ejs.md \
++		$(srcdir)/config/arm/cortex-a15.md \
++		$(srcdir)/config/arm/cortex-a5.md \
++		$(srcdir)/config/arm/cortex-a8.md \
++		$(srcdir)/config/arm/cortex-a8-neon.md \
++		$(srcdir)/config/arm/cortex-a9.md \
++		$(srcdir)/config/arm/cortex-a9-neon.md \
++		$(srcdir)/config/arm/cortex-m4-fpu.md \
++		$(srcdir)/config/arm/cortex-m4.md \
++		$(srcdir)/config/arm/cortex-r4f.md \
++		$(srcdir)/config/arm/cortex-r4.md \
+ 		$(srcdir)/config/arm/cirrus.md \
+ 		$(srcdir)/config/arm/fpa.md \
+ 		$(srcdir)/config/arm/vec-common.md \
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch
new file mode 100644
index 0000000000..8c51c1d1f3
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch
@@ -0,0 +1,69 @@
+2011-12-20  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-11-29  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/51301
+	gcc/
+	* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that
+	the last statement doesn't convert to a bigger type than the original
+	type of the computation.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr51301.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/pr51301.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr51301.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr51301.c	2011-11-30 17:54:51 +0000
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++
++typedef signed char int8_t;
++typedef signed long long int64_t;
++int64_t
++f0a (int8_t * __restrict__ arg1)
++{
++  int idx;
++  int64_t result = 0;
++  for (idx = 0; idx < 416; idx += 1)
++    result += arg1[idx] << (arg1[idx] == arg1[idx]);
++  return result;
++}
++
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-11-27 12:17:31 +0000
++++ new/gcc/tree-vect-patterns.c	2011-12-20 07:47:44 +0000
+@@ -1138,6 +1138,7 @@
+   struct loop *loop = NULL;
+   bb_vec_info bb_vinfo;
+   stmt_vec_info stmt_vinfo;
++  tree type = NULL;
+ 
+   stmt_vinfo = vinfo_for_stmt (stmt);
+   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+@@ -1207,6 +1208,7 @@
+           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+         }
+ 
++      type = gimple_expr_type (stmt);
+       prev_stmt = stmt;
+       stmt = use_stmt;
+ 
+@@ -1222,9 +1224,11 @@
+     {
+       use_lhs = gimple_assign_lhs (use_stmt);
+       use_type = TREE_TYPE (use_lhs);
+-      /* Support only type promotion or signedess change.  */
++      /* Support only type promotion or signedess change.  Check that USE_TYPE
++        is not bigger than the original type.  */
+       if (!INTEGRAL_TYPE_P (use_type)
+-          || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
++          || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
++         || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
+         return NULL;
+ 
+       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch
new file mode 100644
index 0000000000..c433fc73f1
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch
@@ -0,0 +1,22 @@
+2012-01-05  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r182271:
+
+	2011-12-13  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* modulo-sched.c (mark_loop_unsched): Free bbs.
+
+=== modified file 'gcc/modulo-sched.c'
+--- old/gcc/modulo-sched.c	2011-10-30 05:31:00 +0000
++++ new/gcc/modulo-sched.c	2012-01-05 02:45:23 +0000
+@@ -1204,6 +1204,8 @@
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
++
++  free (bbs);
+ }
+ 
+ /* Return true if all the BBs of the loop are empty except the
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch
new file mode 100644
index 0000000000..895d6a6cff
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106860.patch
@@ -0,0 +1,104 @@
+2012-01-12  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 879725
+	Backport from mainline:
+
+	2012-01-02  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* ddg.c (def_has_ccmode_p): New function.
+	(add_cross_iteration_register_deps,
+	create_ddg_dep_from_intra_loop_link): Call it.
+
+	gcc/testsuite/
+	* gcc.dg/sms-11.c: New file.
+
+=== modified file 'gcc/ddg.c'
+--- old/gcc/ddg.c	2011-10-02 06:56:53 +0000
++++ new/gcc/ddg.c	2012-01-10 16:05:14 +0000
+@@ -166,6 +166,24 @@
+   return false;
+ }
+ 
++/* Return true if one of the definitions in INSN has MODE_CC.  Otherwise
++   return false.  */
++static bool
++def_has_ccmode_p (rtx insn)
++{
++  df_ref *def;
++
++  for (def = DF_INSN_DEFS (insn); *def; def++)
++    {
++      enum machine_mode mode = GET_MODE (DF_REF_REG (*def));
++
++      if (GET_MODE_CLASS (mode) == MODE_CC)
++	return true;
++    }
++
++  return false;
++}
++
+ /* Computes the dependence parameters (latency, distance etc.), creates
+    a ddg_edge and adds it to the given DDG.  */
+ static void
+@@ -202,6 +220,7 @@
+      whose register has multiple defs in the loop.  */
+   if (flag_modulo_sched_allow_regmoves 
+       && (t == ANTI_DEP && dt == REG_DEP)
++      && !def_has_ccmode_p (dest_node->insn)
+       && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
+     {
+       rtx set;
+@@ -335,7 +354,8 @@
+           if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+               || !flag_modulo_sched_allow_regmoves
+ 	      || JUMP_P (use_node->insn)
+-              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
++              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
++	      || def_has_ccmode_p (DF_REF_INSN (last_def)))
+             create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+                                     REG_DEP, 1);
+ 
+
+=== added file 'gcc/testsuite/gcc.dg/sms-11.c'
+--- old/gcc/testsuite/gcc.dg/sms-11.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/sms-11.c	2012-01-10 16:05:14 +0000
+@@ -0,0 +1,37 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
++
++extern void abort (void);
++
++float out[4][4] = { 6, 6, 7, 5, 6, 7, 5, 5, 6, 4, 4, 4, 6, 2, 3, 4 };
++
++void
++invert (void)
++{
++  int i, j, k = 0, swap;
++  float tmp[4][4] = { 5, 6, 7, 5, 6, 7, 5, 5, 4, 4, 4, 4, 3, 2, 3, 4 };
++
++  for (i = 0; i < 4; i++)
++    {
++      for (j = i + 1; j < 4; j++)
++	if (tmp[j][i] > tmp[i][i])
++	  swap = j;
++
++      if (swap != i)
++	tmp[i][k] = tmp[swap][k];
++    }
++
++  for (i = 0; i < 4; i++)
++    for (j = 0; j < 4; j++)
++      if (tmp[i][j] != out[i][j])
++	abort ();
++}
++
++int
++main ()
++{
++  invert ();
++  return 0;
++}
++
++/* { dg-final { cleanup-rtl-dump "sms" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch
new file mode 100644
index 0000000000..0199f7b89b
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106861.patch
@@ -0,0 +1,76 @@
+2012-01-18  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r183126:
+
+	2012-01-12  Ira Rosen  <irar@il.ibm.com>
+
+	gcc/
+	PR tree-optimization/51799
+	* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check
+	that the last operation is a type demotion.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr51799.c: New test.
+	* gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift
+	patterns.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/pr51799.c'
+--- old/gcc/testsuite/gcc.dg/vect/pr51799.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/vect/pr51799.c	2012-01-18 01:53:19 +0000
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++
++typedef signed char int8_t;
++typedef unsigned char uint8_t;
++typedef signed short int16_t;
++typedef unsigned long uint32_t;
++void
++f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1,
++     uint32_t * __restrict__ arg4, int8_t temp_6)
++{
++  int idx;
++  for (idx = 0; idx < 416; idx += 1)
++    {
++      result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6);
++    }
++}
++
++/* { dg-final { cleanup-tree-dump "vect" } } */
+
+=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c'
+--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c	2011-10-23 13:33:07 +0000
++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c	2012-01-18 01:53:19 +0000
+@@ -59,7 +59,6 @@
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
+-
+
+=== modified file 'gcc/tree-vect-patterns.c'
+--- old/gcc/tree-vect-patterns.c	2011-12-20 07:47:44 +0000
++++ new/gcc/tree-vect-patterns.c	2012-01-18 01:53:19 +0000
+@@ -1224,13 +1224,15 @@
+     {
+       use_lhs = gimple_assign_lhs (use_stmt);
+       use_type = TREE_TYPE (use_lhs);
+-      /* Support only type promotion or signedess change.  Check that USE_TYPE
+-        is not bigger than the original type.  */
++      /* Support only type demotion or signedess change.  */
+       if (!INTEGRAL_TYPE_P (use_type)
+-          || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
+-         || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
++	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
+         return NULL;
+ 
++      /* Check that NEW_TYPE is not bigger than the conversion result.  */
++      if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
++	return NULL;
++
+       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
+           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
+         {
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch
new file mode 100644
index 0000000000..a20d889a56
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106862.patch
@@ -0,0 +1,45 @@
+2012-01-16  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r183011:
+
+	2012-01-09  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm-cores.def (cortex-a15): Use cortex_a15_tune for
+	tuning parameters.
+	* config/arm/arm.c (arm_cortex_a15_tune): New static variable.
+
+=== modified file 'gcc/config/arm/arm-cores.def'
+--- old/gcc/config/arm/arm-cores.def	2011-10-19 16:46:51 +0000
++++ new/gcc/config/arm/arm-cores.def	2012-01-15 22:02:31 +0000
+@@ -128,7 +128,7 @@
+ ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
+ ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+-ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
++ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+ ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-01-05 15:35:39 +0000
++++ new/gcc/config/arm/arm.c	2012-01-15 22:02:31 +0000
+@@ -983,6 +983,17 @@
+   arm_default_branch_cost
+ };
+ 
++const struct tune_params arm_cortex_a15_tune =
++{
++  arm_9e_rtx_costs,
++  NULL,
++  1,						/* Constant limit.  */
++  1,						/* Max cond insns.  */
++  ARM_PREFETCH_NOT_BENEFICIAL,			/* TODO: Calculate correct values.  */
++  false,					/* Prefer constant pool.  */
++  arm_cortex_a5_branch_cost
++};
++
+ const struct tune_params arm_fa726te_tune =
+ {
+   arm_9e_rtx_costs,
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch
new file mode 100644
index 0000000000..e93493f17f
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106863.patch
@@ -0,0 +1,47 @@
+ 2012-01-16  Michael Hope  <michael.hope@linaro.org>
+ 
+	Backport from mainline r182561:
+
+	2011-12-20  Richard Henderson  <rth@redhat.com>
+
+	gcc/
+	* config/arm/arm.md (*arm_cmpdi_unsigned): Enable for thumb2.
+	* config/arm/arm.c (arm_select_cc_mode): Use it.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-01-15 22:02:31 +0000
++++ new/gcc/config/arm/arm.c	2012-01-23 00:06:27 +0000
+@@ -11602,7 +11602,7 @@
+ 	    return CC_Zmode;
+ 
+ 	  /* We can do an equality test in three Thumb instructions.  */
+-	  if (!TARGET_ARM)
++	  if (!TARGET_32BIT)
+ 	    return CC_Zmode;
+ 
+ 	  /* FALLTHROUGH */
+@@ -11614,7 +11614,7 @@
+ 	  /* DImode unsigned comparisons can be implemented by cmp +
+ 	     cmpeq without a scratch register.  Not worth doing in
+ 	     Thumb-2.  */
+-	  if (TARGET_ARM)
++	  if (TARGET_32BIT)
+ 	    return CC_CZmode;
+ 
+ 	  /* FALLTHROUGH */
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2012-01-05 15:35:39 +0000
++++ new/gcc/config/arm/arm.md	2012-01-15 21:02:00 +0000
+@@ -7515,8 +7515,8 @@
+   [(set (reg:CC_CZ CC_REGNUM)
+ 	(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
+ 		       (match_operand:DI 1 "arm_di_operand"	"rDi")))]
+-  "TARGET_ARM"
+-  "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1"
++  "TARGET_32BIT"
++  "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
+   [(set_attr "conds" "set")
+    (set_attr "length" "8")]
+ )
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch
new file mode 100644
index 0000000000..f15f37a583
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106864.patch
@@ -0,0 +1,63 @@
+ 2012-01-16  Michael Hope  <michael.hope@linaro.org>
+ 
+	Backport from mainline r181210:
+
+	gcc/
+	2011-11-07  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm-cores.def: Add -mcpu=cortex-a7.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Likewise.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Add Cortex A-7.
+	* doc/invoke.texi: Document -mcpu=cortex-a7.
+
+=== modified file 'gcc/config/arm/arm-cores.def'
+--- old/gcc/config/arm/arm-cores.def	2012-01-15 22:02:31 +0000
++++ new/gcc/config/arm/arm-cores.def	2012-01-23 00:36:02 +0000
+@@ -126,6 +126,7 @@
+ ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, v6t2)
+ ARM_CORE("generic-armv7-a", genericv7a,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
++ARM_CORE("cortex-a7",	  cortexa7,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+ ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+
+=== modified file 'gcc/config/arm/arm-tune.md'
+--- old/gcc/config/arm/arm-tune.md	2011-10-19 16:46:51 +0000
++++ new/gcc/config/arm/arm-tune.md	2012-01-15 22:43:29 +0000
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from arm-cores.def
+ (define_attr "tune"
+-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
++	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+ 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
+
+=== modified file 'gcc/config/arm/bpabi.h'
+--- old/gcc/config/arm/bpabi.h	2011-11-02 21:02:53 +0000
++++ new/gcc/config/arm/bpabi.h	2012-01-15 22:43:29 +0000
+@@ -56,6 +56,7 @@
+   "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
+ 
+ #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
++  "|mcpu=cortex-a7"\
+   "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
+   ":%{!r:--be8}}}"
+ 
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2012-01-05 15:35:39 +0000
++++ new/gcc/doc/invoke.texi	2012-01-15 22:43:29 +0000
+@@ -10202,8 +10202,8 @@
+ @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e},
+ @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
+ @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
+-@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
+-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
++@samp{cortex-a5}, @samp{cortex-a7}, @samp{cortex-a8}, @samp{cortex-a9}, 
++@samp{cortex-a15}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
+ @samp{cortex-m4}, @samp{cortex-m3},
+ @samp{cortex-m1},
+ @samp{cortex-m0},
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch
new file mode 100644
index 0000000000..5b0735b004
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106865.patch
@@ -0,0 +1,25 @@
+2012-01-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline
+        2012-01-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/51819
+	* config/arm/arm.c (arm_print_operand): Correct output of alignment
+	hints for neon loads and stores.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-01-23 00:06:27 +0000
++++ new/gcc/config/arm/arm.c	2012-01-23 18:54:21 +0000
+@@ -17463,9 +17463,9 @@
+ 	/* Only certain alignment specifiers are supported by the hardware.  */
+ 	if (memsize == 16 && (align % 32) == 0)
+ 	  align_bits = 256;
+-	else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
++	else if (memsize == 16 && (align % 16) == 0)
+ 	  align_bits = 128;
+-	else if ((align % 8) == 0)
++	else if (memsize >= 8 && (align % 8) == 0)
+ 	  align_bits = 64;
+ 	else
+ 	  align_bits = 0;
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch
new file mode 100644
index 0000000000..de3d2ff4cf
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106869.patch
@@ -0,0 +1,2389 @@
+2012-02-08  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_option_optimization_table): Enable
+	-fsched-pressure using -fsched-pressure-algorithm=model by
+	default when optimizing.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* sched-deps.c (fixup_sched_groups): Rename to...
+	(chain_to_prev_insn): ...this.
+	(chain_to_prev_insn_p): New function.
+	(deps_analyze_insn): Use it instead of SCHED_GROUP_P.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* sched-int.h (_haifa_insn_data): Move priority_status.
+	Add model_index.
+	(INSN_MODEL_INDEX): New macro.
+	* haifa-sched.c (insn_delay): New function.
+	(sched_regno_pressure_class): Update commentary.
+	(mark_regno_birth_or_death): Pass the liveness bitmap and
+	pressure array as arguments, instead of using curr_reg_live and
+	curr_reg_pressure.  Only update the pressure if the bit in the
+	liveness set has changed.
+	(initiate_reg_pressure_info): Always trust the live-in set for
+	SCHED_PRESSURE_MODEL.
+	(initiate_bb_reg_pressure_info): Update call to
+	mark_regno_birth_or_death.
+	(dep_list_size): Take the list as argument.
+	(calculate_reg_deaths): New function, extracted from...
+	(setup_insn_reg_pressure_info): ...here.
+	(MODEL_BAR): New macro.
+	(model_pressure_data, model_insn_info, model_pressure_limit)
+	(model_pressure_group): New structures.
+	(model_schedule, model_worklist, model_insns, model_num_insns)
+	(model_curr_point, model_before_pressure, model_next_priority):
+	New variables.
+	(MODEL_PRESSURE_DATA, MODEL_MAX_PRESSURE, MODEL_REF_PRESSURE)
+	(MODEL_INSN_INFO, MODEL_INSN): New macros.
+	(model_index, model_update_limit_points_in_group): New functions.
+	(model_update_limit_points, model_last_use_except): Likewise.
+	(model_start_update_pressure, model_update_pressure): Likewise.
+	(model_recompute, model_spill_cost, model_excess_group_cost): Likewise.
+	(model_excess_cost, model_dump_pressure_points): Likewise.
+	(model_set_excess_costs): Likewise.
+	(rank_for_schedule): Extend SCHED_PRIORITY_WEIGHTED ordering to
+	SCHED_PRIORITY_MODEL.  Use insn_delay.  Use the order in the model
+	schedule as an alternative tie-breaker.  Update the call to
+	dep_list_size.
+	(ready_sort): Call model_set_excess_costs.
+	(update_register_pressure): Update call to mark_regno_birth_or_death.
+	Rely on that function to check liveness rather than doing it here.
+	(model_classify_pressure, model_order_p, model_add_to_worklist_at)
+	(model_remove_from_worklist, model_add_to_worklist, model_promote_insn)
+	(model_add_to_schedule, model_analyze_insns, model_init_pressure_group)
+	(model_record_pressure, model_record_pressures): New functions.
+	(model_record_final_pressures, model_add_successors_to_worklist)
+	(model_promote_predecessors, model_choose_insn): Likewise.
+	(model_reset_queue_indices, model_dump_pressure_summary): Likewise.
+	(model_start_schedule, model_finalize_pressure_group): Likewise.
+	(model_end_schedule): Likewise.
+	(schedule_insn): Say when we're scheduling the next instruction
+	in the model schedule.
+	(schedule_insn): Handle SCHED_PRESSURE_MODEL.
+	(queue_to_ready): Do not add instructions that are
+	MAX_SCHED_READY_INSNS beyond the current point of the model schedule.
+	Always allow the next instruction in the model schedule to be added.
+	(debug_ready_list): Print the INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+	and delay for SCHED_PRESSURE_MODEL too.
+	(prune_ready_list): Extend SCHED_PRIORITY_WEIGHTED handling to
+	SCHED_PRIORITY_MODEL, but also take the DFA into account.
+	(schedule_block): Call model_start_schedule and model_end_schedule.
+	Extend SCHED_PRIORITY_WEIGHTED stall handling to SCHED_PRIORITY_MODEL.
+	(sched_init): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL, but don't allocate saved_reg_live or
+	region_ref_regs.
+	(sched_finish): Update accordingly.
+	(fix_tick_ready): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL.
+	(add_jump_dependencies): Update call to dep_list_size.
+	(haifa_finish_h_i_d): Fix leak of max_reg_pressure.
+	(haifa_init_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL.
+	* sched-deps.c (init_insn_reg_pressure_info): Likewise, but don't
+	allocate INSN_MAX_REG_PRESSURE for SCHED_PRESSURE_MODEL.
+	(sched_analyze_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+	handling to SCHED_PRESSURE_MODEL.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* common.opt (fsched-pressure-algorithm=): New option.
+	* flag-types.h (sched_pressure_algorithm): New enum.
+	* sched-int.h (sched_pressure_p): Replace with...
+	(sched_pressure): ...this new variable.
+	* haifa-sched.c (sched_pressure_p): Replace with...
+	(sched_pressure): ...this new variable.
+	(sched_regno_pressure_class, rank_for_schedule, ready_sort)
+	(update_reg_and_insn_max_reg_pressure, schedule_insn)
+	(debug_ready_list, schedule_block, sched_init, sched_finish)
+	(fix_tick_ready, haifa_init_insn): Update accordingly.
+	* sched-deps.c (init_insn_reg_pressure_info): Likewise.
+	* sched-rgn.c (schedule_region): Likewise.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-01  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* haifa-sched.c (prune_ready_list): New function, broken out of
+	schedule_block.
+	(schedule_block): Use it.
+
+=== modified file 'gcc/common.opt'
+--- old/gcc/common.opt	2011-04-11 15:26:47 +0000
++++ new/gcc/common.opt	2012-02-08 23:38:13 +0000
+@@ -1614,6 +1614,19 @@
+ Common Report Var(flag_sched_pressure) Init(0) Optimization
+ Enable register pressure sensitive insn scheduling
+ 
++fsched-pressure-algorithm=
++Common Joined RejectNegative Enum(sched_pressure_algorithm) Var(flag_sched_pressure_algorithm) Init(SCHED_PRESSURE_WEIGHTED)
++-fira-algorithm=[CB|priority] Set the used IRA algorithm
++
++Enum
++Name(sched_pressure_algorithm) Type(enum sched_pressure_algorithm) UnknownError(unknown %<fsched-pressure%> algorithm %qs)
++
++EnumValue
++Enum(sched_pressure_algorithm) String(weighted) Value(SCHED_PRESSURE_WEIGHTED)
++
++EnumValue
++Enum(sched_pressure_algorithm) String(model) Value(SCHED_PRESSURE_MODEL)
++
+ fsched-spec
+ Common Report Var(flag_schedule_speculative) Init(1) Optimization
+ Allow speculative motion of non-loads
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-02-01 14:13:07 +0000
++++ new/gcc/config/arm/arm.c	2012-02-09 00:47:59 +0000
+@@ -311,6 +311,11 @@
+ /* Set default optimization options.  */
+ static const struct default_options arm_option_optimization_table[] =
+   {
++    /* Enable -fsched-pressure using -fsched-pressure-algorithm=model
++       by default when optimizing.  */
++    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
++    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure_algorithm_,
++      NULL, SCHED_PRESSURE_MODEL },
+     /* Enable section anchors by default at -O1 or higher.  */
+     { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+     { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+
+=== modified file 'gcc/flag-types.h'
+--- old/gcc/flag-types.h	2010-11-24 13:28:38 +0000
++++ new/gcc/flag-types.h	2012-02-08 23:38:13 +0000
+@@ -106,6 +106,14 @@
+ };
+ #endif
+ 
++/* The algorithm used to implement -fsched-pressure.  */
++enum sched_pressure_algorithm
++{
++  SCHED_PRESSURE_NONE,
++  SCHED_PRESSURE_WEIGHTED,
++  SCHED_PRESSURE_MODEL
++};
++
+ /* The algorithm used for the integrated register allocator (IRA).  */
+ enum ira_algorithm
+ {
+
+=== modified file 'gcc/haifa-sched.c'
+--- old/gcc/haifa-sched.c	2011-02-19 20:59:23 +0000
++++ new/gcc/haifa-sched.c	2012-02-08 23:39:02 +0000
+@@ -348,6 +348,14 @@
+ /* Create empty basic block after the specified block.  */
+ basic_block (* sched_create_empty_bb) (basic_block);
+ 
++/* Return the number of cycles until INSN is expected to be ready.
++   Return zero if it already is.  */
++static int
++insn_delay (rtx insn)
++{
++  return MAX (INSN_TICK (insn) - clock_var, 0);
++}
++
+ static int
+ may_trap_exp (const_rtx x, int is_store)
+ {
+@@ -571,10 +579,10 @@
+ 
+ /* Do register pressure sensitive insn scheduling if the flag is set
+    up.  */
+-bool sched_pressure_p;
++enum sched_pressure_algorithm sched_pressure;
+ 
+ /* Map regno -> its cover class.  The map defined only when
+-   SCHED_PRESSURE_P is true.  */
++   SCHED_PRESSURE != SCHED_PRESSURE_NONE.  */
+ enum reg_class *sched_regno_cover_class;
+ 
+ /* The current register pressure.  Only elements corresponding cover
+@@ -602,10 +610,12 @@
+   bitmap_clear (region_ref_regs);
+ }
+ 
+-/* Update current register pressure related info after birth (if
+-   BIRTH_P) or death of register REGNO.  */
+-static void
+-mark_regno_birth_or_death (int regno, bool birth_p)
++/* PRESSURE[CL] describes the pressure on register class CL.  Update it
++   for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO.
++   LIVE tracks the set of live registers; if it is null, assume that
++   every birth or death is genuine.  */
++static inline void
++mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p)
+ {
+   enum reg_class cover_class;
+ 
+@@ -616,15 +626,17 @@
+ 	{
+ 	  if (birth_p)
+ 	    {
+-	      bitmap_set_bit (curr_reg_live, regno);
+-	      curr_reg_pressure[cover_class]
+-		+= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
++	      if (!live || bitmap_set_bit (live, regno))
++		pressure[cover_class]
++		  += (ira_reg_class_nregs
++		      [cover_class][PSEUDO_REGNO_MODE (regno)]);
+ 	    }
+ 	  else
+ 	    {
+-	      bitmap_clear_bit (curr_reg_live, regno);
+-	      curr_reg_pressure[cover_class]
+-		-= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
++	      if (!live || bitmap_clear_bit (live, regno))
++		pressure[cover_class]
++		  -= (ira_reg_class_nregs
++		      [cover_class][PSEUDO_REGNO_MODE (regno)]);
+ 	    }
+ 	}
+     }
+@@ -633,13 +645,13 @@
+     {
+       if (birth_p)
+ 	{
+-	  bitmap_set_bit (curr_reg_live, regno);
+-	  curr_reg_pressure[cover_class]++;
++	  if (!live || bitmap_set_bit (live, regno))
++	    pressure[cover_class]++;
+ 	}
+       else
+ 	{
+-	  bitmap_clear_bit (curr_reg_live, regno);
+-	  curr_reg_pressure[cover_class]--;
++	  if (!live || bitmap_clear_bit (live, regno))
++	    pressure[cover_class]--;
+ 	}
+     }
+ }
+@@ -657,8 +669,10 @@
+     curr_reg_pressure[ira_reg_class_cover[i]] = 0;
+   bitmap_clear (curr_reg_live);
+   EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi)
+-    if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j))
+-      mark_regno_birth_or_death (j, true);
++    if (sched_pressure == SCHED_PRESSURE_MODEL
++	|| current_nr_blocks == 1
++	|| bitmap_bit_p (region_ref_regs, j))
++      mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, j, true);
+ }
+ 
+ /* Mark registers in X as mentioned in the current region.  */
+@@ -712,7 +726,8 @@
+ 	if (regno == INVALID_REGNUM)
+ 	  break;
+ 	if (! bitmap_bit_p (df_get_live_in (bb), regno))
+-	  mark_regno_birth_or_death (regno, true);
++	  mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
++				     regno, true);
+       }
+ #endif
+ }
+@@ -956,19 +971,19 @@
+   return true;
+ }
+ 
+-/* Compute the number of nondebug forward deps of an insn.  */
++/* Compute the number of nondebug deps in list LIST for INSN.  */
+ 
+ static int
+-dep_list_size (rtx insn)
++dep_list_size (rtx insn, sd_list_types_def list)
+ {
+   sd_iterator_def sd_it;
+   dep_t dep;
+   int dbgcount = 0, nodbgcount = 0;
+ 
+   if (!MAY_HAVE_DEBUG_INSNS)
+-    return sd_lists_size (insn, SD_LIST_FORW);
++    return sd_lists_size (insn, list);
+ 
+-  FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
++  FOR_EACH_DEP (insn, list, sd_it, dep)
+     {
+       if (DEBUG_INSN_P (DEP_CON (dep)))
+ 	dbgcount++;
+@@ -976,7 +991,7 @@
+ 	nodbgcount++;
+     }
+ 
+-  gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, SD_LIST_FORW));
++  gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list));
+ 
+   return nodbgcount;
+ }
+@@ -995,7 +1010,7 @@
+     {
+       int this_priority = -1;
+ 
+-      if (dep_list_size (insn) == 0)
++      if (dep_list_size (insn, SD_LIST_FORW) == 0)
+ 	/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
+ 	   some forward deps but all of them are ignored by
+ 	   contributes_to_priority hook.  At the moment we set priority of
+@@ -1091,6 +1106,22 @@
+          qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); }  \
+ while (0)
+ 
++/* For each cover class CL, set DEATH[CL] to the number of registers
++   in that class that die in INSN.  */
++
++static void
++calculate_reg_deaths (rtx insn, int *death)
++{
++  int i;
++  struct reg_use_data *use;
++
++  for (i = 0; i < ira_reg_class_cover_size; i++)
++    death[ira_reg_class_cover[i]] = 0;
++  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
++    if (dying_use_p (use))
++      mark_regno_birth_or_death (0, death, use->regno, true);
++}
++
+ /* Setup info about the current register pressure impact of scheduling
+    INSN at the current scheduling point.  */
+ static void
+@@ -1102,23 +1133,12 @@
+   enum reg_class cl;
+   struct reg_pressure_data *pressure_info;
+   int *max_reg_pressure;
+-  struct reg_use_data *use;
+   static int death[N_REG_CLASSES];
+ 
+   gcc_checking_assert (!DEBUG_INSN_P (insn));
+ 
+   excess_cost_change = 0;
+-  for (i = 0; i < ira_reg_class_cover_size; i++)
+-    death[ira_reg_class_cover[i]] = 0;
+-  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+-    if (dying_use_p (use))
+-      {
+-	cl = sched_regno_cover_class[use->regno];
+-	if (use->regno < FIRST_PSEUDO_REGISTER)
+-	  death[cl]++;
+-	else
+-	  death[cl] += ira_reg_class_nregs[cl][PSEUDO_REGNO_MODE (use->regno)];
+-      }
++  calculate_reg_deaths (insn, death);
+   pressure_info = INSN_REG_PRESSURE (insn);
+   max_reg_pressure = INSN_MAX_REG_PRESSURE (insn);
+   gcc_assert (pressure_info != NULL && max_reg_pressure != NULL);
+@@ -1139,7 +1159,765 @@
+     }
+   INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change;
+ }
+-
++
++/* This is the first page of code related to SCHED_PRESSURE_MODEL.
++   It tries to make the scheduler take register pressure into account
++   without introducing too many unnecessary stalls.  It hooks into the
++   main scheduling algorithm at several points:
++
++    - Before scheduling starts, model_start_schedule constructs a
++      "model schedule" for the current block.  This model schedule is
++      chosen solely to keep register pressure down.  It does not take the
++      target's pipeline or the original instruction order into account,
++      except as a tie-breaker.  It also doesn't work to a particular
++      pressure limit.
++
++      This model schedule gives us an idea of what pressure can be
++      achieved for the block gives us an example of a schedule that
++      keeps to that pressure.  It also makes the final schedule less
++      dependent on the original instruction order.  This is important
++      because the original order can either be "wide" (many values live
++      at once, such as in user-scheduled code) or "narrow" (few values
++      live at once, such as after loop unrolling, where several
++      iterations are executed sequentially).
++
++      We do not apply this model schedule to the rtx stream.  We simply
++      record it in model_schedule.  We also compute the maximum pressure,
++      MP, that was seen during this schedule.
++
++    - Instructions are added to the ready queue even if they require
++      a stall.  The length of the stall is instead computed as:
++
++	 MAX (INSN_TICK (INSN) - clock_var, 0)
++
++      (= insn_delay).  This allows rank_for_schedule to choose between
++      introducing a deliberate stall or increasing pressure.
++
++    - Before sorting the ready queue, model_set_excess_costs assigns
++      a pressure-based cost to each ready instruction in the queue.
++      This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE
++      (ECC for short) and is effectively measured in cycles.
++
++    - rank_for_schedule ranks instructions based on:
++
++	ECC (insn) + insn_delay (insn)
++
++      then as:
++
++	insn_delay (insn)
++
++      So, for example, an instruction X1 with an ECC of 1 that can issue
++      now will win over an instruction X0 with an ECC of zero that would
++      introduce a stall of one cycle.  However, an instruction X2 with an
++      ECC of 2 that can issue now will lose to X0.
++
++    - When an instruction is scheduled, model_recompute updates the model
++      schedule with the new pressures (some of which might now exceed the
++      original maximum pressure MP).  model_update_limit_points then searches
++      for the new point of maximum pressure, if not already known.  */
++
++/* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL
++   from surrounding debug information.  */
++#define MODEL_BAR \
++  ";;\t\t+------------------------------------------------------\n"
++
++/* Information about the pressure on a particular register class at a
++   particular point of the model schedule.  */
++struct model_pressure_data {
++  /* The pressure at this point of the model schedule, or -1 if the
++     point is associated with an instruction that has already been
++     scheduled.  */
++  int ref_pressure;
++
++  /* The maximum pressure during or after this point of the model schedule.  */
++  int max_pressure;
++};
++
++/* Per-instruction information that is used while building the model
++   schedule.  Here, "schedule" refers to the model schedule rather
++   than the main schedule.  */
++struct model_insn_info {
++  /* The instruction itself.  */
++  rtx insn;
++
++  /* If this instruction is in model_worklist, these fields link to the
++     previous (higher-priority) and next (lower-priority) instructions
++     in the list.  */
++  struct model_insn_info *prev;
++  struct model_insn_info *next;
++
++  /* While constructing the schedule, QUEUE_INDEX describes whether an
++     instruction has already been added to the schedule (QUEUE_SCHEDULED),
++     is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE).
++     old_queue records the value that QUEUE_INDEX had before scheduling
++     started, so that we can restore it once the schedule is complete.  */
++  int old_queue;
++
++  /* The relative importance of an unscheduled instruction.  Higher
++     values indicate greater importance.  */
++  unsigned int model_priority;
++
++  /* The length of the longest path of satisfied true dependencies
++     that leads to this instruction.  */
++  unsigned int depth;
++
++  /* The length of the longest path of dependencies of any kind
++     that leads from this instruction.  */
++  unsigned int alap;
++
++  /* The number of predecessor nodes that must still be scheduled.  */
++  int unscheduled_preds;
++};
++
++/* Information about the pressure limit for a particular register class.
++   This structure is used when applying a model schedule to the main
++   schedule.  */
++struct model_pressure_limit {
++  /* The maximum register pressure seen in the original model schedule.  */
++  int orig_pressure;
++
++  /* The maximum register pressure seen in the current model schedule
++     (which excludes instructions that have already been scheduled).  */
++  int pressure;
++
++  /* The point of the current model schedule at which PRESSURE is first
++     reached.  It is set to -1 if the value needs to be recomputed.  */
++  int point;
++};
++
++/* Describes a particular way of measuring register pressure.  */
++struct model_pressure_group {
++  /* Index CCI describes the maximum pressure on ira_reg_class_cover[CCI].  */
++  struct model_pressure_limit limits[N_REG_CLASSES];
++
++  /* Index (POINT * ira_num_pressure_classes + CCI) describes the pressure
++     on register class ira_reg_class_cover[CCI] at point POINT of the
++     current model schedule.  A POINT of model_num_insns describes the
++     pressure at the end of the schedule.  */
++  struct model_pressure_data *model;
++};
++
++/* Index POINT gives the instruction at point POINT of the model schedule.
++   This array doesn't change during main scheduling.  */
++static VEC (rtx, heap) *model_schedule;
++
++/* The list of instructions in the model worklist, sorted in order of
++   decreasing priority.  */
++static struct model_insn_info *model_worklist;
++
++/* Index I describes the instruction with INSN_LUID I.  */
++static struct model_insn_info *model_insns;
++
++/* The number of instructions in the model schedule.  */
++static int model_num_insns;
++
++/* The index of the first instruction in model_schedule that hasn't yet been
++   added to the main schedule, or model_num_insns if all of them have.  */
++static int model_curr_point;
++
++/* Describes the pressure before each instruction in the model schedule.  */
++static struct model_pressure_group model_before_pressure;
++
++/* The first unused model_priority value (as used in model_insn_info).  */
++static unsigned int model_next_priority;
++
++
++/* The model_pressure_data for ira_reg_class_cover[CCI] in GROUP
++   at point POINT of the model schedule.  */
++#define MODEL_PRESSURE_DATA(GROUP, POINT, CCI) \
++  (&(GROUP)->model[(POINT) * ira_reg_class_cover_size + (CCI)])
++
++/* The maximum pressure on ira_reg_class_cover[CCI] in GROUP at or
++   after point POINT of the model schedule.  */
++#define MODEL_MAX_PRESSURE(GROUP, POINT, CCI) \
++  (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->max_pressure)
++
++/* The pressure on ira_reg_class_cover[CCI] in GROUP at point POINT
++   of the model schedule.  */
++#define MODEL_REF_PRESSURE(GROUP, POINT, CCI) \
++  (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->ref_pressure)
++
++/* Information about INSN that is used when creating the model schedule.  */
++#define MODEL_INSN_INFO(INSN) \
++  (&model_insns[INSN_LUID (INSN)])
++
++/* The instruction at point POINT of the model schedule.  */
++#define MODEL_INSN(POINT) \
++  (VEC_index (rtx, model_schedule, POINT))
++
++
++/* Return INSN's index in the model schedule, or model_num_insns if it
++   doesn't belong to that schedule.  */
++
++static int
++model_index (rtx insn)
++{
++  if (INSN_MODEL_INDEX (insn) == 0)
++    return model_num_insns;
++  return INSN_MODEL_INDEX (insn) - 1;
++}
++
++/* Make sure that GROUP->limits is up-to-date for the current point
++   of the model schedule.  */
++
++static void
++model_update_limit_points_in_group (struct model_pressure_group *group)
++{
++  int cci, max_pressure, point;
++
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      /* We may have passed the final point at which the pressure in
++	 group->limits[cci].pressure was reached.  Update the limit if so.  */
++      max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, cci);
++      group->limits[cci].pressure = max_pressure;
++
++      /* Find the point at which MAX_PRESSURE is first reached.  We need
++	 to search in three cases:
++
++	 - We've already moved past the previous pressure point.
++	   In this case we search forward from model_curr_point.
++
++	 - We scheduled the previous point of maximum pressure ahead of
++	   its position in the model schedule, but doing so didn't bring
++	   the pressure point earlier.  In this case we search forward
++	   from that previous pressure point.
++
++	 - Scheduling an instruction early caused the maximum pressure
++	   to decrease.  In this case we will have set the pressure
++	   point to -1, and we search forward from model_curr_point.  */
++      point = MAX (group->limits[cci].point, model_curr_point);
++      while (point < model_num_insns
++	     && MODEL_REF_PRESSURE (group, point, cci) < max_pressure)
++	point++;
++      group->limits[cci].point = point;
++
++      gcc_assert (MODEL_REF_PRESSURE (group, point, cci) == max_pressure);
++      gcc_assert (MODEL_MAX_PRESSURE (group, point, cci) == max_pressure);
++    }
++}
++
++/* Make sure that all register-pressure limits are up-to-date for the
++   current position in the model schedule.  */
++
++static void
++model_update_limit_points (void)
++{
++  model_update_limit_points_in_group (&model_before_pressure);
++}
++
++/* Return the model_index of the last unscheduled use in chain USE
++   outside of USE's instruction.  Return -1 if there are no other uses,
++   or model_num_insns if the register is live at the end of the block.  */
++
++static int
++model_last_use_except (struct reg_use_data *use)
++{
++  struct reg_use_data *next;
++  int last, index;
++
++  last = -1;
++  for (next = use->next_regno_use; next != use; next = next->next_regno_use)
++    if (NONDEBUG_INSN_P (next->insn)
++	&& QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED)
++      {
++	index = model_index (next->insn);
++	if (index == model_num_insns)
++	  return model_num_insns;
++	if (last < index)
++	  last = index;
++      }
++  return last;
++}
++
++/* An instruction with model_index POINT has just been scheduled, and it
++   adds DELTA to the pressure on ira_reg_class_cover[CCI] after POINT - 1.
++   Update MODEL_REF_PRESSURE (GROUP, POINT, CCI) and
++   MODEL_MAX_PRESSURE (GROUP, POINT, CCI) accordingly.  */
++
++static void
++model_start_update_pressure (struct model_pressure_group *group,
++			     int point, int cci, int delta)
++{
++  int next_max_pressure;
++
++  if (point == model_num_insns)
++    {
++      /* The instruction wasn't part of the model schedule; it was moved
++	 from a different block.  Update the pressure for the end of
++	 the model schedule.  */
++      MODEL_REF_PRESSURE (group, point, cci) += delta;
++      MODEL_MAX_PRESSURE (group, point, cci) += delta;
++    }
++  else
++    {
++      /* Record that this instruction has been scheduled.  Nothing now
++	 changes between POINT and POINT + 1, so get the maximum pressure
++	 from the latter.  If the maximum pressure decreases, the new
++	 pressure point may be before POINT.  */
++      MODEL_REF_PRESSURE (group, point, cci) = -1;
++      next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci);
++      if (MODEL_MAX_PRESSURE (group, point, cci) > next_max_pressure)
++	{
++	  MODEL_MAX_PRESSURE (group, point, cci) = next_max_pressure;
++	  if (group->limits[cci].point == point)
++	    group->limits[cci].point = -1;
++	}
++    }
++}
++
++/* Record that scheduling a later instruction has changed the pressure
++   at point POINT of the model schedule by DELTA (which might be 0).
++   Update GROUP accordingly.  Return nonzero if these changes might
++   trigger changes to previous points as well.  */
++
++static int
++model_update_pressure (struct model_pressure_group *group,
++		       int point, int cci, int delta)
++{
++  int ref_pressure, max_pressure, next_max_pressure;
++
++  /* If POINT hasn't yet been scheduled, update its pressure.  */
++  ref_pressure = MODEL_REF_PRESSURE (group, point, cci);
++  if (ref_pressure >= 0 && delta != 0)
++    {
++      ref_pressure += delta;
++      MODEL_REF_PRESSURE (group, point, cci) = ref_pressure;
++
++      /* Check whether the maximum pressure in the overall schedule
++	 has increased.  (This means that the MODEL_MAX_PRESSURE of
++	 every point <= POINT will need to increae too; see below.)  */
++      if (group->limits[cci].pressure < ref_pressure)
++	group->limits[cci].pressure = ref_pressure;
++
++      /* If we are at maximum pressure, and the maximum pressure
++	 point was previously unknown or later than POINT,
++	 bring it forward.  */
++      if (group->limits[cci].pressure == ref_pressure
++	  && !IN_RANGE (group->limits[cci].point, 0, point))
++	group->limits[cci].point = point;
++
++      /* If POINT used to be the point of maximum pressure, but isn't
++	 any longer, we need to recalculate it using a forward walk.  */
++      if (group->limits[cci].pressure > ref_pressure
++	  && group->limits[cci].point == point)
++	group->limits[cci].point = -1;
++    }
++
++  /* Update the maximum pressure at POINT.  Changes here might also
++     affect the maximum pressure at POINT - 1.  */
++  next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci);
++  max_pressure = MAX (ref_pressure, next_max_pressure);
++  if (MODEL_MAX_PRESSURE (group, point, cci) != max_pressure)
++    {
++      MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
++      return 1;
++    }
++  return 0;
++}
++
++/* INSN has just been scheduled.  Update the model schedule accordingly.  */
++
++static void
++model_recompute (rtx insn)
++{
++  struct {
++    int last_use;
++    int regno;
++  } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS];
++  struct reg_use_data *use;
++  struct reg_pressure_data *reg_pressure;
++  int delta[N_REG_CLASSES];
++  int cci, point, mix, new_last, cl, ref_pressure, queue;
++  unsigned int i, num_uses, num_pending_births;
++  bool print_p;
++
++  /* The destinations of INSN were previously live from POINT onwards, but are
++     now live from model_curr_point onwards.  Set up DELTA accordingly.  */
++  point = model_index (insn);
++  reg_pressure = INSN_REG_PRESSURE (insn);
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      delta[cl] = reg_pressure[cci].set_increase;
++    }
++
++  /* Record which registers previously died at POINT, but which now die
++     before POINT.  Adjust DELTA so that it represents the effect of
++     this change after POINT - 1.  Set NUM_PENDING_BIRTHS to the number of
++     registers that will be born in the range [model_curr_point, POINT).  */
++  num_uses = 0;
++  num_pending_births = 0;
++  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
++    {
++      new_last = model_last_use_except (use);
++      if (new_last < point)
++	{
++	  gcc_assert (num_uses < ARRAY_SIZE (uses));
++	  uses[num_uses].last_use = new_last;
++	  uses[num_uses].regno = use->regno;
++	  /* This register is no longer live after POINT - 1.  */
++	  mark_regno_birth_or_death (NULL, delta, use->regno, false);
++	  num_uses++;
++	  if (new_last >= 0)
++	    num_pending_births++;
++	}
++    }
++
++  /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT.
++     Also set each group pressure limit for POINT.  */
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      model_start_update_pressure (&model_before_pressure,
++				   point, cci, delta[cl]);
++    }
++
++  /* Walk the model schedule backwards, starting immediately before POINT.  */
++  print_p = false;
++  if (point != model_curr_point)
++    do
++      {
++	point--;
++	insn = MODEL_INSN (point);
++	queue = QUEUE_INDEX (insn);
++
++	if (queue != QUEUE_SCHEDULED)
++	  {
++	    /* DELTA describes the effect of the move on the register pressure
++	       after POINT.  Make it describe the effect on the pressure
++	       before POINT.  */
++	    i = 0;
++	    while (i < num_uses)
++	      {
++		if (uses[i].last_use == point)
++		  {
++		    /* This register is now live again.  */
++		    mark_regno_birth_or_death (NULL, delta,
++					       uses[i].regno, true);
++
++		    /* Remove this use from the array.  */
++		    uses[i] = uses[num_uses - 1];
++		    num_uses--;
++		    num_pending_births--;
++		  }
++		else
++		  i++;
++	      }
++
++	    if (sched_verbose >= 5)
++	      {
++		char buf[2048];
++
++		if (!print_p)
++		  {
++		    fprintf (sched_dump, MODEL_BAR);
++		    fprintf (sched_dump, ";;\t\t| New pressure for model"
++			     " schedule\n");
++		    fprintf (sched_dump, MODEL_BAR);
++		    print_p = true;
++		  }
++
++		print_pattern (buf, PATTERN (insn), 0);
++		fprintf (sched_dump, ";;\t\t| %3d %4d %-30s ",
++			 point, INSN_UID (insn), buf);
++		for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++		  {
++		    cl = ira_reg_class_cover[cci];
++		    ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure,
++						       point, cci);
++		    fprintf (sched_dump, " %s:[%d->%d]",
++			     reg_class_names[ira_reg_class_cover[cci]],
++			     ref_pressure, ref_pressure + delta[cl]);
++		  }
++		fprintf (sched_dump, "\n");
++	      }
++	  }
++
++	/* Adjust the pressure at POINT.  Set MIX to nonzero if POINT - 1
++	   might have changed as well.  */
++	mix = num_pending_births;
++	for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++	  {
++	    cl = ira_reg_class_cover[cci];
++	    mix |= delta[cl];
++	    mix |= model_update_pressure (&model_before_pressure,
++					  point, cci, delta[cl]);
++	  }
++      }
++    while (mix && point > model_curr_point);
++
++  if (print_p)
++    fprintf (sched_dump, MODEL_BAR);
++}
++
++/* model_spill_cost (CL, P, P') returns the cost of increasing the
++   pressure on CL from P to P'.  We use this to calculate a "base ECC",
++   baseECC (CL, X), for each cover class CL and each instruction X.
++   Supposing X changes the pressure on CL from P to P', and that the
++   maximum pressure on CL in the current model schedule is MP', then:
++
++   * if X occurs before or at the next point of maximum pressure in
++     the model schedule and P' > MP', then:
++
++       baseECC (CL, X) = model_spill_cost (CL, MP, P')
++
++     The idea is that the pressure after scheduling a fixed set of
++     instructions -- in this case, the set up to and including the
++     next maximum pressure point -- is going to be the same regardless
++     of the order; we simply want to keep the intermediate pressure
++     under control.  Thus X has a cost of zero unless scheduling it
++     now would exceed MP'.
++
++     If all increases in the set are by the same amount, no zero-cost
++     instruction will ever cause the pressure to exceed MP'.  However,
++     if X is instead moved past an instruction X' with pressure in the
++     range (MP' - (P' - P), MP'), the pressure at X' will increase
++     beyond MP'.  Since baseECC is very much a heuristic anyway,
++     it doesn't seem worth the overhead of tracking cases like these.
++
++     The cost of exceeding MP' is always based on the original maximum
++     pressure MP.  This is so that going 2 registers over the original
++     limit has the same cost regardless of whether it comes from two
++     separate +1 deltas or from a single +2 delta.
++
++   * if X occurs after the next point of maximum pressure in the model
++     schedule and P' > P, then:
++
++       baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P))
++
++     That is, if we move X forward across a point of maximum pressure,
++     and if X increases the pressure by P' - P, then we conservatively
++     assume that scheduling X next would increase the maximum pressure
++     by P' - P.  Again, the cost of doing this is based on the original
++     maximum pressure MP, for the same reason as above.
++
++   * if P' < P, P > MP, and X occurs at or after the next point of
++     maximum pressure, then:
++
++       baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P)
++
++     That is, if we have already exceeded the original maximum pressure MP,
++     and if X might reduce the maximum pressure again -- or at least push
++     it further back, and thus allow more scheduling freedom -- it is given
++     a negative cost to reflect the improvement.
++
++   * otherwise,
++
++       baseECC (CL, X) = 0
++
++     In this case, X is not expected to affect the maximum pressure MP',
++     so it has zero cost.
++
++   We then create a combined value baseECC (X) that is the sum of
++   baseECC (CL, X) for each cover class CL.
++
++   baseECC (X) could itself be used as the ECC value described above.
++   However, this is often too conservative, in the sense that it
++   tends to make high-priority instructions that increase pressure
++   wait too long in cases where introducing a spill would be better.
++   For this reason the final ECC is a priority-adjusted form of
++   baseECC (X).  Specifically, we calculate:
++
++     P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X)
++     baseP = MAX { P (X) | baseECC (X) <= 0 }
++
++   Then:
++
++     ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0)
++
++   Thus an instruction's effect on pressure is ignored if it has a high
++   enough priority relative to the ones that don't increase pressure.
++   Negative values of baseECC (X) do not increase the priority of X
++   itself, but they do make it harder for other instructions to
++   increase the pressure further.
++
++   This pressure cost is deliberately timid.  The intention has been
++   to choose a heuristic that rarely interferes with the normal list
++   scheduler in cases where that scheduler would produce good code.
++   We simply want to curb some of its worst excesses.  */
++
++/* Return the cost of increasing the pressure in class CL from FROM to TO.
++
++   Here we use the very simplistic cost model that every register above
++   ira_available_class_regs[CL] has a spill cost of 1.  We could use other
++   measures instead, such as one based on MEMORY_MOVE_COST.  However:
++
++      (1) In order for an instruction to be scheduled, the higher cost
++	  would need to be justified in a single saving of that many stalls.
++	  This is overly pessimistic, because the benefit of spilling is
++	  often to avoid a sequence of several short stalls rather than
++	  a single long one.
++
++      (2) The cost is still arbitrary.  Because we are not allocating
++	  registers during scheduling, we have no way of knowing for
++	  sure how many memory accesses will be required by each spill,
++	  where the spills will be placed within the block, or even
++	  which block(s) will contain the spills.
++
++   So a higher cost than 1 is often too conservative in practice,
++   forcing blocks to contain unnecessary stalls instead of spill code.
++   The simple cost below seems to be the best compromise.  It reduces
++   the interference with the normal list scheduler, which helps make
++   it more suitable for a default-on option.  */
++
++static int
++model_spill_cost (int cl, int from, int to)
++{
++  from = MAX (from, ira_available_class_regs[cl]);
++  return MAX (to, from) - from;
++}
++
++/* Return baseECC (ira_reg_class_cover[CCI], POINT), given that
++   P = curr_reg_pressure[ira_reg_class_cover[CCI]] and that
++   P' = P + DELTA.  */
++
++static int
++model_excess_group_cost (struct model_pressure_group *group,
++			 int point, int cci, int delta)
++{
++  int pressure, cl;
++
++  cl = ira_reg_class_cover[cci];
++  if (delta < 0 && point >= group->limits[cci].point)
++    {
++      pressure = MAX (group->limits[cci].orig_pressure,
++		      curr_reg_pressure[cl] + delta);
++      return -model_spill_cost (cl, pressure, curr_reg_pressure[cl]);
++    }
++
++  if (delta > 0)
++    {
++      if (point > group->limits[cci].point)
++	pressure = group->limits[cci].pressure + delta;
++      else
++	pressure = curr_reg_pressure[cl] + delta;
++
++      if (pressure > group->limits[cci].pressure)
++	return model_spill_cost (cl, group->limits[cci].orig_pressure,
++				 pressure);
++    }
++
++  return 0;
++}
++
++/* Return baseECC (MODEL_INSN (INSN)).  Dump the costs to sched_dump
++   if PRINT_P.  */
++
++static int
++model_excess_cost (rtx insn, bool print_p)
++{
++  int point, cci, cl, cost, this_cost, delta;
++  struct reg_pressure_data *insn_reg_pressure;
++  int insn_death[N_REG_CLASSES];
++
++  calculate_reg_deaths (insn, insn_death);
++  point = model_index (insn);
++  insn_reg_pressure = INSN_REG_PRESSURE (insn);
++  cost = 0;
++
++  if (print_p)
++    fprintf (sched_dump, ";;\t\t| %3d %4d | %4d %+3d |", point,
++	     INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn));
++
++  /* Sum up the individual costs for each register class.  */
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      delta = insn_reg_pressure[cci].set_increase - insn_death[cl];
++      this_cost = model_excess_group_cost (&model_before_pressure,
++					   point, cci, delta);
++      cost += this_cost;
++      if (print_p)
++	fprintf (sched_dump, " %s:[%d base cost %d]",
++		 reg_class_names[cl], delta, this_cost);
++    }
++
++  if (print_p)
++    fprintf (sched_dump, "\n");
++
++  return cost;
++}
++
++/* Dump the next points of maximum pressure for GROUP.  */
++
++static void
++model_dump_pressure_points (struct model_pressure_group *group)
++{
++  int cci, cl;
++
++  fprintf (sched_dump, ";;\t\t|  pressure points");
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      fprintf (sched_dump, " %s:[%d->%d at ", reg_class_names[cl],
++	       curr_reg_pressure[cl], group->limits[cci].pressure);
++      if (group->limits[cci].point < model_num_insns)
++	fprintf (sched_dump, "%d:%d]", group->limits[cci].point,
++		 INSN_UID (MODEL_INSN (group->limits[cci].point)));
++      else
++	fprintf (sched_dump, "end]");
++    }
++  fprintf (sched_dump, "\n");
++}
++
++/* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1].  */
++
++static void
++model_set_excess_costs (rtx *insns, int count)
++{
++  int i, cost, priority_base, priority;
++  bool print_p;
++
++  /* Record the baseECC value for each instruction in the model schedule,
++     except that negative costs are converted to zero ones now rather thatn
++     later.  Do not assign a cost to debug instructions, since they must
++     not change code-generation decisions.  Experiments suggest we also
++     get better results by not assigning a cost to instructions from
++     a different block.
++
++     Set PRIORITY_BASE to baseP in the block comment above.  This is the
++     maximum priority of the "cheap" instructions, which should always
++     include the next model instruction.  */
++  priority_base = 0;
++  print_p = false;
++  for (i = 0; i < count; i++)
++    if (INSN_MODEL_INDEX (insns[i]))
++      {
++	if (sched_verbose >= 6 && !print_p)
++	  {
++	    fprintf (sched_dump, MODEL_BAR);
++	    fprintf (sched_dump, ";;\t\t| Pressure costs for ready queue\n");
++	    model_dump_pressure_points (&model_before_pressure);
++	    fprintf (sched_dump, MODEL_BAR);
++	    print_p = true;
++	  }
++	cost = model_excess_cost (insns[i], print_p);
++	if (cost <= 0)
++	  {
++	    priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]) - cost;
++	    priority_base = MAX (priority_base, priority);
++	    cost = 0;
++	  }
++	INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost;
++      }
++  if (print_p)
++    fprintf (sched_dump, MODEL_BAR);
++
++  /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each
++     instruction.  */
++  for (i = 0; i < count; i++)
++    {
++      cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]);
++      priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]);
++      if (cost > 0 && priority > priority_base)
++	{
++	  cost += priority_base - priority;
++	  INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0);
++	}
++    }
++}
++
+ /* Returns a positive value if x is preferred; returns a negative value if
+    y is preferred.  Should never return 0, since that will make the sort
+    unstable.  */
+@@ -1170,23 +1948,20 @@
+   /* Make sure that priority of TMP and TMP2 are initialized.  */
+   gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure != SCHED_PRESSURE_NONE)
+     {
+       int diff;
+ 
+       /* Prefer insn whose scheduling results in the smallest register
+ 	 pressure excess.  */
+       if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp)
+-		   + (INSN_TICK (tmp) > clock_var
+-		      ? INSN_TICK (tmp) - clock_var : 0)
++		   + insn_delay (tmp)
+ 		   - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2)
+-		   - (INSN_TICK (tmp2) > clock_var
+-		      ? INSN_TICK (tmp2) - clock_var : 0))) != 0)
++		   - insn_delay (tmp2))))
+ 	return diff;
+     }
+ 
+-
+-  if (sched_pressure_p
++  if (sched_pressure != SCHED_PRESSURE_NONE
+       && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var))
+     {
+       if (INSN_TICK (tmp) <= clock_var)
+@@ -1277,11 +2052,22 @@
+ 	return val;
+     }
+ 
++  /* Prefer instructions that occur earlier in the model schedule.  */
++  if (sched_pressure == SCHED_PRESSURE_MODEL)
++    {
++      int diff;
++
++      diff = model_index (tmp) - model_index (tmp2);
++      if (diff != 0)
++	return diff;
++    }
++
+   /* Prefer the insn which has more later insns that depend on it.
+      This gives the scheduler more freedom when scheduling later
+      instructions at the expense of added register pressure.  */
+ 
+-  val = (dep_list_size (tmp2) - dep_list_size (tmp));
++  val = (dep_list_size (tmp2, SD_LIST_FORW)
++	 - dep_list_size (tmp, SD_LIST_FORW));
+ 
+   if (flag_sched_dep_count_heuristic && val != 0)
+     return val;
+@@ -1480,12 +2266,15 @@
+   int i;
+   rtx *first = ready_lastpos (ready);
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+     {
+       for (i = 0; i < ready->n_ready; i++)
+ 	if (!DEBUG_INSN_P (first[i]))
+ 	  setup_insn_reg_pressure_info (first[i]);
+     }
++  if (sched_pressure == SCHED_PRESSURE_MODEL
++      && model_curr_point < model_num_insns)
++    model_set_excess_costs (first, ready->n_ready);
+   SCHED_SORT (first, ready->n_ready);
+ }
+ 
+@@ -1551,10 +2340,12 @@
+   gcc_checking_assert (!DEBUG_INSN_P (insn));
+ 
+   for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+-    if (dying_use_p (use) && bitmap_bit_p (curr_reg_live, use->regno))
+-      mark_regno_birth_or_death (use->regno, false);
++    if (dying_use_p (use))
++      mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
++				 use->regno, false);
+   for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set)
+-    mark_regno_birth_or_death (set->regno, true);
++    mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
++			       set->regno, true);
+ }
+ 
+ /* Set up or update (if UPDATE_P) max register pressure (see its
+@@ -1626,11 +2417,618 @@
+ void
+ sched_setup_bb_reg_pressure_info (basic_block bb, rtx after)
+ {
+-  gcc_assert (sched_pressure_p);
++  gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED);
+   initiate_bb_reg_pressure_info (bb);
+   setup_insn_max_reg_pressure (after, false);
+ }
+-
++
++/* Return (in order):
++
++   - positive if INSN adversely affects the pressure on one
++     register class
++
++   - negative if INSN reduces the pressure on one register class
++
++   - 0 if INSN doesn't affect the pressure on any register class.  */
++
++static int
++model_classify_pressure (struct model_insn_info *insn)
++{
++  struct reg_pressure_data *reg_pressure;
++  int death[N_REG_CLASSES];
++  int cci, cl, sum;
++
++  calculate_reg_deaths (insn->insn, death);
++  reg_pressure = INSN_REG_PRESSURE (insn->insn);
++  sum = 0;
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      if (death[cl] < reg_pressure[cci].set_increase)
++	return 1;
++      sum += reg_pressure[cci].set_increase - death[cl];
++    }
++  return sum;
++}
++
++/* Return true if INSN1 should come before INSN2 in the model schedule.  */
++
++static int
++model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2)
++{
++  unsigned int height1, height2;
++  unsigned int priority1, priority2;
++
++  /* Prefer instructions with a higher model priority.  */
++  if (insn1->model_priority != insn2->model_priority)
++    return insn1->model_priority > insn2->model_priority;
++
++  /* Combine the length of the longest path of satisfied true dependencies
++     that leads to each instruction (depth) with the length of the longest
++     path of any dependencies that leads from the instruction (alap).
++     Prefer instructions with the greatest combined length.  If the combined
++     lengths are equal, prefer instructions with the greatest depth.
++
++     The idea is that, if we have a set S of "equal" instructions that each
++     have ALAP value X, and we pick one such instruction I, any true-dependent
++     successors of I that have ALAP value X - 1 should be preferred over S.
++     This encourages the schedule to be "narrow" rather than "wide".
++     However, if I is a low-priority instruction that we decided to
++     schedule because of its model_classify_pressure, and if there
++     is a set of higher-priority instructions T, the aforementioned
++     successors of I should not have the edge over T.  */
++  height1 = insn1->depth + insn1->alap;
++  height2 = insn2->depth + insn2->alap;
++  if (height1 != height2)
++    return height1 > height2;
++  if (insn1->depth != insn2->depth)
++    return insn1->depth > insn2->depth;
++
++  /* We have no real preference between INSN1 an INSN2 as far as attempts
++     to reduce pressure go.  Prefer instructions with higher priorities.  */
++  priority1 = INSN_PRIORITY (insn1->insn);
++  priority2 = INSN_PRIORITY (insn2->insn);
++  if (priority1 != priority2)
++    return priority1 > priority2;
++
++  /* Use the original rtl sequence as a tie-breaker.  */
++  return insn1 < insn2;
++}
++
++/* Add INSN to the model worklist immediately after PREV.  Add it to the
++   beginning of the list if PREV is null.  */
++
++static void
++model_add_to_worklist_at (struct model_insn_info *insn,
++			  struct model_insn_info *prev)
++{
++  gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE);
++  QUEUE_INDEX (insn->insn) = QUEUE_READY;
++
++  insn->prev = prev;
++  if (prev)
++    {
++      insn->next = prev->next;
++      prev->next = insn;
++    }
++  else
++    {
++      insn->next = model_worklist;
++      model_worklist = insn;
++    }
++  if (insn->next)
++    insn->next->prev = insn;
++}
++
++/* Remove INSN from the model worklist.  */
++
++static void
++model_remove_from_worklist (struct model_insn_info *insn)
++{
++  gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY);
++  QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE;
++
++  if (insn->prev)
++    insn->prev->next = insn->next;
++  else
++    model_worklist = insn->next;
++  if (insn->next)
++    insn->next->prev = insn->prev;
++}
++
++/* Add INSN to the model worklist.  Start looking for a suitable position
++   between neighbors PREV and NEXT, testing at most MAX_SCHED_READY_INSNS
++   insns either side.  A null PREV indicates the beginning of the list and
++   a null NEXT indicates the end.  */
++
++static void
++model_add_to_worklist (struct model_insn_info *insn,
++		       struct model_insn_info *prev,
++		       struct model_insn_info *next)
++{
++  int count;
++
++  count = MAX_SCHED_READY_INSNS;
++  if (count > 0 && prev && model_order_p (insn, prev))
++    do
++      {
++	count--;
++	prev = prev->prev;
++      }
++    while (count > 0 && prev && model_order_p (insn, prev));
++  else
++    while (count > 0 && next && model_order_p (next, insn))
++      {
++	count--;
++	prev = next;
++	next = next->next;
++      }
++  model_add_to_worklist_at (insn, prev);
++}
++
++/* INSN may now have a higher priority (in the model_order_p sense)
++   than before.  Move it up the worklist if necessary.  */
++
++static void
++model_promote_insn (struct model_insn_info *insn)
++{
++  struct model_insn_info *prev;
++  int count;
++
++  prev = insn->prev;
++  count = MAX_SCHED_READY_INSNS;
++  while (count > 0 && prev && model_order_p (insn, prev))
++    {
++      count--;
++      prev = prev->prev;
++    }
++  if (prev != insn->prev)
++    {
++      model_remove_from_worklist (insn);
++      model_add_to_worklist_at (insn, prev);
++    }
++}
++
++/* Add INSN to the end of the model schedule.  */
++
++static void
++model_add_to_schedule (rtx insn)
++{
++  unsigned int point;
++
++  gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
++  QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
++
++  point = VEC_length (rtx, model_schedule);
++  VEC_quick_push (rtx, model_schedule, insn);
++  INSN_MODEL_INDEX (insn) = point + 1;
++}
++
++/* Analyze the instructions that are to be scheduled, setting up
++   MODEL_INSN_INFO (...) and model_num_insns accordingly.  Add ready
++   instructions to model_worklist.  */
++
++static void
++model_analyze_insns (void)
++{
++  rtx start, end, iter;
++  sd_iterator_def sd_it;
++  dep_t dep;
++  struct model_insn_info *insn, *con;
++
++  model_num_insns = 0;
++  start = PREV_INSN (current_sched_info->next_tail);
++  end = current_sched_info->prev_head;
++  for (iter = start; iter != end; iter = PREV_INSN (iter))
++    if (NONDEBUG_INSN_P (iter))
++      {
++	insn = MODEL_INSN_INFO (iter);
++	insn->insn = iter;
++	FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep)
++	  {
++	    con = MODEL_INSN_INFO (DEP_CON (dep));
++	    if (con->insn && insn->alap < con->alap + 1)
++	      insn->alap = con->alap + 1;
++	  }
++
++	insn->old_queue = QUEUE_INDEX (iter);
++	QUEUE_INDEX (iter) = QUEUE_NOWHERE;
++
++	insn->unscheduled_preds = dep_list_size (iter, SD_LIST_HARD_BACK);
++	if (insn->unscheduled_preds == 0)
++	  model_add_to_worklist (insn, NULL, model_worklist);
++
++	model_num_insns++;
++      }
++}
++
++/* The global state describes the register pressure at the start of the
++   model schedule.  Initialize GROUP accordingly.  */
++
++static void
++model_init_pressure_group (struct model_pressure_group *group)
++{
++  int cci, cl;
++
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      group->limits[cci].pressure = curr_reg_pressure[cl];
++      group->limits[cci].point = 0;
++    }
++  /* Use index model_num_insns to record the state after the last
++     instruction in the model schedule.  */
++  group->model = XNEWVEC (struct model_pressure_data,
++			  (model_num_insns + 1) * ira_reg_class_cover_size);
++}
++
++/* Record that MODEL_REF_PRESSURE (GROUP, POINT, CCI) is PRESSURE.
++   Update the maximum pressure for the whole schedule.  */
++
++static void
++model_record_pressure (struct model_pressure_group *group,
++		       int point, int cci, int pressure)
++{
++  MODEL_REF_PRESSURE (group, point, cci) = pressure;
++  if (group->limits[cci].pressure < pressure)
++    {
++      group->limits[cci].pressure = pressure;
++      group->limits[cci].point = point;
++    }
++}
++
++/* INSN has just been added to the end of the model schedule.  Record its
++   register-pressure information.  */
++
++static void
++model_record_pressures (struct model_insn_info *insn)
++{
++  struct reg_pressure_data *reg_pressure;
++  int point, cci, cl, delta;
++  int death[N_REG_CLASSES];
++
++  point = model_index (insn->insn);
++  if (sched_verbose >= 2)
++    {
++      char buf[2048];
++
++      if (point == 0)
++	{
++	  fprintf (sched_dump, "\n;;\tModel schedule:\n;;\n");
++	  fprintf (sched_dump, ";;\t| idx insn | mpri hght dpth prio |\n");
++	}
++      print_pattern (buf, PATTERN (insn->insn), 0);
++      fprintf (sched_dump, ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s ",
++	       point, INSN_UID (insn->insn), insn->model_priority,
++	       insn->depth + insn->alap, insn->depth,
++	       INSN_PRIORITY (insn->insn), buf);
++    }
++  calculate_reg_deaths (insn->insn, death);
++  reg_pressure = INSN_REG_PRESSURE (insn->insn);
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      delta = reg_pressure[cci].set_increase - death[cl];
++      if (sched_verbose >= 2)
++	fprintf (sched_dump, " %s:[%d,%+d]", reg_class_names[cl],
++		 curr_reg_pressure[cl], delta);
++      model_record_pressure (&model_before_pressure, point, cci,
++			     curr_reg_pressure[cl]);
++    }
++  if (sched_verbose >= 2)
++    fprintf (sched_dump, "\n");
++}
++
++/* All instructions have been added to the model schedule.  Record the
++   final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs.  */
++
++static void
++model_record_final_pressures (struct model_pressure_group *group)
++{
++  int point, cci, max_pressure, ref_pressure, cl;
++
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      /* Record the final pressure for this class.  */
++      cl = ira_reg_class_cover[cci];
++      point = model_num_insns;
++      ref_pressure = curr_reg_pressure[cl];
++      model_record_pressure (group, point, cci, ref_pressure);
++
++      /* Record the original maximum pressure.  */
++      group->limits[cci].orig_pressure = group->limits[cci].pressure;
++
++      /* Update the MODEL_MAX_PRESSURE for every point of the schedule.  */
++      max_pressure = ref_pressure;
++      MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
++      while (point > 0)
++	{
++	  point--;
++	  ref_pressure = MODEL_REF_PRESSURE (group, point, cci);
++	  max_pressure = MAX (max_pressure, ref_pressure);
++	  MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
++	}
++    }
++}
++
++/* Update all successors of INSN, given that INSN has just been scheduled.  */
++
++static void
++model_add_successors_to_worklist (struct model_insn_info *insn)
++{
++  sd_iterator_def sd_it;
++  struct model_insn_info *con;
++  dep_t dep;
++
++  FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep)
++    {
++      con = MODEL_INSN_INFO (DEP_CON (dep));
++      /* Ignore debug instructions, and instructions from other blocks.  */
++      if (con->insn)
++	{
++	  con->unscheduled_preds--;
++
++	  /* Update the depth field of each true-dependent successor.
++	     Increasing the depth gives them a higher priority than
++	     before.  */
++	  if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1)
++	    {
++	      con->depth = insn->depth + 1;
++	      if (QUEUE_INDEX (con->insn) == QUEUE_READY)
++		model_promote_insn (con);
++	    }
++
++	  /* If this is a true dependency, or if there are no remaining
++	     dependencies for CON (meaning that CON only had non-true
++	     dependencies), make sure that CON is on the worklist.
++	     We don't bother otherwise because it would tend to fill the
++	     worklist with a lot of low-priority instructions that are not
++	     yet ready to issue.  */
++	  if ((con->depth > 0 || con->unscheduled_preds == 0)
++	      && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE)
++	    model_add_to_worklist (con, insn, insn->next);
++	}
++    }
++}
++
++/* Give INSN a higher priority than any current instruction, then give
++   unscheduled predecessors of INSN a higher priority still.  If any of
++   those predecessors are not on the model worklist, do the same for its
++   predecessors, and so on.  */
++
++static void
++model_promote_predecessors (struct model_insn_info *insn)
++{
++  struct model_insn_info *pro, *first;
++  sd_iterator_def sd_it;
++  dep_t dep;
++
++  if (sched_verbose >= 7)
++    fprintf (sched_dump, ";;\t+--- priority of %d = %d, priority of",
++	     INSN_UID (insn->insn), model_next_priority);
++  insn->model_priority = model_next_priority++;
++  model_remove_from_worklist (insn);
++  model_add_to_worklist_at (insn, NULL);
++
++  first = NULL;
++  for (;;)
++    {
++      FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep)
++	{
++	  pro = MODEL_INSN_INFO (DEP_PRO (dep));
++	  /* The first test is to ignore debug instructions, and instructions
++	     from other blocks.  */
++	  if (pro->insn
++	      && pro->model_priority != model_next_priority
++	      && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED)
++	    {
++	      pro->model_priority = model_next_priority;
++	      if (sched_verbose >= 7)
++		fprintf (sched_dump, " %d", INSN_UID (pro->insn));
++	      if (QUEUE_INDEX (pro->insn) == QUEUE_READY)
++		{
++		  /* PRO is already in the worklist, but it now has
++		     a higher priority than before.  Move it at the
++		     appropriate place.  */
++		  model_remove_from_worklist (pro);
++		  model_add_to_worklist (pro, NULL, model_worklist);
++		}
++	      else
++		{
++		  /* PRO isn't in the worklist.  Recursively process
++		     its predecessors until we find one that is.  */
++		  pro->next = first;
++		  first = pro;
++		}
++	    }
++	}
++      if (!first)
++	break;
++      insn = first;
++      first = insn->next;
++    }
++  if (sched_verbose >= 7)
++    fprintf (sched_dump, " = %d\n", model_next_priority);
++  model_next_priority++;
++}
++
++/* Pick one instruction from model_worklist and process it.  */
++
++static void
++model_choose_insn (void)
++{
++  struct model_insn_info *insn, *fallback;
++  int count;
++
++  if (sched_verbose >= 7)
++    {
++      fprintf (sched_dump, ";;\t+--- worklist:\n");
++      insn = model_worklist;
++      count = MAX_SCHED_READY_INSNS;
++      while (count > 0 && insn)
++	{
++	  fprintf (sched_dump, ";;\t+---   %d [%d, %d, %d, %d]\n",
++		   INSN_UID (insn->insn), insn->model_priority,
++		   insn->depth + insn->alap, insn->depth,
++		   INSN_PRIORITY (insn->insn));
++	  count--;
++	  insn = insn->next;
++	}
++    }
++
++  /* Look for a ready instruction whose model_classify_priority is zero
++     or negative, picking the highest-priority one.  Adding such an
++     instruction to the schedule now should do no harm, and may actually
++     do some good.
++
++     Failing that, see whether there is an instruction with the highest
++     extant model_priority that is not yet ready, but which would reduce
++     pressure if it became ready.  This is designed to catch cases like:
++
++       (set (mem (reg R1)) (reg R2))
++
++     where the instruction is the last remaining use of R1 and where the
++     value of R2 is not yet available (or vice versa).  The death of R1
++     means that this instruction already reduces pressure.  It is of
++     course possible that the computation of R2 involves other registers
++     that are hard to kill, but such cases are rare enough for this
++     heuristic to be a win in general.
++
++     Failing that, just pick the highest-priority instruction in the
++     worklist.  */
++  count = MAX_SCHED_READY_INSNS;
++  insn = model_worklist;
++  fallback = 0;
++  for (;;)
++    {
++      if (count == 0 || !insn)
++	{
++	  insn = fallback ? fallback : model_worklist;
++	  break;
++	}
++      if (insn->unscheduled_preds)
++	{
++	  if (model_worklist->model_priority == insn->model_priority
++	      && !fallback
++	      && model_classify_pressure (insn) < 0)
++	    fallback = insn;
++	}
++      else
++	{
++	  if (model_classify_pressure (insn) <= 0)
++	    break;
++	}
++      count--;
++      insn = insn->next;
++    }
++
++  if (sched_verbose >= 7 && insn != model_worklist)
++    {
++      if (insn->unscheduled_preds)
++	fprintf (sched_dump, ";;\t+--- promoting insn %d, with dependencies\n",
++		 INSN_UID (insn->insn));
++      else
++	fprintf (sched_dump, ";;\t+--- promoting insn %d, which is ready\n",
++		 INSN_UID (insn->insn));
++    }
++  if (insn->unscheduled_preds)
++    /* INSN isn't yet ready to issue.  Give all its predecessors the
++       highest priority.  */
++    model_promote_predecessors (insn);
++  else
++    {
++      /* INSN is ready.  Add it to the end of model_schedule and
++	 process its successors.  */
++      model_add_successors_to_worklist (insn);
++      model_remove_from_worklist (insn);
++      model_add_to_schedule (insn->insn);
++      model_record_pressures (insn);
++      update_register_pressure (insn->insn);
++    }
++}
++
++/* Restore all QUEUE_INDEXs to the values that they had before
++   model_start_schedule was called.  */
++
++static void
++model_reset_queue_indices (void)
++{
++  unsigned int i;
++  rtx insn;
++
++  FOR_EACH_VEC_ELT (rtx, model_schedule, i, insn)
++    QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue;
++}
++
++/* We have calculated the model schedule and spill costs.  Print a summary
++   to sched_dump.  */
++
++static void
++model_dump_pressure_summary (void)
++{
++  int cci, cl;
++
++  fprintf (sched_dump, ";; Pressure summary:");
++  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
++    {
++      cl = ira_reg_class_cover[cci];
++      fprintf (sched_dump, " %s:%d", reg_class_names[cl],
++	       model_before_pressure.limits[cci].pressure);
++    }
++  fprintf (sched_dump, "\n\n");
++}
++
++/* Initialize the SCHED_PRESSURE_MODEL information for the current
++   scheduling region.  */
++
++static void
++model_start_schedule (void)
++{
++  basic_block bb;
++
++  model_next_priority = 1;
++  model_schedule = VEC_alloc (rtx, heap, sched_max_luid);
++  model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid);
++
++  bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head));
++  initiate_reg_pressure_info (df_get_live_in (bb));
++
++  model_analyze_insns ();
++  model_init_pressure_group (&model_before_pressure);
++  while (model_worklist)
++    model_choose_insn ();
++  gcc_assert (model_num_insns == (int) VEC_length (rtx, model_schedule));
++  if (sched_verbose >= 2)
++    fprintf (sched_dump, "\n");
++
++  model_record_final_pressures (&model_before_pressure);
++  model_reset_queue_indices ();
++
++  XDELETEVEC (model_insns);
++
++  model_curr_point = 0;
++  initiate_reg_pressure_info (df_get_live_in (bb));
++  if (sched_verbose >= 1)
++    model_dump_pressure_summary ();
++}
++
++/* Free the information associated with GROUP.  */
++
++static void
++model_finalize_pressure_group (struct model_pressure_group *group)
++{
++  XDELETEVEC (group->model);
++}
++
++/* Free the information created by model_start_schedule.  */
++
++static void
++model_end_schedule (void)
++{
++  model_finalize_pressure_group (&model_before_pressure);
++  VEC_free (rtx, heap, model_schedule);
++}
++
+ /* INSN is the "currently executing insn".  Launch each insn which was
+    waiting on INSN.  READY is the ready list which contains the insns
+    that are ready to fire.  CLOCK is the current cycle.  The function
+@@ -1667,10 +3065,14 @@
+ 		     reg_class_names[ira_reg_class_cover[i]],
+ 		     pressure_info[i].set_increase, pressure_info[i].change);
+ 	}
++      if (sched_pressure == SCHED_PRESSURE_MODEL
++	  && model_curr_point < model_num_insns
++	  && model_index (insn) == model_curr_point)
++	fprintf (sched_dump, ":model %d", model_curr_point);
+       fputc ('\n', sched_dump);
+     }
+ 
+-  if (sched_pressure_p && !DEBUG_INSN_P (insn))
++  if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn))
+     update_reg_and_insn_max_reg_pressure (insn);
+ 
+   /* Scheduling instruction should have all its dependencies resolved and
+@@ -1728,6 +3130,24 @@
+   gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
+   QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
+ 
++  if (sched_pressure == SCHED_PRESSURE_MODEL
++      && model_curr_point < model_num_insns
++      && NONDEBUG_INSN_P (insn))
++    {
++      if (model_index (insn) == model_curr_point)
++	do
++	  model_curr_point++;
++	while (model_curr_point < model_num_insns
++	       && (QUEUE_INDEX (MODEL_INSN (model_curr_point))
++		   == QUEUE_SCHEDULED));
++      else
++	model_recompute (insn);
++      model_update_limit_points ();
++      update_register_pressure (insn);
++      if (sched_verbose >= 2)
++	print_curr_reg_pressure ();
++    }
++
+   gcc_assert (INSN_TICK (insn) >= MIN_TICK);
+   if (INSN_TICK (insn) > clock_var)
+     /* INSN has been prematurely moved from the queue to the ready list.
+@@ -2056,7 +3476,16 @@
+       /* If the ready list is full, delay the insn for 1 cycle.
+ 	 See the comment in schedule_block for the rationale.  */
+       if (!reload_completed
+-	  && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
++	  && (ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
++	      || (sched_pressure == SCHED_PRESSURE_MODEL
++		  /* Limit pressure recalculations to MAX_SCHED_READY_INSNS
++		     instructions too.  */
++		  && model_index (insn) > (model_curr_point
++					   + MAX_SCHED_READY_INSNS)))
++	  && !(sched_pressure == SCHED_PRESSURE_MODEL
++	       && model_curr_point < model_num_insns
++	       /* Always allow the next model instruction to issue.  */
++	       && model_index (insn) == model_curr_point)
+ 	  && !SCHED_GROUP_P (insn)
+ 	  && insn != skip_insn)
+ 	{
+@@ -2293,12 +3722,12 @@
+       fprintf (sched_dump, "  %s:%d",
+ 	       (*current_sched_info->print_insn) (p[i], 0),
+ 	       INSN_LUID (p[i]));
+-      if (sched_pressure_p)
++      if (sched_pressure != SCHED_PRESSURE_NONE)
+ 	fprintf (sched_dump, "(cost=%d",
+ 		 INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i]));
+       if (INSN_TICK (p[i]) > clock_var)
+ 	fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var);
+-      if (sched_pressure_p)
++      if (sched_pressure != SCHED_PRESSURE_NONE)
+ 	fprintf (sched_dump, ")");
+     }
+   fprintf (sched_dump, "\n");
+@@ -2609,8 +4038,8 @@
+ 	    {
+ 	      if (state_dead_lock_p (state)
+ 		  || insn_finishes_cycle_p (insn))
+- 		/* We won't issue any more instructions in the next
+- 		   choice_state.  */
++		/* We won't issue any more instructions in the next
++		   choice_state.  */
+ 		top->rest = 0;
+ 	      else
+ 		top->rest--;
+@@ -2813,6 +4242,59 @@
+     }
+ }
+ 
++/* Examine all insns on the ready list and queue those which can't be
++   issued in this cycle.  TEMP_STATE is temporary scheduler state we
++   can use as scratch space.  If FIRST_CYCLE_INSN_P is true, no insns
++   have been issued for the current cycle, which means it is valid to
++   issue an asm statement.  */
++
++static void
++prune_ready_list (state_t temp_state, bool first_cycle_insn_p)
++{
++  int i;
++
++ restart:
++  for (i = 0; i < ready.n_ready; i++)
++    {
++      rtx insn = ready_element (&ready, i);
++      int cost = 0;
++
++      if (recog_memoized (insn) < 0)
++	{
++	  if (!first_cycle_insn_p
++	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
++		  || asm_noperands (PATTERN (insn)) >= 0))
++	    cost = 1;
++	}
++      else if (sched_pressure != SCHED_PRESSURE_NONE)
++	{
++	  if (sched_pressure == SCHED_PRESSURE_MODEL
++	      && INSN_TICK (insn) <= clock_var)
++	    {
++	      memcpy (temp_state, curr_state, dfa_state_size);
++	      if (state_transition (temp_state, insn) >= 0)
++		INSN_TICK (insn) = clock_var + 1;
++	    }
++	  cost = 0;
++	}
++      else
++	{
++	  memcpy (temp_state, curr_state, dfa_state_size);
++	  cost = state_transition (temp_state, insn);
++	  if (cost < 0)
++	    cost = 0;
++	  else if (cost == 0)
++	    cost = 1;
++	}
++      if (cost >= 1)
++	{
++	  ready_remove (&ready, i);
++	  queue_insn (insn, cost);
++	  goto restart;
++	}
++    }
++}
++
+ /* Use forward list scheduling to rearrange insns of block pointed to by
+    TARGET_BB, possibly bringing insns from subsequent blocks in the same
+    region.  */
+@@ -2882,6 +4364,9 @@
+      in try_ready () (which is called through init_ready_list ()).  */
+   (*current_sched_info->init_ready_list) ();
+ 
++  if (sched_pressure == SCHED_PRESSURE_MODEL)
++    model_start_schedule ();
++
+   /* The algorithm is O(n^2) in the number of ready insns at any given
+      time in the worst case.  Before reload we are more likely to have
+      big lists so truncate them to a reasonable size.  */
+@@ -2963,6 +4448,10 @@
+ 	}
+       while (advance > 0);
+ 
++      prune_ready_list (temp_state, true);
++      if (ready.n_ready == 0)
++        continue;
++
+       if (sort_p)
+ 	{
+ 	  /* Sort the ready list based on priority.  */
+@@ -3040,7 +4529,7 @@
+ 	      fprintf (sched_dump, ";;\tReady list (t = %3d):  ",
+ 		       clock_var);
+ 	      debug_ready_list (&ready);
+-	      if (sched_pressure_p)
++	      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+ 		print_curr_reg_pressure ();
+ 	    }
+ 
+@@ -3084,7 +4573,8 @@
+ 	  else
+ 	    insn = ready_remove_first (&ready);
+ 
+-	  if (sched_pressure_p && INSN_TICK (insn) > clock_var)
++	  if (sched_pressure != SCHED_PRESSURE_NONE
++	      && INSN_TICK (insn) > clock_var)
+ 	    {
+ 	      ready_add (&ready, insn, true);
+ 	      advance = 1;
+@@ -3112,44 +4602,6 @@
+ 	    }
+ 
+ 	  sort_p = TRUE;
+-	  memcpy (temp_state, curr_state, dfa_state_size);
+-	  if (recog_memoized (insn) < 0)
+-	    {
+-	      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
+-		       || asm_noperands (PATTERN (insn)) >= 0);
+-	      if (!first_cycle_insn_p && asm_p)
+-		/* This is asm insn which is tried to be issued on the
+-		   cycle not first.  Issue it on the next cycle.  */
+-		cost = 1;
+-	      else
+-		/* A USE insn, or something else we don't need to
+-		   understand.  We can't pass these directly to
+-		   state_transition because it will trigger a
+-		   fatal error for unrecognizable insns.  */
+-		cost = 0;
+-	    }
+-	  else if (sched_pressure_p)
+-	    cost = 0;
+-	  else
+-	    {
+-	      cost = state_transition (temp_state, insn);
+-	      if (cost < 0)
+-		cost = 0;
+-	      else if (cost == 0)
+-		cost = 1;
+-	    }
+-
+-	  if (cost >= 1)
+-	    {
+-	      queue_insn (insn, cost);
+- 	      if (SCHED_GROUP_P (insn))
+- 		{
+- 		  advance = cost;
+- 		  break;
+- 		}
+-
+-	      continue;
+-	    }
+ 
+ 	  if (current_sched_info->can_schedule_ready_p
+ 	      && ! (*current_sched_info->can_schedule_ready_p) (insn))
+@@ -3200,11 +4652,17 @@
+ 	  reemit_notes (insn);
+ 	  last_scheduled_insn = insn;
+ 
+-	  if (memcmp (curr_state, temp_state, dfa_state_size) != 0)
+-            {
+-              cycle_issued_insns++;
+-              memcpy (curr_state, temp_state, dfa_state_size);
+-            }
++	  if (recog_memoized (insn) >= 0)
++	    {
++	      cost = state_transition (curr_state, insn);
++	      if (sched_pressure != SCHED_PRESSURE_WEIGHTED)
++		gcc_assert (cost < 0);
++	      cycle_issued_insns++;
++	      asm_p = false;
++	    }
++	  else
++	    asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
++		     || asm_noperands (PATTERN (insn)) >= 0);
+ 
+ 	  if (targetm.sched.variable_issue)
+ 	    can_issue_more =
+@@ -3225,6 +4683,9 @@
+ 
+ 	  first_cycle_insn_p = false;
+ 
++	  if (ready.n_ready > 0)
++            prune_ready_list (temp_state, false);
++
+ 	  /* Sort the ready list based on priority.  This must be
+ 	     redone here, as schedule_insn may have readied additional
+ 	     insns that will not be sorted correctly.  */
+@@ -3321,6 +4782,9 @@
+ 	  }
+     }
+ 
++  if (sched_pressure == SCHED_PRESSURE_MODEL)
++    model_end_schedule ();
++
+   if (sched_verbose)
+     fprintf (sched_dump, ";;   total time = %d\n", clock_var);
+ 
+@@ -3424,10 +4888,14 @@
+   if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON))
+     targetm.sched.dispatch_do (NULL_RTX, DISPATCH_INIT);
+ 
+-  sched_pressure_p = (flag_sched_pressure && ! reload_completed
+-		      && common_sched_info->sched_pass_id == SCHED_RGN_PASS);
++  if (flag_sched_pressure
++      && !reload_completed
++      && common_sched_info->sched_pass_id == SCHED_RGN_PASS)
++    sched_pressure = flag_sched_pressure_algorithm;
++  else
++    sched_pressure = SCHED_PRESSURE_NONE;
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure != SCHED_PRESSURE_NONE)
+     ira_setup_eliminable_regset ();
+ 
+   /* Initialize SPEC_INFO.  */
+@@ -3504,7 +4972,7 @@
+   if (targetm.sched.init_global)
+     targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1);
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure != SCHED_PRESSURE_NONE)
+     {
+       int i, max_regno = max_reg_num ();
+ 
+@@ -3517,8 +4985,11 @@
+ 	     ? ira_class_translate[REGNO_REG_CLASS (i)]
+ 	     : reg_cover_class (i));
+       curr_reg_live = BITMAP_ALLOC (NULL);
+-      saved_reg_live = BITMAP_ALLOC (NULL);
+-      region_ref_regs = BITMAP_ALLOC (NULL);
++      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
++	{
++	  saved_reg_live = BITMAP_ALLOC (NULL);
++	  region_ref_regs = BITMAP_ALLOC (NULL);
++	}
+     }
+ 
+   curr_state = xmalloc (dfa_state_size);
+@@ -3618,12 +5089,15 @@
+ sched_finish (void)
+ {
+   haifa_finish_h_i_d ();
+-  if (sched_pressure_p)
++  if (sched_pressure != SCHED_PRESSURE_NONE)
+     {
++      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
++	{
++	  BITMAP_FREE (region_ref_regs);
++	  BITMAP_FREE (saved_reg_live);
++	}
++      BITMAP_FREE (curr_reg_live);
+       free (sched_regno_cover_class);
+-      BITMAP_FREE (region_ref_regs);
+-      BITMAP_FREE (saved_reg_live);
+-      BITMAP_FREE (curr_reg_live);
+     }
+   free (curr_state);
+ 
+@@ -3936,7 +5410,7 @@
+   INSN_TICK (next) = tick;
+ 
+   delay = tick - clock_var;
+-  if (delay <= 0 || sched_pressure_p)
++  if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE)
+     delay = QUEUE_READY;
+ 
+   change_queue_index (next, delay);
+@@ -5185,7 +6659,7 @@
+       if (insn == jump)
+ 	break;
+ 
+-      if (dep_list_size (insn) == 0)
++      if (dep_list_size (insn, SD_LIST_FORW) == 0)
+ 	{
+ 	  dep_def _new_dep, *new_dep = &_new_dep;
+ 
+@@ -5556,6 +7030,7 @@
+ 
+   FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data)
+     {
++      free (data->max_reg_pressure);
+       if (data->reg_pressure != NULL)
+ 	free (data->reg_pressure);
+       for (use = data->reg_use_list; use != NULL; use = next)
+
+=== modified file 'gcc/sched-deps.c'
+--- old/gcc/sched-deps.c	2011-12-08 13:33:58 +0000
++++ new/gcc/sched-deps.c	2012-02-08 23:39:45 +0000
+@@ -450,7 +450,7 @@
+ static void add_dependence_list_and_free (struct deps_desc *, rtx,
+ 					  rtx *, int, enum reg_note);
+ static void delete_all_dependences (rtx);
+-static void fixup_sched_groups (rtx);
++static void chain_to_prev_insn (rtx);
+ 
+ static void flush_pending_lists (struct deps_desc *, rtx, int, int);
+ static void sched_analyze_1 (struct deps_desc *, rtx, rtx);
+@@ -1490,7 +1490,7 @@
+    the previous nonnote insn.  */
+ 
+ static void
+-fixup_sched_groups (rtx insn)
++chain_to_prev_insn (rtx insn)
+ {
+   sd_iterator_def sd_it;
+   dep_t dep;
+@@ -1999,7 +1999,7 @@
+   static struct reg_pressure_data *pressure_info;
+   rtx link;
+ 
+-  gcc_assert (sched_pressure_p);
++  gcc_assert (sched_pressure != SCHED_PRESSURE_NONE);
+ 
+   if (! INSN_P (insn))
+     return;
+@@ -2030,8 +2030,9 @@
+   len = sizeof (struct reg_pressure_data) * ira_reg_class_cover_size;
+   pressure_info
+     = INSN_REG_PRESSURE (insn) = (struct reg_pressure_data *) xmalloc (len);
+-  INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size
+-						  * sizeof (int), 1);
++  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
++    INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size
++						    * sizeof (int), 1);
+   for (i = 0; i < ira_reg_class_cover_size; i++)
+     {
+       cl = ira_reg_class_cover[i];
+@@ -2775,7 +2776,7 @@
+       || (NONJUMP_INSN_P (insn) && control_flow_insn_p (insn)))
+     reg_pending_barrier = MOVE_BARRIER;
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure != SCHED_PRESSURE_NONE)
+     {
+       setup_insn_reg_uses (deps, insn);
+       setup_insn_reg_pressure_info (insn);
+@@ -3076,7 +3077,7 @@
+ 	       instructions that follow seem like they should be part
+ 	       of the call group.
+ 
+-	       Also, if we did, fixup_sched_groups() would move the
++	       Also, if we did, chain_to_prev_insn would move the
+ 	       deps of the debug insn to the call insn, modifying
+ 	       non-debug post-dependency counts of the debug insn
+ 	       dependencies and otherwise messing with the scheduling
+@@ -3222,6 +3223,37 @@
+   return true;
+ }
+ 
++/* Return true if INSN should be made dependent on the previous instruction
++   group, and if all INSN's dependencies should be moved to the first
++   instruction of that group.  */
++
++static bool
++chain_to_prev_insn_p (rtx insn)
++{
++  rtx prev, x;
++
++  /* INSN forms a group with the previous instruction.  */
++  if (SCHED_GROUP_P (insn))
++    return true;
++
++  /* If the previous instruction clobbers a register R and this one sets
++     part of R, the clobber was added specifically to help us track the
++     liveness of R.  There's no point scheduling the clobber and leaving
++     INSN behind, especially if we move the clobber to another block.  */
++  prev = prev_nonnote_nondebug_insn (insn);
++  if (prev
++      && INSN_P (prev)
++      && BLOCK_FOR_INSN (prev) == BLOCK_FOR_INSN (insn)
++      && GET_CODE (PATTERN (prev)) == CLOBBER)
++    {
++      x = XEXP (PATTERN (prev), 0);
++      if (set_of (x, insn))
++	return true;
++    }
++
++  return false;
++}
++
+ /* Analyze INSN with DEPS as a context.  */
+ void
+ deps_analyze_insn (struct deps_desc *deps, rtx insn)
+@@ -3358,8 +3390,9 @@
+ 
+   /* Fixup the dependencies in the sched group.  */
+   if ((NONJUMP_INSN_P (insn) || JUMP_P (insn))
+-      && SCHED_GROUP_P (insn) && !sel_sched_p ())
+-    fixup_sched_groups (insn);
++      && chain_to_prev_insn_p (insn)
++      && !sel_sched_p ())
++    chain_to_prev_insn (insn);
+ }
+ 
+ /* Initialize DEPS for the new block beginning with HEAD.  */
+
+=== modified file 'gcc/sched-int.h'
+--- old/gcc/sched-int.h	2011-02-02 04:31:35 +0000
++++ new/gcc/sched-int.h	2012-02-08 23:39:02 +0000
+@@ -651,7 +651,7 @@
+ 
+ /* Do register pressure sensitive insn scheduling if the flag is set
+    up.  */
+-extern bool sched_pressure_p;
++extern enum sched_pressure_algorithm sched_pressure;
+ 
+ /* Map regno -> its cover class.  The map defined only when
+    SCHED_PRESSURE_P is true.  */
+@@ -773,16 +773,16 @@
+ 
+   short cost;
+ 
++  /* '> 0' if priority is valid,
++     '== 0' if priority was not yet computed,
++     '< 0' if priority in invalid and should be recomputed.  */
++  signed char priority_status;
++
+   /* Set if there's DEF-USE dependence between some speculatively
+      moved load insn and this one.  */
+   unsigned int fed_by_spec_load : 1;
+   unsigned int is_load_insn : 1;
+ 
+-  /* '> 0' if priority is valid,
+-     '== 0' if priority was not yet computed,
+-     '< 0' if priority in invalid and should be recomputed.  */
+-  signed char priority_status;
+-
+   /* What speculations are necessary to apply to schedule the instruction.  */
+   ds_t todo_spec;
+ 
+@@ -817,6 +817,7 @@
+   /* Info about how scheduling the insn changes cost of register
+      pressure excess (between source and target).  */
+   int reg_pressure_excess_cost_change;
++  int model_index;
+ };
+ 
+ typedef struct _haifa_insn_data haifa_insn_data_def;
+@@ -839,6 +840,7 @@
+ #define INSN_REG_PRESSURE_EXCESS_COST_CHANGE(INSN) \
+   (HID (INSN)->reg_pressure_excess_cost_change)
+ #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status)
++#define INSN_MODEL_INDEX(INSN) (HID (INSN)->model_index)
+ 
+ typedef struct _haifa_deps_insn_data haifa_deps_insn_data_def;
+ typedef haifa_deps_insn_data_def *haifa_deps_insn_data_t;
+
+=== modified file 'gcc/sched-rgn.c'
+--- old/gcc/sched-rgn.c	2011-06-04 10:15:48 +0000
++++ new/gcc/sched-rgn.c	2012-02-08 23:38:13 +0000
+@@ -2943,7 +2943,7 @@
+ 
+   sched_extend_ready_list (rgn_n_insns);
+ 
+-  if (sched_pressure_p)
++  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+     {
+       sched_init_region_reg_pressure_info ();
+       for (bb = 0; bb < current_nr_blocks; bb++)
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch
new file mode 100644
index 0000000000..1a5448403e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106870.patch
@@ -0,0 +1,28 @@
+2012-02-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_print_operand): Avoid null-pointer
+	dereference from MEM_SIZE.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-02-09 00:47:59 +0000
++++ new/gcc/config/arm/arm.c	2012-02-20 15:32:26 +0000
+@@ -17446,6 +17446,7 @@
+ 	rtx addr;
+ 	bool postinc = FALSE;
+ 	unsigned align, memsize, align_bits;
++	rtx memsize_rtx;
+ 
+ 	gcc_assert (GET_CODE (x) == MEM);
+ 	addr = XEXP (x, 0);
+@@ -17460,7 +17461,8 @@
+ 	   instruction (for some alignments) as an aid to the memory subsystem
+ 	   of the target.  */
+ 	align = MEM_ALIGN (x) >> 3;
+-	memsize = INTVAL (MEM_SIZE (x));
++	memsize_rtx = MEM_SIZE (x);
++	memsize = memsize_rtx ? INTVAL (memsize_rtx) : 0;
+ 	
+ 	/* Only certain alignment specifiers are supported by the hardware.  */
+ 	if (memsize == 16 && (align % 32) == 0)
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch
new file mode 100644
index 0000000000..9bfd969df6
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106872.patch
@@ -0,0 +1,126 @@
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+        2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (vfp3_const_double_for_fract_bits): Define.
+	* config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare.
+	* config/arm/constraints.md ("Dt"): New constraint.
+	* config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal):
+	New.
+	* config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New.
+	(*arm_combine_vcvt_f32_u32): New.
+
+	LP:#900426
+
+	2011-12-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+        * config/arm/vfp.md (*combine_vcvt_f64_<FCVTI32typename>): Fix
+       formatting character for vmov.f64 case.
+
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_print_operand): Remove wrongly merged code.
+	 (vfp3_const_double_for_fract_bits): Likewise.
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-12-06 10:42:29 +0000
++++ new/gcc/config/arm/arm-protos.h	2012-02-22 13:31:54 +0000
+@@ -238,6 +238,7 @@
+ };
+ 
+ extern const struct tune_params *current_tune;
++extern int vfp3_const_double_for_fract_bits (rtx);
+ #endif /* RTX_CODE */
+ 
+ #endif /* ! GCC_ARM_PROTOS_H */
+
+=== modified file 'gcc/config/arm/constraints.md'
+--- old/gcc/config/arm/constraints.md	2011-12-06 10:42:29 +0000
++++ new/gcc/config/arm/constraints.md	2012-02-22 13:31:54 +0000
+@@ -29,7 +29,7 @@
+ ;; in Thumb-1 state: I, J, K, L, M, N, O
+ 
+ ;; The following multi-letter normal constraints have been used:
+-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
++;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+ ;; in Thumb-1 state: Pa, Pb, Pc, Pd
+ ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
+ 
+@@ -291,6 +291,12 @@
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
+ 
++(define_constraint "Dt" 
++ "@internal
++  In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation"
++  (and (match_code "const_double")
++       (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
++
+ (define_memory_constraint "Ut"
+  "@internal
+   In ARM/Thumb-2 state an address valid for loading/storing opaque structure
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2011-12-06 10:42:29 +0000
++++ new/gcc/config/arm/predicates.md	2012-02-22 13:31:54 +0000
+@@ -725,6 +725,11 @@
+   return true; 
+ })
+ 
++(define_predicate "const_double_vcvt_power_of_two_reciprocal"
++  (and (match_code "const_double")
++       (match_test "TARGET_32BIT && TARGET_VFP 
++       		    && vfp3_const_double_for_fract_bits (op)")))
++
+ (define_special_predicate "neon_struct_operand"
+   (and (match_code "mem")
+        (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+
+=== modified file 'gcc/config/arm/vfp.md'
+--- old/gcc/config/arm/vfp.md	2011-12-06 10:42:29 +0000
++++ new/gcc/config/arm/vfp.md	2012-02-22 13:31:54 +0000
+@@ -1131,9 +1131,40 @@
+    (set_attr "type" "fcmpd")]
+ )
+ 
++;; Fixed point to floating point conversions. 
++(define_code_iterator FCVT [unsigned_float float])
++(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
++
++(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
++  [(set (match_operand:SF 0 "s_register_operand" "=t")
++	(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
++		 (match_operand 2 
++			"const_double_vcvt_power_of_two_reciprocal" "Dt")))]
++  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
++  "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2"
++ [(set_attr "predicable" "no")
++  (set_attr "type" "f_cvt")]
++)
++
++;; Not the ideal way of implementing this. Ideally we would be able to split
++;; this into a move to a DP register and then a vcvt.f64.i32
++(define_insn "*combine_vcvt_f64_<FCVTI32typename>"
++  [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
++	(mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
++		 (match_operand 2 
++		     "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
++  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math 
++  && !TARGET_VFP_SINGLE"
++  "@
++  vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
++  vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
++  vmov.f64\\t%P0, %1, %1\; vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
++ [(set_attr "predicable" "no")
++  (set_attr "type" "f_cvt")
++  (set_attr "length" "8")]
++)
+ 
+ ;; Store multiple insn used in function prologue.
+-
+ (define_insn "*push_multi_vfp"
+   [(match_parallel 2 "multi_register_push"
+     [(set (match_operand:BLK 0 "memory_operand" "=m")
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch
new file mode 100644
index 0000000000..5ce71a5138
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106873.patch
@@ -0,0 +1,80 @@
+ 2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+ 
+	LP:#922474
+	gcc/
+	* config/arm/sync.md (sync_lock_releasedi): Define.
+	(arm_sync_lock_releasedi): Likewise.
+	gcc/testsuite
+ 	Backport from mainline.
+        2012-01-30  Greta Yorsh  <Greta.Yorsh@arm.com>
+	* gcc.target/arm/di-longlong64-sync-withldrexd.c: Accept
+	new code generated for __sync_lock_release.
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2012-02-01 14:13:07 +0000
++++ new/gcc/config/arm/arm.md	2012-02-22 18:37:56 +0000
+@@ -157,6 +157,7 @@
+    (VUNSPEC_SYNC_OP               23)	; Represent a sync_<op>
+    (VUNSPEC_SYNC_NEW_OP           24)	; Represent a sync_new_<op>
+    (VUNSPEC_SYNC_OLD_OP           25)	; Represent a sync_old_<op>
++   (VUNSPEC_SYNC_RELEASE 	  26)	; Represent a sync_lock_release.
+   ]
+ )
+ 
+
+=== modified file 'gcc/config/arm/sync.md'
+--- old/gcc/config/arm/sync.md	2011-10-14 15:47:15 +0000
++++ new/gcc/config/arm/sync.md	2012-02-22 18:37:56 +0000
+@@ -494,3 +494,36 @@
+    (set_attr "conds" "unconditional")
+    (set_attr "predicable" "no")])
+ 
++(define_expand "sync_lock_releasedi"
++ [(match_operand:DI 0 "memory_operand")
++  (match_operand:DI 1 "s_register_operand")]
++ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
++ { 
++   struct arm_sync_generator generator;
++   rtx tmp1 = gen_reg_rtx (DImode);
++   generator.op = arm_sync_generator_omn;
++   generator.u.omn = gen_arm_sync_lock_releasedi;
++   arm_expand_sync (DImode, &generator, operands[1], operands[0], NULL, tmp1);
++   DONE;
++ }
++)
++
++(define_insn "arm_sync_lock_releasedi"
++ [(set (match_operand:DI 2 "s_register_operand" "=&r")
++       (unspec_volatile:DI [(match_operand:DI 1 "arm_sync_memory_operand" "+Q")
++       			    (match_operand:DI 0 "s_register_operand" "r")]
++			    VUNSPEC_SYNC_RELEASE))
++  (clobber (reg:CC CC_REGNUM))
++  (clobber (match_scratch:SI 3 "=&r"))]
++  "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
++ {
++  return arm_output_sync_insn (insn, operands);
++ }
++ [(set_attr "sync_memory"          "1")
++  (set_attr "sync_result" 	   "2")
++  (set_attr "sync_t1" 		   "2")
++  (set_attr "sync_t2" 		   "3")
++  (set_attr "sync_new_value" 	   "0")
++  (set_attr "conds"             "clob")
++  (set_attr "predicable"          "no")]
++)
+
+=== modified file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c'
+--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c	2011-10-14 15:56:32 +0000
++++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c	2012-02-22 18:37:56 +0000
+@@ -10,8 +10,8 @@
+ #include "../../gcc.dg/di-longlong64-sync-1.c"
+ 
+ /* We should be using ldrexd, strexd and no helpers or shorter ldrex.  */
+-/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */
+-/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */
++/* { dg-final { scan-assembler-times "\tldrexd" 48 } } */
++/* { dg-final { scan-assembler-times "\tstrexd" 48 } } */
+ /* { dg-final { scan-assembler-not "__sync_" } } */
+ /* { dg-final { scan-assembler-not "ldrex\t" } } */
+ /* { dg-final { scan-assembler-not "strex\t" } } */
+
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch
new file mode 100644
index 0000000000..092650dc9d
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106874.patch
@@ -0,0 +1,46 @@
+ 2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+ 
+	Backport from mainline.
+	gcc/
+        2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+        Revert r183011
+        * config/arm/arm-cores.def (cortex-a15): Use generic Cortex tuning
+        parameters.
+        * config/arm/arm.c (arm_cortex_a15_tune): Remove.
+
+=== modified file 'gcc/config/arm/arm-cores.def'
+--- old/gcc/config/arm/arm-cores.def	2012-01-23 00:36:02 +0000
++++ new/gcc/config/arm/arm-cores.def	2012-02-22 15:53:56 +0000
+@@ -129,7 +129,7 @@
+ ARM_CORE("cortex-a7",	  cortexa7,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+-ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
++ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
+ ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2012-02-24 16:20:29 +0000
++++ new/gcc/config/arm/arm.c	2012-02-24 17:33:58 +0000
+@@ -988,17 +988,6 @@
+   arm_default_branch_cost
+ };
+ 
+-const struct tune_params arm_cortex_a15_tune =
+-{
+-  arm_9e_rtx_costs,
+-  NULL,
+-  1,						/* Constant limit.  */
+-  1,						/* Max cond insns.  */
+-  ARM_PREFETCH_NOT_BENEFICIAL,			/* TODO: Calculate correct values.  */
+-  false,					/* Prefer constant pool.  */
+-  arm_cortex_a5_branch_cost
+-};
+-
+ const struct tune_params arm_fa726te_tune =
+ {
+   arm_9e_rtx_costs,
+