1 files changed, 948 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch
new file mode 100644
index 0000000000..ade96fdd11
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch
@@ -0,0 +1,948 @@
+2011-09-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2,
+	b3 and b4 unsigned.
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Set can_negate correctly
+	when code is SET.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (struct four_ints): New type.
+	(count_insns_for_constant): Delete function.
+	(find_best_start): Delete function.
+	(optimal_immediate_sequence): New function.
+	(optimal_immediate_sequence_1): New function.
+	(arm_gen_constant): Move constant splitting code to
+	optimal_immediate_sequence.
+	Rewrite constant negation/invertion code.
+
+	gcc/testsuite/
+	* gcc.target/arm/thumb2-replicated-constant1.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant2.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant3.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant4.c: New file.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
+	* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
+	Remove prototype. Remove static function type.
+	* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
+	Add arch attribute.
+	* config/arm/constraints.md (Pj, PJ): New constraints.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Move mowv support ....
+	(const_ok_for_op): ... to here.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
+
+
+=== modified file 'gcc/config/arm/arm-protos.h'
+--- old/gcc/config/arm/arm-protos.h	2011-07-04 14:03:49 +0000
++++ new/gcc/config/arm/arm-protos.h	2011-08-25 13:26:58 +0000
+@@ -46,6 +46,7 @@
+ extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
+ extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
+ extern int const_ok_for_arm (HOST_WIDE_INT);
++extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
+ 			       HOST_WIDE_INT, rtx, rtx, int);
+ extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-08-24 17:35:16 +0000
++++ new/gcc/config/arm/arm.c	2011-09-06 12:57:56 +0000
+@@ -63,6 +63,11 @@
+ 
+ void (*arm_lang_output_object_attributes_hook)(void);
+ 
++struct four_ints
++{
++  int i[4];
++};
++
+ /* Forward function declarations.  */
+ static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
+ static int arm_compute_static_chain_stack_bytes (void);
+@@ -81,7 +86,6 @@
+ static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
+ static int thumb_far_jump_used_p (void);
+ static bool thumb_force_lr_save (void);
+-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+ static rtx emit_sfm (int, int);
+ static unsigned arm_size_return_regs (void);
+ static bool arm_assemble_integer (rtx, unsigned int, int);
+@@ -129,7 +133,13 @@
+ static int arm_comp_type_attributes (const_tree, const_tree);
+ static void arm_set_default_type_attributes (tree);
+ static int arm_adjust_cost (rtx, rtx, rtx, int);
+-static int count_insns_for_constant (HOST_WIDE_INT, int);
++static int optimal_immediate_sequence (enum rtx_code code,
++				       unsigned HOST_WIDE_INT val,
++				       struct four_ints *return_sequence);
++static int optimal_immediate_sequence_1 (enum rtx_code code,
++					 unsigned HOST_WIDE_INT val,
++					 struct four_ints *return_sequence,
++					 int i);
+ static int arm_get_strip_length (int);
+ static bool arm_function_ok_for_sibcall (tree, tree);
+ static enum machine_mode arm_promote_function_mode (const_tree,
+@@ -2525,7 +2535,7 @@
+ }
+ 
+ /* Return true if I is a valid constant for the operation CODE.  */
+-static int
++int
+ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
+ {
+   if (const_ok_for_arm (i))
+@@ -2533,7 +2543,21 @@
+ 
+   switch (code)
+     {
++    case SET:
++      /* See if we can use movw.  */
++      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
++	return 1;
++      else
++	return 0;
++
+     case PLUS:
++      /* See if we can use addw or subw.  */
++      if (TARGET_THUMB2
++	  && ((i & 0xfffff000) == 0
++	      || ((-i) & 0xfffff000) == 0))
++	return 1;
++      /* else fall through.  */
++
+     case COMPARE:
+     case EQ:
+     case NE:
+@@ -2649,68 +2673,41 @@
+ 			   1);
+ }
+ 
+-/* Return the number of instructions required to synthesize the given
+-   constant, if we start emitting them from bit-position I.  */
+-static int
+-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
+-{
+-  HOST_WIDE_INT temp1;
+-  int step_size = TARGET_ARM ? 2 : 1;
+-  int num_insns = 0;
+-
+-  gcc_assert (TARGET_ARM || i == 0);
+-
+-  do
+-    {
+-      int end;
+-
+-      if (i <= 0)
+-	i += 32;
+-      if (remainder & (((1 << step_size) - 1) << (i - step_size)))
+-	{
+-	  end = i - 8;
+-	  if (end < 0)
+-	    end += 32;
+-	  temp1 = remainder & ((0x0ff << end)
+-				    | ((i < end) ? (0xff >> (32 - end)) : 0));
+-	  remainder &= ~temp1;
+-	  num_insns++;
+-	  i -= 8 - step_size;
+-	}
+-      i -= step_size;
+-    } while (remainder);
+-  return num_insns;
+-}
+-
+-static int
+-find_best_start (unsigned HOST_WIDE_INT remainder)
++/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
++   ARM/THUMB2 immediates, and add up to VAL.
++   Thr function return value gives the number of insns required.  */
++static int
++optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
++			    struct four_ints *return_sequence)
+ {
+   int best_consecutive_zeros = 0;
+   int i;
+   int best_start = 0;
++  int insns1, insns2;
++  struct four_ints tmp_sequence;
+ 
+   /* If we aren't targetting ARM, the best place to start is always at
+-     the bottom.  */
+-  if (! TARGET_ARM)
+-    return 0;
+-
+-  for (i = 0; i < 32; i += 2)
++     the bottom, otherwise look more closely.  */
++  if (TARGET_ARM)
+     {
+-      int consecutive_zeros = 0;
+-
+-      if (!(remainder & (3 << i)))
++      for (i = 0; i < 32; i += 2)
+ 	{
+-	  while ((i < 32) && !(remainder & (3 << i)))
+-	    {
+-	      consecutive_zeros += 2;
+-	      i += 2;
+-	    }
+-	  if (consecutive_zeros > best_consecutive_zeros)
+-	    {
+-	      best_consecutive_zeros = consecutive_zeros;
+-	      best_start = i - consecutive_zeros;
+-	    }
+-	  i -= 2;
++	  int consecutive_zeros = 0;
++
++	  if (!(val & (3 << i)))
++	    {
++	      while ((i < 32) && !(val & (3 << i)))
++		{
++		  consecutive_zeros += 2;
++		  i += 2;
++		}
++	      if (consecutive_zeros > best_consecutive_zeros)
++		{
++		  best_consecutive_zeros = consecutive_zeros;
++		  best_start = i - consecutive_zeros;
++		}
++	      i -= 2;
++	    }
+ 	}
+     }
+ 
+@@ -2737,13 +2734,161 @@
+      the constant starting from `best_start', and also starting from
+      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
+      yield a shorter sequence, we may as well use zero.  */
++  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
+   if (best_start != 0
+-      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+-      && (count_insns_for_constant (remainder, 0) <=
+-	  count_insns_for_constant (remainder, best_start)))
+-    best_start = 0;
+-
+-  return best_start;
++      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
++    {
++      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
++      if (insns2 <= insns1)
++	{
++	  *return_sequence = tmp_sequence;
++	  insns1 = insns2;
++	}
++    }
++
++  return insns1;
++}
++
++/* As for optimal_immediate_sequence, but starting at bit-position I.  */
++static int
++optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
++			     struct four_ints *return_sequence, int i)
++{
++  int remainder = val & 0xffffffff;
++  int insns = 0;
++
++  /* Try and find a way of doing the job in either two or three
++     instructions.
++     
++     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
++     location.  We start at position I.  This may be the MSB, or
++     optimial_immediate_sequence may have positioned it at the largest block 
++     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
++     wrapping around to the top of the word when we drop off the bottom.
++     In the worst case this code should produce no more than four insns.
++
++     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
++     constants, shifted to any arbitrary location.  We should always start
++     at the MSB.  */
++  do
++    {
++      int end;
++      unsigned int b1, b2, b3, b4;
++      unsigned HOST_WIDE_INT result;
++      int loc;
++
++      gcc_assert (insns < 4);
++
++      if (i <= 0)
++	i += 32;
++
++      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
++      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
++	{
++	  loc = i;
++	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
++	    /* We can use addw/subw for the last 12 bits.  */
++	    result = remainder;
++	  else
++	    {
++	      /* Use an 8-bit shifted/rotated immediate.  */
++	      end = i - 8;
++	      if (end < 0)
++		end += 32;
++	      result = remainder & ((0x0ff << end)
++				   | ((i < end) ? (0xff >> (32 - end))
++						: 0));
++	      i -= 8;
++	    }
++	}
++      else
++	{
++	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
++	     arbitrary shifts.  */
++	  i -= TARGET_ARM ? 2 : 1;
++	  continue;
++	}
++
++      /* Next, see if we can do a better job with a thumb2 replicated
++	 constant.
++       
++         We do it this way around to catch the cases like 0x01F001E0 where
++	 two 8-bit immediates would work, but a replicated constant would
++	 make it worse.
++       
++         TODO: 16-bit constants that don't clear all the bits, but still win.
++         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
++      if (TARGET_THUMB2)
++	{
++	  b1 = (remainder & 0xff000000) >> 24;
++	  b2 = (remainder & 0x00ff0000) >> 16;
++	  b3 = (remainder & 0x0000ff00) >> 8;
++	  b4 = remainder & 0xff;
++
++	  if (loc > 24)
++	    {
++	      /* The 8-bit immediate already found clears b1 (and maybe b2),
++		 but must leave b3 and b4 alone.  */
++
++	      /* First try to find a 32-bit replicated constant that clears
++		 almost everything.  We can assume that we can't do it in one,
++		 or else we wouldn't be here.  */
++	      unsigned int tmp = b1 & b2 & b3 & b4;
++	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
++				  + (tmp << 24);
++	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
++					    + (tmp == b3) + (tmp == b4);
++	      if (tmp
++		  && (matching_bytes >= 3
++		      || (matching_bytes == 2
++			  && const_ok_for_op (remainder & ~tmp2, code))))
++		{
++		  /* At least 3 of the bytes match, and the fourth has at 
++		     least as many bits set, or two of the bytes match
++		     and it will only require one more insn to finish.  */
++		  result = tmp2;
++		  i = tmp != b1 ? 32
++		      : tmp != b2 ? 24
++		      : tmp != b3 ? 16
++		      : 8;
++		}
++
++	      /* Second, try to find a 16-bit replicated constant that can
++		 leave three of the bytes clear.  If b2 or b4 is already
++		 zero, then we can.  If the 8-bit from above would not
++		 clear b2 anyway, then we still win.  */
++	      else if (b1 == b3 && (!b2 || !b4
++			       || (remainder & 0x00ff0000 & ~result)))
++		{
++		  result = remainder & 0xff00ff00;
++		  i = 24;
++		}
++	    }
++	  else if (loc > 16)
++	    {
++	      /* The 8-bit immediate already found clears b2 (and maybe b3)
++		 and we don't get here unless b1 is alredy clear, but it will
++		 leave b4 unchanged.  */
++
++	      /* If we can clear b2 and b4 at once, then we win, since the
++		 8-bits couldn't possibly reach that far.  */
++	      if (b2 == b4)
++		{
++		  result = remainder & 0x00ff00ff;
++		  i = 16;
++		}
++	    }
++	}
++
++      return_sequence->i[insns++] = result;
++      remainder &= ~result;
++
++      if (code == SET || code == MINUS)
++	code = PLUS;
++    }
++  while (remainder);
++
++  return insns;
+ }
+ 
+ /* Emit an instruction with the indicated PATTERN.  If COND is
+@@ -2760,7 +2905,6 @@
+ 
+ /* As above, but extra parameter GENERATE which, if clear, suppresses
+    RTL generation.  */
+-/* ??? This needs more work for thumb2.  */
+ 
+ static int
+ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
+@@ -2772,15 +2916,15 @@
+   int final_invert = 0;
+   int can_negate_initial = 0;
+   int i;
+-  int num_bits_set = 0;
+   int set_sign_bit_copies = 0;
+   int clear_sign_bit_copies = 0;
+   int clear_zero_bit_copies = 0;
+   int set_zero_bit_copies = 0;
+-  int insns = 0;
++  int insns = 0, neg_insns, inv_insns;
+   unsigned HOST_WIDE_INT temp1, temp2;
+   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+-  int step_size = TARGET_ARM ? 2 : 1;
++  struct four_ints *immediates;
++  struct four_ints pos_immediates, neg_immediates, inv_immediates;
+ 
+   /* Find out which operations are safe for a given CODE.  Also do a quick
+      check for degenerate cases; these can occur when DImode operations
+@@ -2789,7 +2933,6 @@
+     {
+     case SET:
+       can_invert = 1;
+-      can_negate = 1;
+       break;
+ 
+     case PLUS:
+@@ -2817,9 +2960,6 @@
+ 				gen_rtx_SET (VOIDmode, target, source));
+ 	  return 1;
+ 	}
+-
+-      if (TARGET_THUMB2)
+-	can_invert = 1;
+       break;
+ 
+     case AND:
+@@ -2861,6 +3001,7 @@
+ 					     gen_rtx_NOT (mode, source)));
+ 	  return 1;
+ 	}
++      final_invert = 1;
+       break;
+ 
+     case MINUS:
+@@ -2883,7 +3024,6 @@
+ 							    source)));
+ 	  return 1;
+ 	}
+-      can_negate = 1;
+ 
+       break;
+ 
+@@ -2892,9 +3032,7 @@
+     }
+ 
+   /* If we can do it in one insn get out quickly.  */
+-  if (const_ok_for_arm (val)
+-      || (can_negate_initial && const_ok_for_arm (-val))
+-      || (can_invert && const_ok_for_arm (~val)))
++  if (const_ok_for_op (val, code))
+     {
+       if (generate)
+ 	emit_constant_insn (cond,
+@@ -2947,15 +3085,6 @@
+   switch (code)
+     {
+     case SET:
+-      /* See if we can use movw.  */
+-      if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
+-	{
+-	  if (generate)
+-	    emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+-						   GEN_INT (val)));
+-	  return 1;
+-	}
+-
+       /* See if we can do this by sign_extending a constant that is known
+ 	 to be negative.  This is a good, way of doing it, since the shift
+ 	 may well merge into a subsequent insn.  */
+@@ -3306,121 +3435,97 @@
+       break;
+     }
+ 
+-  for (i = 0; i < 32; i++)
+-    if (remainder & (1 << i))
+-      num_bits_set++;
+-
+-  if ((code == AND)
+-      || (code != IOR && can_invert && num_bits_set > 16))
+-    remainder ^= 0xffffffff;
+-  else if (code == PLUS && num_bits_set > 16)
+-    remainder = (-remainder) & 0xffffffff;
+-
+-  /* For XOR, if more than half the bits are set and there's a sequence
+-     of more than 8 consecutive ones in the pattern then we can XOR by the
+-     inverted constant and then invert the final result; this may save an
+-     instruction and might also lead to the final mvn being merged with
+-     some other operation.  */
+-  else if (code == XOR && num_bits_set > 16
+-	   && (count_insns_for_constant (remainder ^ 0xffffffff,
+-					 find_best_start
+-					 (remainder ^ 0xffffffff))
+-	       < count_insns_for_constant (remainder,
+-					   find_best_start (remainder))))
+-    {
+-      remainder ^= 0xffffffff;
+-      final_invert = 1;
++  /* Calculate what the instruction sequences would be if we generated it
++     normally, negated, or inverted.  */
++  if (code == AND)
++    /* AND cannot be split into multiple insns, so invert and use BIC.  */
++    insns = 99;
++  else
++    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
++
++  if (can_negate)
++    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
++					    &neg_immediates);
++  else
++    neg_insns = 99;
++
++  if (can_invert || final_invert)
++    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
++					    &inv_immediates);
++  else
++    inv_insns = 99;
++
++  immediates = &pos_immediates;
++
++  /* Is the negated immediate sequence more efficient?  */
++  if (neg_insns < insns && neg_insns <= inv_insns)
++    {
++      insns = neg_insns;
++      immediates = &neg_immediates;
++    }
++  else
++    can_negate = 0;
++
++  /* Is the inverted immediate sequence more efficient?
++     We must allow for an extra NOT instruction for XOR operations, although
++     there is some chance that the final 'mvn' will get optimized later.  */
++  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
++    {
++      insns = inv_insns;
++      immediates = &inv_immediates;
+     }
+   else
+     {
+       can_invert = 0;
+-      can_negate = 0;
++      final_invert = 0;
+     }
+ 
+-  /* Now try and find a way of doing the job in either two or three
+-     instructions.
+-     We start by looking for the largest block of zeros that are aligned on
+-     a 2-bit boundary, we then fill up the temps, wrapping around to the
+-     top of the word when we drop off the bottom.
+-     In the worst case this code should produce no more than four insns.
+-     Thumb-2 constants are shifted, not rotated, so the MSB is always the
+-     best place to start.  */
+-
+-  /* ??? Use thumb2 replicated constants when the high and low halfwords are
+-     the same.  */
+-  {
+-    /* Now start emitting the insns.  */
+-    i = find_best_start (remainder);
+-    do
+-      {
+-	int end;
+-
+-	if (i <= 0)
+-	  i += 32;
+-	if (remainder & (3 << (i - 2)))
+-	  {
+-	    end = i - 8;
+-	    if (end < 0)
+-	      end += 32;
+-	    temp1 = remainder & ((0x0ff << end)
+-				 | ((i < end) ? (0xff >> (32 - end)) : 0));
+-	    remainder &= ~temp1;
+-
+-	    if (generate)
+-	      {
+-		rtx new_src, temp1_rtx;
+-
+-		if (code == SET || code == MINUS)
+-		  {
+-		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
+-		    if (can_invert && code != MINUS)
+-		      temp1 = ~temp1;
+-		  }
+-		else
+-		  {
+-		    if ((final_invert || remainder) && subtargets)
+-		      new_src = gen_reg_rtx (mode);
+-		    else
+-		      new_src = target;
+-		    if (can_invert)
+-		      temp1 = ~temp1;
+-		    else if (can_negate)
+-		      temp1 = -temp1;
+-		  }
+-
+-		temp1 = trunc_int_for_mode (temp1, mode);
+-		temp1_rtx = GEN_INT (temp1);
+-
+-		if (code == SET)
+-		  ;
+-		else if (code == MINUS)
+-		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+-		else
+-		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+-
+-		emit_constant_insn (cond,
+-				    gen_rtx_SET (VOIDmode, new_src,
+-						 temp1_rtx));
+-		source = new_src;
+-	      }
+-
+-	    if (code == SET)
+-	      {
+-		can_invert = 0;
+-		code = PLUS;
+-	      }
+-	    else if (code == MINUS)
++  /* Now output the chosen sequence as instructions.  */
++  if (generate)
++    {
++      for (i = 0; i < insns; i++)
++	{
++	  rtx new_src, temp1_rtx;
++
++	  temp1 = immediates->i[i];
++
++	  if (code == SET || code == MINUS)
++	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
++	  else if ((final_invert || i < (insns - 1)) && subtargets)
++	    new_src = gen_reg_rtx (mode);
++	  else
++	    new_src = target;
++
++	  if (can_invert)
++	    temp1 = ~temp1;
++	  else if (can_negate)
++	    temp1 = -temp1;
++
++	  temp1 = trunc_int_for_mode (temp1, mode);
++	  temp1_rtx = GEN_INT (temp1);
++
++	  if (code == SET)
++	    ;
++	  else if (code == MINUS)
++	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
++	  else
++	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
++
++	  emit_constant_insn (cond,
++			      gen_rtx_SET (VOIDmode, new_src,
++					   temp1_rtx));
++	  source = new_src;
++
++	  if (code == SET)
++	    {
++	      can_negate = can_invert;
++	      can_invert = 0;
+ 	      code = PLUS;
+-
+-	    insns++;
+-	    i -= 8 - step_size;
+-	  }
+-	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
+-	   shifts.  */
+-	i -= step_size;
+-      }
+-    while (remainder);
+-  }
++	    }
++	  else if (code == MINUS)
++	    code = PLUS;
++	}
++    }
+ 
+   if (final_invert)
+     {
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2011-08-25 11:42:09 +0000
++++ new/gcc/config/arm/arm.md	2011-08-25 13:26:58 +0000
+@@ -701,21 +701,24 @@
+ ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
+ ;; put the duplicated register first, and not try the commutative version.
+ (define_insn_and_split "*arm_addsi3"
+-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k,r")
+-	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
+-		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
++  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
++	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
++		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
+   "TARGET_32BIT"
+   "@
+    add%?\\t%0, %1, %2
+    add%?\\t%0, %1, %2
+    add%?\\t%0, %2, %1
+-   sub%?\\t%0, %1, #%n2
+-   sub%?\\t%0, %1, #%n2
++   addw%?\\t%0, %1, %2
++   addw%?\\t%0, %1, %2
++   sub%?\\t%0, %1, #%n2
++   sub%?\\t%0, %1, #%n2
++   subw%?\\t%0, %1, #%n2
++   subw%?\\t%0, %1, #%n2
+    #"
+   "TARGET_32BIT
+    && GET_CODE (operands[2]) == CONST_INT
+-   && !(const_ok_for_arm (INTVAL (operands[2]))
+-        || const_ok_for_arm (-INTVAL (operands[2])))
++   && !const_ok_for_op (INTVAL (operands[2]), PLUS)
+    && (reload_completed || !arm_eliminable_register (operands[1]))"
+   [(clobber (const_int 0))]
+   "
+@@ -724,8 +727,9 @@
+ 		      operands[1], 0);
+   DONE;
+   "
+-  [(set_attr "length" "4,4,4,4,4,16")
+-   (set_attr "predicable" "yes")]
++  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
++   (set_attr "predicable" "yes")
++   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+ )
+ 
+ (define_insn_and_split "*thumb1_addsi3"
+
+=== modified file 'gcc/config/arm/constraints.md'
+--- old/gcc/config/arm/constraints.md	2011-01-03 20:52:22 +0000
++++ new/gcc/config/arm/constraints.md	2011-08-25 13:26:58 +0000
+@@ -31,7 +31,7 @@
+ ;; The following multi-letter normal constraints have been used:
+ ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+ ;; in Thumb-1 state: Pa, Pb, Pc, Pd
+-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
++;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
+ 
+ ;; The following memory constraints have been used:
+ ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+@@ -74,6 +74,18 @@
+ 	   (and (match_code "const_int")
+                 (match_test "(ival & 0xffff0000) == 0")))))
+ 
++(define_constraint "Pj"
++ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
++ (and (match_code "const_int")
++      (and (match_test "TARGET_THUMB2")
++	   (match_test "(ival & 0xfffff000) == 0"))))
++
++(define_constraint "PJ"
++ "@internal A constant that satisfies the Pj constrant if negated."
++ (and (match_code "const_int")
++      (and (match_test "TARGET_THUMB2")
++	   (match_test "((-ival) & 0xfffff000) == 0"))))
++
+ (define_register_constraint "k" "STACK_REG"
+  "@internal The stack register.")
+ 
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,27 @@
++/* Ensure simple replicated constant immediates work.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a + 0xfefefefe;
++}
++
++/* { dg-final { scan-assembler "add.*#-16843010" } } */
++
++int
++foo2 (int a)
++{
++  return a - 0xab00ab00;
++}
++
++/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
++
++int
++foo3 (int a)
++{
++  return a & 0x00cd00cd;
++}
++
++/* { dg-final { scan-assembler "and.*#13435085" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,75 @@
++/* Ensure split constants can use replicated patterns.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a + 0xfe00fe01;
++}
++
++/* { dg-final { scan-assembler "add.*#-33489408" } } */
++/* { dg-final { scan-assembler "add.*#1" } } */
++
++int
++foo2 (int a)
++{
++  return a + 0xdd01dd00;
++}
++
++/* { dg-final { scan-assembler "add.*#-587145984" } } */
++/* { dg-final { scan-assembler "add.*#65536" } } */
++
++int
++foo3 (int a)
++{
++  return a + 0x00443344;
++}
++
++/* { dg-final { scan-assembler "add.*#4456516" } } */
++/* { dg-final { scan-assembler "add.*#13056" } } */
++
++int
++foo4 (int a)
++{
++  return a + 0x77330033;
++}
++
++/* { dg-final { scan-assembler "add.*#1996488704" } } */
++/* { dg-final { scan-assembler "add.*#3342387" } } */
++
++int
++foo5 (int a)
++{
++  return a + 0x11221122;
++}
++
++/* { dg-final { scan-assembler "add.*#285217024" } } */
++/* { dg-final { scan-assembler "add.*#2228258" } } */
++
++int
++foo6 (int a)
++{
++  return a + 0x66666677;
++}
++
++/* { dg-final { scan-assembler "add.*#1717986918" } } */
++/* { dg-final { scan-assembler "add.*#17" } } */
++
++int
++foo7 (int a)
++{
++  return a + 0x99888888;
++}
++
++/* { dg-final { scan-assembler "add.*#-2004318072" } } */
++/* { dg-final { scan-assembler "add.*#285212672" } } */
++
++int
++foo8 (int a)
++{
++  return a + 0xdddddfff;
++}
++
++/* { dg-final { scan-assembler "add.*#-572662307" } } */
++/* { dg-final { scan-assembler "addw.*#546" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,28 @@
++/* Ensure negated/inverted replicated constant immediates work.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  return a | 0xffffff00;
++}
++
++/* { dg-final { scan-assembler "orn.*#255" } } */
++
++int
++foo2 (int a)
++{
++  return a & 0xffeeffee;
++}
++
++/* { dg-final { scan-assembler "bic.*#1114129" } } */
++
++int
++foo3 (int a)
++{
++  return a & 0xaaaaaa00;
++}
++
++/* { dg-final { scan-assembler "and.*#-1431655766" } } */
++/* { dg-final { scan-assembler "bic.*#170" } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c'
+--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c	2011-08-25 13:31:00 +0000
+@@ -0,0 +1,22 @@
++/* Ensure replicated constants don't make things worse.  */
++/* { dg-options "-mthumb -O2" } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++
++int
++foo1 (int a)
++{
++  /* It might be tempting to use 0x01000100, but it wouldn't help. */
++  return a + 0x01f001e0;
++}
++
++/* { dg-final { scan-assembler "add.*#32505856" } } */
++/* { dg-final { scan-assembler "add.*#480" } } */
++
++int
++foo2 (int a)
++{
++  return a + 0x0f100e10;
++}
++
++/* { dg-final { scan-assembler "add.*#252706816" } } */
++/* { dg-final { scan-assembler "add.*#3600" } } */
+