1 files changed, 688 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99323.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99323.patch
new file mode 100644
index 0000000000..38b6fa3f0e
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99323.patch
@@ -0,0 +1,688 @@
+	Vladimir Prus  <vladimir@codesourcery.com>
+	Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_override_options): Warn if mlow-irq-latency is
+	specified in Thumb mode.
+	(load_multiple_sequence): Return 0 if low irq latency is requested.
+	(store_multiple_sequence): Likewise.
+	(arm_gen_load_multiple): Load registers one-by-one if low irq latency
+	is requested.
+	(arm_gen_store_multiple): Likewise.
+	(vfp_output_fldmd): When low_irq_latency is non zero, pop each
+	register separately.
+	(vfp_emit_fstmd): When low_irq_latency is non zero, save each register
+	separately.
+	(arm_get_vfp_saved_size): Adjust saved register size calculation for
+	the above changes.
+	(print_pop_reg_by_ldr): New.
+	(arm_output_epilogue): Use print_pop_reg_by_ldr when low irq latency
+	is requested.
+	(emit_multi_reg_push): Push registers separately if low irq latency
+	is requested.
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set __low_irq_latency__.
+	(low_irq_latency): Define.
+	(USE_RETURN_INSN): Don't use return insn when low irq latency is
+	requested.
+	* config/arm/lib1funcs.asm (do_pop, do_push): Define as variadic
+	macros. When __low_irq_latency__ is defined, push and pop registers
+	individually.
+	(div0): Use correct punctuation.
+	* config/arm/ieee754-df.S: Adjust syntax of using do_push.
+	* config/arm/ieee754-sf.S: Likewise.
+	* config/arm/bpabi.S: Likewise.
+	* config/arm/arm.opt (mlow-irq-latency): New option.
+	* config/arm/predicates.md (load_multiple_operation): Return false is
+	low irq latency is requested.
+	(store_multiple_operation): Likewise.
+	* config/arm/arm.md (movmemqi): Don't use it if low irq latency is
+	requested.
+	* doc/invoke.texi (-mlow-irq-latency): Add documentation.
+
+2010-07-26  Julian Brown  <julian@codesourcery.com>
+
+	Merge from Sourcery G++ 4.4:
+
+ 	2007-06-06  Joseph Myers  <joseph@codesourcery.com>
+ 
+ 	gcc/
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2010-08-05 14:33:31 +0000
++++ new/gcc/config/arm/arm.c	2010-08-05 15:20:54 +0000
+@@ -1884,6 +1884,13 @@
+ 
+   /* Register global variables with the garbage collector.  */
+   arm_add_gc_roots ();
++
++  if (low_irq_latency && TARGET_THUMB)
++    {
++      warning (0, 
++	       "-mlow-irq-latency has no effect when compiling for Thumb");
++      low_irq_latency = 0;
++    }
+ }
+ 
+ static void
+@@ -9053,6 +9060,9 @@
+   int base_reg = -1;
+   int i;
+ 
++  if (low_irq_latency)
++    return 0;
++
+   /* Can only handle 2, 3, or 4 insns at present,
+      though could be easily extended if required.  */
+   gcc_assert (nops >= 2 && nops <= 4);
+@@ -9282,6 +9292,9 @@
+   int base_reg = -1;
+   int i;
+ 
++  if (low_irq_latency)
++    return 0;
++
+   /* Can only handle 2, 3, or 4 insns at present, though could be easily
+      extended if required.  */
+   gcc_assert (nops >= 2 && nops <= 4);
+@@ -9489,7 +9502,7 @@
+ 
+      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+      for counts of 3 or 4 regs.  */
+-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
++  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
+     {
+       rtx seq;
+ 
+@@ -9552,7 +9565,7 @@
+ 
+   /* See arm_gen_load_multiple for discussion of
+      the pros/cons of ldm/stm usage for XScale.  */
+-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
++  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
+     {
+       rtx seq;
+ 
+@@ -11795,6 +11808,21 @@
+ vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
+ {
+   int i;
++  int offset;
++
++  if (low_irq_latency)
++    {
++      /* Output a sequence of FLDD instructions.  */
++      offset = 0;
++      for (i = reg; i < reg + count; ++i, offset += 8)
++	{
++	  fputc ('\t', stream);
++	  asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset);
++	}
++      asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8);
++      return;
++    }
++
+ 
+   /* Workaround ARM10 VFPr1 bug.  */
+   if (count == 2 && !arm_arch6)
+@@ -11865,6 +11893,56 @@
+   rtx tmp, reg;
+   int i;
+ 
++  if (low_irq_latency)
++    {
++      int saved_size;
++      rtx sp_insn;
++
++      if (!count)
++	return 0;
++
++      saved_size = count * GET_MODE_SIZE (DFmode);
++
++      /* Since fstd does not have postdecrement addressing mode,
++	 we first decrement stack pointer and then use base+offset
++	 stores for VFP registers. The ARM EABI unwind information 
++	 can't easily describe base+offset loads, so we attach
++	 a note for the effects of the whole block in the first insn, 
++	 and  avoid marking the subsequent instructions 
++	 with RTX_FRAME_RELATED_P.  */
++      sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
++			    GEN_INT (-saved_size));
++      sp_insn = emit_insn (sp_insn);
++      RTX_FRAME_RELATED_P (sp_insn) = 1;
++
++      dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
++      XVECEXP (dwarf, 0, 0) = 
++	gen_rtx_SET (VOIDmode, stack_pointer_rtx,
++		     plus_constant (stack_pointer_rtx, -saved_size));
++      
++      /* push double VFP registers to stack */
++      for (i = 0; i < count; ++i )
++	{
++	  rtx reg;
++	  rtx mem;
++	  rtx addr;
++	  rtx insn;
++	  reg = gen_rtx_REG (DFmode, base_reg + 2*i);
++	  addr = (i == 0) ? stack_pointer_rtx
++	    : gen_rtx_PLUS (SImode, stack_pointer_rtx,
++			    GEN_INT (i * GET_MODE_SIZE (DFmode)));
++	  mem = gen_frame_mem (DFmode, addr);
++	  insn = emit_move_insn (mem, reg);
++	  XVECEXP (dwarf, 0, i+1) = 
++	    gen_rtx_SET (VOIDmode, mem, reg);
++	}
++
++      REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
++					       REG_NOTES (sp_insn));
++      
++      return saved_size;
++    }
++
+   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
+      register pairs are stored by a store multiple insn.  We avoid this
+      by pushing an extra pair.  */
+@@ -13307,7 +13385,7 @@
+ 	      if (count > 0)
+ 		{
+ 		  /* Workaround ARM10 VFPr1 bug.  */
+-		  if (count == 2 && !arm_arch6)
++		  if (count == 2 && !arm_arch6 && !low_irq_latency)
+ 		    count++;
+ 		  saved += count * 8;
+ 		}
+@@ -13645,6 +13723,41 @@
+ 
+ }
+ 
++/* Generate to STREAM a code sequence that pops registers identified 
++   in REGS_MASK from SP. SP is incremented as the result.
++*/
++static void
++print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe)
++{
++  int reg;
++
++  gcc_assert (! (regs_mask & (1 << SP_REGNUM)));
++  
++  for (reg = 0; reg < PC_REGNUM; ++reg)
++    if (regs_mask & (1 << reg))
++      asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
++		   reg, SP_REGNUM); 
++
++  if (regs_mask & (1 << PC_REGNUM))
++    {
++      if (rfe)
++	/* When returning from exception, we need to
++	   copy SPSR to CPSR.  There are two ways to do
++	   that: the ldm instruction with "^" suffix,
++	   and movs instruction.  The latter would
++	   require that we load from stack to some
++	   scratch register, and then move to PC.
++	   Therefore, we'd need extra instruction and
++	   have to make sure we actually have a spare
++	   register.  Using ldm with a single register
++	   is simler.  */
++	asm_fprintf (stream, "\tldm\tsp!, {pc}^\n");
++      else
++	asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
++		     PC_REGNUM, SP_REGNUM); 
++    }
++}
++
+ const char *
+ arm_output_epilogue (rtx sibling)
+ {
+@@ -14018,22 +14131,19 @@
+ 	 to load use the LDR instruction - it is faster.  For Thumb-2
+ 	 always use pop and the assembler will pick the best instruction.*/
+       if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
+-	  && !IS_INTERRUPT(func_type))
++	  && !IS_INTERRUPT (func_type))
+ 	{
+ 	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
+ 	}
+       else if (saved_regs_mask)
+ 	{
+-	  if (saved_regs_mask & (1 << SP_REGNUM))
+-	    /* Note - write back to the stack register is not enabled
+-	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
+-	       in the list of registers and if we add writeback the
+-	       instruction becomes UNPREDICTABLE.  */
+-	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
+-			     rfe);
+-	  else if (TARGET_ARM)
+-	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
+-			     rfe);
++	  gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM)));
++	  if (TARGET_ARM)
++	    if (low_irq_latency)
++	      print_pop_reg_by_ldr (f, saved_regs_mask, rfe);
++	    else
++	      print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
++			       rfe);
+ 	  else
+ 	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
+ 	}
+@@ -14154,6 +14264,32 @@
+ 
+   gcc_assert (num_regs && num_regs <= 16);
+ 
++  if (low_irq_latency)
++    {
++      rtx insn = 0;
++
++      /* Emit a series of ldr instructions rather rather than a single ldm.  */
++      /* TODO: Use ldrd where possible.  */
++      gcc_assert (! (mask & (1 << SP_REGNUM)));
++
++      for (i = LAST_ARM_REGNUM; i >= 0; --i)
++        {
++          if (mask & (1 << i))
++
++            {
++              rtx reg, where, mem;
++
++	      reg = gen_rtx_REG (SImode, i);
++	      where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
++	      mem = gen_rtx_MEM (SImode, where);
++	      insn = emit_move_insn (mem, reg);
++	      RTX_FRAME_RELATED_P (insn) = 1;
++            }
++        }
++
++      return insn;
++    }
++
+   /* We don't record the PC in the dwarf frame information.  */
+   num_dwarf_regs = num_regs;
+   if (mask & (1 << PC_REGNUM))
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2010-08-05 14:33:31 +0000
++++ new/gcc/config/arm/arm.h	2010-08-05 15:20:54 +0000
+@@ -101,6 +101,8 @@
+ 	      builtin_define ("__ARM_PCS");		\
+ 	    builtin_define ("__ARM_EABI__");		\
+ 	  }						\
++	if (low_irq_latency)				\
++	  builtin_define ("__low_irq_latency__");	\
+     } while (0)
+ 
+ /* The various ARM cores.  */
+@@ -449,6 +451,10 @@
+ /* Nonzero if chip supports integer division instruction.  */
+ extern int arm_arch_hwdiv;
+ 
++/* Nonzero if we should minimize interrupt latency of the
++   generated code.  */
++extern int low_irq_latency;
++
+ #ifndef TARGET_DEFAULT
+ #define TARGET_DEFAULT  (MASK_APCS_FRAME)
+ #endif
+@@ -1823,9 +1829,10 @@
+ /* Determine if the epilogue should be output as RTL.
+    You should override this if you define FUNCTION_EXTRA_EPILOGUE.  */
+ /* This is disabled for Thumb-2 because it will confuse the
+-   conditional insn counter.  */
++   conditional insn counter.
++   Do not use a return insn if we're avoiding ldm/stm instructions.  */
+ #define USE_RETURN_INSN(ISCOND)				\
+-  (TARGET_ARM ? use_return_insn (ISCOND, NULL) : 0)
++  ((TARGET_ARM && !low_irq_latency) ? use_return_insn (ISCOND, NULL) : 0)
+ 
+ /* Definitions for register eliminations.
+ 
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md	2010-08-05 12:06:40 +0000
++++ new/gcc/config/arm/arm.md	2010-08-05 15:20:54 +0000
+@@ -6587,7 +6587,7 @@
+    (match_operand:BLK 1 "general_operand" "")
+    (match_operand:SI 2 "const_int_operand" "")
+    (match_operand:SI 3 "const_int_operand" "")]
+-  "TARGET_EITHER"
++  "TARGET_EITHER && !low_irq_latency"
+   "
+   if (TARGET_32BIT)
+     {
+
+=== modified file 'gcc/config/arm/arm.opt'
+--- old/gcc/config/arm/arm.opt	2009-06-18 11:24:10 +0000
++++ new/gcc/config/arm/arm.opt	2010-08-05 15:20:54 +0000
+@@ -161,6 +161,10 @@
+ Target Report Mask(NEON_VECTORIZE_QUAD)
+ Use Neon quad-word (rather than double-word) registers for vectorization
+ 
++mlow-irq-latency
++Target Report Var(low_irq_latency)
++Try to reduce interrupt latency of the generated code
++
+ mword-relocations
+ Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
+ Only generate absolute relocations on word sized values.
+
+=== modified file 'gcc/config/arm/bpabi.S'
+--- old/gcc/config/arm/bpabi.S	2009-12-17 15:37:23 +0000
++++ new/gcc/config/arm/bpabi.S	2010-08-05 15:20:54 +0000
+@@ -116,16 +116,17 @@
+ 	test_div_by_zero signed
+ 
+ 	sub sp, sp, #8
+-#if defined(__thumb2__)
++/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
++#if defined(__thumb2__) || defined(__irq_low_latency__)
+ 	mov ip, sp
+-	push {ip, lr}
++	do_push (ip, lr)
+ #else
+-	do_push {sp, lr}
++	stmfd sp!, {sp, lr}
+ #endif
+ 	bl SYM(__gnu_ldivmod_helper) __PLT__
+ 	ldr lr, [sp, #4]
+ 	add sp, sp, #8
+-	do_pop {r2, r3}
++	do_pop (r2, r3)
+ 	RET
+ 	
+ #endif /* L_aeabi_ldivmod */
+@@ -136,16 +137,17 @@
+ 	test_div_by_zero unsigned
+ 
+ 	sub sp, sp, #8
+-#if defined(__thumb2__)
++/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
++#if defined(__thumb2__) || defined(__irq_low_latency__)
+ 	mov ip, sp
+-	push {ip, lr}
++	do_push (ip, lr)
+ #else
+-	do_push {sp, lr}
++	stmfd sp!, {sp, lr}
+ #endif
+ 	bl SYM(__gnu_uldivmod_helper) __PLT__
+ 	ldr lr, [sp, #4]
+ 	add sp, sp, #8
+-	do_pop {r2, r3}
++	do_pop (r2, r3)
+ 	RET
+ 	
+ #endif /* L_aeabi_divmod */
+
+=== modified file 'gcc/config/arm/ieee754-df.S'
+--- old/gcc/config/arm/ieee754-df.S	2009-06-05 12:52:36 +0000
++++ new/gcc/config/arm/ieee754-df.S	2010-08-05 15:20:54 +0000
+@@ -83,7 +83,7 @@
+ ARM_FUNC_START adddf3
+ ARM_FUNC_ALIAS aeabi_dadd adddf3
+ 
+-1:	do_push	{r4, r5, lr}
++1:	do_push	(r4, r5, lr)
+ 
+ 	@ Look for zeroes, equal values, INF, or NAN.
+ 	shift1	lsl, r4, xh, #1
+@@ -427,7 +427,7 @@
+ 	do_it	eq, t
+ 	moveq	r1, #0
+ 	RETc(eq)
+-	do_push	{r4, r5, lr}
++	do_push	(r4, r5, lr)
+ 	mov	r4, #0x400		@ initial exponent
+ 	add	r4, r4, #(52-1 - 1)
+ 	mov	r5, #0			@ sign bit is 0
+@@ -447,7 +447,7 @@
+ 	do_it	eq, t
+ 	moveq	r1, #0
+ 	RETc(eq)
+-	do_push	{r4, r5, lr}
++	do_push	(r4, r5, lr)
+ 	mov	r4, #0x400		@ initial exponent
+ 	add	r4, r4, #(52-1 - 1)
+ 	ands	r5, r0, #0x80000000	@ sign bit in r5
+@@ -481,7 +481,7 @@
+ 	RETc(eq)			@ we are done already.
+ 
+ 	@ value was denormalized.  We can normalize it now.
+-	do_push	{r4, r5, lr}
++	do_push	(r4, r5, lr)
+ 	mov	r4, #0x380		@ setup corresponding exponent
+ 	and	r5, xh, #0x80000000	@ move sign bit in r5
+ 	bic	xh, xh, #0x80000000
+@@ -508,9 +508,9 @@
+ 	@ compatibility.
+ 	adr	ip, LSYM(f0_ret)
+ 	@ Push pc as well so that RETLDM works correctly.
+-	do_push	{r4, r5, ip, lr, pc}
++	do_push	(r4, r5, ip, lr, pc)
+ #else
+-	do_push	{r4, r5, lr}
++	do_push	(r4, r5, lr)
+ #endif
+ 
+ 	mov	r5, #0
+@@ -534,9 +534,9 @@
+ 	@ compatibility.
+ 	adr	ip, LSYM(f0_ret)
+ 	@ Push pc as well so that RETLDM works correctly.
+-	do_push	{r4, r5, ip, lr, pc}
++	do_push	(r4, r5, ip, lr, pc)
+ #else
+-	do_push	{r4, r5, lr}
++	do_push	(r4, r5, lr)
+ #endif
+ 
+ 	ands	r5, ah, #0x80000000	@ sign bit in r5
+@@ -585,7 +585,7 @@
+ 	@ Legacy code expects the result to be returned in f0.  Copy it
+ 	@ there as well.
+ LSYM(f0_ret):
+-	do_push	{r0, r1}
++	do_push	(r0, r1)
+ 	ldfd	f0, [sp], #8
+ 	RETLDM
+ 
+@@ -602,7 +602,7 @@
+ 
+ ARM_FUNC_START muldf3
+ ARM_FUNC_ALIAS aeabi_dmul muldf3
+-	do_push	{r4, r5, r6, lr}
++	do_push	(r4, r5, r6, lr)
+ 
+ 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+ 	mov	ip, #0xff
+@@ -910,7 +910,7 @@
+ ARM_FUNC_START divdf3
+ ARM_FUNC_ALIAS aeabi_ddiv divdf3
+ 	
+-	do_push	{r4, r5, r6, lr}
++	do_push	(r4, r5, r6, lr)
+ 
+ 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+ 	mov	ip, #0xff
+@@ -1195,7 +1195,7 @@
+ 
+ 	@ The status-returning routines are required to preserve all
+ 	@ registers except ip, lr, and cpsr.
+-6:	do_push	{r0, lr}
++6:	do_push	(r0, lr)
+ 	ARM_CALL cmpdf2
+ 	@ Set the Z flag correctly, and the C flag unconditionally.
+ 	cmp	r0, #0
+
+=== modified file 'gcc/config/arm/ieee754-sf.S'
+--- old/gcc/config/arm/ieee754-sf.S	2009-06-05 12:52:36 +0000
++++ new/gcc/config/arm/ieee754-sf.S	2010-08-05 15:20:54 +0000
+@@ -481,7 +481,7 @@
+ 	and	r3, ip, #0x80000000
+ 
+ 	@ Well, no way to make it shorter without the umull instruction.
+-	do_push	{r3, r4, r5}
++	do_push	(r3, r4, r5)
+ 	mov	r4, r0, lsr #16
+ 	mov	r5, r1, lsr #16
+ 	bic	r0, r0, r4, lsl #16
+@@ -492,7 +492,7 @@
+ 	mla	r0, r4, r1, r0
+ 	adds	r3, r3, r0, lsl #16
+ 	adc	r1, ip, r0, lsr #16
+-	do_pop	{r0, r4, r5}
++	do_pop	(r0, r4, r5)
+ 
+ #else
+ 
+@@ -882,7 +882,7 @@
+ 
+ 	@ The status-returning routines are required to preserve all
+ 	@ registers except ip, lr, and cpsr.
+-6:	do_push	{r0, r1, r2, r3, lr}
++6:	do_push	(r0, r1, r2, r3, lr)
+ 	ARM_CALL cmpsf2
+ 	@ Set the Z flag correctly, and the C flag unconditionally.
+ 	cmp	r0, #0
+
+=== modified file 'gcc/config/arm/lib1funcs.asm'
+--- old/gcc/config/arm/lib1funcs.asm	2010-04-02 18:54:46 +0000
++++ new/gcc/config/arm/lib1funcs.asm	2010-08-05 15:20:54 +0000
+@@ -254,8 +254,8 @@
+ .macro shift1 op, arg0, arg1, arg2
+ 	\op	\arg0, \arg1, \arg2
+ .endm
+-#define do_push	push
+-#define do_pop	pop
++#define do_push(...)	push {__VA_ARGS__}
++#define do_pop(...)	pop {__VA_ARGS__}
+ #define COND(op1, op2, cond) op1 ## op2 ## cond
+ /* Perform an arithmetic operation with a variable shift operand.  This
+    requires two instructions and a scratch register on Thumb-2.  */
+@@ -269,8 +269,42 @@
+ .macro shift1 op, arg0, arg1, arg2
+ 	mov	\arg0, \arg1, \op \arg2
+ .endm
+-#define do_push	stmfd sp!,
+-#define do_pop	ldmfd sp!,
++#if defined(__low_irq_latency__)        
++#define do_push(...) \
++  _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__)
++#define _buildN1(BASE, X)	_buildN2(BASE, X)
++#define _buildN2(BASE, X)	BASE##X
++#define _buildC1(...)		_buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1)
++#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c
++        
++#define do_push1(r1) str r1, [sp, #-4]!
++#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]!
++#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]!
++#define do_push4(r1, r2, r3, r4) \
++        do_push3 (r2, r3, r4);\
++        do_push1 (r1)
++#define do_push5(r1, r2, r3, r4, r5) \
++        do_push4 (r2, r3, r4, r5);\
++        do_push1 (r1)
++        
++#define do_pop(...) \
++_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__)
++        
++#define do_pop1(r1) ldr r1, [sp], #4
++#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4
++#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4
++#define do_pop4(r1, r2, r3, r4) \
++        do_pop1 (r1);\
++        do_pup3 (r2, r3, r4)
++#define do_pop5(r1, r2, r3, r4, r5) \
++        do_pop1 (r1);\
++        do_pop4 (r2, r3, r4, r5)
++#else
++#define do_push(...)    stmfd sp!, { __VA_ARGS__}
++#define do_pop(...)     ldmfd sp!, {__VA_ARGS__}
++#endif
++
++        
+ #define COND(op1, op2, cond) op1 ## cond ## op2
+ .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+ 	\name \dest, \src1, \src2, \shiftop \shiftreg
+@@ -1260,7 +1294,7 @@
+ 	ARM_FUNC_START div0
+ #endif
+ 
+-	do_push	{r1, lr}
++	do_push	(r1, lr)
+ 	mov	r0, #SIGFPE
+ 	bl	SYM(raise) __PLT__
+ 	RETLDM	r1
+@@ -1277,7 +1311,7 @@
+ #if defined __ARM_EABI__ && defined __linux__
+ @ EABI GNU/Linux call to cacheflush syscall.
+ 	ARM_FUNC_START clear_cache
+-	do_push	{r7}
++	do_push	(r7)
+ #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+ 	movw	r7, #2
+ 	movt	r7, #0xf
+@@ -1287,7 +1321,7 @@
+ #endif
+ 	mov	r2, #0
+ 	swi	0
+-	do_pop	{r7}
++	do_pop	(r7)
+ 	RET
+ 	FUNC_END clear_cache
+ #else
+@@ -1490,7 +1524,7 @@
+ 	push	{r4, lr}
+ # else
+ ARM_FUNC_START clzdi2
+-	do_push	{r4, lr}
++	do_push	(r4, lr)
+ # endif
+ 	cmp	xxh, #0
+ 	bne	1f
+
+=== modified file 'gcc/config/arm/predicates.md'
+--- old/gcc/config/arm/predicates.md	2010-07-30 14:17:05 +0000
++++ new/gcc/config/arm/predicates.md	2010-08-05 15:20:54 +0000
+@@ -328,6 +328,9 @@
+   HOST_WIDE_INT i = 1, base = 0;
+   rtx elt;
+ 
++  if (low_irq_latency)
++    return false;
++
+   if (count <= 1
+       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
+     return false;
+@@ -385,6 +388,9 @@
+   HOST_WIDE_INT i = 1, base = 0;
+   rtx elt;
+ 
++  if (low_irq_latency)
++    return false;
++
+   if (count <= 1
+       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
+     return false;
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi	2010-07-29 17:03:20 +0000
++++ new/gcc/doc/invoke.texi	2010-08-05 15:20:54 +0000
+@@ -469,6 +469,7 @@
+ -mtpcs-frame  -mtpcs-leaf-frame @gol
+ -mcaller-super-interworking  -mcallee-super-interworking @gol
+ -mtp=@var{name} @gol
++-mlow-irq-latency @gol
+ -mword-relocations @gol
+ -mfix-cortex-m3-ldrd}
+ 
+@@ -9489,6 +9490,12 @@
+ @code{,}, @code{!}, @code{|}, and @code{*} as needed.
+ 
+ 
++@item -mlow-irq-latency
++@opindex mlow-irq-latency
++Avoid instructions with high interrupt latency when generating
++code.  This can increase code size and reduce performance.
++The option is off by default.
++
+ @end table
+ 
+ The conditional text @code{X} in a %@{@code{S}:@code{X}@} or similar
+