Vladimir Prus  <vladimir@codesourcery.com>
	Julian Brown  <julian@codesourcery.com>

	gcc/
	* config/arm/arm.c (arm_override_options): Warn if mlow-irq-latency is
	specified in Thumb mode.
	(load_multiple_sequence): Return 0 if low irq latency is requested.
	(store_multiple_sequence): Likewise.
	(arm_gen_load_multiple): Load registers one-by-one if low irq latency
	is requested.
	(arm_gen_store_multiple): Likewise.
	(vfp_output_fldmd): When low_irq_latency is non zero, pop each
	register separately.
	(vfp_emit_fstmd): When low_irq_latency is non zero, save each register
	separately.
	(arm_get_vfp_saved_size): Adjust saved register size calculation for
	the above changes.
	(print_pop_reg_by_ldr): New.
	(arm_output_epilogue): Use print_pop_reg_by_ldr when low irq latency
	is requested.
	(emit_multi_reg_push): Push registers separately if low irq latency
	is requested.
	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set __low_irq_latency__.
	(low_irq_latency): Define.
	(USE_RETURN_INSN): Don't use return insn when low irq latency is
	requested.
	* config/arm/lib1funcs.asm (do_pop, do_push): Define as variadic
	macros. When __low_irq_latency__ is defined, push and pop registers
	individually.
	(div0): Use correct punctuation.
	* config/arm/ieee754-df.S: Adjust syntax of using do_push.
	* config/arm/ieee754-sf.S: Likewise.
	* config/arm/bpabi.S: Likewise.
	* config/arm/arm.opt (mlow-irq-latency): New option.
	* config/arm/predicates.md (load_multiple_operation): Return false is
	low irq latency is requested.
	(store_multiple_operation): Likewise.
	* config/arm/arm.md (movmemqi): Don't use it if low irq latency is
	requested.
	* doc/invoke.texi (-mlow-irq-latency): Add documentation.

2010-07-26  Julian Brown  <julian@codesourcery.com>

	Merge from Sourcery G++ 4.4:

 	2007-06-06  Joseph Myers  <joseph@codesourcery.com>
 
 	gcc/

=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c	2010-08-05 14:33:31 +0000
+++ new/gcc/config/arm/arm.c	2010-08-05 15:20:54 +0000
@@ -1884,6 +1884,13 @@
 
   /* Register global variables with the garbage collector.  */
   arm_add_gc_roots ();
+
+  if (low_irq_latency && TARGET_THUMB)
+    {
+      warning (0, 
+	       "-mlow-irq-latency has no effect when compiling for Thumb");
+      low_irq_latency = 0;
+    }
 }
 
 static void
@@ -9053,6 +9060,9 @@
   int base_reg = -1;
   int i;
 
+  if (low_irq_latency)
+    return 0;
+
   /* Can only handle 2, 3, or 4 insns at present,
      though could be easily extended if required.  */
   gcc_assert (nops >= 2 && nops <= 4);
@@ -9282,6 +9292,9 @@
   int base_reg = -1;
   int i;
 
+  if (low_irq_latency)
+    return 0;
+
   /* Can only handle 2, 3, or 4 insns at present, though could be easily
      extended if required.  */
   gcc_assert (nops >= 2 && nops <= 4);
@@ -9489,7 +9502,7 @@
 
      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
      for counts of 3 or 4 regs.  */
-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
+  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
     {
       rtx seq;
 
@@ -9552,7 +9565,7 @@
 
   /* See arm_gen_load_multiple for discussion of
      the pros/cons of ldm/stm usage for XScale.  */
-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
+  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
     {
       rtx seq;
 
@@ -11795,6 +11808,21 @@
 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
 {
   int i;
+  int offset;
+
+  if (low_irq_latency)
+    {
+      /* Output a sequence of FLDD instructions.  */
+      offset = 0;
+      for (i = reg; i < reg + count; ++i, offset += 8)
+	{
+	  fputc ('\t', stream);
+	  asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset);
+	}
+      asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8);
+      return;
+    }
+
 
   /* Workaround ARM10 VFPr1 bug.  */
   if (count == 2 && !arm_arch6)
@@ -11865,6 +11893,56 @@
   rtx tmp, reg;
   int i;
 
+  if (low_irq_latency)
+    {
+      int saved_size;
+      rtx sp_insn;
+
+      if (!count)
+	return 0;
+
+      saved_size = count * GET_MODE_SIZE (DFmode);
+
+      /* Since fstd does not have postdecrement addressing mode,
+	 we first decrement stack pointer and then use base+offset
+	 stores for VFP registers. The ARM EABI unwind information 
+	 can't easily describe base+offset loads, so we attach
+	 a note for the effects of the whole block in the first insn, 
+	 and  avoid marking the subsequent instructions 
+	 with RTX_FRAME_RELATED_P.  */
+      sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (-saved_size));
+      sp_insn = emit_insn (sp_insn);
+      RTX_FRAME_RELATED_P (sp_insn) = 1;
+
+      dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+      XVECEXP (dwarf, 0, 0) = 
+	gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx, -saved_size));
+      
+      /* push double VFP registers to stack */
+      for (i = 0; i < count; ++i )
+	{
+	  rtx reg;
+	  rtx mem;
+	  rtx addr;
+	  rtx insn;
+	  reg = gen_rtx_REG (DFmode, base_reg + 2*i);
+	  addr = (i == 0) ? stack_pointer_rtx
+	    : gen_rtx_PLUS (SImode, stack_pointer_rtx,
+			    GEN_INT (i * GET_MODE_SIZE (DFmode)));
+	  mem = gen_frame_mem (DFmode, addr);
+	  insn = emit_move_insn (mem, reg);
+	  XVECEXP (dwarf, 0, i+1) = 
+	    gen_rtx_SET (VOIDmode, mem, reg);
+	}
+
+      REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
+					       REG_NOTES (sp_insn));
+      
+      return saved_size;
+    }
+
   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
      register pairs are stored by a store multiple insn.  We avoid this
      by pushing an extra pair.  */
@@ -13307,7 +13385,7 @@
 	      if (count > 0)
 		{
 		  /* Workaround ARM10 VFPr1 bug.  */
-		  if (count == 2 && !arm_arch6)
+		  if (count == 2 && !arm_arch6 && !low_irq_latency)
 		    count++;
 		  saved += count * 8;
 		}
@@ -13645,6 +13723,41 @@
 
 }
 
+/* Generate to STREAM a code sequence that pops registers identified 
+   in REGS_MASK from SP. SP is incremented as the result.
+*/
+static void
+print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe)
+{
+  int reg;
+
+  gcc_assert (! (regs_mask & (1 << SP_REGNUM)));
+  
+  for (reg = 0; reg < PC_REGNUM; ++reg)
+    if (regs_mask & (1 << reg))
+      asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+		   reg, SP_REGNUM); 
+
+  if (regs_mask & (1 << PC_REGNUM))
+    {
+      if (rfe)
+	/* When returning from exception, we need to
+	   copy SPSR to CPSR.  There are two ways to do
+	   that: the ldm instruction with "^" suffix,
+	   and movs instruction.  The latter would
+	   require that we load from stack to some
+	   scratch register, and then move to PC.
+	   Therefore, we'd need extra instruction and
+	   have to make sure we actually have a spare
+	   register.  Using ldm with a single register
+	   is simler.  */
+	asm_fprintf (stream, "\tldm\tsp!, {pc}^\n");
+      else
+	asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+		     PC_REGNUM, SP_REGNUM); 
+    }
+}
+
 const char *
 arm_output_epilogue (rtx sibling)
 {
@@ -14018,22 +14131,19 @@
 	 to load use the LDR instruction - it is faster.  For Thumb-2
 	 always use pop and the assembler will pick the best instruction.*/
       if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
-	  && !IS_INTERRUPT(func_type))
+	  && !IS_INTERRUPT (func_type))
 	{
 	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
 	}
       else if (saved_regs_mask)
 	{
-	  if (saved_regs_mask & (1 << SP_REGNUM))
-	    /* Note - write back to the stack register is not enabled
-	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
-	       in the list of registers and if we add writeback the
-	       instruction becomes UNPREDICTABLE.  */
-	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
-	  else if (TARGET_ARM)
-	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
+	  gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM)));
+	  if (TARGET_ARM)
+	    if (low_irq_latency)
+	      print_pop_reg_by_ldr (f, saved_regs_mask, rfe);
+	    else
+	      print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
+			       rfe);
 	  else
 	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
 	}
@@ -14154,6 +14264,32 @@
 
   gcc_assert (num_regs && num_regs <= 16);
 
+  if (low_irq_latency)
+    {
+      rtx insn = 0;
+
+      /* Emit a series of ldr instructions rather rather than a single ldm.  */
+      /* TODO: Use ldrd where possible.  */
+      gcc_assert (! (mask & (1 << SP_REGNUM)));
+
+      for (i = LAST_ARM_REGNUM; i >= 0; --i)
+        {
+          if (mask & (1 << i))
+
+            {
+              rtx reg, where, mem;
+
+	      reg = gen_rtx_REG (SImode, i);
+	      where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
+	      mem = gen_rtx_MEM (SImode, where);
+	      insn = emit_move_insn (mem, reg);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+            }
+        }
+
+      return insn;
+    }
+
   /* We don't record the PC in the dwarf frame information.  */
   num_dwarf_regs = num_regs;
   if (mask & (1 << PC_REGNUM))

=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h	2010-08-05 14:33:31 +0000
+++ new/gcc/config/arm/arm.h	2010-08-05 15:20:54 +0000
@@ -101,6 +101,8 @@
 	      builtin_define ("__ARM_PCS");		\
 	    builtin_define ("__ARM_EABI__");		\
 	  }						\
+	if (low_irq_latency)				\
+	  builtin_define ("__low_irq_latency__");	\
     } while (0)
 
 /* The various ARM cores.  */
@@ -449,6 +451,10 @@
 /* Nonzero if chip supports integer division instruction.  */
 extern int arm_arch_hwdiv;
 
+/* Nonzero if we should minimize interrupt latency of the
+   generated code.  */
+extern int low_irq_latency;
+
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
 #endif
@@ -1823,9 +1829,10 @@
 /* Determine if the epilogue should be output as RTL.
    You should override this if you define FUNCTION_EXTRA_EPILOGUE.  */
 /* This is disabled for Thumb-2 because it will confuse the
-   conditional insn counter.  */
+   conditional insn counter.
+   Do not use a return insn if we're avoiding ldm/stm instructions.  */
 #define USE_RETURN_INSN(ISCOND)				\
-  (TARGET_ARM ? use_return_insn (ISCOND, NULL) : 0)
+  ((TARGET_ARM && !low_irq_latency) ? use_return_insn (ISCOND, NULL) : 0)
 
 /* Definitions for register eliminations.
 

=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md	2010-08-05 12:06:40 +0000
+++ new/gcc/config/arm/arm.md	2010-08-05 15:20:54 +0000
@@ -6587,7 +6587,7 @@
    (match_operand:BLK 1 "general_operand" "")
    (match_operand:SI 2 "const_int_operand" "")
    (match_operand:SI 3 "const_int_operand" "")]
-  "TARGET_EITHER"
+  "TARGET_EITHER && !low_irq_latency"
   "
   if (TARGET_32BIT)
     {

=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt	2009-06-18 11:24:10 +0000
+++ new/gcc/config/arm/arm.opt	2010-08-05 15:20:54 +0000
@@ -161,6 +161,10 @@
 Target Report Mask(NEON_VECTORIZE_QUAD)
 Use Neon quad-word (rather than double-word) registers for vectorization
 
+mlow-irq-latency
+Target Report Var(low_irq_latency)
+Try to reduce interrupt latency of the generated code
+
 mword-relocations
 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.

=== modified file 'gcc/config/arm/bpabi.S'
--- old/gcc/config/arm/bpabi.S	2009-12-17 15:37:23 +0000
+++ new/gcc/config/arm/bpabi.S	2010-08-05 15:20:54 +0000
@@ -116,16 +116,17 @@
 	test_div_by_zero signed
 
 	sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
 	mov ip, sp
-	push {ip, lr}
+	do_push (ip, lr)
 #else
-	do_push {sp, lr}
+	stmfd sp!, {sp, lr}
 #endif
 	bl SYM(__gnu_ldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
 	add sp, sp, #8
-	do_pop {r2, r3}
+	do_pop (r2, r3)
 	RET
 	
 #endif /* L_aeabi_ldivmod */
@@ -136,16 +137,17 @@
 	test_div_by_zero unsigned
 
 	sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
 	mov ip, sp
-	push {ip, lr}
+	do_push (ip, lr)
 #else
-	do_push {sp, lr}
+	stmfd sp!, {sp, lr}
 #endif
 	bl SYM(__gnu_uldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
 	add sp, sp, #8
-	do_pop {r2, r3}
+	do_pop (r2, r3)
 	RET
 	
 #endif /* L_aeabi_divmod */

=== modified file 'gcc/config/arm/ieee754-df.S'
--- old/gcc/config/arm/ieee754-df.S	2009-06-05 12:52:36 +0000
+++ new/gcc/config/arm/ieee754-df.S	2010-08-05 15:20:54 +0000
@@ -83,7 +83,7 @@
 ARM_FUNC_START adddf3
 ARM_FUNC_ALIAS aeabi_dadd adddf3
 
-1:	do_push	{r4, r5, lr}
+1:	do_push	(r4, r5, lr)
 
 	@ Look for zeroes, equal values, INF, or NAN.
 	shift1	lsl, r4, xh, #1
@@ -427,7 +427,7 @@
 	do_it	eq, t
 	moveq	r1, #0
 	RETc(eq)
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x400		@ initial exponent
 	add	r4, r4, #(52-1 - 1)
 	mov	r5, #0			@ sign bit is 0
@@ -447,7 +447,7 @@
 	do_it	eq, t
 	moveq	r1, #0
 	RETc(eq)
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x400		@ initial exponent
 	add	r4, r4, #(52-1 - 1)
 	ands	r5, r0, #0x80000000	@ sign bit in r5
@@ -481,7 +481,7 @@
 	RETc(eq)			@ we are done already.
 
 	@ value was denormalized.  We can normalize it now.
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x380		@ setup corresponding exponent
 	and	r5, xh, #0x80000000	@ move sign bit in r5
 	bic	xh, xh, #0x80000000
@@ -508,9 +508,9 @@
 	@ compatibility.
 	adr	ip, LSYM(f0_ret)
 	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
+	do_push	(r4, r5, ip, lr, pc)
 #else
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 #endif
 
 	mov	r5, #0
@@ -534,9 +534,9 @@
 	@ compatibility.
 	adr	ip, LSYM(f0_ret)
 	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
+	do_push	(r4, r5, ip, lr, pc)
 #else
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 #endif
 
 	ands	r5, ah, #0x80000000	@ sign bit in r5
@@ -585,7 +585,7 @@
 	@ Legacy code expects the result to be returned in f0.  Copy it
 	@ there as well.
 LSYM(f0_ret):
-	do_push	{r0, r1}
+	do_push	(r0, r1)
 	ldfd	f0, [sp], #8
 	RETLDM
 
@@ -602,7 +602,7 @@
 
 ARM_FUNC_START muldf3
 ARM_FUNC_ALIAS aeabi_dmul muldf3
-	do_push	{r4, r5, r6, lr}
+	do_push	(r4, r5, r6, lr)
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 	mov	ip, #0xff
@@ -910,7 +910,7 @@
 ARM_FUNC_START divdf3
 ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	
-	do_push	{r4, r5, r6, lr}
+	do_push	(r4, r5, r6, lr)
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 	mov	ip, #0xff
@@ -1195,7 +1195,7 @@
 
 	@ The status-returning routines are required to preserve all
 	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, lr}
+6:	do_push	(r0, lr)
 	ARM_CALL cmpdf2
 	@ Set the Z flag correctly, and the C flag unconditionally.
 	cmp	r0, #0

=== modified file 'gcc/config/arm/ieee754-sf.S'
--- old/gcc/config/arm/ieee754-sf.S	2009-06-05 12:52:36 +0000
+++ new/gcc/config/arm/ieee754-sf.S	2010-08-05 15:20:54 +0000
@@ -481,7 +481,7 @@
 	and	r3, ip, #0x80000000
 
 	@ Well, no way to make it shorter without the umull instruction.
-	do_push	{r3, r4, r5}
+	do_push	(r3, r4, r5)
 	mov	r4, r0, lsr #16
 	mov	r5, r1, lsr #16
 	bic	r0, r0, r4, lsl #16
@@ -492,7 +492,7 @@
 	mla	r0, r4, r1, r0
 	adds	r3, r3, r0, lsl #16
 	adc	r1, ip, r0, lsr #16
-	do_pop	{r0, r4, r5}
+	do_pop	(r0, r4, r5)
 
 #else
 
@@ -882,7 +882,7 @@
 
 	@ The status-returning routines are required to preserve all
 	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, r1, r2, r3, lr}
+6:	do_push	(r0, r1, r2, r3, lr)
 	ARM_CALL cmpsf2
 	@ Set the Z flag correctly, and the C flag unconditionally.
 	cmp	r0, #0

=== modified file 'gcc/config/arm/lib1funcs.asm'
--- old/gcc/config/arm/lib1funcs.asm	2010-04-02 18:54:46 +0000
+++ new/gcc/config/arm/lib1funcs.asm	2010-08-05 15:20:54 +0000
@@ -254,8 +254,8 @@
 .macro shift1 op, arg0, arg1, arg2
 	\op	\arg0, \arg1, \arg2
 .endm
-#define do_push	push
-#define do_pop	pop
+#define do_push(...)	push {__VA_ARGS__}
+#define do_pop(...)	pop {__VA_ARGS__}
 #define COND(op1, op2, cond) op1 ## op2 ## cond
 /* Perform an arithmetic operation with a variable shift operand.  This
    requires two instructions and a scratch register on Thumb-2.  */
@@ -269,8 +269,42 @@
 .macro shift1 op, arg0, arg1, arg2
 	mov	\arg0, \arg1, \op \arg2
 .endm
-#define do_push	stmfd sp!,
-#define do_pop	ldmfd sp!,
+#if defined(__low_irq_latency__)        
+#define do_push(...) \
+  _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+#define _buildN1(BASE, X)	_buildN2(BASE, X)
+#define _buildN2(BASE, X)	BASE##X
+#define _buildC1(...)		_buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1)
+#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c
+        
+#define do_push1(r1) str r1, [sp, #-4]!
+#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]!
+#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]!
+#define do_push4(r1, r2, r3, r4) \
+        do_push3 (r2, r3, r4);\
+        do_push1 (r1)
+#define do_push5(r1, r2, r3, r4, r5) \
+        do_push4 (r2, r3, r4, r5);\
+        do_push1 (r1)
+        
+#define do_pop(...) \
+_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+        
+#define do_pop1(r1) ldr r1, [sp], #4
+#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4
+#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4
+#define do_pop4(r1, r2, r3, r4) \
+        do_pop1 (r1);\
+        do_pup3 (r2, r3, r4)
+#define do_pop5(r1, r2, r3, r4, r5) \
+        do_pop1 (r1);\
+        do_pop4 (r2, r3, r4, r5)
+#else
+#define do_push(...)    stmfd sp!, { __VA_ARGS__}
+#define do_pop(...)     ldmfd sp!, {__VA_ARGS__}
+#endif
+
+        
 #define COND(op1, op2, cond) op1 ## cond ## op2
 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 	\name \dest, \src1, \src2, \shiftop \shiftreg
@@ -1260,7 +1294,7 @@
 	ARM_FUNC_START div0
 #endif
 
-	do_push	{r1, lr}
+	do_push	(r1, lr)
 	mov	r0, #SIGFPE
 	bl	SYM(raise) __PLT__
 	RETLDM	r1
@@ -1277,7 +1311,7 @@
 #if defined __ARM_EABI__ && defined __linux__
 @ EABI GNU/Linux call to cacheflush syscall.
 	ARM_FUNC_START clear_cache
-	do_push	{r7}
+	do_push	(r7)
 #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
 	movw	r7, #2
 	movt	r7, #0xf
@@ -1287,7 +1321,7 @@
 #endif
 	mov	r2, #0
 	swi	0
-	do_pop	{r7}
+	do_pop	(r7)
 	RET
 	FUNC_END clear_cache
 #else
@@ -1490,7 +1524,7 @@
 	push	{r4, lr}
 # else
 ARM_FUNC_START clzdi2
-	do_push	{r4, lr}
+	do_push	(r4, lr)
 # endif
 	cmp	xxh, #0
 	bne	1f

=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md	2010-07-30 14:17:05 +0000
+++ new/gcc/config/arm/predicates.md	2010-08-05 15:20:54 +0000
@@ -328,6 +328,9 @@
   HOST_WIDE_INT i = 1, base = 0;
   rtx elt;
 
+  if (low_irq_latency)
+    return false;
+
   if (count <= 1
       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
     return false;
@@ -385,6 +388,9 @@
   HOST_WIDE_INT i = 1, base = 0;
   rtx elt;
 
+  if (low_irq_latency)
+    return false;
+
   if (count <= 1
       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
     return false;

=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi	2010-07-29 17:03:20 +0000
+++ new/gcc/doc/invoke.texi	2010-08-05 15:20:54 +0000
@@ -469,6 +469,7 @@
 -mtpcs-frame  -mtpcs-leaf-frame @gol
 -mcaller-super-interworking  -mcallee-super-interworking @gol
 -mtp=@var{name} @gol
+-mlow-irq-latency @gol
 -mword-relocations @gol
 -mfix-cortex-m3-ldrd}
 
@@ -9489,6 +9490,12 @@
 @code{,}, @code{!}, @code{|}, and @code{*} as needed.
 
 
+@item -mlow-irq-latency
+@opindex mlow-irq-latency
+Avoid instructions with high interrupt latency when generating
+code.  This can increase code size and reduce performance.
+The option is off by default.
+
 @end table
 
 The conditional text @code{X} in a %@{@code{S}:@code{X}@} or similar