1 files changed, 515 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
new file mode 100644
index 0000000000..d72446919f
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
@@ -0,0 +1,515 @@
+2011-11-21  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r180131:
+
+	2011-10-18  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_block_move_unaligned_straight)
+	(arm_adjust_block_mem, arm_block_move_unaligned_loop)
+	(arm_movmemqi_unaligned): New.
+	(arm_gen_movmemqi): Support unaligned block copies.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_arm_unaligned): New.
+	* gcc.target/arm/unaligned-memcpy-1.c: New.
+	* gcc.target/arm/unaligned-memcpy-2.c: New.
+	* gcc.target/arm/unaligned-memcpy-3.c: New.
+	* gcc.target/arm/unaligned-memcpy-4.c: New.
+
+	2011-09-15  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	gcc/
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c	2011-10-26 11:38:30 +0000
++++ new/gcc/config/arm/arm.c	2011-11-21 01:45:54 +0000
+@@ -10803,6 +10803,335 @@
+   return true;
+ }
+ 
++/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
++   unaligned copies on processors which support unaligned semantics for those
++   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
++   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
++   An interleave factor of 1 (the minimum) will perform no interleaving. 
++   Load/store multiple are used for aligned addresses where possible.  */
++
++static void
++arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
++				   HOST_WIDE_INT length,
++				   unsigned int interleave_factor)
++{
++  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
++  int *regnos = XALLOCAVEC (int, interleave_factor);
++  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
++  HOST_WIDE_INT i, j;
++  HOST_WIDE_INT remaining = length, words;
++  rtx halfword_tmp = NULL, byte_tmp = NULL;
++  rtx dst, src;
++  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
++  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
++  HOST_WIDE_INT srcoffset, dstoffset;
++  HOST_WIDE_INT src_autoinc, dst_autoinc;
++  rtx mem, addr;
++  
++  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
++  
++  /* Use hard registers if we have aligned source or destination so we can use
++     load/store multiple with contiguous registers.  */
++  if (dst_aligned || src_aligned)
++    for (i = 0; i < interleave_factor; i++)
++      regs[i] = gen_rtx_REG (SImode, i);
++  else
++    for (i = 0; i < interleave_factor; i++)
++      regs[i] = gen_reg_rtx (SImode);
++
++  dst = copy_addr_to_reg (XEXP (dstbase, 0));
++  src = copy_addr_to_reg (XEXP (srcbase, 0));
++
++  srcoffset = dstoffset = 0;
++  
++  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
++     For copying the last bytes we want to subtract this offset again.  */
++  src_autoinc = dst_autoinc = 0;
++
++  for (i = 0; i < interleave_factor; i++)
++    regnos[i] = i;
++
++  /* Copy BLOCK_SIZE_BYTES chunks.  */
++
++  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
++    {
++      /* Load words.  */
++      if (src_aligned && interleave_factor > 1)
++	{
++	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
++					    TRUE, srcbase, &srcoffset));
++	  src_autoinc += UNITS_PER_WORD * interleave_factor;
++	}
++      else
++	{
++	  for (j = 0; j < interleave_factor; j++)
++	    {
++	      addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
++					 - src_autoinc);
++	      mem = adjust_automodify_address (srcbase, SImode, addr,
++					       srcoffset + j * UNITS_PER_WORD);
++	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
++	    }
++	  srcoffset += block_size_bytes;
++	}
++
++      /* Store words.  */
++      if (dst_aligned && interleave_factor > 1)
++	{
++	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
++					     TRUE, dstbase, &dstoffset));
++	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
++	}
++      else
++	{
++	  for (j = 0; j < interleave_factor; j++)
++	    {
++	      addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
++					 - dst_autoinc);
++	      mem = adjust_automodify_address (dstbase, SImode, addr,
++					       dstoffset + j * UNITS_PER_WORD);
++	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
++	    }
++	  dstoffset += block_size_bytes;
++	}
++
++      remaining -= block_size_bytes;
++    }
++  
++  /* Copy any whole words left (note these aren't interleaved with any
++     subsequent halfword/byte load/stores in the interests of simplicity).  */
++  
++  words = remaining / UNITS_PER_WORD;
++
++  gcc_assert (words < interleave_factor);
++  
++  if (src_aligned && words > 1)
++    {
++      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
++					&srcoffset));
++      src_autoinc += UNITS_PER_WORD * words;
++    }
++  else
++    {
++      for (j = 0; j < words; j++)
++	{
++	  addr = plus_constant (src,
++				srcoffset + j * UNITS_PER_WORD - src_autoinc);
++	  mem = adjust_automodify_address (srcbase, SImode, addr,
++					   srcoffset + j * UNITS_PER_WORD);
++	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
++	}
++      srcoffset += words * UNITS_PER_WORD;
++    }
++
++  if (dst_aligned && words > 1)
++    {
++      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
++					 &dstoffset));
++      dst_autoinc += words * UNITS_PER_WORD;
++    }
++  else
++    {
++      for (j = 0; j < words; j++)
++	{
++	  addr = plus_constant (dst,
++				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, SImode, addr,
++					   dstoffset + j * UNITS_PER_WORD);
++	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
++	}
++      dstoffset += words * UNITS_PER_WORD;
++    }
++
++  remaining -= words * UNITS_PER_WORD;
++  
++  gcc_assert (remaining < 4);
++  
++  /* Copy a halfword if necessary.  */
++  
++  if (remaining >= 2)
++    {
++      halfword_tmp = gen_reg_rtx (SImode);
++
++      addr = plus_constant (src, srcoffset - src_autoinc);
++      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
++      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
++
++      /* Either write out immediately, or delay until we've loaded the last
++	 byte, depending on interleave factor.  */
++      if (interleave_factor == 1)
++	{
++	  addr = plus_constant (dst, dstoffset - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
++	  emit_insn (gen_unaligned_storehi (mem,
++		       gen_lowpart (HImode, halfword_tmp)));
++	  halfword_tmp = NULL;
++	  dstoffset += 2;
++	}
++
++      remaining -= 2;
++      srcoffset += 2;
++    }
++  
++  gcc_assert (remaining < 2);
++  
++  /* Copy last byte.  */
++  
++  if ((remaining & 1) != 0)
++    {
++      byte_tmp = gen_reg_rtx (SImode);
++
++      addr = plus_constant (src, srcoffset - src_autoinc);
++      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
++      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
++
++      if (interleave_factor == 1)
++	{
++	  addr = plus_constant (dst, dstoffset - dst_autoinc);
++	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
++	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
++	  byte_tmp = NULL;
++	  dstoffset++;
++	}
++
++      remaining--;
++      srcoffset++;
++    }
++  
++  /* Store last halfword if we haven't done so already.  */
++  
++  if (halfword_tmp)
++    {
++      addr = plus_constant (dst, dstoffset - dst_autoinc);
++      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
++      emit_insn (gen_unaligned_storehi (mem,
++		   gen_lowpart (HImode, halfword_tmp)));
++      dstoffset += 2;
++    }
++
++  /* Likewise for last byte.  */
++
++  if (byte_tmp)
++    {
++      addr = plus_constant (dst, dstoffset - dst_autoinc);
++      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
++      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
++      dstoffset++;
++    }
++  
++  gcc_assert (remaining == 0 && srcoffset == dstoffset);
++}
++
++/* From mips_adjust_block_mem:
++
++   Helper function for doing a loop-based block operation on memory
++   reference MEM.  Each iteration of the loop will operate on LENGTH
++   bytes of MEM.
++
++   Create a new base register for use within the loop and point it to
++   the start of MEM.  Create a new memory reference that uses this
++   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
++
++static void
++arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
++		      rtx *loop_mem)
++{
++  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
++  
++  /* Although the new mem does not refer to a known location,
++     it does keep up to LENGTH bytes of alignment.  */
++  *loop_mem = change_address (mem, BLKmode, *loop_reg);
++  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
++}
++
++/* From mips_block_move_loop:
++
++   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
++   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
++   the memory regions do not overlap.  */
++
++static void
++arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
++			       unsigned int interleave_factor,
++			       HOST_WIDE_INT bytes_per_iter)
++{
++  rtx label, src_reg, dest_reg, final_src, test;
++  HOST_WIDE_INT leftover;
++  
++  leftover = length % bytes_per_iter;
++  length -= leftover;
++  
++  /* Create registers and memory references for use within the loop.  */
++  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
++  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
++  
++  /* Calculate the value that SRC_REG should have after the last iteration of
++     the loop.  */
++  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
++				   0, 0, OPTAB_WIDEN);
++
++  /* Emit the start of the loop.  */
++  label = gen_label_rtx ();
++  emit_label (label);
++  
++  /* Emit the loop body.  */
++  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
++				     interleave_factor);
++
++  /* Move on to the next block.  */
++  emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
++  emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
++  
++  /* Emit the loop condition.  */
++  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
++  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
++  
++  /* Mop up any left-over bytes.  */
++  if (leftover)
++    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
++}
++
++/* Emit a block move when either the source or destination is unaligned (not
++   aligned to a four-byte boundary).  This may need further tuning depending on
++   core type, optimize_size setting, etc.  */
++
++static int
++arm_movmemqi_unaligned (rtx *operands)
++{
++  HOST_WIDE_INT length = INTVAL (operands[2]);
++  
++  if (optimize_size)
++    {
++      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
++      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
++      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
++	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
++	 or dst_aligned though: allow more interleaving in those cases since the
++	 resulting code can be smaller.  */
++      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
++      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
++      
++      if (length > 12)
++	arm_block_move_unaligned_loop (operands[0], operands[1], length,
++				       interleave_factor, bytes_per_iter);
++      else
++	arm_block_move_unaligned_straight (operands[0], operands[1], length,
++					   interleave_factor);
++    }
++  else
++    {
++      /* Note that the loop created by arm_block_move_unaligned_loop may be
++	 subject to loop unrolling, which makes tuning this condition a little
++	 redundant.  */
++      if (length > 32)
++	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
++      else
++	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
++    }
++  
++  return 1;
++}
++
+ int
+ arm_gen_movmemqi (rtx *operands)
+ {
+@@ -10815,8 +11144,13 @@
+ 
+   if (GET_CODE (operands[2]) != CONST_INT
+       || GET_CODE (operands[3]) != CONST_INT
+-      || INTVAL (operands[2]) > 64
+-      || INTVAL (operands[3]) & 3)
++      || INTVAL (operands[2]) > 64)
++    return 0;
++
++  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
++    return arm_movmemqi_unaligned (operands);
++
++  if (INTVAL (operands[3]) & 3)
+     return 0;
+ 
+   dstbase = operands[0];
+
+=== modified file 'gcc/config/arm/arm.h'
+--- old/gcc/config/arm/arm.h	2011-10-19 17:01:50 +0000
++++ new/gcc/config/arm/arm.h	2011-11-21 01:45:54 +0000
+@@ -47,6 +47,8 @@
+     {							\
+ 	if (TARGET_DSP_MULTIPLY)			\
+ 	   builtin_define ("__ARM_FEATURE_DSP");	\
++	if (unaligned_access)				\
++	  builtin_define ("__ARM_FEATURE_UNALIGNED");	\
+ 	/* Define __arm__ even when in thumb mode, for	\
+ 	   consistency with armcc.  */			\
+ 	builtin_define ("__arm__");			\
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++void unknown_alignment (char *dest, char *src)
++{
++  memcpy (dest, src, 15);
++}
++
++/* We should see three unaligned word loads and store pairs, one unaligned
++   ldrh/strh pair, and an ldrb/strb pair.  Sanity check that.  */
++
++/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char dest[16];
++
++void aligned_dest (char *src)
++{
++  memcpy (dest, src, 15);
++}
++
++/* Expect a multi-word store for the main part of the copy, but subword
++   loads/stores for the remainder.  */
++
++/* { dg-final { scan-assembler-times "stmia" 1 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char src[16];
++
++void aligned_src (char *dest)
++{
++  memcpy (dest, src, 15);
++}
++
++/* Expect a multi-word load for the main part of the copy, but subword
++   loads/stores for the remainder.  */
++
++/* { dg-final { scan-assembler-times "ldmia" 1 } } */
++/* { dg-final { scan-assembler-times "ldrh" 1 } } */
++/* { dg-final { scan-assembler-times "strh" 1 } } */
++/* { dg-final { scan-assembler-times "ldrb" 1 } } */
++/* { dg-final { scan-assembler-times "strb" 1 } } */
+
+=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
+--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c	1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c	2011-10-19 22:56:19 +0000
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_unaligned } */
++/* { dg-options "-O2" } */
++
++#include <string.h>
++
++char src[16];
++char dest[16];
++
++void aligned_both (void)
++{
++  memcpy (dest, src, 15);
++}
++
++/* We know both src and dest to be aligned: expect multiword loads/stores.  */
++
++/* { dg-final { scan-assembler-times "ldmia" 1 } } */
++/* { dg-final { scan-assembler-times "stmia" 1 } } */
+
+=== modified file 'gcc/testsuite/lib/target-supports.exp'
+--- old/gcc/testsuite/lib/target-supports.exp	2011-10-23 13:33:07 +0000
++++ new/gcc/testsuite/lib/target-supports.exp	2011-11-21 01:45:54 +0000
+@@ -1894,6 +1894,18 @@
+     }]
+ }
+ 
++# Return 1 if this is an ARM target that supports unaligned word/halfword
++# load/store instructions.
++
++proc check_effective_target_arm_unaligned { } {
++    return [check_no_compiler_messages arm_unaligned assembly {
++	#ifndef __ARM_FEATURE_UNALIGNED
++	#error no unaligned support
++	#endif
++	int i;
++    }]
++}
++
+ # Add the options needed for NEON.  We need either -mfloat-abi=softfp
+ # or -mfloat-abi=hard, but if one is already specified by the
+ # multilib, use it.  Similarly, if a -mfpu option already enables
+