1 files changed, 0 insertions, 663 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch
deleted file mode 100644
index be102160c5..0000000000
--- a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch
+++ /dev/null
@@ -1,663 +0,0 @@
-2010-08-24  Andrew Stubbs  <ams@codesourcery.com>
-
-	Backport from FSF:
-
-	2010-08-07  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-
-	* config/arm/cortex-a9.md: Rewrite VFP Pipeline description.
-	* config/arm/arm.c (arm_xscale_tune): Initialize sched_adjust_cost.
-	 (arm_fastmul_tune,arm_slowmul_tune, arm_9e_tune): Likewise.
-	 (arm_adjust_cost): Split into xscale_sched_adjust_cost and a
-	 generic part.
-	 (cortex_a9_sched_adjust_cost): New function.
-	 (xscale_sched_adjust_cost): New function.
-	* config/arm/arm-protos.h (struct tune_params): New field
-	sched_adjust_cost.
-	* config/arm/arm-cores.def: Adjust costs for cortex-a9.
-
-	2010-04-17  Richard Earnshaw  <rearnsha@arm.com>
-
-	* arm-protos.h (tune_params): New structure.
-	* arm.c (current_tune): New variable.
-	(arm_constant_limit): Delete.
-	(struct processors): Add pointer to the tune parameters.
-	(arm_slowmul_tune): New tuning option.
-	(arm_fastmul_tune, arm_xscale_tune, arm_9e_tune): Likewise.
-	(all_cores): Adjust to pick up the tuning model.
-	(arm_constant_limit): New function.
-	(arm_override_options): Select the appropriate tuning model.  Delete
-	initialization of arm_const_limit.
-	(arm_split_constant): Use the new constant-limit model.
-	(arm_rtx_costs): Pick up the current tuning model.
-	* arm.md (is_strongarm, is_xscale): Delete.
-	* arm-generic.md (load_ldsched_x, load_ldsched): Test explicitly
-	for Xscale variant architectures.
-	(mult_ldsched_strongarm, mult_ldsched): Similarly for StrongARM.
-
- 2010-08-23  Andrew Stubbs  <ams@codesourcery.com>
- 
- 	Backport from FSF:
-
-=== modified file 'gcc/config/arm/arm-cores.def'
---- old/gcc/config/arm/arm-cores.def	2010-07-29 15:53:39 +0000
-+++ new/gcc/config/arm/arm-cores.def	2010-08-24 13:15:54 +0000
-@@ -120,7 +120,7 @@
- ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, 9e)
- ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, 9e)
- ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
--ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, 9e)
-+ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
- ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
- ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
- ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, 9e)
-
-=== modified file 'gcc/config/arm/arm-generic.md'
---- old/gcc/config/arm/arm-generic.md	2007-08-02 09:49:31 +0000
-+++ new/gcc/config/arm/arm-generic.md	2010-08-24 13:15:54 +0000
-@@ -104,14 +104,14 @@
-   (and (eq_attr "generic_sched" "yes")
-        (and (eq_attr "ldsched" "yes") 
- 	    (and (eq_attr "type" "load_byte,load1")
--	         (eq_attr "is_xscale" "yes"))))
-+	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
-   "core")
- 
- (define_insn_reservation "load_ldsched" 2
-   (and (eq_attr "generic_sched" "yes")
-        (and (eq_attr "ldsched" "yes") 
- 	    (and (eq_attr "type" "load_byte,load1")
--	         (eq_attr "is_xscale" "no"))))
-+	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
-   "core")
- 
- (define_insn_reservation "load_or_store" 2
-@@ -128,14 +128,16 @@
- (define_insn_reservation "mult_ldsched_strongarm" 3
-   (and (eq_attr "generic_sched" "yes")
-        (and (eq_attr "ldsched" "yes") 
--	    (and (eq_attr "is_strongarm" "yes")
-+	    (and (eq_attr "tune"
-+		  "strongarm,strongarm110,strongarm1100,strongarm1110")
- 	         (eq_attr "type" "mult"))))
-   "core*2")
- 
- (define_insn_reservation "mult_ldsched" 4
-   (and (eq_attr "generic_sched" "yes")
-        (and (eq_attr "ldsched" "yes") 
--	    (and (eq_attr "is_strongarm" "no")
-+	    (and (eq_attr "tune"
-+		  "!strongarm,strongarm110,strongarm1100,strongarm1110")
- 	         (eq_attr "type" "mult"))))
-   "core*4")
- 
-
-=== modified file 'gcc/config/arm/arm-protos.h'
---- old/gcc/config/arm/arm-protos.h	2010-08-10 13:31:21 +0000
-+++ new/gcc/config/arm/arm-protos.h	2010-08-24 13:15:54 +0000
-@@ -214,4 +214,17 @@
- 
- extern void arm_order_regs_for_local_alloc (void);
- 
-+#ifdef RTX_CODE
-+/* This needs to be here because we need RTX_CODE and similar.  */
-+
-+struct tune_params
-+{
-+  bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
-+  bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
-+  int constant_limit;
-+};
-+
-+extern const struct tune_params *current_tune;
-+#endif /* RTX_CODE */
-+
- #endif /* ! GCC_ARM_PROTOS_H */
-
-=== modified file 'gcc/config/arm/arm.c'
---- old/gcc/config/arm/arm.c	2010-08-20 16:21:01 +0000
-+++ new/gcc/config/arm/arm.c	2010-08-24 13:15:54 +0000
-@@ -228,6 +228,8 @@
- static void arm_trampoline_init (rtx, tree, rtx);
- static rtx arm_trampoline_adjust_address (rtx);
- static rtx arm_pic_static_addr (rtx orig, rtx reg);
-+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
-+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
- static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
- static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
- 						     const_tree type,
-@@ -545,6 +547,9 @@
- /* The processor for which instructions should be scheduled.  */
- enum processor_type arm_tune = arm_none;
- 
-+/* The current tuning set.  */
-+const struct tune_params *current_tune;
-+
- /* The default processor used if not overridden by commandline.  */
- static enum processor_type arm_default_cpu = arm_none;
- 
-@@ -720,9 +725,6 @@
-    the next function.  */
- static int after_arm_reorg = 0;
- 
--/* The maximum number of insns to be used when loading a constant.  */
--static int arm_constant_limit = 3;
--
- enum arm_pcs arm_pcs_default;
- 
- /* For an explanation of these variables, see final_prescan_insn below.  */
-@@ -761,8 +763,44 @@
-   enum processor_type core;
-   const char *arch;
-   const unsigned long flags;
--  bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
--};
-+  const struct tune_params *const tune;
-+};
-+
-+const struct tune_params arm_slowmul_tune =
-+{
-+  arm_slowmul_rtx_costs,
-+  NULL,
-+  3
-+};
-+
-+const struct tune_params arm_fastmul_tune =
-+{
-+  arm_fastmul_rtx_costs,
-+  NULL,
-+  1
-+};
-+
-+const struct tune_params arm_xscale_tune =
-+{
-+  arm_xscale_rtx_costs,
-+  xscale_sched_adjust_cost,
-+  2
-+};
-+
-+const struct tune_params arm_9e_tune =
-+{
-+  arm_9e_rtx_costs,
-+  NULL,
-+  1
-+};
-+
-+const struct tune_params arm_cortex_a9_tune =
-+{
-+  arm_9e_rtx_costs,
-+  cortex_a9_sched_adjust_cost,
-+  1
-+};
-+
- 
- /* Not all of these give usefully different compilation alternatives,
-    but there is no simple way of generalizing them.  */
-@@ -770,7 +808,7 @@
- {
-   /* ARM Cores */
- #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
--  {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
-+  {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
- #include "arm-cores.def"
- #undef ARM_CORE
-   {NULL, arm_none, NULL, 0, NULL}
-@@ -779,7 +817,7 @@
- static const struct processors all_architectures[] =
- {
-   /* ARM Architectures */
--  /* We don't specify rtx_costs here as it will be figured out
-+  /* We don't specify tuning costs here as it will be figured out
-      from the core.  */
- 
-   {"armv2",   arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
-@@ -928,6 +966,13 @@
-   TLS_LE32
- };
- 
-+/* The maximum number of insns to be used when loading a constant.  */
-+inline static int
-+arm_constant_limit (bool size_p)
-+{
-+  return size_p ? 1 : current_tune->constant_limit;
-+}
-+
- /* Emit an insn that's a simple single-set.  Both the operands must be known
-    to be valid.  */
- inline static rtx
-@@ -1478,6 +1523,7 @@
-     }
- 
-   tune_flags = all_cores[(int)arm_tune].flags;
-+  current_tune = all_cores[(int)arm_tune].tune;
- 
-   if (target_fp16_format_name)
-     {
-@@ -1875,26 +1921,12 @@
- 
-   if (optimize_size)
-     {
--      arm_constant_limit = 1;
--
-       /* If optimizing for size, bump the number of instructions that we
-          are prepared to conditionally execute (even on a StrongARM).  */
-       max_insns_skipped = 6;
-     }
-   else
-     {
--      /* For processors with load scheduling, it never costs more than
--         2 cycles to load a constant, and the load scheduler may well
--	 reduce that to 1.  */
--      if (arm_ld_sched)
--        arm_constant_limit = 1;
--
--      /* On XScale the longer latency of a load makes it more difficult
--         to achieve a good schedule, so it's faster to synthesize
--	 constants that can be done in two insns.  */
--      if (arm_tune_xscale)
--        arm_constant_limit = 2;
--
-       /* StrongARM has early execution of branches, so a sequence
-          that is worth skipping is shorter.  */
-       if (arm_tune_strongarm)
-@@ -2423,7 +2455,8 @@
- 	  && !cond
- 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
- 				1, 0)
--	      > arm_constant_limit + (code != SET)))
-+	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
-+		 + (code != SET))))
- 	{
- 	  if (code == SET)
- 	    {
-@@ -7771,9 +7804,9 @@
- 				   (enum rtx_code) outer_code, total);
-     }
-   else
--    return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
--					       (enum rtx_code) outer_code,
--					       total, speed);
-+    return current_tune->rtx_costs (x, (enum rtx_code) code,
-+				    (enum rtx_code) outer_code,
-+				    total, speed);
- }
- 
- /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
-@@ -7918,7 +7951,8 @@
-    so it can be ignored.  */
- 
- static bool
--arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
-+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-+		      int *total, bool speed)
- {
-   enum machine_mode mode = GET_MODE (x);
- 
-@@ -8119,15 +8153,15 @@
-   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
- }
- 
--static int
--arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
-+/* Adjust cost hook for XScale.  */
-+static bool
-+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
- {
-   rtx i_pat, d_pat;
- 
-   /* Some true dependencies can have a higher cost depending
-      on precisely how certain input operands are used.  */
--  if (arm_tune_xscale
--      && REG_NOTE_KIND (link) == 0
-+  if (REG_NOTE_KIND (link) == 0
-       && recog_memoized (insn) >= 0
-       && recog_memoized (dep) >= 0)
-     {
-@@ -8161,10 +8195,106 @@
- 
- 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
- 					   shifted_operand))
--		return 2;
-+		{
-+		  *cost = 2;
-+		  return false;
-+		}
- 	    }
- 	}
-     }
-+  return true;
-+}
-+
-+/* Adjust cost hook for Cortex A9.  */
-+static bool
-+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
-+{
-+  switch (REG_NOTE_KIND (link))
-+    {
-+    case REG_DEP_ANTI:
-+      *cost = 0;
-+      return false;
-+
-+    case REG_DEP_TRUE:
-+    case REG_DEP_OUTPUT:
-+	if (recog_memoized (insn) >= 0
-+	    && recog_memoized (dep) >= 0)
-+	  {
-+	    if (GET_CODE (PATTERN (insn)) == SET)
-+	      {
-+		if (GET_MODE_CLASS 
-+		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
-+		  || GET_MODE_CLASS 
-+		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
-+		  {
-+		    enum attr_type attr_type_insn = get_attr_type (insn);
-+		    enum attr_type attr_type_dep = get_attr_type (dep);
-+
-+		    /* By default all dependencies of the form
-+		       s0 = s0 <op> s1
-+		       s0 = s0 <op> s2
-+		       have an extra latency of 1 cycle because
-+		       of the input and output dependency in this
-+		       case. However this gets modeled as an true
-+		       dependency and hence all these checks.  */
-+		    if (REG_P (SET_DEST (PATTERN (insn)))
-+			&& REG_P (SET_DEST (PATTERN (dep)))
-+			&& reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
-+						    SET_DEST (PATTERN (dep))))
-+		      {
-+			/* FMACS is a special case where the dependant
-+			   instruction can be issued 3 cycles before
-+			   the normal latency in case of an output 
-+			   dependency.  */
-+			if ((attr_type_insn == TYPE_FMACS
-+			     || attr_type_insn == TYPE_FMACD)
-+			    && (attr_type_dep == TYPE_FMACS
-+				|| attr_type_dep == TYPE_FMACD))
-+			  {
-+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
-+			      *cost = insn_default_latency (dep) - 3;
-+			    else
-+			      *cost = insn_default_latency (dep);
-+			    return false;
-+			  }
-+			else
-+			  {
-+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
-+			      *cost = insn_default_latency (dep) + 1;
-+			    else
-+			      *cost = insn_default_latency (dep);
-+			  }
-+			return false;
-+		      }
-+		  }
-+	      }
-+	  }
-+	break;
-+
-+    default:
-+      gcc_unreachable ();
-+    }
-+
-+  return true;
-+}
-+
-+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
-+   It corrects the value of COST based on the relationship between
-+   INSN and DEP through the dependence LINK.  It returns the new
-+   value. There is a per-core adjust_cost hook to adjust scheduler costs
-+   and the per-core hook can choose to completely override the generic 
-+   adjust_cost function. Only put bits of code into arm_adjust_cost that 
-+   are common across all cores.  */
-+static int
-+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
-+{
-+  rtx i_pat, d_pat;
-+
-+  if (current_tune->sched_adjust_cost != NULL)
-+    {
-+      if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
-+	return cost;
-+    }
- 
-   /* XXX This is not strictly true for the FPA.  */
-   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
-@@ -8187,7 +8317,8 @@
- 	 constant pool are cached, and that others will miss.  This is a
- 	 hack.  */
- 
--      if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
-+      if ((GET_CODE (src_mem) == SYMBOL_REF 
-+	   && CONSTANT_POOL_ADDRESS_P (src_mem))
- 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
- 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
- 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
-
-=== modified file 'gcc/config/arm/arm.md'
---- old/gcc/config/arm/arm.md	2010-08-23 14:39:12 +0000
-+++ new/gcc/config/arm/arm.md	2010-08-24 13:15:54 +0000
-@@ -150,13 +150,6 @@
- ; patterns that share the same RTL in both ARM and Thumb code.
- (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
- 
--; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects
--; scheduling decisions for the load unit and the multiplier.
--(define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm")))
--
--; IS_XSCALE is set to 'yes' when compiling for XScale.
--(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_tune_xscale")))
--
- ;; Operand number of an input operand that is shifted.  Zero if the
- ;; given instruction does not shift one of its input operands.
- (define_attr "shift" "" (const_int 0))
-
-=== modified file 'gcc/config/arm/cortex-a9.md'
---- old/gcc/config/arm/cortex-a9.md	2009-10-31 16:40:03 +0000
-+++ new/gcc/config/arm/cortex-a9.md	2010-08-24 13:15:54 +0000
-@@ -2,8 +2,10 @@
- ;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
- ;; Originally written by CodeSourcery for VFP.
- ;;
--;; Integer core pipeline description contributed by ARM Ltd.
--;;
-+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+;; Integer Pipeline description contributed by ARM Ltd.
-+;; VFP Pipeline description rewritten and contributed by ARM Ltd.
-+
- ;; This file is part of GCC.
- ;;
- ;; GCC is free software; you can redistribute it and/or modify it
-@@ -22,28 +24,27 @@
- 
- (define_automaton "cortex_a9")
- 
--;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has
-+;; The Cortex-A9 core is modelled as a dual issue pipeline that has
- ;; the following components.
- ;; 1. 1 Load Store Pipeline.
- ;; 2. P0 / main pipeline for data processing instructions.
- ;; 3. P1 / Dual pipeline for Data processing instructions.
- ;; 4. MAC pipeline for multiply as well as multiply
- ;;    and accumulate instructions.
--;; 5. 1 VFP / Neon pipeline.
--;; The Load/Store and VFP/Neon pipeline are multiplexed.
-+;; 5. 1 VFP and an optional Neon unit.
-+;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
- ;; The P0 / main pipeline and M1 stage of the MAC pipeline are
- ;;   multiplexed.
- ;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
- ;;   multiplexed.
--;; There are only 4 register read ports and hence at any point of
-+;; There are only 4 integer register read ports and hence at any point of
- ;; time we can't have issue down the E1 and the E2 ports unless
- ;; of course there are bypass paths that get exercised.
- ;; Both P0 and P1 have 2 stages E1 and E2.
- ;; Data processing instructions issue to E1 or E2 depending on
- ;; whether they have an early shift or not.
- 
--
--(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9")
-+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
- (define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
- (define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
- (define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
-@@ -71,11 +72,7 @@
- 
- ;; Issue at the same time along the load store pipeline and
- ;; the VFP / Neon pipeline is not possible.
--;; FIXME:: At some point we need to model the issue
--;; of the load store and the vfp being shared rather than anything else.
--
--(exclusion_set "cortex_a9_ls" "cortex_a9_vfp")
--
-+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
- 
- ;; Default data processing instruction without any shift
- ;; The only exception to this is the mov instruction
-@@ -101,18 +98,13 @@
- 
- (define_insn_reservation "cortex_a9_load1_2" 4
-   (and (eq_attr "tune" "cortexa9")
--       (eq_attr "type" "load1, load2, load_byte"))
-+       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
-   "cortex_a9_ls")
- 
- ;; Loads multiples and store multiples can't be issued for 2 cycles in a
- ;; row. The description below assumes that addresses are 64 bit aligned.
- ;; If not, there is an extra cycle latency which is not modelled.
- 
--;; FIXME:: This bit might need to be reworked when we get to
--;; tuning for the VFP because strictly speaking the ldm
--;; is sent to the LSU unit as is and there is only an
--;; issue restriction between the LSU and the VFP/ Neon unit.
--
- (define_insn_reservation "cortex_a9_load3_4" 5
-   (and (eq_attr "tune" "cortexa9")
-        (eq_attr "type" "load3, load4"))
-@@ -120,12 +112,13 @@
- 
- (define_insn_reservation "cortex_a9_store1_2" 0
-   (and (eq_attr "tune" "cortexa9")
--       (eq_attr "type" "store1, store2"))
-+       (eq_attr "type" "store1, store2, f_stores, f_stored"))
-   "cortex_a9_ls")
- 
- ;; Almost all our store multiples use an auto-increment
- ;; form. Don't issue back to back load and store multiples
- ;; because the load store unit will stall.
-+
- (define_insn_reservation "cortex_a9_store3_4" 0
-   (and (eq_attr "tune" "cortexa9")
-        (eq_attr "type" "store3, store4"))
-@@ -193,47 +186,79 @@
- (define_insn_reservation "cortex_a9_call"  0
-   (and (eq_attr "tune" "cortexa9")
-        (eq_attr "type" "call"))
--  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp")
-+  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
- 
- 
- ;; Pipelining for VFP instructions.
--
--(define_insn_reservation "cortex_a9_ffarith" 1
-+;; Issue happens either along load store unit or the VFP / Neon unit.
-+;; Pipeline   Instruction Classification.
-+;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
-+;; FP_ADD   - fadds, faddd, fcmps (1)
-+;; FPMUL   - fmul{s,d}, fmac{s,d}
-+;; FPDIV - fdiv{s,d}
-+(define_cpu_unit "ca9fps" "cortex_a9")
-+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
-+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
-+(define_cpu_unit "ca9fp_ds1" "cortex_a9")
-+
-+
-+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
-+(define_insn_reservation "cortex_a9_fps" 2
-  (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd"))
-- "cortex_a9_vfp")
-+      (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
-+ "ca9_issue_vfp_neon + ca9fps")
-+
-+(define_bypass 1
-+  "cortex_a9_fps"
-+  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
-+
-+;; Scheduling on the FP_ADD pipeline.
-+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
- 
- (define_insn_reservation "cortex_a9_fadd" 4
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fadds,faddd,f_cvt"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmuls" 5
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fmuls"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmuld" 6
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fmuld"))
-- "cortex_a9_vfp*2")
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fadds, faddd, f_cvt"))
-+  "ca9fp_add")
-+
-+(define_insn_reservation "cortex_a9_fcmp" 1
-+  (and (eq_attr "tune" "cortexa9")
-+      (eq_attr "type" "fcmps, fcmpd"))
-+ "ca9_issue_vfp_neon + ca9fp_add1")
-+
-+;; Scheduling for the Multiply and MAC instructions.
-+(define_reservation "ca9fmuls"
-+  "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
-+
-+(define_reservation "ca9fmuld"
-+  "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
-+
-+(define_insn_reservation "cortex_a9_fmuls" 4
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fmuls"))
-+  "ca9fmuls")
-+
-+(define_insn_reservation "cortex_a9_fmuld" 5
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fmuld"))
-+  "ca9fmuld")
- 
- (define_insn_reservation "cortex_a9_fmacs" 8
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fmacs"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmacd" 8
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fmacd"))
-- "cortex_a9_vfp*2")
--
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fmacs"))
-+  "ca9fmuls, ca9fp_add")
-+
-+(define_insn_reservation "cortex_a9_fmacd" 9
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fmacd"))
-+  "ca9fmuld, ca9fp_add")
-+
-+;; Division pipeline description.
- (define_insn_reservation "cortex_a9_fdivs" 15
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fdivs"))
-- "cortex_a9_vfp*10")
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fdivs"))
-+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
- 
- (define_insn_reservation "cortex_a9_fdivd" 25
-- (and (eq_attr "tune" "cortexa9")
--      (eq_attr "type" "fdivd"))
-- "cortex_a9_vfp*20")
-+  (and (eq_attr "tune" "cortexa9")
-+       (eq_attr "type" "fdivd"))
-+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
-