diff options
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch')
-rw-r--r-- | meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch | 663 |
1 files changed, 0 insertions, 663 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch deleted file mode 100644 index be102160c5..0000000000 --- a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch +++ /dev/null @@ -1,663 +0,0 @@ -2010-08-24 Andrew Stubbs <ams@codesourcery.com> - - Backport from FSF: - - 2010-08-07 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> - - * config/arm/cortex-a9.md: Rewrite VFP Pipeline description. - * config/arm/arm.c (arm_xscale_tune): Initialize sched_adjust_cost. - (arm_fastmul_tune,arm_slowmul_tune, arm_9e_tune): Likewise. - (arm_adjust_cost): Split into xscale_sched_adjust_cost and a - generic part. - (cortex_a9_sched_adjust_cost): New function. - (xscale_sched_adjust_cost): New function. - * config/arm/arm-protos.h (struct tune_params): New field - sched_adjust_cost. - * config/arm/arm-cores.def: Adjust costs for cortex-a9. - - 2010-04-17 Richard Earnshaw <rearnsha@arm.com> - - * arm-protos.h (tune_params): New structure. - * arm.c (current_tune): New variable. - (arm_constant_limit): Delete. - (struct processors): Add pointer to the tune parameters. - (arm_slowmul_tune): New tuning option. - (arm_fastmul_tune, arm_xscale_tune, arm_9e_tune): Likewise. - (all_cores): Adjust to pick up the tuning model. - (arm_constant_limit): New function. - (arm_override_options): Select the appropriate tuning model. Delete - initialization of arm_const_limit. - (arm_split_constant): Use the new constant-limit model. - (arm_rtx_costs): Pick up the current tuning model. - * arm.md (is_strongarm, is_xscale): Delete. - * arm-generic.md (load_ldsched_x, load_ldsched): Test explicitly - for Xscale variant architectures. - (mult_ldsched_strongarm, mult_ldsched): Similarly for StrongARM. - - 2010-08-23 Andrew Stubbs <ams@codesourcery.com> - - Backport from FSF: - -=== modified file 'gcc/config/arm/arm-cores.def' ---- old/gcc/config/arm/arm-cores.def 2010-07-29 15:53:39 +0000 -+++ new/gcc/config/arm/arm-cores.def 2010-08-24 13:15:54 +0000 -@@ -120,7 +120,7 @@ - ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) - ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) - ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) --ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e) -+ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) - ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) - ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) - ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) - -=== modified file 'gcc/config/arm/arm-generic.md' ---- old/gcc/config/arm/arm-generic.md 2007-08-02 09:49:31 +0000 -+++ new/gcc/config/arm/arm-generic.md 2010-08-24 13:15:54 +0000 -@@ -104,14 +104,14 @@ - (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "yes") - (and (eq_attr "type" "load_byte,load1") -- (eq_attr "is_xscale" "yes")))) -+ (eq_attr "tune" "xscale,iwmmxt,iwmmxt2")))) - "core") - - (define_insn_reservation "load_ldsched" 2 - (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "yes") - (and (eq_attr "type" "load_byte,load1") -- (eq_attr "is_xscale" "no")))) -+ (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2")))) - "core") - - (define_insn_reservation "load_or_store" 2 -@@ -128,14 +128,16 @@ - (define_insn_reservation "mult_ldsched_strongarm" 3 - (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "yes") -- (and (eq_attr "is_strongarm" "yes") -+ (and (eq_attr "tune" -+ "strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) - "core*2") - - (define_insn_reservation "mult_ldsched" 4 - (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "yes") -- (and (eq_attr "is_strongarm" "no") -+ (and (eq_attr "tune" -+ "!strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) - "core*4") - - -=== modified file 'gcc/config/arm/arm-protos.h' ---- old/gcc/config/arm/arm-protos.h 2010-08-10 13:31:21 +0000 -+++ new/gcc/config/arm/arm-protos.h 2010-08-24 13:15:54 +0000 -@@ -214,4 +214,17 @@ - - extern void arm_order_regs_for_local_alloc (void); - -+#ifdef RTX_CODE -+/* This needs to be here because we need RTX_CODE and similar. */ -+ -+struct tune_params -+{ -+ bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); -+ bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); -+ int constant_limit; -+}; -+ -+extern const struct tune_params *current_tune; -+#endif /* RTX_CODE */ -+ - #endif /* ! GCC_ARM_PROTOS_H */ - -=== modified file 'gcc/config/arm/arm.c' ---- old/gcc/config/arm/arm.c 2010-08-20 16:21:01 +0000 -+++ new/gcc/config/arm/arm.c 2010-08-24 13:15:54 +0000 -@@ -228,6 +228,8 @@ - static void arm_trampoline_init (rtx, tree, rtx); - static rtx arm_trampoline_adjust_address (rtx); - static rtx arm_pic_static_addr (rtx orig, rtx reg); -+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); -+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); - static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); - static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, - const_tree type, -@@ -545,6 +547,9 @@ - /* The processor for which instructions should be scheduled. */ - enum processor_type arm_tune = arm_none; - -+/* The current tuning set. */ -+const struct tune_params *current_tune; -+ - /* The default processor used if not overridden by commandline. */ - static enum processor_type arm_default_cpu = arm_none; - -@@ -720,9 +725,6 @@ - the next function. */ - static int after_arm_reorg = 0; - --/* The maximum number of insns to be used when loading a constant. */ --static int arm_constant_limit = 3; -- - enum arm_pcs arm_pcs_default; - - /* For an explanation of these variables, see final_prescan_insn below. */ -@@ -761,8 +763,44 @@ - enum processor_type core; - const char *arch; - const unsigned long flags; -- bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool); --}; -+ const struct tune_params *const tune; -+}; -+ -+const struct tune_params arm_slowmul_tune = -+{ -+ arm_slowmul_rtx_costs, -+ NULL, -+ 3 -+}; -+ -+const struct tune_params arm_fastmul_tune = -+{ -+ arm_fastmul_rtx_costs, -+ NULL, -+ 1 -+}; -+ -+const struct tune_params arm_xscale_tune = -+{ -+ arm_xscale_rtx_costs, -+ xscale_sched_adjust_cost, -+ 2 -+}; -+ -+const struct tune_params arm_9e_tune = -+{ -+ arm_9e_rtx_costs, -+ NULL, -+ 1 -+}; -+ -+const struct tune_params arm_cortex_a9_tune = -+{ -+ arm_9e_rtx_costs, -+ cortex_a9_sched_adjust_cost, -+ 1 -+}; -+ - - /* Not all of these give usefully different compilation alternatives, - but there is no simple way of generalizing them. */ -@@ -770,7 +808,7 @@ - { - /* ARM Cores */ - #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ -- {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs}, -+ {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, - #include "arm-cores.def" - #undef ARM_CORE - {NULL, arm_none, NULL, 0, NULL} -@@ -779,7 +817,7 @@ - static const struct processors all_architectures[] = - { - /* ARM Architectures */ -- /* We don't specify rtx_costs here as it will be figured out -+ /* We don't specify tuning costs here as it will be figured out - from the core. */ - - {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, -@@ -928,6 +966,13 @@ - TLS_LE32 - }; - -+/* The maximum number of insns to be used when loading a constant. */ -+inline static int -+arm_constant_limit (bool size_p) -+{ -+ return size_p ? 1 : current_tune->constant_limit; -+} -+ - /* Emit an insn that's a simple single-set. Both the operands must be known - to be valid. */ - inline static rtx -@@ -1478,6 +1523,7 @@ - } - - tune_flags = all_cores[(int)arm_tune].flags; -+ current_tune = all_cores[(int)arm_tune].tune; - - if (target_fp16_format_name) - { -@@ -1875,26 +1921,12 @@ - - if (optimize_size) - { -- arm_constant_limit = 1; -- - /* If optimizing for size, bump the number of instructions that we - are prepared to conditionally execute (even on a StrongARM). */ - max_insns_skipped = 6; - } - else - { -- /* For processors with load scheduling, it never costs more than -- 2 cycles to load a constant, and the load scheduler may well -- reduce that to 1. */ -- if (arm_ld_sched) -- arm_constant_limit = 1; -- -- /* On XScale the longer latency of a load makes it more difficult -- to achieve a good schedule, so it's faster to synthesize -- constants that can be done in two insns. */ -- if (arm_tune_xscale) -- arm_constant_limit = 2; -- - /* StrongARM has early execution of branches, so a sequence - that is worth skipping is shorter. */ - if (arm_tune_strongarm) -@@ -2423,7 +2455,8 @@ - && !cond - && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, - 1, 0) -- > arm_constant_limit + (code != SET))) -+ > (arm_constant_limit (optimize_function_for_size_p (cfun)) -+ + (code != SET)))) - { - if (code == SET) - { -@@ -7771,9 +7804,9 @@ - (enum rtx_code) outer_code, total); - } - else -- return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code, -- (enum rtx_code) outer_code, -- total, speed); -+ return current_tune->rtx_costs (x, (enum rtx_code) code, -+ (enum rtx_code) outer_code, -+ total, speed); - } - - /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not -@@ -7918,7 +7951,8 @@ - so it can be ignored. */ - - static bool --arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) -+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -+ int *total, bool speed) - { - enum machine_mode mode = GET_MODE (x); - -@@ -8119,15 +8153,15 @@ - return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); - } - --static int --arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) -+/* Adjust cost hook for XScale. */ -+static bool -+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) - { - rtx i_pat, d_pat; - - /* Some true dependencies can have a higher cost depending - on precisely how certain input operands are used. */ -- if (arm_tune_xscale -- && REG_NOTE_KIND (link) == 0 -+ if (REG_NOTE_KIND (link) == 0 - && recog_memoized (insn) >= 0 - && recog_memoized (dep) >= 0) - { -@@ -8161,10 +8195,106 @@ - - if (reg_overlap_mentioned_p (recog_data.operand[opno], - shifted_operand)) -- return 2; -+ { -+ *cost = 2; -+ return false; -+ } - } - } - } -+ return true; -+} -+ -+/* Adjust cost hook for Cortex A9. */ -+static bool -+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) -+{ -+ switch (REG_NOTE_KIND (link)) -+ { -+ case REG_DEP_ANTI: -+ *cost = 0; -+ return false; -+ -+ case REG_DEP_TRUE: -+ case REG_DEP_OUTPUT: -+ if (recog_memoized (insn) >= 0 -+ && recog_memoized (dep) >= 0) -+ { -+ if (GET_CODE (PATTERN (insn)) == SET) -+ { -+ if (GET_MODE_CLASS -+ (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT -+ || GET_MODE_CLASS -+ (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) -+ { -+ enum attr_type attr_type_insn = get_attr_type (insn); -+ enum attr_type attr_type_dep = get_attr_type (dep); -+ -+ /* By default all dependencies of the form -+ s0 = s0 <op> s1 -+ s0 = s0 <op> s2 -+ have an extra latency of 1 cycle because -+ of the input and output dependency in this -+ case. However this gets modeled as an true -+ dependency and hence all these checks. */ -+ if (REG_P (SET_DEST (PATTERN (insn))) -+ && REG_P (SET_DEST (PATTERN (dep))) -+ && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)), -+ SET_DEST (PATTERN (dep)))) -+ { -+ /* FMACS is a special case where the dependant -+ instruction can be issued 3 cycles before -+ the normal latency in case of an output -+ dependency. */ -+ if ((attr_type_insn == TYPE_FMACS -+ || attr_type_insn == TYPE_FMACD) -+ && (attr_type_dep == TYPE_FMACS -+ || attr_type_dep == TYPE_FMACD)) -+ { -+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) -+ *cost = insn_default_latency (dep) - 3; -+ else -+ *cost = insn_default_latency (dep); -+ return false; -+ } -+ else -+ { -+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) -+ *cost = insn_default_latency (dep) + 1; -+ else -+ *cost = insn_default_latency (dep); -+ } -+ return false; -+ } -+ } -+ } -+ } -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ return true; -+} -+ -+/* This function implements the target macro TARGET_SCHED_ADJUST_COST. -+ It corrects the value of COST based on the relationship between -+ INSN and DEP through the dependence LINK. It returns the new -+ value. There is a per-core adjust_cost hook to adjust scheduler costs -+ and the per-core hook can choose to completely override the generic -+ adjust_cost function. Only put bits of code into arm_adjust_cost that -+ are common across all cores. */ -+static int -+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) -+{ -+ rtx i_pat, d_pat; -+ -+ if (current_tune->sched_adjust_cost != NULL) -+ { -+ if (!current_tune->sched_adjust_cost (insn, link, dep, &cost)) -+ return cost; -+ } - - /* XXX This is not strictly true for the FPA. */ - if (REG_NOTE_KIND (link) == REG_DEP_ANTI -@@ -8187,7 +8317,8 @@ - constant pool are cached, and that others will miss. This is a - hack. */ - -- if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem)) -+ if ((GET_CODE (src_mem) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (src_mem)) - || reg_mentioned_p (stack_pointer_rtx, src_mem) - || reg_mentioned_p (frame_pointer_rtx, src_mem) - || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) - -=== modified file 'gcc/config/arm/arm.md' ---- old/gcc/config/arm/arm.md 2010-08-23 14:39:12 +0000 -+++ new/gcc/config/arm/arm.md 2010-08-24 13:15:54 +0000 -@@ -150,13 +150,6 @@ - ; patterns that share the same RTL in both ARM and Thumb code. - (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) - --; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects --; scheduling decisions for the load unit and the multiplier. --(define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm"))) -- --; IS_XSCALE is set to 'yes' when compiling for XScale. --(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_tune_xscale"))) -- - ;; Operand number of an input operand that is shifted. Zero if the - ;; given instruction does not shift one of its input operands. - (define_attr "shift" "" (const_int 0)) - -=== modified file 'gcc/config/arm/cortex-a9.md' ---- old/gcc/config/arm/cortex-a9.md 2009-10-31 16:40:03 +0000 -+++ new/gcc/config/arm/cortex-a9.md 2010-08-24 13:15:54 +0000 -@@ -2,8 +2,10 @@ - ;; Copyright (C) 2008, 2009 Free Software Foundation, Inc. - ;; Originally written by CodeSourcery for VFP. - ;; --;; Integer core pipeline description contributed by ARM Ltd. --;; -+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+;; Integer Pipeline description contributed by ARM Ltd. -+;; VFP Pipeline description rewritten and contributed by ARM Ltd. -+ - ;; This file is part of GCC. - ;; - ;; GCC is free software; you can redistribute it and/or modify it -@@ -22,28 +24,27 @@ - - (define_automaton "cortex_a9") - --;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has -+;; The Cortex-A9 core is modelled as a dual issue pipeline that has - ;; the following components. - ;; 1. 1 Load Store Pipeline. - ;; 2. P0 / main pipeline for data processing instructions. - ;; 3. P1 / Dual pipeline for Data processing instructions. - ;; 4. MAC pipeline for multiply as well as multiply - ;; and accumulate instructions. --;; 5. 1 VFP / Neon pipeline. --;; The Load/Store and VFP/Neon pipeline are multiplexed. -+;; 5. 1 VFP and an optional Neon unit. -+;; The Load/Store, VFP and Neon issue pipeline are multiplexed. - ;; The P0 / main pipeline and M1 stage of the MAC pipeline are - ;; multiplexed. - ;; The P1 / dual pipeline and M2 stage of the MAC pipeline are - ;; multiplexed. --;; There are only 4 register read ports and hence at any point of -+;; There are only 4 integer register read ports and hence at any point of - ;; time we can't have issue down the E1 and the E2 ports unless - ;; of course there are bypass paths that get exercised. - ;; Both P0 and P1 have 2 stages E1 and E2. - ;; Data processing instructions issue to E1 or E2 depending on - ;; whether they have an early shift or not. - -- --(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9") -+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9") - (define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9") - (define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9") - (define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9") -@@ -71,11 +72,7 @@ - - ;; Issue at the same time along the load store pipeline and - ;; the VFP / Neon pipeline is not possible. --;; FIXME:: At some point we need to model the issue --;; of the load store and the vfp being shared rather than anything else. -- --(exclusion_set "cortex_a9_ls" "cortex_a9_vfp") -- -+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon") - - ;; Default data processing instruction without any shift - ;; The only exception to this is the mov instruction -@@ -101,18 +98,13 @@ - - (define_insn_reservation "cortex_a9_load1_2" 4 - (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "load1, load2, load_byte")) -+ (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd")) - "cortex_a9_ls") - - ;; Loads multiples and store multiples can't be issued for 2 cycles in a - ;; row. The description below assumes that addresses are 64 bit aligned. - ;; If not, there is an extra cycle latency which is not modelled. - --;; FIXME:: This bit might need to be reworked when we get to --;; tuning for the VFP because strictly speaking the ldm --;; is sent to the LSU unit as is and there is only an --;; issue restriction between the LSU and the VFP/ Neon unit. -- - (define_insn_reservation "cortex_a9_load3_4" 5 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "load3, load4")) -@@ -120,12 +112,13 @@ - - (define_insn_reservation "cortex_a9_store1_2" 0 - (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "store1, store2")) -+ (eq_attr "type" "store1, store2, f_stores, f_stored")) - "cortex_a9_ls") - - ;; Almost all our store multiples use an auto-increment - ;; form. Don't issue back to back load and store multiples - ;; because the load store unit will stall. -+ - (define_insn_reservation "cortex_a9_store3_4" 0 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "store3, store4")) -@@ -193,47 +186,79 @@ - (define_insn_reservation "cortex_a9_call" 0 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "call")) -- "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp") -+ "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon") - - - ;; Pipelining for VFP instructions. -- --(define_insn_reservation "cortex_a9_ffarith" 1 -+;; Issue happens either along load store unit or the VFP / Neon unit. -+;; Pipeline Instruction Classification. -+;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r -+;; FP_ADD - fadds, faddd, fcmps (1) -+;; FPMUL - fmul{s,d}, fmac{s,d} -+;; FPDIV - fdiv{s,d} -+(define_cpu_unit "ca9fps" "cortex_a9") -+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9") -+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9") -+(define_cpu_unit "ca9fp_ds1" "cortex_a9") -+ -+ -+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. -+(define_insn_reservation "cortex_a9_fps" 2 - (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd")) -- "cortex_a9_vfp") -+ (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag")) -+ "ca9_issue_vfp_neon + ca9fps") -+ -+(define_bypass 1 -+ "cortex_a9_fps" -+ "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply") -+ -+;; Scheduling on the FP_ADD pipeline. -+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") - - (define_insn_reservation "cortex_a9_fadd" 4 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fadds,faddd,f_cvt")) -- "cortex_a9_vfp") -- --(define_insn_reservation "cortex_a9_fmuls" 5 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fmuls")) -- "cortex_a9_vfp") -- --(define_insn_reservation "cortex_a9_fmuld" 6 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fmuld")) -- "cortex_a9_vfp*2") -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fadds, faddd, f_cvt")) -+ "ca9fp_add") -+ -+(define_insn_reservation "cortex_a9_fcmp" 1 -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fcmps, fcmpd")) -+ "ca9_issue_vfp_neon + ca9fp_add1") -+ -+;; Scheduling for the Multiply and MAC instructions. -+(define_reservation "ca9fmuls" -+ "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") -+ -+(define_reservation "ca9fmuld" -+ "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") -+ -+(define_insn_reservation "cortex_a9_fmuls" 4 -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fmuls")) -+ "ca9fmuls") -+ -+(define_insn_reservation "cortex_a9_fmuld" 5 -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fmuld")) -+ "ca9fmuld") - - (define_insn_reservation "cortex_a9_fmacs" 8 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fmacs")) -- "cortex_a9_vfp") -- --(define_insn_reservation "cortex_a9_fmacd" 8 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fmacd")) -- "cortex_a9_vfp*2") -- -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fmacs")) -+ "ca9fmuls, ca9fp_add") -+ -+(define_insn_reservation "cortex_a9_fmacd" 9 -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fmacd")) -+ "ca9fmuld, ca9fp_add") -+ -+;; Division pipeline description. - (define_insn_reservation "cortex_a9_fdivs" 15 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fdivs")) -- "cortex_a9_vfp*10") -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fdivs")) -+ "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") - - (define_insn_reservation "cortex_a9_fdivd" 25 -- (and (eq_attr "tune" "cortexa9") -- (eq_attr "type" "fdivd")) -- "cortex_a9_vfp*20") -+ (and (eq_attr "tune" "cortexa9") -+ (eq_attr "type" "fdivd")) -+ "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") - |