diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch | 784 |
1 files changed, 784 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch new file mode 100644 index 0000000000..bb866ce8d9 --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch @@ -0,0 +1,784 @@ +2011-03-24 Revital Eres <revital.eres@linaro.org> + + gcc/ + * loop-doloop.c (doloop_condition_get): Support new form of + doloop pattern and use prev_nondebug_insn instead of PREV_INSN. + * config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*". + (doloop_end): New. + * config/arm/arm.md (*addsi3_compare0): Remove "*". + * ddg.c (check_closing_branch_deps, get_node_of_insn_uid): + New functions. + (create_ddg): Pass sbitmap containing do-loop related + instructions instead of closing_branch_deps parameter and call + check_closing_branch_deps function. + * ddg.h (create_ddg): Adjust the function declaration. + * modulo-sched.c (PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT + and redefine. + (doloop_register_get): Handle NONDEBUG_INSN_P. + (stage_count): New field in struct partial_schedule. + (mark_doloop_insns, calculate_stage_count): New functions. + (normalize_sched_times): Rename to reset_sched_times and handle + incrementing the sched time of the nodes by a constant value + passed as parameter. + (duplicate_insns_of_cycles): Skip closing branch. + (sms_schedule_by_order): Schedule closing branch when + closing_branch_deps is true. + (ps_insn_find_column): Handle closing branch. + (sms_schedule): Call reset_sched_times and handle case where + do-loop pattern is not decoupled from the other loop instructions. + Support new form of doloop pattern. + (ps_insert_empty_row): Update calls to normalize_sched_times + and rotate_partial_schedule functions. + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-03-11 14:26:34 +0000 ++++ new/gcc/config/arm/arm.md 2011-03-24 07:45:38 +0000 +@@ -734,7 +734,7 @@ + "" + ) + +-(define_insn "*addsi3_compare0" ++(define_insn "addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "r, r") + +=== modified file 'gcc/config/arm/thumb2.md' +--- old/gcc/config/arm/thumb2.md 2011-02-08 10:51:58 +0000 ++++ new/gcc/config/arm/thumb2.md 2011-03-24 07:45:38 +0000 +@@ -1194,7 +1194,7 @@ + (set_attr "length" "2")] + ) + +-(define_insn "*thumb2_addsi3_compare0" ++(define_insn "thumb2_addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") +@@ -1445,3 +1445,56 @@ + [(set_attr "length" "4,4,16") + (set_attr "predicable" "yes")] + ) ++ ++ ++;; Define the subtract-one-and-jump insns so loop.c ++;; knows what to generate. ++(define_expand "doloop_end" ++ [(use (match_operand 0 "" "")) ; loop pseudo ++ (use (match_operand 1 "" "")) ; iterations; zero if unknown ++ (use (match_operand 2 "" "")) ; max iterations ++ (use (match_operand 3 "" "")) ; loop level ++ (use (match_operand 4 "" ""))] ; label ++ "TARGET_32BIT" ++ " ++ { ++ /* Currently SMS relies on the do-loop pattern to recognize loops ++ where (1) the control part consists of all insns defining and/or ++ using a certain 'count' register and (2) the loop count can be ++ adjusted by modifying this register prior to the loop. ++ ??? The possible introduction of a new block to initialize the ++ new IV can potentially affect branch optimizations. */ ++ if (optimize > 0 && flag_modulo_sched) ++ { ++ rtx s0; ++ rtx bcomp; ++ rtx loc_ref; ++ rtx cc_reg; ++ rtx insn; ++ rtx cmp; ++ ++ /* Only use this on innermost loops. */ ++ if (INTVAL (operands[3]) > 1) ++ FAIL; ++ ++ if (GET_MODE (operands[0]) != SImode) ++ FAIL; ++ ++ s0 = operands [0]; ++ if (TARGET_THUMB2) ++ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1))); ++ else ++ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1))); ++ ++ cmp = XVECEXP (PATTERN (insn), 0, 0); ++ cc_reg = SET_DEST (cmp); ++ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); ++ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]); ++ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ++ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, ++ loc_ref, pc_rtx))); ++ DONE; ++ }else ++ FAIL; ++ }") ++ + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2010-07-19 08:58:53 +0000 ++++ new/gcc/ddg.c 2011-03-24 07:45:38 +0000 +@@ -60,6 +60,8 @@ + static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type, + dep_data_type, int, int); + static void add_edge_to_ddg (ddg_ptr g, ddg_edge_ptr); ++static ddg_node_ptr get_node_of_insn_uid (ddg_ptr, int); ++ + + /* Auxiliary variable for mem_read_insn_p/mem_write_insn_p. */ + static bool mem_ref_p; +@@ -450,12 +452,65 @@ + sched_free_deps (head, tail, false); + } + ++/* Given DOLOOP_INSNS which holds the instructions that ++ belong to the do-loop part; mark closing_branch_deps field in ddg G ++ as TRUE if the do-loop part's instructions are dependent on the other ++ loop instructions. Otherwise mark it as FALSE. */ ++static void ++check_closing_branch_deps (ddg_ptr g, sbitmap doloop_insns) ++{ ++ sbitmap_iterator sbi; ++ unsigned int u = 0; ++ ++ EXECUTE_IF_SET_IN_SBITMAP (doloop_insns, 0, u, sbi) ++ { ++ ddg_edge_ptr e; ++ ddg_node_ptr u_node = get_node_of_insn_uid (g, u); ++ ++ gcc_assert (u_node); ++ ++ for (e = u_node->in; e != 0; e = e->next_in) ++ { ++ ddg_node_ptr v_node = e->src; ++ ++ if (((unsigned int) INSN_UID (v_node->insn) == u) ++ || DEBUG_INSN_P (v_node->insn)) ++ continue; ++ ++ /* Ignore dependencies between memory writes and the ++ jump. */ ++ if (JUMP_P (u_node->insn) ++ && e->type == OUTPUT_DEP ++ && mem_write_insn_p (v_node->insn)) ++ continue; ++ if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) ++ { ++ g->closing_branch_deps = 1; ++ return; ++ } ++ } ++ for (e = u_node->out; e != 0; e = e->next_out) ++ { ++ ddg_node_ptr v_node = e->dest; ++ ++ if (((unsigned int) INSN_UID (v_node->insn) == u) ++ || DEBUG_INSN_P (v_node->insn)) ++ continue; ++ if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) ++ { ++ g->closing_branch_deps = 1; ++ return; ++ } ++ } ++ } ++ g->closing_branch_deps = 0; ++} + + /* Given a basic block, create its DDG and return a pointer to a variable + of ddg type that represents it. + Initialize the ddg structure fields to the appropriate values. */ + ddg_ptr +-create_ddg (basic_block bb, int closing_branch_deps) ++create_ddg (basic_block bb, sbitmap doloop_insns) + { + ddg_ptr g; + rtx insn, first_note; +@@ -465,7 +520,6 @@ + g = (ddg_ptr) xcalloc (1, sizeof (struct ddg)); + + g->bb = bb; +- g->closing_branch_deps = closing_branch_deps; + + /* Count the number of insns in the BB. */ + for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); +@@ -538,6 +592,11 @@ + /* Build the data dependency graph. */ + build_intra_loop_deps (g); + build_inter_loop_deps (g); ++ ++ /* Check whether the do-loop part is decoupled from the other loop ++ instructions. */ ++ check_closing_branch_deps (g, doloop_insns); ++ + return g; + } + +@@ -831,6 +890,18 @@ + return NULL; + } + ++/* Given the uid of an instruction UID return the node that represents it. */ ++static ddg_node_ptr ++get_node_of_insn_uid (ddg_ptr g, int uid) ++{ ++ int i; ++ ++ for (i = 0; i < g->num_nodes; i++) ++ if (uid == INSN_UID (g->nodes[i].insn)) ++ return &g->nodes[i]; ++ return NULL; ++} ++ + /* Given a set OPS of nodes in the DDG, find the set of their successors + which are not in OPS, and set their bits in SUCC. Bits corresponding to + OPS are cleared from SUCC. Leaves the other bits in SUCC unchanged. */ + +=== modified file 'gcc/ddg.h' +--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000 ++++ new/gcc/ddg.h 2011-03-24 07:45:38 +0000 +@@ -167,7 +167,7 @@ + }; + + +-ddg_ptr create_ddg (basic_block, int closing_branch_deps); ++ddg_ptr create_ddg (basic_block, sbitmap); + void free_ddg (ddg_ptr); + + void print_ddg (FILE *, ddg_ptr); + +=== modified file 'gcc/loop-doloop.c' +--- old/gcc/loop-doloop.c 2010-07-19 08:58:53 +0000 ++++ new/gcc/loop-doloop.c 2011-03-24 07:45:38 +0000 +@@ -78,6 +78,8 @@ + rtx inc_src; + rtx condition; + rtx pattern; ++ rtx cc_reg = NULL_RTX; ++ rtx reg_orig = NULL_RTX; + + /* The canonical doloop pattern we expect has one of the following + forms: +@@ -96,7 +98,16 @@ + 2) (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) +- (pc))). */ ++ (pc))). ++ ++ Some targets (ARM) do the comparison before the branch, as in the ++ following form: ++ ++ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) */ + + pattern = PATTERN (doloop_pat); + +@@ -104,19 +115,47 @@ + { + rtx cond; + rtx prev_insn = prev_nondebug_insn (doloop_pat); ++ rtx cmp_arg1, cmp_arg2; ++ rtx cmp_orig; + +- /* We expect the decrement to immediately precede the branch. */ ++ /* In case the pattern is not PARALLEL we expect two forms ++ of doloop which are cases 2) and 3) above: in case 2) the ++ decrement immediately precedes the branch, while in case 3) ++ the compare and decrement instructions immediately precede ++ the branch. */ + + if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) + return 0; + + cmp = pattern; +- inc = PATTERN (PREV_INSN (doloop_pat)); ++ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) ++ { ++ /* The third case: the compare and decrement instructions ++ immediately precede the branch. */ ++ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); ++ if (GET_CODE (cmp_orig) != SET) ++ return 0; ++ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) ++ return 0; ++ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); ++ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); ++ if (cmp_arg2 != const0_rtx ++ || GET_CODE (cmp_arg1) != PLUS) ++ return 0; ++ reg_orig = XEXP (cmp_arg1, 0); ++ if (XEXP (cmp_arg1, 1) != GEN_INT (-1) ++ || !REG_P (reg_orig)) ++ return 0; ++ cc_reg = SET_DEST (cmp_orig); ++ ++ inc = XVECEXP (PATTERN (prev_insn), 0, 1); ++ } ++ else ++ inc = PATTERN (prev_insn); + /* We expect the condition to be of the form (reg != 0) */ + cond = XEXP (SET_SRC (cmp), 0); + if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) + return 0; +- + } + else + { +@@ -162,11 +201,15 @@ + return 0; + + if ((XEXP (condition, 0) == reg) ++ /* For the third case: */ ++ || ((cc_reg != NULL_RTX) ++ && (XEXP (condition, 0) == cc_reg) ++ && (reg_orig == reg)) + || (GET_CODE (XEXP (condition, 0)) == PLUS +- && XEXP (XEXP (condition, 0), 0) == reg)) ++ && XEXP (XEXP (condition, 0), 0) == reg)) + { + if (GET_CODE (pattern) != PARALLEL) +- /* The second form we expect: ++ /* For the second form we expect: + + (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) +@@ -181,7 +224,24 @@ + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + +- So we return that form instead. ++ For the third form we expect: ++ ++ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) ++ ++ which is equivalent to the following: ++ ++ (parallel [(set (cc) (compare (reg, 1)) ++ (set (reg) (plus (reg) (const_int -1))) ++ (set (pc) (if_then_else (NE == cc) ++ (label_ref (label)) ++ (pc))))]) ++ ++ So we return the second form instead for the two cases. ++ + */ + condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); + + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2009-11-25 10:55:54 +0000 ++++ new/gcc/modulo-sched.c 2011-03-24 07:45:38 +0000 +@@ -116,8 +116,10 @@ + + /* The number of different iterations the nodes in ps span, assuming + the stage boundaries are placed efficiently. */ +-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ +- + 1 + (ps)->ii - 1) / (ps)->ii) ++#define CALC_STAGE_COUNT(min_cycle,max_cycle,ii) ((max_cycle - min_cycle \ ++ + 1 + ii - 1) / ii) ++/* The stage count of ps. */ ++#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) + + /* A single instruction in the partial schedule. */ + struct ps_insn +@@ -155,6 +157,8 @@ + int max_cycle; + + ddg_ptr g; /* The DDG of the insns in the partial schedule. */ ++ ++ int stage_count; /* The stage count of the partial schedule. */ + }; + + /* We use this to record all the register replacements we do in +@@ -195,6 +199,7 @@ + rtx, rtx); + static void duplicate_insns_of_cycles (partial_schedule_ptr, + int, int, int, rtx); ++static int calculate_stage_count (partial_schedule_ptr ps); + + #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) + #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +@@ -310,10 +315,10 @@ + either a single (parallel) branch-on-count or a (non-parallel) + branch immediately preceded by a single (decrement) insn. */ + first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail +- : PREV_INSN (tail)); ++ : prev_nondebug_insn (tail)); + + for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) +- if (reg_mentioned_p (reg, insn)) ++ if (reg_mentioned_p (reg, insn) && NONDEBUG_INSN_P (insn)) + { + if (dump_file) + { +@@ -332,6 +337,24 @@ + #endif + } + ++/* Mark in DOLOOP_INSNS the instructions that belong to the do-loop part. ++ Use TAIL to recognize that part. */ ++static void ++mark_doloop_insns (sbitmap doloop_insns, rtx tail) ++{ ++ rtx first_insn_not_to_check, insn; ++ ++ /* This is the first instruction which belongs the doloop part. */ ++ first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail ++ : prev_nondebug_insn (tail)); ++ ++ sbitmap_zero (doloop_insns); ++ for (insn = first_insn_not_to_check; insn != NEXT_INSN (tail); ++ insn = NEXT_INSN (insn)) ++ if (NONDEBUG_INSN_P (insn)) ++ SET_BIT (doloop_insns, INSN_UID (insn)); ++} ++ + /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so + that the number of iterations is a compile-time constant. If so, + return the rtx that sets COUNT_REG to a constant, and set COUNT to +@@ -569,13 +592,12 @@ + } + } + +-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values +- of SCHED_ROW and SCHED_STAGE. */ ++/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of ++ SCHED_ROW and SCHED_STAGE. */ + static void +-normalize_sched_times (partial_schedule_ptr ps) ++reset_sched_times (partial_schedule_ptr ps, int amount) + { + int row; +- int amount = PS_MIN_CYCLE (ps); + int ii = ps->ii; + ps_insn_ptr crr_insn; + +@@ -584,6 +606,10 @@ + { + ddg_node_ptr u = crr_insn->node; + int normalized_time = SCHED_TIME (u) - amount; ++ int new_min_cycle = PS_MIN_CYCLE (ps) - amount; ++ /* The first cycle in row zero after the rotation. */ ++ int new_first_cycle_in_row_zero = ++ new_min_cycle + ii - SMODULO (new_min_cycle, ii); + + if (dump_file) + fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ +@@ -592,8 +618,30 @@ + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); + gcc_assert (SCHED_TIME (u) <= ps->max_cycle); + SCHED_TIME (u) = normalized_time; +- SCHED_ROW (u) = normalized_time % ii; +- SCHED_STAGE (u) = normalized_time / ii; ++ crr_insn->cycle = normalized_time; ++ SCHED_ROW (u) = SMODULO (normalized_time, ii); ++ ++ /* If min_cycle is in row zero after the rotation then ++ the stage count can be calculated by dividing the cycle ++ with ii. Otherwise, the calculation is done by dividing the ++ SMSed kernel into two intervals: ++ ++ 1) min_cycle <= interval 0 < first_cycle_in_row_zero ++ 2) first_cycle_in_row_zero <= interval 1 < max_cycle ++ ++ Cycles in interval 0 are in stage 0. The stage of cycles ++ in interval 1 should be added by 1 to take interval 0 into ++ account. */ ++ if (SMODULO (new_min_cycle, ii) == 0) ++ SCHED_STAGE (u) = normalized_time / ii; ++ else ++ { ++ if (crr_insn->cycle < new_first_cycle_in_row_zero) ++ SCHED_STAGE (u) = 0; ++ else ++ SCHED_STAGE (u) = ++ ((SCHED_TIME (u) - new_first_cycle_in_row_zero) / ii) + 1; ++ } + } + } + +@@ -646,9 +694,12 @@ + + /* Do not duplicate any insn which refers to count_reg as it + belongs to the control part. ++ If closing_branch_deps is true the closing branch is scheduled ++ as well and thus should be ignored. + TODO: This should be done by analyzing the control part of + the loop. */ +- if (reg_mentioned_p (count_reg, u_node->insn)) ++ if (reg_mentioned_p (count_reg, u_node->insn) ++ || JUMP_P (ps_ij->node->insn)) + continue; + + if (for_prolog) +@@ -894,7 +945,8 @@ + basic_block condition_bb = NULL; + edge latch_edge; + gcov_type trip_count = 0; +- ++ sbitmap doloop_insns; ++ + loop_optimizer_init (LOOPS_HAVE_PREHEADERS + | LOOPS_HAVE_RECORDED_EXITS); + if (number_of_loops () <= 1) +@@ -919,6 +971,7 @@ + setup_sched_infos (); + haifa_sched_init (); + ++ doloop_insns = sbitmap_alloc (get_max_uid () + 1); + /* Allocate memory to hold the DDG array one entry for each loop. + We use loop->num as index into this array. */ + g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); +@@ -1009,9 +1062,11 @@ + continue; + } + +- /* Don't handle BBs with calls or barriers, or !single_set insns, +- or auto-increment insns (to avoid creating invalid reg-moves +- for the auto-increment insns). ++ /* Don't handle BBs with calls or barriers or auto-increment insns ++ (to avoid creating invalid reg-moves for the auto-increment insns), ++ or !single_set with the exception of instructions that include ++ count_reg---these instructions are part of the control part ++ that do-loop recognizes. + ??? Should handle auto-increment insns. + ??? Should handle insns defining subregs. */ + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) +@@ -1021,7 +1076,8 @@ + if (CALL_P (insn) + || BARRIER_P (insn) + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) +- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) ++ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE ++ && !reg_mentioned_p (count_reg, insn)) + || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) + || (INSN_P (insn) && (set = single_set (insn)) + && GET_CODE (SET_DEST (set)) == SUBREG)) +@@ -1048,14 +1104,16 @@ + + continue; + } +- +- if (! (g = create_ddg (bb, 0))) ++ mark_doloop_insns (doloop_insns, tail); ++ if (! (g = create_ddg (bb, doloop_insns))) + { + if (dump_file) + fprintf (dump_file, "SMS create_ddg failed\n"); + continue; + } +- ++ if (dump_file) ++ fprintf (dump_file, "SMS closing_branch_deps: %d\n", ++ g->closing_branch_deps); + g_arr[loop->num] = g; + if (dump_file) + fprintf (dump_file, "...OK\n"); +@@ -1157,11 +1215,13 @@ + + ps = sms_schedule_by_order (g, mii, maxii, node_order); + +- if (ps){ +- stage_count = PS_STAGE_COUNT (ps); +- gcc_assert(stage_count >= 1); +- } +- ++ if (ps) ++ { ++ stage_count = calculate_stage_count (ps); ++ gcc_assert(stage_count >= 1); ++ PS_STAGE_COUNT(ps) = stage_count; ++ } ++ + /* Stage count of 1 means that there is no interleaving between + iterations, let the scheduling passes do the job. */ + if (stage_count <= 1 +@@ -1182,17 +1242,7 @@ + else + { + struct undo_replace_buff_elem *reg_move_replaces; +- +- if (dump_file) +- { +- fprintf (dump_file, +- "SMS succeeded %d %d (with ii, sc)\n", ps->ii, +- stage_count); +- print_partial_schedule (ps, dump_file); +- fprintf (dump_file, +- "SMS Branch (%d) will later be scheduled at cycle %d.\n", +- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); +- } ++ int amount; + + /* Set the stage boundaries. If the DDG is built with closing_branch_deps, + the closing_branch was scheduled and should appear in the last (ii-1) +@@ -1202,12 +1252,28 @@ + TODO: Revisit the issue of scheduling the insns of the + control part relative to the branch when the control part + has more than one insn. */ +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); ++ amount = (g->closing_branch_deps)? SCHED_TIME (g->closing_branch) + 1: ++ PS_MIN_CYCLE (ps); ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ + set_columns_for_ps (ps); + + canon_loop (loop); + ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "SMS succeeded %d %d (with ii, sc)\n", ps->ii, ++ stage_count); ++ print_partial_schedule (ps, dump_file); ++ if (!g->closing_branch_deps) ++ fprintf (dump_file, ++ "SMS Branch (%d) will later be scheduled at \ ++ cycle %d.\n", ++ g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); ++ } ++ + /* case the BCT count is not known , Do loop-versioning */ + if (count_reg && ! count_init) + { +@@ -1252,6 +1318,7 @@ + } + + free (g_arr); ++ sbitmap_free (doloop_insns); + + /* Release scheduler data, needed until now because of DFA. */ + haifa_sched_finish (); +@@ -1759,8 +1826,9 @@ + RESET_BIT (tobe_scheduled, u); + continue; + } +- +- if (JUMP_P (insn)) /* Closing branch handled later. */ ++ /* Closing branch handled later unless closing_branch_deps ++ is true. */ ++ if (JUMP_P (insn) && !g->closing_branch_deps) + { + RESET_BIT (tobe_scheduled, u); + continue; +@@ -1893,8 +1961,8 @@ + if (dump_file) + fprintf (dump_file, "split_row=%d\n", split_row); + +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, ps->min_cycle); ++ reset_sched_times (ps, PS_MIN_CYCLE (ps)); ++ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); + + rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); + for (row = 0; row < split_row; row++) +@@ -2571,6 +2639,7 @@ + ps_insn_ptr next_ps_i; + ps_insn_ptr first_must_follow = NULL; + ps_insn_ptr last_must_precede = NULL; ++ ps_insn_ptr last_in_row = NULL; + int row; + + if (! ps_i) +@@ -2597,8 +2666,37 @@ + else + last_must_precede = next_ps_i; + } ++ /* The closing branch must be the last in the row. */ ++ if (must_precede ++ && TEST_BIT (must_precede, next_ps_i->node->cuid) ++ && JUMP_P (next_ps_i->node->insn)) ++ return false; ++ ++ last_in_row = next_ps_i; + } + ++ /* If closing_branch_deps is true we are scheduling the closing ++ branch as well. Make sure there is no dependent instruction after ++ it as the branch should be the last instruction. */ ++ if (JUMP_P (ps_i->node->insn)) ++ { ++ if (first_must_follow) ++ return false; ++ if (last_in_row) ++ { ++ /* Make the branch the last in the row. New instructions ++ will be inserted at the beginning of the row or after the ++ last must_precede instruction thus the branch is guaranteed ++ to remain the last instruction in the row. */ ++ last_in_row->next_in_row = ps_i; ++ ps_i->prev_in_row = last_in_row; ++ ps_i->next_in_row = NULL; ++ } ++ else ++ ps->rows[row] = ps_i; ++ return true; ++ } ++ + /* Now insert the node after INSERT_AFTER_PSI. */ + + if (! last_must_precede) +@@ -2820,6 +2918,54 @@ + return ps_i; + } + ++/* Calculate the stage count of the partial schedule PS. */ ++int ++calculate_stage_count (partial_schedule_ptr ps) ++{ ++ int stage_count; ++ ++ /* If closing_branch_deps is false then the stage ++ boundaries are placed efficiently, meaning that min_cycle will be ++ placed at row 0. Otherwise, the closing branch will be placed in ++ row ii-1. For the later case we assume the final SMSed kernel can ++ be divided into two intervals. This assumption is used for the ++ stage count calculation: ++ ++ 1) min_cycle <= interval 0 < first_cycle_in_row_zero ++ 2) first_cycle_in_row_zero <= interval 1 < max_cycle ++ */ ++ stage_count = ++ CALC_STAGE_COUNT (PS_MIN_CYCLE (ps), PS_MAX_CYCLE (ps), ps->ii); ++ if (ps->g->closing_branch_deps) ++ { ++ int new_min_cycle; ++ int new_min_cycle_row; ++ int rotation_amount = SCHED_TIME (ps->g->closing_branch) + 1; ++ ++ /* This is the new value of min_cycle after the final rotation to ++ bring closing branch into row ii-1. */ ++ new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; ++ /* This is the row which the the new min_cycle will be placed in. */ ++ new_min_cycle_row = SMODULO (new_min_cycle, ps->ii); ++ /* If the row of min_cycle is zero then interval 0 is empty. ++ Otherwise, we need to calculate interval 1 and add it by one ++ to take interval 0 into account. */ ++ if (new_min_cycle_row != 0) ++ { ++ int new_max_cycle, first_cycle_in_row_zero; ++ ++ new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; ++ first_cycle_in_row_zero = ++ new_min_cycle + ps->ii - new_min_cycle_row; ++ ++ stage_count = ++ CALC_STAGE_COUNT (first_cycle_in_row_zero, new_max_cycle, ++ ps->ii) + 1; ++ } ++ } ++ return stage_count; ++} ++ + /* Rotate the rows of PS such that insns scheduled at time + START_CYCLE will appear in row 0. Updates max/min_cycles. */ + void + |