diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | 1759 |
1 files changed, 0 insertions, 1759 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch deleted file mode 100644 index a58dd24416..0000000000 --- a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch +++ /dev/null @@ -1,1759 +0,0 @@ -2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> - - Backport code hoisting improvements from mainline: - - 2010-07-28 Jakub Jelinek <jakub@redhat.com> - PR debug/45105 - * gcc.dg/pr45105.c: New test. - - 2010-07-28 Jakub Jelinek <jakub@redhat.com> - PR debug/45105 - * gcse.c (hoist_code): Use FOR_BB_INSNS macro. - - 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/45107 - * gcc.dg/pr45107.c: New test. - - 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/45107 - * gcse.c (hash_scan_set): Use max_distance for gcse-las. - - 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/45101 - * gcc.dg/pr45101.c: New test. - - 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/45101 - * gcse.c (hash_scan_set): Fix argument ordering of insert_expr_in_table - for gcse-las. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/40956 - PR target/42495 - PR middle-end/42574 - * gcc.target/arm/pr40956.c, gcc.target/arm/pr42495.c, - * gcc.target/arm/pr42574.c: Add tests. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - * config/arm/arm.c (params.h): Include. - (arm_override_options): Tune gcse-unrestricted-cost. - * config/arm/t-arm (arm.o): Define dependencies. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR target/42495 - PR middle-end/42574 - * basic-block.h (get_dominated_to_depth): Declare. - * dominance.c (get_dominated_to_depth): New function, use - get_all_dominated_blocks as a base. - (get_all_dominated_blocks): Use get_dominated_to_depth. - * gcse.c (occr_t, VEC (occr_t, heap)): Define. - (hoist_exprs): Remove. - (alloc_code_hoist_mem, free_code_hoist_mem): Update. - (compute_code_hoist_vbeinout): Add debug print outs. - (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. - * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid - quadratic behavior. - * params.h (MAX_HOIST_DEPTH): New macro. - * doc/invoke.texi (max-hoist-depth): Document. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/40956 - * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple - constants. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR target/42495 - PR middle-end/42574 - * config/arm/arm.c (legitimize_pic_address): Use - gen_calculate_pic_address pattern to emit calculation of PIC address. - (will_be_in_index_register): New function. - (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) - (thumb1_legitimate_address_p): Use it provided !strict_p. - * config/arm/arm.md (calculate_pic_address): New expand and split. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR target/42495 - PR middle-end/42574 - * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants. - * config/arm/arm.md (define_split "J", define_split "K"): Make - IRA/reload friendly. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - * gcse.c (insert_insn_end_basic_block): Update signature, remove - unused checks. - (pre_edge_insert, hoist_code): Update. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR target/42495 - PR middle-end/42574 - * gcse.c (hoist_expr_reaches_here_p): Remove excessive check. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - * gcse.c (hoist_code): Generate new pseudo for every new set insn. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - PR rtl-optimization/40956 - PR target/42495 - PR middle-end/42574 - * gcse.c (compute_code_hoist_vbeinout): Consider more expressions - for hoisting. - (hoist_code): Count occurences in current block too. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - * gcse.c (struct expr:max_distance): New field. - (doing_code_hoisting_p): New static variable. - (want_to_gcse_p): Change signature. Allow constrained hoisting of - simple expressions, don't change behavior for PRE. Set max_distance. - (insert_expr_in_table): Set new max_distance field. - (hash_scan_set): Update. - (hoist_expr_reaches_here_p): Stop search after max_distance - instructions. - (find_occr_in_bb): New static function. Use it in ... - (hoist_code): Calculate sizes of basic block before any changes are - done. Pass max_distance to hoist_expr_reaches_here_p. - (one_code_hoisting_pass): Set doing_code_hoisting_p. - * params.def (PARAM_GCSE_COST_DISTANCE_RATIO,) - (PARAM_GCSE_UNRESTRICTED_COST): New parameters. - * params.h (GCSE_COST_DISTANCE_RATIO, GCSE_UNRESTRICTED_COST): New - macros. - * doc/invoke.texi (gcse-cost-distance-ratio, gcse-unrestricted-cost): - Document. - - 2010-07-27 Jeff Law <law@redhat.com> - Maxim Kuvyrkov <maxim@codesourcery.com> - * gcse.c (compute_transpout, transpout): Remove, move logic - to prune_expressions. - (compute_pre_data): Move pruning of trapping expressions ... - (prune_expressions): ... here. New static function. - (compute_code_hoist_data): Use it. - (alloc_code_hoist_mem, free_code_hoist_mem, hoist_code): Update. - - 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> - * dbgcnt.def (hoist_insn): New debug counter. - * gcse.c (hoist_code): Use it. - - 2010-07-28 Julian Brown <julian@codesourcery.com> - - Backport from FSF mainline: - -=== modified file 'gcc/basic-block.h' ---- old/gcc/basic-block.h 2010-04-02 18:54:46 +0000 -+++ new/gcc/basic-block.h 2010-08-16 09:41:58 +0000 -@@ -932,6 +932,8 @@ - extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction, - basic_block *, - unsigned); -+extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction, -+ basic_block, int); - extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction, - basic_block); - extern void add_to_dominance_info (enum cdi_direction, basic_block); - -=== modified file 'gcc/config/arm/arm.c' ---- old/gcc/config/arm/arm.c 2010-08-13 15:37:39 +0000 -+++ new/gcc/config/arm/arm.c 2010-08-16 09:41:58 +0000 -@@ -56,6 +56,7 @@ - #include "df.h" - #include "intl.h" - #include "libfuncs.h" -+#include "params.h" - - /* Forward definitions of types. */ - typedef struct minipool_node Mnode; -@@ -1902,6 +1903,14 @@ - flag_reorder_blocks = 1; - } - -+ if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST) -+ && flag_pic) -+ /* Hoisting PIC address calculations more aggressively provides a small, -+ but measurable, size reduction for PIC code. Therefore, we decrease -+ the bar for unrestricted expression hoisting to the cost of PIC address -+ calculation, which is 2 instructions. */ -+ set_param_value ("gcse-unrestricted-cost", 2); -+ - /* Register global variables with the garbage collector. */ - arm_add_gc_roots (); - -@@ -5070,17 +5079,13 @@ - if (GET_CODE (orig) == SYMBOL_REF - || GET_CODE (orig) == LABEL_REF) - { -- rtx pic_ref, address; - rtx insn; - - if (reg == 0) - { - gcc_assert (can_create_pseudo_p ()); - reg = gen_reg_rtx (Pmode); -- address = gen_reg_rtx (Pmode); - } -- else -- address = reg; - - /* VxWorks does not impose a fixed gap between segments; the run-time - gap can be different from the object-file gap. We therefore can't -@@ -5096,18 +5101,21 @@ - insn = arm_pic_static_addr (orig, reg); - else - { -+ rtx pat; -+ rtx mem; -+ - /* If this function doesn't have a pic register, create one now. */ - require_pic_register (); - -- if (TARGET_32BIT) -- emit_insn (gen_pic_load_addr_32bit (address, orig)); -- else /* TARGET_THUMB1 */ -- emit_insn (gen_pic_load_addr_thumb1 (address, orig)); -- -- pic_ref = gen_const_mem (Pmode, -- gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, -- address)); -- insn = emit_move_insn (reg, pic_ref); -+ pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); -+ -+ /* Make the MEM as close to a constant as possible. */ -+ mem = SET_SRC (pat); -+ gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); -+ MEM_READONLY_P (mem) = 1; -+ MEM_NOTRAP_P (mem) = 1; -+ -+ insn = emit_insn (pat); - } - - /* Put a REG_EQUAL note on this insn, so that it can be optimized -@@ -5387,6 +5395,15 @@ - return FALSE; - } - -+/* Return true if X will surely end up in an index register after next -+ splitting pass. */ -+static bool -+will_be_in_index_register (const_rtx x) -+{ -+ /* arm.md: calculate_pic_address will split this into a register. */ -+ return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; -+} -+ - /* Return nonzero if X is a valid ARM state address operand. */ - int - arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, -@@ -5444,8 +5461,9 @@ - rtx xop1 = XEXP (x, 1); - - return ((arm_address_register_rtx_p (xop0, strict_p) -- && GET_CODE(xop1) == CONST_INT -- && arm_legitimate_index_p (mode, xop1, outer, strict_p)) -+ && ((GET_CODE(xop1) == CONST_INT -+ && arm_legitimate_index_p (mode, xop1, outer, strict_p)) -+ || (!strict_p && will_be_in_index_register (xop1)))) - || (arm_address_register_rtx_p (xop1, strict_p) - && arm_legitimate_index_p (mode, xop0, outer, strict_p))); - } -@@ -5531,7 +5549,8 @@ - rtx xop1 = XEXP (x, 1); - - return ((arm_address_register_rtx_p (xop0, strict_p) -- && thumb2_legitimate_index_p (mode, xop1, strict_p)) -+ && (thumb2_legitimate_index_p (mode, xop1, strict_p) -+ || (!strict_p && will_be_in_index_register (xop1)))) - || (arm_address_register_rtx_p (xop1, strict_p) - && thumb2_legitimate_index_p (mode, xop0, strict_p))); - } -@@ -5834,7 +5853,8 @@ - && XEXP (x, 0) != frame_pointer_rtx - && XEXP (x, 1) != frame_pointer_rtx - && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) -- && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) -+ && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) -+ || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) - return 1; - - /* REG+const has 5-7 bit offset for non-SP registers. */ -@@ -6413,12 +6433,16 @@ - - case CONST_INT: - if (outer == SET) -- { -- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) -- return 0; -- if (thumb_shiftable_const (INTVAL (x))) -- return COSTS_N_INSNS (2); -- return COSTS_N_INSNS (3); -+ { -+ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) -+ return COSTS_N_INSNS (1); -+ /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ -+ if (INTVAL (x) >= -255 && INTVAL (x) <= -1) -+ return COSTS_N_INSNS (2); -+ /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ -+ if (thumb_shiftable_const (INTVAL (x))) -+ return COSTS_N_INSNS (2); -+ return COSTS_N_INSNS (3); - } - else if ((outer == PLUS || outer == COMPARE) - && INTVAL (x) < 256 && INTVAL (x) > -256) -@@ -7110,6 +7134,12 @@ - a single register, otherwise it costs one insn per word. */ - if (REG_P (XEXP (x, 0))) - *total = COSTS_N_INSNS (1); -+ else if (flag_pic -+ && GET_CODE (XEXP (x, 0)) == PLUS -+ && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) -+ /* This will be split into two instructions. -+ See arm.md:calculate_pic_address. */ -+ *total = COSTS_N_INSNS (2); - else - *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); - return true; - -=== modified file 'gcc/config/arm/arm.md' ---- old/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000 -+++ new/gcc/config/arm/arm.md 2010-08-16 09:41:58 +0000 -@@ -5290,17 +5290,21 @@ - [(set (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" -- [(set (match_dup 0) (match_dup 1)) -- (set (match_dup 0) (neg:SI (match_dup 0)))] -- "operands[1] = GEN_INT (- INTVAL (operands[1]));" -+ [(set (match_dup 2) (match_dup 1)) -+ (set (match_dup 0) (neg:SI (match_dup 2)))] -+ " -+ { -+ operands[1] = GEN_INT (- INTVAL (operands[1])); -+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; -+ }" - ) - - (define_split - [(set (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" -- [(set (match_dup 0) (match_dup 1)) -- (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] -+ [(set (match_dup 2) (match_dup 1)) -+ (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] - " - { - unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; -@@ -5311,12 +5315,13 @@ - if ((val & (mask << i)) == val) - break; - -- /* Shouldn't happen, but we don't want to split if the shift is zero. */ -+ /* Don't split if the shift is zero. */ - if (i == 0) - FAIL; - - operands[1] = GEN_INT (val >> i); -- operands[2] = GEN_INT (i); -+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; -+ operands[3] = GEN_INT (i); - }" - ) - -@@ -5325,6 +5330,34 @@ - ;; we use an unspec. The offset will be loaded from a constant pool entry, - ;; since that is the only type of relocation we can use. - -+;; Wrap calculation of the whole PIC address in a single pattern for the -+;; benefit of optimizers, particularly, PRE and HOIST. Calculation of -+;; a PIC address involves two loads from memory, so we want to CSE it -+;; as often as possible. -+;; This pattern will be split into one of the pic_load_addr_* patterns -+;; and a move after GCSE optimizations. -+;; -+;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. -+(define_expand "calculate_pic_address" -+ [(set (match_operand:SI 0 "register_operand" "") -+ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") -+ (unspec:SI [(match_operand:SI 2 "" "")] -+ UNSPEC_PIC_SYM))))] -+ "flag_pic" -+) -+ -+;; Split calculate_pic_address into pic_load_addr_* and a move. -+(define_split -+ [(set (match_operand:SI 0 "register_operand" "") -+ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") -+ (unspec:SI [(match_operand:SI 2 "" "")] -+ UNSPEC_PIC_SYM))))] -+ "flag_pic" -+ [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) -+ (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] -+ "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" -+) -+ - ;; The rather odd constraints on the following are to force reload to leave - ;; the insn alone, and to force the minipool generation pass to then move - ;; the GOT symbol to memory. - -=== modified file 'gcc/config/arm/t-arm' ---- old/gcc/config/arm/t-arm 2009-06-21 19:48:15 +0000 -+++ new/gcc/config/arm/t-arm 2010-08-16 09:41:58 +0000 -@@ -45,6 +45,15 @@ - $(srcdir)/config/arm/arm-cores.def > \ - $(srcdir)/config/arm/arm-tune.md - -+arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ -+ $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ -+ insn-config.h conditions.h output.h \ -+ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ -+ $(EXPR_H) $(OPTABS_H) toplev.h $(RECOG_H) $(CGRAPH_H) \ -+ $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \ -+ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ -+ intl.h libfuncs.h $(PARAMS_H) -+ - arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ - coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - -=== modified file 'gcc/dbgcnt.def' ---- old/gcc/dbgcnt.def 2009-11-25 10:55:54 +0000 -+++ new/gcc/dbgcnt.def 2010-08-16 09:41:58 +0000 -@@ -158,6 +158,7 @@ - DEBUG_COUNTER (global_alloc_at_func) - DEBUG_COUNTER (global_alloc_at_reg) - DEBUG_COUNTER (hoist) -+DEBUG_COUNTER (hoist_insn) - DEBUG_COUNTER (ia64_sched2) - DEBUG_COUNTER (if_conversion) - DEBUG_COUNTER (if_after_combine) - -=== modified file 'gcc/doc/invoke.texi' ---- old/gcc/doc/invoke.texi 2010-08-05 15:20:54 +0000 -+++ new/gcc/doc/invoke.texi 2010-08-16 09:41:58 +0000 -@@ -8086,6 +8086,29 @@ - vectorization needs to be greater than the value specified by this option - to allow vectorization. The default value is 0. - -+@item gcse-cost-distance-ratio -+Scaling factor in calculation of maximum distance an expression -+can be moved by GCSE optimizations. This is currently supported only in -+code hoisting pass. The bigger the ratio, the more agressive code hoisting -+will be with simple expressions, i.e., the expressions which have cost -+less than @option{gcse-unrestricted-cost}. Specifying 0 will disable -+hoisting of simple expressions. The default value is 10. -+ -+@item gcse-unrestricted-cost -+Cost, roughly measured as the cost of a single typical machine -+instruction, at which GCSE optimizations will not constrain -+the distance an expression can travel. This is currently -+supported only in code hoisting pass. The lesser the cost, -+the more aggressive code hoisting will be. Specifying 0 will -+allow all expressions to travel unrestricted distances. -+The default value is 3. -+ -+@item max-hoist-depth -+The depth of search in the dominator tree for expressions to hoist. -+This is used to avoid quadratic behavior in hoisting algorithm. -+The value of 0 will avoid limiting the search, but may slow down compilation -+of huge functions. The default value is 30. -+ - @item max-unrolled-insns - The maximum number of instructions that a loop should have if that loop - is unrolled, and if the loop is unrolled, it determines how many times - -=== modified file 'gcc/dominance.c' ---- old/gcc/dominance.c 2010-04-02 18:54:46 +0000 -+++ new/gcc/dominance.c 2010-08-16 09:41:58 +0000 -@@ -782,16 +782,20 @@ - } - - /* Returns the list of basic blocks including BB dominated by BB, in the -- direction DIR. The vector will be sorted in preorder. */ -+ direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will -+ produce a vector containing all dominated blocks. The vector will be sorted -+ in preorder. */ - - VEC (basic_block, heap) * --get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) -+get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth) - { - VEC(basic_block, heap) *bbs = NULL; - unsigned i; -+ unsigned next_level_start; - - i = 0; - VEC_safe_push (basic_block, heap, bbs, bb); -+ next_level_start = 1; /* = VEC_length (basic_block, bbs); */ - - do - { -@@ -802,12 +806,24 @@ - son; - son = next_dom_son (dir, son)) - VEC_safe_push (basic_block, heap, bbs, son); -+ -+ if (i == next_level_start && --depth) -+ next_level_start = VEC_length (basic_block, bbs); - } -- while (i < VEC_length (basic_block, bbs)); -+ while (i < next_level_start); - - return bbs; - } - -+/* Returns the list of basic blocks including BB dominated by BB, in the -+ direction DIR. The vector will be sorted in preorder. */ -+ -+VEC (basic_block, heap) * -+get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) -+{ -+ return get_dominated_to_depth (dir, bb, 0); -+} -+ - /* Redirect all edges pointing to BB to TO. */ - void - redirect_immediate_dominators (enum cdi_direction dir, basic_block bb, - -=== modified file 'gcc/gcse.c' ---- old/gcc/gcse.c 2010-03-16 10:50:42 +0000 -+++ new/gcc/gcse.c 2010-08-16 09:41:58 +0000 -@@ -296,6 +296,12 @@ - The value is the newly created pseudo-reg to record a copy of the - expression in all the places that reach the redundant copy. */ - rtx reaching_reg; -+ /* Maximum distance in instructions this expression can travel. -+ We avoid moving simple expressions for more than a few instructions -+ to keep register pressure under control. -+ A value of "0" removes restrictions on how far the expression can -+ travel. */ -+ int max_distance; - }; - - /* Occurrence of an expression. -@@ -317,6 +323,10 @@ - char copied_p; - }; - -+typedef struct occr *occr_t; -+DEF_VEC_P (occr_t); -+DEF_VEC_ALLOC_P (occr_t, heap); -+ - /* Expression and copy propagation hash tables. - Each hash table is an array of buckets. - ??? It is known that if it were an array of entries, structure elements -@@ -419,6 +429,9 @@ - /* Number of global copies propagated. */ - static int global_copy_prop_count; - -+/* Doing code hoisting. */ -+static bool doing_code_hoisting_p = false; -+ - /* For available exprs */ - static sbitmap *ae_kill; - -@@ -432,12 +445,12 @@ - static void hash_scan_set (rtx, rtx, struct hash_table_d *); - static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); - static void hash_scan_call (rtx, rtx, struct hash_table_d *); --static int want_to_gcse_p (rtx); -+static int want_to_gcse_p (rtx, int *); - static bool gcse_constant_p (const_rtx); - static int oprs_unchanged_p (const_rtx, const_rtx, int); - static int oprs_anticipatable_p (const_rtx, const_rtx); - static int oprs_available_p (const_rtx, const_rtx); --static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, -+static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, - struct hash_table_d *); - static void insert_set_in_table (rtx, rtx, struct hash_table_d *); - static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); -@@ -462,7 +475,6 @@ - static void alloc_cprop_mem (int, int); - static void free_cprop_mem (void); - static void compute_transp (const_rtx, int, sbitmap *, int); --static void compute_transpout (void); - static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, - struct hash_table_d *); - static void compute_cprop_data (void); -@@ -486,7 +498,7 @@ - static void compute_pre_data (void); - static int pre_expr_reaches_here_p (basic_block, struct expr *, - basic_block); --static void insert_insn_end_basic_block (struct expr *, basic_block, int); -+static void insert_insn_end_basic_block (struct expr *, basic_block); - static void pre_insert_copy_insn (struct expr *, rtx); - static void pre_insert_copies (void); - static int pre_delete (void); -@@ -497,7 +509,8 @@ - static void free_code_hoist_mem (void); - static void compute_code_hoist_vbeinout (void); - static void compute_code_hoist_data (void); --static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); -+static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, -+ int, int *); - static int hoist_code (void); - static int one_code_hoisting_pass (void); - static rtx process_insert_insn (struct expr *); -@@ -755,7 +768,7 @@ - GCSE. */ - - static int --want_to_gcse_p (rtx x) -+want_to_gcse_p (rtx x, int *max_distance_ptr) - { - #ifdef STACK_REGS - /* On register stack architectures, don't GCSE constants from the -@@ -765,18 +778,67 @@ - x = avoid_constant_pool_reference (x); - #endif - -+ /* GCSE'ing constants: -+ -+ We do not specifically distinguish between constant and non-constant -+ expressions in PRE and Hoist. We use rtx_cost below to limit -+ the maximum distance simple expressions can travel. -+ -+ Nevertheless, constants are much easier to GCSE, and, hence, -+ it is easy to overdo the optimizations. Usually, excessive PRE and -+ Hoisting of constant leads to increased register pressure. -+ -+ RA can deal with this by rematerialing some of the constants. -+ Therefore, it is important that the back-end generates sets of constants -+ in a way that allows reload rematerialize them under high register -+ pressure, i.e., a pseudo register with REG_EQUAL to constant -+ is set only once. Failing to do so will result in IRA/reload -+ spilling such constants under high register pressure instead of -+ rematerializing them. */ -+ - switch (GET_CODE (x)) - { - case REG: - case SUBREG: -- case CONST_INT: -- case CONST_DOUBLE: -- case CONST_FIXED: -- case CONST_VECTOR: - case CALL: - return 0; - -+ case CONST_INT: -+ case CONST_DOUBLE: -+ case CONST_FIXED: -+ case CONST_VECTOR: -+ if (!doing_code_hoisting_p) -+ /* Do not PRE constants. */ -+ return 0; -+ -+ /* FALLTHRU */ -+ - default: -+ if (doing_code_hoisting_p) -+ /* PRE doesn't implement max_distance restriction. */ -+ { -+ int cost; -+ int max_distance; -+ -+ gcc_assert (!optimize_function_for_speed_p (cfun) -+ && optimize_function_for_size_p (cfun)); -+ cost = rtx_cost (x, SET, 0); -+ -+ if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) -+ { -+ max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; -+ if (max_distance == 0) -+ return 0; -+ -+ gcc_assert (max_distance > 0); -+ } -+ else -+ max_distance = 0; -+ -+ if (max_distance_ptr) -+ *max_distance_ptr = max_distance; -+ } -+ - return can_assign_to_reg_without_clobbers_p (x); - } - } -@@ -1090,11 +1152,14 @@ - It is only used if X is a CONST_INT. - - ANTIC_P is nonzero if X is an anticipatable expression. -- AVAIL_P is nonzero if X is an available expression. */ -+ AVAIL_P is nonzero if X is an available expression. -+ -+ MAX_DISTANCE is the maximum distance in instructions this expression can -+ be moved. */ - - static void - insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, -- int avail_p, struct hash_table_d *table) -+ int avail_p, int max_distance, struct hash_table_d *table) - { - int found, do_not_record_p; - unsigned int hash; -@@ -1137,7 +1202,11 @@ - cur_expr->next_same_hash = NULL; - cur_expr->antic_occr = NULL; - cur_expr->avail_occr = NULL; -+ gcc_assert (max_distance >= 0); -+ cur_expr->max_distance = max_distance; - } -+ else -+ gcc_assert (cur_expr->max_distance == max_distance); - - /* Now record the occurrence(s). */ - if (antic_p) -@@ -1238,6 +1307,8 @@ - cur_expr->next_same_hash = NULL; - cur_expr->antic_occr = NULL; - cur_expr->avail_occr = NULL; -+ /* Not used for set_p tables. */ -+ cur_expr->max_distance = 0; - } - - /* Now record the occurrence. */ -@@ -1307,6 +1378,7 @@ - { - unsigned int regno = REGNO (dest); - rtx tmp; -+ int max_distance = 0; - - /* See if a REG_EQUAL note shows this equivalent to a simpler expression. - -@@ -1329,7 +1401,7 @@ - && !REG_P (src) - && (table->set_p - ? gcse_constant_p (XEXP (note, 0)) -- : want_to_gcse_p (XEXP (note, 0)))) -+ : want_to_gcse_p (XEXP (note, 0), NULL))) - src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); - - /* Only record sets of pseudo-regs in the hash table. */ -@@ -1344,7 +1416,7 @@ - can't do the same thing at the rtl level. */ - && !can_throw_internal (insn) - /* Is SET_SRC something we want to gcse? */ -- && want_to_gcse_p (src) -+ && want_to_gcse_p (src, &max_distance) - /* Don't CSE a nop. */ - && ! set_noop_p (pat) - /* Don't GCSE if it has attached REG_EQUIV note. -@@ -1368,7 +1440,8 @@ - int avail_p = (oprs_available_p (src, insn) - && ! JUMP_P (insn)); - -- insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); -+ insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, -+ max_distance, table); - } - - /* Record sets for constant/copy propagation. */ -@@ -1394,6 +1467,7 @@ - else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) - { - unsigned int regno = REGNO (src); -+ int max_distance = 0; - - /* Do not do this for constant/copy propagation. */ - if (! table->set_p -@@ -1405,7 +1479,7 @@ - do that easily for EH edges so disable GCSE on these for now. */ - && !can_throw_internal (insn) - /* Is SET_DEST something we want to gcse? */ -- && want_to_gcse_p (dest) -+ && want_to_gcse_p (dest, &max_distance) - /* Don't CSE a nop. */ - && ! set_noop_p (pat) - /* Don't GCSE if it has attached REG_EQUIV note. -@@ -1427,7 +1501,7 @@ - - /* Record the memory expression (DEST) in the hash table. */ - insert_expr_in_table (dest, GET_MODE (dest), insn, -- antic_p, avail_p, table); -+ antic_p, avail_p, max_distance, table); - } - } - } -@@ -1513,8 +1587,8 @@ - if (flat_table[i] != 0) - { - expr = flat_table[i]; -- fprintf (file, "Index %d (hash value %d)\n ", -- expr->bitmap_index, hash_val[i]); -+ fprintf (file, "Index %d (hash value %d; max distance %d)\n ", -+ expr->bitmap_index, hash_val[i], expr->max_distance); - print_rtl (file, expr->expr); - fprintf (file, "\n"); - } -@@ -3168,11 +3242,6 @@ - /* Nonzero for expressions that are transparent in the block. */ - static sbitmap *transp; - --/* Nonzero for expressions that are transparent at the end of the block. -- This is only zero for expressions killed by abnormal critical edge -- created by a calls. */ --static sbitmap *transpout; -- - /* Nonzero for expressions that are computed (available) in the block. */ - static sbitmap *comp; - -@@ -3236,28 +3305,105 @@ - pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; - } - --/* Top level routine to do the dataflow analysis needed by PRE. */ -+/* Remove certain expressions from anticipatable and transparent -+ sets of basic blocks that have incoming abnormal edge. -+ For PRE remove potentially trapping expressions to avoid placing -+ them on abnormal edges. For hoisting remove memory references that -+ can be clobbered by calls. */ - - static void --compute_pre_data (void) -+prune_expressions (bool pre_p) - { -- sbitmap trapping_expr; -- basic_block bb; -+ sbitmap prune_exprs; - unsigned int ui; -- -- compute_local_properties (transp, comp, antloc, &expr_hash_table); -- sbitmap_vector_zero (ae_kill, last_basic_block); -- -- /* Collect expressions which might trap. */ -- trapping_expr = sbitmap_alloc (expr_hash_table.n_elems); -- sbitmap_zero (trapping_expr); -+ basic_block bb; -+ -+ prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); -+ sbitmap_zero (prune_exprs); - for (ui = 0; ui < expr_hash_table.size; ui++) - { - struct expr *e; - for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) -- if (may_trap_p (e->expr)) -- SET_BIT (trapping_expr, e->bitmap_index); -- } -+ { -+ /* Note potentially trapping expressions. */ -+ if (may_trap_p (e->expr)) -+ { -+ SET_BIT (prune_exprs, e->bitmap_index); -+ continue; -+ } -+ -+ if (!pre_p && MEM_P (e->expr)) -+ /* Note memory references that can be clobbered by a call. -+ We do not split abnormal edges in hoisting, so would -+ a memory reference get hoisted along an abnormal edge, -+ it would be placed /before/ the call. Therefore, only -+ constant memory references can be hoisted along abnormal -+ edges. */ -+ { -+ if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0))) -+ continue; -+ -+ if (MEM_READONLY_P (e->expr) -+ && !MEM_VOLATILE_P (e->expr) -+ && MEM_NOTRAP_P (e->expr)) -+ /* Constant memory reference, e.g., a PIC address. */ -+ continue; -+ -+ /* ??? Optimally, we would use interprocedural alias -+ analysis to determine if this mem is actually killed -+ by this call. */ -+ -+ SET_BIT (prune_exprs, e->bitmap_index); -+ } -+ } -+ } -+ -+ FOR_EACH_BB (bb) -+ { -+ edge e; -+ edge_iterator ei; -+ -+ /* If the current block is the destination of an abnormal edge, we -+ kill all trapping (for PRE) and memory (for hoist) expressions -+ because we won't be able to properly place the instruction on -+ the edge. So make them neither anticipatable nor transparent. -+ This is fairly conservative. -+ -+ ??? For hoisting it may be necessary to check for set-and-jump -+ instructions here, not just for abnormal edges. The general problem -+ is that when an expression cannot not be placed right at the end of -+ a basic block we should account for any side-effects of a subsequent -+ jump instructions that could clobber the expression. It would -+ be best to implement this check along the lines of -+ hoist_expr_reaches_here_p where the target block is already known -+ and, hence, there's no need to conservatively prune expressions on -+ "intermediate" set-and-jump instructions. */ -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ if ((e->flags & EDGE_ABNORMAL) -+ && (pre_p || CALL_P (BB_END (e->src)))) -+ { -+ sbitmap_difference (antloc[bb->index], -+ antloc[bb->index], prune_exprs); -+ sbitmap_difference (transp[bb->index], -+ transp[bb->index], prune_exprs); -+ break; -+ } -+ } -+ -+ sbitmap_free (prune_exprs); -+} -+ -+/* Top level routine to do the dataflow analysis needed by PRE. */ -+ -+static void -+compute_pre_data (void) -+{ -+ basic_block bb; -+ -+ compute_local_properties (transp, comp, antloc, &expr_hash_table); -+ prune_expressions (true); -+ sbitmap_vector_zero (ae_kill, last_basic_block); - - /* Compute ae_kill for each basic block using: - -@@ -3266,21 +3412,6 @@ - - FOR_EACH_BB (bb) - { -- edge e; -- edge_iterator ei; -- -- /* If the current block is the destination of an abnormal edge, we -- kill all trapping expressions because we won't be able to properly -- place the instruction on the edge. So make them neither -- anticipatable nor transparent. This is fairly conservative. */ -- FOR_EACH_EDGE (e, ei, bb->preds) -- if (e->flags & EDGE_ABNORMAL) -- { -- sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr); -- sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr); -- break; -- } -- - sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); - sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); - } -@@ -3291,7 +3422,6 @@ - antloc = NULL; - sbitmap_vector_free (ae_kill); - ae_kill = NULL; -- sbitmap_free (trapping_expr); - } - - /* PRE utilities */ -@@ -3406,14 +3536,10 @@ - - /* Add EXPR to the end of basic block BB. - -- This is used by both the PRE and code hoisting. -- -- For PRE, we want to verify that the expr is either transparent -- or locally anticipatable in the target block. This check makes -- no sense for code hoisting. */ -+ This is used by both the PRE and code hoisting. */ - - static void --insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) -+insert_insn_end_basic_block (struct expr *expr, basic_block bb) - { - rtx insn = BB_END (bb); - rtx new_insn; -@@ -3440,12 +3566,6 @@ - #ifdef HAVE_cc0 - rtx note; - #endif -- /* It should always be the case that we can put these instructions -- anywhere in the basic block with performing PRE optimizations. -- Check this. */ -- gcc_assert (!NONJUMP_INSN_P (insn) || !pre -- || TEST_BIT (antloc[bb->index], expr->bitmap_index) -- || TEST_BIT (transp[bb->index], expr->bitmap_index)); - - /* If this is a jump table, then we can't insert stuff here. Since - we know the previous real insn must be the tablejump, we insert -@@ -3482,15 +3602,7 @@ - /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, - we search backward and place the instructions before the first - parameter is loaded. Do this for everyone for consistency and a -- presumption that we'll get better code elsewhere as well. -- -- It should always be the case that we can put these instructions -- anywhere in the basic block with performing PRE optimizations. -- Check this. */ -- -- gcc_assert (!pre -- || TEST_BIT (antloc[bb->index], expr->bitmap_index) -- || TEST_BIT (transp[bb->index], expr->bitmap_index)); -+ presumption that we'll get better code elsewhere as well. */ - - /* Since different machines initialize their parameter registers - in different orders, assume nothing. Collect the set of all -@@ -3587,7 +3699,7 @@ - now. */ - - if (eg->flags & EDGE_ABNORMAL) -- insert_insn_end_basic_block (index_map[j], bb, 0); -+ insert_insn_end_basic_block (index_map[j], bb); - else - { - insn = process_insert_insn (index_map[j]); -@@ -4046,61 +4158,12 @@ - } - } - --/* Compute transparent outgoing information for each block. -- -- An expression is transparent to an edge unless it is killed by -- the edge itself. This can only happen with abnormal control flow, -- when the edge is traversed through a call. This happens with -- non-local labels and exceptions. -- -- This would not be necessary if we split the edge. While this is -- normally impossible for abnormal critical edges, with some effort -- it should be possible with exception handling, since we still have -- control over which handler should be invoked. But due to increased -- EH table sizes, this may not be worthwhile. */ -- --static void --compute_transpout (void) --{ -- basic_block bb; -- unsigned int i; -- struct expr *expr; -- -- sbitmap_vector_ones (transpout, last_basic_block); -- -- FOR_EACH_BB (bb) -- { -- /* Note that flow inserted a nop at the end of basic blocks that -- end in call instructions for reasons other than abnormal -- control flow. */ -- if (! CALL_P (BB_END (bb))) -- continue; -- -- for (i = 0; i < expr_hash_table.size; i++) -- for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash) -- if (MEM_P (expr->expr)) -- { -- if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF -- && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) -- continue; -- -- /* ??? Optimally, we would use interprocedural alias -- analysis to determine if this mem is actually killed -- by this call. */ -- RESET_BIT (transpout[bb->index], expr->bitmap_index); -- } -- } --} -- - /* Code Hoisting variables and subroutines. */ - - /* Very busy expressions. */ - static sbitmap *hoist_vbein; - static sbitmap *hoist_vbeout; - --/* Hoistable expressions. */ --static sbitmap *hoist_exprs; -- - /* ??? We could compute post dominators and run this algorithm in - reverse to perform tail merging, doing so would probably be - more effective than the tail merging code in jump.c. -@@ -4119,8 +4182,6 @@ - - hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); - hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); -- hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); -- transpout = sbitmap_vector_alloc (n_blocks, n_exprs); - } - - /* Free vars used for code hoisting analysis. */ -@@ -4134,8 +4195,6 @@ - - sbitmap_vector_free (hoist_vbein); - sbitmap_vector_free (hoist_vbeout); -- sbitmap_vector_free (hoist_exprs); -- sbitmap_vector_free (transpout); - - free_dominance_info (CDI_DOMINATORS); - } -@@ -4166,8 +4225,15 @@ - FOR_EACH_BB_REVERSE (bb) - { - if (bb->next_bb != EXIT_BLOCK_PTR) -- sbitmap_intersection_of_succs (hoist_vbeout[bb->index], -- hoist_vbein, bb->index); -+ { -+ sbitmap_intersection_of_succs (hoist_vbeout[bb->index], -+ hoist_vbein, bb->index); -+ -+ /* Include expressions in VBEout that are calculated -+ in BB and available at its end. */ -+ sbitmap_a_or_b (hoist_vbeout[bb->index], -+ hoist_vbeout[bb->index], comp[bb->index]); -+ } - - changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], - antloc[bb->index], -@@ -4179,7 +4245,17 @@ - } - - if (dump_file) -- fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); -+ { -+ fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); -+ -+ FOR_EACH_BB (bb) -+ { -+ fprintf (dump_file, "vbein (%d): ", bb->index); -+ dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); -+ fprintf (dump_file, "vbeout(%d): ", bb->index); -+ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); -+ } -+ } - } - - /* Top level routine to do the dataflow analysis needed by code hoisting. */ -@@ -4188,7 +4264,7 @@ - compute_code_hoist_data (void) - { - compute_local_properties (transp, comp, antloc, &expr_hash_table); -- compute_transpout (); -+ prune_expressions (false); - compute_code_hoist_vbeinout (); - calculate_dominance_info (CDI_DOMINATORS); - if (dump_file) -@@ -4197,6 +4273,8 @@ - - /* Determine if the expression identified by EXPR_INDEX would - reach BB unimpared if it was placed at the end of EXPR_BB. -+ Stop the search if the expression would need to be moved more -+ than DISTANCE instructions. - - It's unclear exactly what Muchnick meant by "unimpared". It seems - to me that the expression must either be computed or transparent in -@@ -4209,12 +4287,24 @@ - paths. */ - - static int --hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) -+hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, -+ char *visited, int distance, int *bb_size) - { - edge pred; - edge_iterator ei; - int visited_allocated_locally = 0; - -+ /* Terminate the search if distance, for which EXPR is allowed to move, -+ is exhausted. */ -+ if (distance > 0) -+ { -+ distance -= bb_size[bb->index]; -+ -+ if (distance <= 0) -+ return 0; -+ } -+ else -+ gcc_assert (distance == 0); - - if (visited == NULL) - { -@@ -4233,9 +4323,6 @@ - else if (visited[pred_bb->index]) - continue; - -- /* Does this predecessor generate this expression? */ -- else if (TEST_BIT (comp[pred_bb->index], expr_index)) -- break; - else if (! TEST_BIT (transp[pred_bb->index], expr_index)) - break; - -@@ -4243,8 +4330,8 @@ - else - { - visited[pred_bb->index] = 1; -- if (! hoist_expr_reaches_here_p (expr_bb, expr_index, -- pred_bb, visited)) -+ if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, -+ visited, distance, bb_size)) - break; - } - } -@@ -4254,20 +4341,33 @@ - return (pred == NULL); - } - -+/* Find occurence in BB. */ -+static struct occr * -+find_occr_in_bb (struct occr *occr, basic_block bb) -+{ -+ /* Find the right occurrence of this expression. */ -+ while (occr && BLOCK_FOR_INSN (occr->insn) != bb) -+ occr = occr->next; -+ -+ return occr; -+} -+ - /* Actually perform code hoisting. */ - - static int - hoist_code (void) - { - basic_block bb, dominated; -+ VEC (basic_block, heap) *dom_tree_walk; -+ unsigned int dom_tree_walk_index; - VEC (basic_block, heap) *domby; - unsigned int i,j; - struct expr **index_map; - struct expr *expr; -+ int *to_bb_head; -+ int *bb_size; - int changed = 0; - -- sbitmap_vector_zero (hoist_exprs, last_basic_block); -- - /* Compute a mapping from expression number (`bitmap_index') to - hash table entry. */ - -@@ -4276,28 +4376,98 @@ - for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) - index_map[expr->bitmap_index] = expr; - -+ /* Calculate sizes of basic blocks and note how far -+ each instruction is from the start of its block. We then use this -+ data to restrict distance an expression can travel. */ -+ -+ to_bb_head = XCNEWVEC (int, get_max_uid ()); -+ bb_size = XCNEWVEC (int, last_basic_block); -+ -+ FOR_EACH_BB (bb) -+ { -+ rtx insn; -+ int to_head; -+ -+ to_head = 0; -+ FOR_BB_INSNS (bb, insn) -+ { -+ /* Don't count debug instructions to avoid them affecting -+ decision choices. */ -+ if (NONDEBUG_INSN_P (insn)) -+ to_bb_head[INSN_UID (insn)] = to_head++; -+ } -+ -+ bb_size[bb->index] = to_head; -+ } -+ -+ gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 -+ && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest -+ == ENTRY_BLOCK_PTR->next_bb)); -+ -+ dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, -+ ENTRY_BLOCK_PTR->next_bb); -+ - /* Walk over each basic block looking for potentially hoistable - expressions, nothing gets hoisted from the entry block. */ -- FOR_EACH_BB (bb) -+ for (dom_tree_walk_index = 0; -+ VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb); -+ dom_tree_walk_index++) - { -- int found = 0; -- int insn_inserted_p; -- -- domby = get_dominated_by (CDI_DOMINATORS, bb); -+ domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); -+ -+ if (VEC_length (basic_block, domby) == 0) -+ continue; -+ - /* Examine each expression that is very busy at the exit of this - block. These are the potentially hoistable expressions. */ - for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) - { -- int hoistable = 0; -- -- if (TEST_BIT (hoist_vbeout[bb->index], i) -- && TEST_BIT (transpout[bb->index], i)) -+ if (TEST_BIT (hoist_vbeout[bb->index], i)) - { -+ /* Current expression. */ -+ struct expr *expr = index_map[i]; -+ /* Number of occurences of EXPR that can be hoisted to BB. */ -+ int hoistable = 0; -+ /* Basic blocks that have occurences reachable from BB. */ -+ bitmap_head _from_bbs, *from_bbs = &_from_bbs; -+ /* Occurences reachable from BB. */ -+ VEC (occr_t, heap) *occrs_to_hoist = NULL; -+ /* We want to insert the expression into BB only once, so -+ note when we've inserted it. */ -+ int insn_inserted_p; -+ occr_t occr; -+ -+ bitmap_initialize (from_bbs, 0); -+ -+ /* If an expression is computed in BB and is available at end of -+ BB, hoist all occurences dominated by BB to BB. */ -+ if (TEST_BIT (comp[bb->index], i)) -+ { -+ occr = find_occr_in_bb (expr->antic_occr, bb); -+ -+ if (occr) -+ { -+ /* An occurence might've been already deleted -+ while processing a dominator of BB. */ -+ if (occr->deleted_p) -+ gcc_assert (MAX_HOIST_DEPTH > 1); -+ else -+ { -+ gcc_assert (NONDEBUG_INSN_P (occr->insn)); -+ hoistable++; -+ } -+ } -+ else -+ hoistable++; -+ } -+ - /* We've found a potentially hoistable expression, now - we look at every block BB dominates to see if it - computes the expression. */ - for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) - { -+ int max_distance; -+ - /* Ignore self dominance. */ - if (bb == dominated) - continue; -@@ -4307,17 +4477,43 @@ - if (!TEST_BIT (antloc[dominated->index], i)) - continue; - -+ occr = find_occr_in_bb (expr->antic_occr, dominated); -+ gcc_assert (occr); -+ -+ /* An occurence might've been already deleted -+ while processing a dominator of BB. */ -+ if (occr->deleted_p) -+ { -+ gcc_assert (MAX_HOIST_DEPTH > 1); -+ continue; -+ } -+ gcc_assert (NONDEBUG_INSN_P (occr->insn)); -+ -+ max_distance = expr->max_distance; -+ if (max_distance > 0) -+ /* Adjust MAX_DISTANCE to account for the fact that -+ OCCR won't have to travel all of DOMINATED, but -+ only part of it. */ -+ max_distance += (bb_size[dominated->index] -+ - to_bb_head[INSN_UID (occr->insn)]); -+ - /* Note if the expression would reach the dominated block - unimpared if it was placed at the end of BB. - - Keep track of how many times this expression is hoistable - from a dominated block into BB. */ -- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) -- hoistable++; -+ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, -+ max_distance, bb_size)) -+ { -+ hoistable++; -+ VEC_safe_push (occr_t, heap, -+ occrs_to_hoist, occr); -+ bitmap_set_bit (from_bbs, dominated->index); -+ } - } - - /* If we found more than one hoistable occurrence of this -- expression, then note it in the bitmap of expressions to -+ expression, then note it in the vector of expressions to - hoist. It makes no sense to hoist things which are computed - in only one BB, and doing so tends to pessimize register - allocation. One could increase this value to try harder -@@ -4326,91 +4522,80 @@ - the vast majority of hoistable expressions are only movable - from two successors, so raising this threshold is likely - to nullify any benefit we get from code hoisting. */ -- if (hoistable > 1) -- { -- SET_BIT (hoist_exprs[bb->index], i); -- found = 1; -- } -- } -- } -- /* If we found nothing to hoist, then quit now. */ -- if (! found) -- { -- VEC_free (basic_block, heap, domby); -- continue; -- } -- -- /* Loop over all the hoistable expressions. */ -- for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) -- { -- /* We want to insert the expression into BB only once, so -- note when we've inserted it. */ -- insn_inserted_p = 0; -- -- /* These tests should be the same as the tests above. */ -- if (TEST_BIT (hoist_exprs[bb->index], i)) -- { -- /* We've found a potentially hoistable expression, now -- we look at every block BB dominates to see if it -- computes the expression. */ -- for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) -- { -- /* Ignore self dominance. */ -- if (bb == dominated) -- continue; -- -- /* We've found a dominated block, now see if it computes -- the busy expression and whether or not moving that -- expression to the "beginning" of that block is safe. */ -- if (!TEST_BIT (antloc[dominated->index], i)) -- continue; -- -- /* The expression is computed in the dominated block and -- it would be safe to compute it at the start of the -- dominated block. Now we have to determine if the -- expression would reach the dominated block if it was -- placed at the end of BB. */ -- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) -- { -- struct expr *expr = index_map[i]; -- struct occr *occr = expr->antic_occr; -- rtx insn; -- rtx set; -- -- /* Find the right occurrence of this expression. */ -- while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) -- occr = occr->next; -- -- gcc_assert (occr); -- insn = occr->insn; -- set = single_set (insn); -- gcc_assert (set); -- -- /* Create a pseudo-reg to store the result of reaching -- expressions into. Get the mode for the new pseudo -- from the mode of the original destination pseudo. */ -- if (expr->reaching_reg == NULL) -- expr->reaching_reg -- = gen_reg_rtx_and_attrs (SET_DEST (set)); -- -- gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); -- delete_insn (insn); -- occr->deleted_p = 1; -- changed = 1; -- gcse_subst_count++; -- -- if (!insn_inserted_p) -- { -- insert_insn_end_basic_block (index_map[i], bb, 0); -- insn_inserted_p = 1; -- } -- } -- } -+ if (hoistable > 1 && dbg_cnt (hoist_insn)) -+ { -+ /* If (hoistable != VEC_length), then there is -+ an occurence of EXPR in BB itself. Don't waste -+ time looking for LCA in this case. */ -+ if ((unsigned) hoistable -+ == VEC_length (occr_t, occrs_to_hoist)) -+ { -+ basic_block lca; -+ -+ lca = nearest_common_dominator_for_set (CDI_DOMINATORS, -+ from_bbs); -+ if (lca != bb) -+ /* Punt, it's better to hoist these occurences to -+ LCA. */ -+ VEC_free (occr_t, heap, occrs_to_hoist); -+ } -+ } -+ else -+ /* Punt, no point hoisting a single occurence. */ -+ VEC_free (occr_t, heap, occrs_to_hoist); -+ -+ insn_inserted_p = 0; -+ -+ /* Walk through occurences of I'th expressions we want -+ to hoist to BB and make the transformations. */ -+ for (j = 0; -+ VEC_iterate (occr_t, occrs_to_hoist, j, occr); -+ j++) -+ { -+ rtx insn; -+ rtx set; -+ -+ gcc_assert (!occr->deleted_p); -+ -+ insn = occr->insn; -+ set = single_set (insn); -+ gcc_assert (set); -+ -+ /* Create a pseudo-reg to store the result of reaching -+ expressions into. Get the mode for the new pseudo -+ from the mode of the original destination pseudo. -+ -+ It is important to use new pseudos whenever we -+ emit a set. This will allow reload to use -+ rematerialization for such registers. */ -+ if (!insn_inserted_p) -+ expr->reaching_reg -+ = gen_reg_rtx_and_attrs (SET_DEST (set)); -+ -+ gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), -+ insn); -+ delete_insn (insn); -+ occr->deleted_p = 1; -+ changed = 1; -+ gcse_subst_count++; -+ -+ if (!insn_inserted_p) -+ { -+ insert_insn_end_basic_block (expr, bb); -+ insn_inserted_p = 1; -+ } -+ } -+ -+ VEC_free (occr_t, heap, occrs_to_hoist); -+ bitmap_clear (from_bbs); - } - } - VEC_free (basic_block, heap, domby); - } - -+ VEC_free (basic_block, heap, dom_tree_walk); -+ free (bb_size); -+ free (to_bb_head); - free (index_map); - - return changed; -@@ -4433,6 +4618,8 @@ - || is_too_expensive (_("GCSE disabled"))) - return 0; - -+ doing_code_hoisting_p = true; -+ - /* We need alias. */ - init_alias_analysis (); - -@@ -4468,6 +4655,8 @@ - gcse_subst_count, gcse_create_count); - } - -+ doing_code_hoisting_p = false; -+ - return changed; - } - - -=== modified file 'gcc/params.def' ---- old/gcc/params.def 2010-04-02 18:54:46 +0000 -+++ new/gcc/params.def 2010-08-16 09:41:58 +0000 -@@ -219,6 +219,29 @@ - "gcse-after-reload-critical-fraction", - "The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload", - 10, 0, 0) -+ -+/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor -+ to calculate maximum distance for which an expression is allowed to move -+ from its rtx_cost. */ -+DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO, -+ "gcse-cost-distance-ratio", -+ "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations", -+ 10, 0, 0) -+/* GCSE won't restrict distance for which an expression with rtx_cost greater -+ than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */ -+DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST, -+ "gcse-unrestricted-cost", -+ "Cost at which GCSE optimizations will not constraint the distance an expression can travel", -+ 3, 0, 0) -+ -+/* How deep from a given basic block the dominator tree should be searched -+ for expressions to hoist to the block. The value of 0 will avoid limiting -+ the search. */ -+DEFPARAM(PARAM_MAX_HOIST_DEPTH, -+ "max-hoist-depth", -+ "Maximum depth of search in the dominator tree for expressions to hoist", -+ 30, 0, 0) -+ - /* This parameter limits the number of insns in a loop that will be unrolled, - and by how much the loop is unrolled. - - -=== modified file 'gcc/params.h' ---- old/gcc/params.h 2009-12-01 19:12:29 +0000 -+++ new/gcc/params.h 2010-08-16 09:41:58 +0000 -@@ -125,6 +125,12 @@ - PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) - #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ - PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) -+#define GCSE_COST_DISTANCE_RATIO \ -+ PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO) -+#define GCSE_UNRESTRICTED_COST \ -+ PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST) -+#define MAX_HOIST_DEPTH \ -+ PARAM_VALUE (PARAM_MAX_HOIST_DEPTH) - #define MAX_UNROLLED_INSNS \ - PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) - #define MAX_SMS_LOOP_NUMBER \ - -=== added file 'gcc/testsuite/gcc.dg/pr45101.c' ---- old/gcc/testsuite/gcc.dg/pr45101.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.dg/pr45101.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,15 @@ -+/* PR rtl-optimization/45101 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fgcse -fgcse-las" } */ -+ -+struct -+{ -+ int i; -+} *s; -+ -+extern void bar (void); -+ -+void foo () -+{ -+ !s ? s->i++ : bar (); -+} - -=== added file 'gcc/testsuite/gcc.dg/pr45105.c' ---- old/gcc/testsuite/gcc.dg/pr45105.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.dg/pr45105.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,27 @@ -+/* PR debug/45105 */ -+/* { dg-do compile } */ -+/* { dg-options "-Os -fcompare-debug" } */ -+ -+extern int *baz (int *, int *); -+ -+void -+bar (int *p1, int *p2) -+{ -+ int n = *baz (0, 0); -+ p1[n] = p2[n]; -+} -+ -+void -+foo (int *p, int l) -+{ -+ int a1[32]; -+ int a2[32]; -+ baz (a1, a2); -+ while (l) -+ { -+ if (l & 1) -+ p = baz (a2, p); -+ l--; -+ bar (a1, a2); -+ } -+} - -=== added file 'gcc/testsuite/gcc.dg/pr45107.c' ---- old/gcc/testsuite/gcc.dg/pr45107.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.dg/pr45107.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,13 @@ -+/* PR rtl-optimization/45107 */ -+/* { dg-do compile } */ -+/* { dg-options "-Os -fgcse-las" } */ -+ -+extern void bar(int *); -+ -+int foo (int *p) -+{ -+ int i = *p; -+ if (i != 1) -+ bar(&i); -+ *p = i; -+} - -=== added file 'gcc/testsuite/gcc.target/arm/pr40956.c' ---- old/gcc/testsuite/gcc.target/arm/pr40956.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.target/arm/pr40956.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,14 @@ -+/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ -+/* { dg-require-effective-target arm_thumb1_ok } */ -+/* { dg-require-effective-target fpic } */ -+/* Make sure the constant "0" is loaded into register only once. */ -+/* { dg-final { scan-assembler-times "mov\[\\t \]*r., #0" 1 } } */ -+ -+int foo(int p, int* q) -+{ -+ if (p!=9) -+ *q = 0; -+ else -+ *(q+1) = 0; -+ return 3; -+} - -=== added file 'gcc/testsuite/gcc.target/arm/pr42495.c' ---- old/gcc/testsuite/gcc.target/arm/pr42495.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.target/arm/pr42495.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,31 @@ -+/* { dg-options "-mthumb -Os -fpic -march=armv5te -fdump-rtl-hoist" } */ -+/* { dg-require-effective-target arm_thumb1_ok } */ -+/* { dg-require-effective-target fpic } */ -+/* Make sure all calculations of gObj's address get hoisted to one location. */ -+/* { dg-final { scan-rtl-dump "PRE/HOIST: end of bb .* copying expression" "hoist" } } */ -+ -+struct st_a { -+ int data; -+}; -+ -+struct st_b { -+ struct st_a *p_a; -+ struct st_b *next; -+}; -+ -+extern struct st_b gObj; -+extern void foo(int, struct st_b*); -+ -+int goo(struct st_b * obj) { -+ struct st_a *pa; -+ if (gObj.p_a->data != 0) { -+ foo(gObj.p_a->data, obj); -+ } -+ pa = obj->p_a; -+ if (pa == 0) { -+ return 0; -+ } else if (pa == gObj.p_a) { -+ return 0; -+ } -+ return pa->data; -+} - -=== added file 'gcc/testsuite/gcc.target/arm/pr42574.c' ---- old/gcc/testsuite/gcc.target/arm/pr42574.c 1970-01-01 00:00:00 +0000 -+++ new/gcc/testsuite/gcc.target/arm/pr42574.c 2010-08-16 09:41:58 +0000 -@@ -0,0 +1,24 @@ -+/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ -+/* { dg-require-effective-target arm_thumb1_ok } */ -+/* { dg-require-effective-target fpic } */ -+/* Make sure the address of glob.c is calculated only once and using -+ a logical shift for the offset (200<<1). */ -+/* { dg-final { scan-assembler-times "lsl" 1 } } */ -+ -+struct A { -+ char a[400]; -+ float* c; -+}; -+struct A glob; -+void func(); -+void func1(float*); -+int func2(float*, int*); -+void func3(float*); -+ -+void test(int *p) { -+ func1(glob.c); -+ if (func2(glob.c, p)) { -+ func(); -+ } -+ func3(glob.c); -+} - |