From 91aeba1aed9efd7e58f18e079ab1e7d622fa8f65 Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Fri, 24 Sep 2010 13:49:18 -0700 Subject: gcc-4.5: Import Linaro patches * Tested gcc on efikamx. Signed-off-by: Khem Raj Marcin Juszkiewicz --- .../gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | 1759 ++++++++++++++++++++ 1 file changed, 1759 insertions(+) create mode 100644 recipes/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch (limited to 'recipes/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch') diff --git a/recipes/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch b/recipes/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch new file mode 100644 index 0000000000..a58dd24416 --- /dev/null +++ b/recipes/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch @@ -0,0 +1,1759 @@ +2010-07-28 Maxim Kuvyrkov + + Backport code hoisting improvements from mainline: + + 2010-07-28 Jakub Jelinek + PR debug/45105 + * gcc.dg/pr45105.c: New test. + + 2010-07-28 Jakub Jelinek + PR debug/45105 + * gcse.c (hoist_code): Use FOR_BB_INSNS macro. + + 2010-07-28 Maxim Kuvyrkov + PR rtl-optimization/45107 + * gcc.dg/pr45107.c: New test. + + 2010-07-28 Maxim Kuvyrkov + PR rtl-optimization/45107 + * gcse.c (hash_scan_set): Use max_distance for gcse-las. + + 2010-07-28 Maxim Kuvyrkov + PR rtl-optimization/45101 + * gcc.dg/pr45101.c: New test. + + 2010-07-28 Maxim Kuvyrkov + PR rtl-optimization/45101 + * gcse.c (hash_scan_set): Fix argument ordering of insert_expr_in_table + for gcse-las. + + 2010-07-27 Maxim Kuvyrkov + PR rtl-optimization/40956 + PR target/42495 + PR middle-end/42574 + * gcc.target/arm/pr40956.c, gcc.target/arm/pr42495.c, + * gcc.target/arm/pr42574.c: Add tests. + + 2010-07-27 Maxim Kuvyrkov + * config/arm/arm.c (params.h): Include. + (arm_override_options): Tune gcse-unrestricted-cost. + * config/arm/t-arm (arm.o): Define dependencies. + + 2010-07-27 Maxim Kuvyrkov + PR target/42495 + PR middle-end/42574 + * basic-block.h (get_dominated_to_depth): Declare. + * dominance.c (get_dominated_to_depth): New function, use + get_all_dominated_blocks as a base. + (get_all_dominated_blocks): Use get_dominated_to_depth. + * gcse.c (occr_t, VEC (occr_t, heap)): Define. + (hoist_exprs): Remove. + (alloc_code_hoist_mem, free_code_hoist_mem): Update. + (compute_code_hoist_vbeinout): Add debug print outs. + (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. + * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid + quadratic behavior. + * params.h (MAX_HOIST_DEPTH): New macro. + * doc/invoke.texi (max-hoist-depth): Document. + + 2010-07-27 Maxim Kuvyrkov + PR rtl-optimization/40956 + * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple + constants. + + 2010-07-27 Maxim Kuvyrkov + PR target/42495 + PR middle-end/42574 + * config/arm/arm.c (legitimize_pic_address): Use + gen_calculate_pic_address pattern to emit calculation of PIC address. + (will_be_in_index_register): New function. + (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) + (thumb1_legitimate_address_p): Use it provided !strict_p. + * config/arm/arm.md (calculate_pic_address): New expand and split. + + 2010-07-27 Maxim Kuvyrkov + PR target/42495 + PR middle-end/42574 + * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants. + * config/arm/arm.md (define_split "J", define_split "K"): Make + IRA/reload friendly. + + 2010-07-27 Maxim Kuvyrkov + * gcse.c (insert_insn_end_basic_block): Update signature, remove + unused checks. + (pre_edge_insert, hoist_code): Update. + + 2010-07-27 Maxim Kuvyrkov + PR target/42495 + PR middle-end/42574 + * gcse.c (hoist_expr_reaches_here_p): Remove excessive check. + + 2010-07-27 Maxim Kuvyrkov + * gcse.c (hoist_code): Generate new pseudo for every new set insn. + + 2010-07-27 Maxim Kuvyrkov + PR rtl-optimization/40956 + PR target/42495 + PR middle-end/42574 + * gcse.c (compute_code_hoist_vbeinout): Consider more expressions + for hoisting. + (hoist_code): Count occurences in current block too. + + 2010-07-27 Maxim Kuvyrkov + * gcse.c (struct expr:max_distance): New field. + (doing_code_hoisting_p): New static variable. + (want_to_gcse_p): Change signature. Allow constrained hoisting of + simple expressions, don't change behavior for PRE. Set max_distance. + (insert_expr_in_table): Set new max_distance field. + (hash_scan_set): Update. + (hoist_expr_reaches_here_p): Stop search after max_distance + instructions. + (find_occr_in_bb): New static function. Use it in ... + (hoist_code): Calculate sizes of basic block before any changes are + done. Pass max_distance to hoist_expr_reaches_here_p. + (one_code_hoisting_pass): Set doing_code_hoisting_p. + * params.def (PARAM_GCSE_COST_DISTANCE_RATIO,) + (PARAM_GCSE_UNRESTRICTED_COST): New parameters. + * params.h (GCSE_COST_DISTANCE_RATIO, GCSE_UNRESTRICTED_COST): New + macros. + * doc/invoke.texi (gcse-cost-distance-ratio, gcse-unrestricted-cost): + Document. + + 2010-07-27 Jeff Law + Maxim Kuvyrkov + * gcse.c (compute_transpout, transpout): Remove, move logic + to prune_expressions. + (compute_pre_data): Move pruning of trapping expressions ... + (prune_expressions): ... here. New static function. + (compute_code_hoist_data): Use it. + (alloc_code_hoist_mem, free_code_hoist_mem, hoist_code): Update. + + 2010-07-27 Maxim Kuvyrkov + * dbgcnt.def (hoist_insn): New debug counter. + * gcse.c (hoist_code): Use it. + + 2010-07-28 Julian Brown + + Backport from FSF mainline: + +=== modified file 'gcc/basic-block.h' +--- old/gcc/basic-block.h 2010-04-02 18:54:46 +0000 ++++ new/gcc/basic-block.h 2010-08-16 09:41:58 +0000 +@@ -932,6 +932,8 @@ + extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction, + basic_block *, + unsigned); ++extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction, ++ basic_block, int); + extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction, + basic_block); + extern void add_to_dominance_info (enum cdi_direction, basic_block); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2010-08-13 15:37:39 +0000 ++++ new/gcc/config/arm/arm.c 2010-08-16 09:41:58 +0000 +@@ -56,6 +56,7 @@ + #include "df.h" + #include "intl.h" + #include "libfuncs.h" ++#include "params.h" + + /* Forward definitions of types. */ + typedef struct minipool_node Mnode; +@@ -1902,6 +1903,14 @@ + flag_reorder_blocks = 1; + } + ++ if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST) ++ && flag_pic) ++ /* Hoisting PIC address calculations more aggressively provides a small, ++ but measurable, size reduction for PIC code. Therefore, we decrease ++ the bar for unrestricted expression hoisting to the cost of PIC address ++ calculation, which is 2 instructions. */ ++ set_param_value ("gcse-unrestricted-cost", 2); ++ + /* Register global variables with the garbage collector. */ + arm_add_gc_roots (); + +@@ -5070,17 +5079,13 @@ + if (GET_CODE (orig) == SYMBOL_REF + || GET_CODE (orig) == LABEL_REF) + { +- rtx pic_ref, address; + rtx insn; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); +- address = gen_reg_rtx (Pmode); + } +- else +- address = reg; + + /* VxWorks does not impose a fixed gap between segments; the run-time + gap can be different from the object-file gap. We therefore can't +@@ -5096,18 +5101,21 @@ + insn = arm_pic_static_addr (orig, reg); + else + { ++ rtx pat; ++ rtx mem; ++ + /* If this function doesn't have a pic register, create one now. */ + require_pic_register (); + +- if (TARGET_32BIT) +- emit_insn (gen_pic_load_addr_32bit (address, orig)); +- else /* TARGET_THUMB1 */ +- emit_insn (gen_pic_load_addr_thumb1 (address, orig)); +- +- pic_ref = gen_const_mem (Pmode, +- gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, +- address)); +- insn = emit_move_insn (reg, pic_ref); ++ pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); ++ ++ /* Make the MEM as close to a constant as possible. */ ++ mem = SET_SRC (pat); ++ gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); ++ MEM_READONLY_P (mem) = 1; ++ MEM_NOTRAP_P (mem) = 1; ++ ++ insn = emit_insn (pat); + } + + /* Put a REG_EQUAL note on this insn, so that it can be optimized +@@ -5387,6 +5395,15 @@ + return FALSE; + } + ++/* Return true if X will surely end up in an index register after next ++ splitting pass. */ ++static bool ++will_be_in_index_register (const_rtx x) ++{ ++ /* arm.md: calculate_pic_address will split this into a register. */ ++ return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; ++} ++ + /* Return nonzero if X is a valid ARM state address operand. */ + int + arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, +@@ -5444,8 +5461,9 @@ + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) +- && GET_CODE(xop1) == CONST_INT +- && arm_legitimate_index_p (mode, xop1, outer, strict_p)) ++ && ((GET_CODE(xop1) == CONST_INT ++ && arm_legitimate_index_p (mode, xop1, outer, strict_p)) ++ || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && arm_legitimate_index_p (mode, xop0, outer, strict_p))); + } +@@ -5531,7 +5549,8 @@ + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) +- && thumb2_legitimate_index_p (mode, xop1, strict_p)) ++ && (thumb2_legitimate_index_p (mode, xop1, strict_p) ++ || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && thumb2_legitimate_index_p (mode, xop0, strict_p))); + } +@@ -5834,7 +5853,8 @@ + && XEXP (x, 0) != frame_pointer_rtx + && XEXP (x, 1) != frame_pointer_rtx + && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) +- && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) ++ && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) ++ || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) + return 1; + + /* REG+const has 5-7 bit offset for non-SP registers. */ +@@ -6413,12 +6433,16 @@ + + case CONST_INT: + if (outer == SET) +- { +- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) +- return 0; +- if (thumb_shiftable_const (INTVAL (x))) +- return COSTS_N_INSNS (2); +- return COSTS_N_INSNS (3); ++ { ++ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) ++ return COSTS_N_INSNS (1); ++ /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ ++ if (INTVAL (x) >= -255 && INTVAL (x) <= -1) ++ return COSTS_N_INSNS (2); ++ /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ ++ if (thumb_shiftable_const (INTVAL (x))) ++ return COSTS_N_INSNS (2); ++ return COSTS_N_INSNS (3); + } + else if ((outer == PLUS || outer == COMPARE) + && INTVAL (x) < 256 && INTVAL (x) > -256) +@@ -7110,6 +7134,12 @@ + a single register, otherwise it costs one insn per word. */ + if (REG_P (XEXP (x, 0))) + *total = COSTS_N_INSNS (1); ++ else if (flag_pic ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) ++ /* This will be split into two instructions. ++ See arm.md:calculate_pic_address. */ ++ *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return true; + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000 ++++ new/gcc/config/arm/arm.md 2010-08-16 09:41:58 +0000 +@@ -5290,17 +5290,21 @@ + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" +- [(set (match_dup 0) (match_dup 1)) +- (set (match_dup 0) (neg:SI (match_dup 0)))] +- "operands[1] = GEN_INT (- INTVAL (operands[1]));" ++ [(set (match_dup 2) (match_dup 1)) ++ (set (match_dup 0) (neg:SI (match_dup 2)))] ++ " ++ { ++ operands[1] = GEN_INT (- INTVAL (operands[1])); ++ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; ++ }" + ) + + (define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" +- [(set (match_dup 0) (match_dup 1)) +- (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] ++ [(set (match_dup 2) (match_dup 1)) ++ (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] + " + { + unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; +@@ -5311,12 +5315,13 @@ + if ((val & (mask << i)) == val) + break; + +- /* Shouldn't happen, but we don't want to split if the shift is zero. */ ++ /* Don't split if the shift is zero. */ + if (i == 0) + FAIL; + + operands[1] = GEN_INT (val >> i); +- operands[2] = GEN_INT (i); ++ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; ++ operands[3] = GEN_INT (i); + }" + ) + +@@ -5325,6 +5330,34 @@ + ;; we use an unspec. The offset will be loaded from a constant pool entry, + ;; since that is the only type of relocation we can use. + ++;; Wrap calculation of the whole PIC address in a single pattern for the ++;; benefit of optimizers, particularly, PRE and HOIST. Calculation of ++;; a PIC address involves two loads from memory, so we want to CSE it ++;; as often as possible. ++;; This pattern will be split into one of the pic_load_addr_* patterns ++;; and a move after GCSE optimizations. ++;; ++;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. ++(define_expand "calculate_pic_address" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") ++ (unspec:SI [(match_operand:SI 2 "" "")] ++ UNSPEC_PIC_SYM))))] ++ "flag_pic" ++) ++ ++;; Split calculate_pic_address into pic_load_addr_* and a move. ++(define_split ++ [(set (match_operand:SI 0 "register_operand" "") ++ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") ++ (unspec:SI [(match_operand:SI 2 "" "")] ++ UNSPEC_PIC_SYM))))] ++ "flag_pic" ++ [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) ++ (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] ++ "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" ++) ++ + ;; The rather odd constraints on the following are to force reload to leave + ;; the insn alone, and to force the minipool generation pass to then move + ;; the GOT symbol to memory. + +=== modified file 'gcc/config/arm/t-arm' +--- old/gcc/config/arm/t-arm 2009-06-21 19:48:15 +0000 ++++ new/gcc/config/arm/t-arm 2010-08-16 09:41:58 +0000 +@@ -45,6 +45,15 @@ + $(srcdir)/config/arm/arm-cores.def > \ + $(srcdir)/config/arm/arm-tune.md + ++arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) toplev.h $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) ++ + arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + +=== modified file 'gcc/dbgcnt.def' +--- old/gcc/dbgcnt.def 2009-11-25 10:55:54 +0000 ++++ new/gcc/dbgcnt.def 2010-08-16 09:41:58 +0000 +@@ -158,6 +158,7 @@ + DEBUG_COUNTER (global_alloc_at_func) + DEBUG_COUNTER (global_alloc_at_reg) + DEBUG_COUNTER (hoist) ++DEBUG_COUNTER (hoist_insn) + DEBUG_COUNTER (ia64_sched2) + DEBUG_COUNTER (if_conversion) + DEBUG_COUNTER (if_after_combine) + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2010-08-05 15:20:54 +0000 ++++ new/gcc/doc/invoke.texi 2010-08-16 09:41:58 +0000 +@@ -8086,6 +8086,29 @@ + vectorization needs to be greater than the value specified by this option + to allow vectorization. The default value is 0. + ++@item gcse-cost-distance-ratio ++Scaling factor in calculation of maximum distance an expression ++can be moved by GCSE optimizations. This is currently supported only in ++code hoisting pass. The bigger the ratio, the more agressive code hoisting ++will be with simple expressions, i.e., the expressions which have cost ++less than @option{gcse-unrestricted-cost}. Specifying 0 will disable ++hoisting of simple expressions. The default value is 10. ++ ++@item gcse-unrestricted-cost ++Cost, roughly measured as the cost of a single typical machine ++instruction, at which GCSE optimizations will not constrain ++the distance an expression can travel. This is currently ++supported only in code hoisting pass. The lesser the cost, ++the more aggressive code hoisting will be. Specifying 0 will ++allow all expressions to travel unrestricted distances. ++The default value is 3. ++ ++@item max-hoist-depth ++The depth of search in the dominator tree for expressions to hoist. ++This is used to avoid quadratic behavior in hoisting algorithm. ++The value of 0 will avoid limiting the search, but may slow down compilation ++of huge functions. The default value is 30. ++ + @item max-unrolled-insns + The maximum number of instructions that a loop should have if that loop + is unrolled, and if the loop is unrolled, it determines how many times + +=== modified file 'gcc/dominance.c' +--- old/gcc/dominance.c 2010-04-02 18:54:46 +0000 ++++ new/gcc/dominance.c 2010-08-16 09:41:58 +0000 +@@ -782,16 +782,20 @@ + } + + /* Returns the list of basic blocks including BB dominated by BB, in the +- direction DIR. The vector will be sorted in preorder. */ ++ direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will ++ produce a vector containing all dominated blocks. The vector will be sorted ++ in preorder. */ + + VEC (basic_block, heap) * +-get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) ++get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth) + { + VEC(basic_block, heap) *bbs = NULL; + unsigned i; ++ unsigned next_level_start; + + i = 0; + VEC_safe_push (basic_block, heap, bbs, bb); ++ next_level_start = 1; /* = VEC_length (basic_block, bbs); */ + + do + { +@@ -802,12 +806,24 @@ + son; + son = next_dom_son (dir, son)) + VEC_safe_push (basic_block, heap, bbs, son); ++ ++ if (i == next_level_start && --depth) ++ next_level_start = VEC_length (basic_block, bbs); + } +- while (i < VEC_length (basic_block, bbs)); ++ while (i < next_level_start); + + return bbs; + } + ++/* Returns the list of basic blocks including BB dominated by BB, in the ++ direction DIR. The vector will be sorted in preorder. */ ++ ++VEC (basic_block, heap) * ++get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) ++{ ++ return get_dominated_to_depth (dir, bb, 0); ++} ++ + /* Redirect all edges pointing to BB to TO. */ + void + redirect_immediate_dominators (enum cdi_direction dir, basic_block bb, + +=== modified file 'gcc/gcse.c' +--- old/gcc/gcse.c 2010-03-16 10:50:42 +0000 ++++ new/gcc/gcse.c 2010-08-16 09:41:58 +0000 +@@ -296,6 +296,12 @@ + The value is the newly created pseudo-reg to record a copy of the + expression in all the places that reach the redundant copy. */ + rtx reaching_reg; ++ /* Maximum distance in instructions this expression can travel. ++ We avoid moving simple expressions for more than a few instructions ++ to keep register pressure under control. ++ A value of "0" removes restrictions on how far the expression can ++ travel. */ ++ int max_distance; + }; + + /* Occurrence of an expression. +@@ -317,6 +323,10 @@ + char copied_p; + }; + ++typedef struct occr *occr_t; ++DEF_VEC_P (occr_t); ++DEF_VEC_ALLOC_P (occr_t, heap); ++ + /* Expression and copy propagation hash tables. + Each hash table is an array of buckets. + ??? It is known that if it were an array of entries, structure elements +@@ -419,6 +429,9 @@ + /* Number of global copies propagated. */ + static int global_copy_prop_count; + ++/* Doing code hoisting. */ ++static bool doing_code_hoisting_p = false; ++ + /* For available exprs */ + static sbitmap *ae_kill; + +@@ -432,12 +445,12 @@ + static void hash_scan_set (rtx, rtx, struct hash_table_d *); + static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); + static void hash_scan_call (rtx, rtx, struct hash_table_d *); +-static int want_to_gcse_p (rtx); ++static int want_to_gcse_p (rtx, int *); + static bool gcse_constant_p (const_rtx); + static int oprs_unchanged_p (const_rtx, const_rtx, int); + static int oprs_anticipatable_p (const_rtx, const_rtx); + static int oprs_available_p (const_rtx, const_rtx); +-static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, ++static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, + struct hash_table_d *); + static void insert_set_in_table (rtx, rtx, struct hash_table_d *); + static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); +@@ -462,7 +475,6 @@ + static void alloc_cprop_mem (int, int); + static void free_cprop_mem (void); + static void compute_transp (const_rtx, int, sbitmap *, int); +-static void compute_transpout (void); + static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, + struct hash_table_d *); + static void compute_cprop_data (void); +@@ -486,7 +498,7 @@ + static void compute_pre_data (void); + static int pre_expr_reaches_here_p (basic_block, struct expr *, + basic_block); +-static void insert_insn_end_basic_block (struct expr *, basic_block, int); ++static void insert_insn_end_basic_block (struct expr *, basic_block); + static void pre_insert_copy_insn (struct expr *, rtx); + static void pre_insert_copies (void); + static int pre_delete (void); +@@ -497,7 +509,8 @@ + static void free_code_hoist_mem (void); + static void compute_code_hoist_vbeinout (void); + static void compute_code_hoist_data (void); +-static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); ++static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, ++ int, int *); + static int hoist_code (void); + static int one_code_hoisting_pass (void); + static rtx process_insert_insn (struct expr *); +@@ -755,7 +768,7 @@ + GCSE. */ + + static int +-want_to_gcse_p (rtx x) ++want_to_gcse_p (rtx x, int *max_distance_ptr) + { + #ifdef STACK_REGS + /* On register stack architectures, don't GCSE constants from the +@@ -765,18 +778,67 @@ + x = avoid_constant_pool_reference (x); + #endif + ++ /* GCSE'ing constants: ++ ++ We do not specifically distinguish between constant and non-constant ++ expressions in PRE and Hoist. We use rtx_cost below to limit ++ the maximum distance simple expressions can travel. ++ ++ Nevertheless, constants are much easier to GCSE, and, hence, ++ it is easy to overdo the optimizations. Usually, excessive PRE and ++ Hoisting of constant leads to increased register pressure. ++ ++ RA can deal with this by rematerialing some of the constants. ++ Therefore, it is important that the back-end generates sets of constants ++ in a way that allows reload rematerialize them under high register ++ pressure, i.e., a pseudo register with REG_EQUAL to constant ++ is set only once. Failing to do so will result in IRA/reload ++ spilling such constants under high register pressure instead of ++ rematerializing them. */ ++ + switch (GET_CODE (x)) + { + case REG: + case SUBREG: +- case CONST_INT: +- case CONST_DOUBLE: +- case CONST_FIXED: +- case CONST_VECTOR: + case CALL: + return 0; + ++ case CONST_INT: ++ case CONST_DOUBLE: ++ case CONST_FIXED: ++ case CONST_VECTOR: ++ if (!doing_code_hoisting_p) ++ /* Do not PRE constants. */ ++ return 0; ++ ++ /* FALLTHRU */ ++ + default: ++ if (doing_code_hoisting_p) ++ /* PRE doesn't implement max_distance restriction. */ ++ { ++ int cost; ++ int max_distance; ++ ++ gcc_assert (!optimize_function_for_speed_p (cfun) ++ && optimize_function_for_size_p (cfun)); ++ cost = rtx_cost (x, SET, 0); ++ ++ if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) ++ { ++ max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; ++ if (max_distance == 0) ++ return 0; ++ ++ gcc_assert (max_distance > 0); ++ } ++ else ++ max_distance = 0; ++ ++ if (max_distance_ptr) ++ *max_distance_ptr = max_distance; ++ } ++ + return can_assign_to_reg_without_clobbers_p (x); + } + } +@@ -1090,11 +1152,14 @@ + It is only used if X is a CONST_INT. + + ANTIC_P is nonzero if X is an anticipatable expression. +- AVAIL_P is nonzero if X is an available expression. */ ++ AVAIL_P is nonzero if X is an available expression. ++ ++ MAX_DISTANCE is the maximum distance in instructions this expression can ++ be moved. */ + + static void + insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, +- int avail_p, struct hash_table_d *table) ++ int avail_p, int max_distance, struct hash_table_d *table) + { + int found, do_not_record_p; + unsigned int hash; +@@ -1137,7 +1202,11 @@ + cur_expr->next_same_hash = NULL; + cur_expr->antic_occr = NULL; + cur_expr->avail_occr = NULL; ++ gcc_assert (max_distance >= 0); ++ cur_expr->max_distance = max_distance; + } ++ else ++ gcc_assert (cur_expr->max_distance == max_distance); + + /* Now record the occurrence(s). */ + if (antic_p) +@@ -1238,6 +1307,8 @@ + cur_expr->next_same_hash = NULL; + cur_expr->antic_occr = NULL; + cur_expr->avail_occr = NULL; ++ /* Not used for set_p tables. */ ++ cur_expr->max_distance = 0; + } + + /* Now record the occurrence. */ +@@ -1307,6 +1378,7 @@ + { + unsigned int regno = REGNO (dest); + rtx tmp; ++ int max_distance = 0; + + /* See if a REG_EQUAL note shows this equivalent to a simpler expression. + +@@ -1329,7 +1401,7 @@ + && !REG_P (src) + && (table->set_p + ? gcse_constant_p (XEXP (note, 0)) +- : want_to_gcse_p (XEXP (note, 0)))) ++ : want_to_gcse_p (XEXP (note, 0), NULL))) + src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); + + /* Only record sets of pseudo-regs in the hash table. */ +@@ -1344,7 +1416,7 @@ + can't do the same thing at the rtl level. */ + && !can_throw_internal (insn) + /* Is SET_SRC something we want to gcse? */ +- && want_to_gcse_p (src) ++ && want_to_gcse_p (src, &max_distance) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + /* Don't GCSE if it has attached REG_EQUIV note. +@@ -1368,7 +1440,8 @@ + int avail_p = (oprs_available_p (src, insn) + && ! JUMP_P (insn)); + +- insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); ++ insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, ++ max_distance, table); + } + + /* Record sets for constant/copy propagation. */ +@@ -1394,6 +1467,7 @@ + else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) + { + unsigned int regno = REGNO (src); ++ int max_distance = 0; + + /* Do not do this for constant/copy propagation. */ + if (! table->set_p +@@ -1405,7 +1479,7 @@ + do that easily for EH edges so disable GCSE on these for now. */ + && !can_throw_internal (insn) + /* Is SET_DEST something we want to gcse? */ +- && want_to_gcse_p (dest) ++ && want_to_gcse_p (dest, &max_distance) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + /* Don't GCSE if it has attached REG_EQUIV note. +@@ -1427,7 +1501,7 @@ + + /* Record the memory expression (DEST) in the hash table. */ + insert_expr_in_table (dest, GET_MODE (dest), insn, +- antic_p, avail_p, table); ++ antic_p, avail_p, max_distance, table); + } + } + } +@@ -1513,8 +1587,8 @@ + if (flat_table[i] != 0) + { + expr = flat_table[i]; +- fprintf (file, "Index %d (hash value %d)\n ", +- expr->bitmap_index, hash_val[i]); ++ fprintf (file, "Index %d (hash value %d; max distance %d)\n ", ++ expr->bitmap_index, hash_val[i], expr->max_distance); + print_rtl (file, expr->expr); + fprintf (file, "\n"); + } +@@ -3168,11 +3242,6 @@ + /* Nonzero for expressions that are transparent in the block. */ + static sbitmap *transp; + +-/* Nonzero for expressions that are transparent at the end of the block. +- This is only zero for expressions killed by abnormal critical edge +- created by a calls. */ +-static sbitmap *transpout; +- + /* Nonzero for expressions that are computed (available) in the block. */ + static sbitmap *comp; + +@@ -3236,28 +3305,105 @@ + pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; + } + +-/* Top level routine to do the dataflow analysis needed by PRE. */ ++/* Remove certain expressions from anticipatable and transparent ++ sets of basic blocks that have incoming abnormal edge. ++ For PRE remove potentially trapping expressions to avoid placing ++ them on abnormal edges. For hoisting remove memory references that ++ can be clobbered by calls. */ + + static void +-compute_pre_data (void) ++prune_expressions (bool pre_p) + { +- sbitmap trapping_expr; +- basic_block bb; ++ sbitmap prune_exprs; + unsigned int ui; +- +- compute_local_properties (transp, comp, antloc, &expr_hash_table); +- sbitmap_vector_zero (ae_kill, last_basic_block); +- +- /* Collect expressions which might trap. */ +- trapping_expr = sbitmap_alloc (expr_hash_table.n_elems); +- sbitmap_zero (trapping_expr); ++ basic_block bb; ++ ++ prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); ++ sbitmap_zero (prune_exprs); + for (ui = 0; ui < expr_hash_table.size; ui++) + { + struct expr *e; + for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) +- if (may_trap_p (e->expr)) +- SET_BIT (trapping_expr, e->bitmap_index); +- } ++ { ++ /* Note potentially trapping expressions. */ ++ if (may_trap_p (e->expr)) ++ { ++ SET_BIT (prune_exprs, e->bitmap_index); ++ continue; ++ } ++ ++ if (!pre_p && MEM_P (e->expr)) ++ /* Note memory references that can be clobbered by a call. ++ We do not split abnormal edges in hoisting, so would ++ a memory reference get hoisted along an abnormal edge, ++ it would be placed /before/ the call. Therefore, only ++ constant memory references can be hoisted along abnormal ++ edges. */ ++ { ++ if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0))) ++ continue; ++ ++ if (MEM_READONLY_P (e->expr) ++ && !MEM_VOLATILE_P (e->expr) ++ && MEM_NOTRAP_P (e->expr)) ++ /* Constant memory reference, e.g., a PIC address. */ ++ continue; ++ ++ /* ??? Optimally, we would use interprocedural alias ++ analysis to determine if this mem is actually killed ++ by this call. */ ++ ++ SET_BIT (prune_exprs, e->bitmap_index); ++ } ++ } ++ } ++ ++ FOR_EACH_BB (bb) ++ { ++ edge e; ++ edge_iterator ei; ++ ++ /* If the current block is the destination of an abnormal edge, we ++ kill all trapping (for PRE) and memory (for hoist) expressions ++ because we won't be able to properly place the instruction on ++ the edge. So make them neither anticipatable nor transparent. ++ This is fairly conservative. ++ ++ ??? For hoisting it may be necessary to check for set-and-jump ++ instructions here, not just for abnormal edges. The general problem ++ is that when an expression cannot not be placed right at the end of ++ a basic block we should account for any side-effects of a subsequent ++ jump instructions that could clobber the expression. It would ++ be best to implement this check along the lines of ++ hoist_expr_reaches_here_p where the target block is already known ++ and, hence, there's no need to conservatively prune expressions on ++ "intermediate" set-and-jump instructions. */ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ if ((e->flags & EDGE_ABNORMAL) ++ && (pre_p || CALL_P (BB_END (e->src)))) ++ { ++ sbitmap_difference (antloc[bb->index], ++ antloc[bb->index], prune_exprs); ++ sbitmap_difference (transp[bb->index], ++ transp[bb->index], prune_exprs); ++ break; ++ } ++ } ++ ++ sbitmap_free (prune_exprs); ++} ++ ++/* Top level routine to do the dataflow analysis needed by PRE. */ ++ ++static void ++compute_pre_data (void) ++{ ++ basic_block bb; ++ ++ compute_local_properties (transp, comp, antloc, &expr_hash_table); ++ prune_expressions (true); ++ sbitmap_vector_zero (ae_kill, last_basic_block); + + /* Compute ae_kill for each basic block using: + +@@ -3266,21 +3412,6 @@ + + FOR_EACH_BB (bb) + { +- edge e; +- edge_iterator ei; +- +- /* If the current block is the destination of an abnormal edge, we +- kill all trapping expressions because we won't be able to properly +- place the instruction on the edge. So make them neither +- anticipatable nor transparent. This is fairly conservative. */ +- FOR_EACH_EDGE (e, ei, bb->preds) +- if (e->flags & EDGE_ABNORMAL) +- { +- sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr); +- sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr); +- break; +- } +- + sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); + sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); + } +@@ -3291,7 +3422,6 @@ + antloc = NULL; + sbitmap_vector_free (ae_kill); + ae_kill = NULL; +- sbitmap_free (trapping_expr); + } + + /* PRE utilities */ +@@ -3406,14 +3536,10 @@ + + /* Add EXPR to the end of basic block BB. + +- This is used by both the PRE and code hoisting. +- +- For PRE, we want to verify that the expr is either transparent +- or locally anticipatable in the target block. This check makes +- no sense for code hoisting. */ ++ This is used by both the PRE and code hoisting. */ + + static void +-insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) ++insert_insn_end_basic_block (struct expr *expr, basic_block bb) + { + rtx insn = BB_END (bb); + rtx new_insn; +@@ -3440,12 +3566,6 @@ + #ifdef HAVE_cc0 + rtx note; + #endif +- /* It should always be the case that we can put these instructions +- anywhere in the basic block with performing PRE optimizations. +- Check this. */ +- gcc_assert (!NONJUMP_INSN_P (insn) || !pre +- || TEST_BIT (antloc[bb->index], expr->bitmap_index) +- || TEST_BIT (transp[bb->index], expr->bitmap_index)); + + /* If this is a jump table, then we can't insert stuff here. Since + we know the previous real insn must be the tablejump, we insert +@@ -3482,15 +3602,7 @@ + /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, + we search backward and place the instructions before the first + parameter is loaded. Do this for everyone for consistency and a +- presumption that we'll get better code elsewhere as well. +- +- It should always be the case that we can put these instructions +- anywhere in the basic block with performing PRE optimizations. +- Check this. */ +- +- gcc_assert (!pre +- || TEST_BIT (antloc[bb->index], expr->bitmap_index) +- || TEST_BIT (transp[bb->index], expr->bitmap_index)); ++ presumption that we'll get better code elsewhere as well. */ + + /* Since different machines initialize their parameter registers + in different orders, assume nothing. Collect the set of all +@@ -3587,7 +3699,7 @@ + now. */ + + if (eg->flags & EDGE_ABNORMAL) +- insert_insn_end_basic_block (index_map[j], bb, 0); ++ insert_insn_end_basic_block (index_map[j], bb); + else + { + insn = process_insert_insn (index_map[j]); +@@ -4046,61 +4158,12 @@ + } + } + +-/* Compute transparent outgoing information for each block. +- +- An expression is transparent to an edge unless it is killed by +- the edge itself. This can only happen with abnormal control flow, +- when the edge is traversed through a call. This happens with +- non-local labels and exceptions. +- +- This would not be necessary if we split the edge. While this is +- normally impossible for abnormal critical edges, with some effort +- it should be possible with exception handling, since we still have +- control over which handler should be invoked. But due to increased +- EH table sizes, this may not be worthwhile. */ +- +-static void +-compute_transpout (void) +-{ +- basic_block bb; +- unsigned int i; +- struct expr *expr; +- +- sbitmap_vector_ones (transpout, last_basic_block); +- +- FOR_EACH_BB (bb) +- { +- /* Note that flow inserted a nop at the end of basic blocks that +- end in call instructions for reasons other than abnormal +- control flow. */ +- if (! CALL_P (BB_END (bb))) +- continue; +- +- for (i = 0; i < expr_hash_table.size; i++) +- for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash) +- if (MEM_P (expr->expr)) +- { +- if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF +- && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) +- continue; +- +- /* ??? Optimally, we would use interprocedural alias +- analysis to determine if this mem is actually killed +- by this call. */ +- RESET_BIT (transpout[bb->index], expr->bitmap_index); +- } +- } +-} +- + /* Code Hoisting variables and subroutines. */ + + /* Very busy expressions. */ + static sbitmap *hoist_vbein; + static sbitmap *hoist_vbeout; + +-/* Hoistable expressions. */ +-static sbitmap *hoist_exprs; +- + /* ??? We could compute post dominators and run this algorithm in + reverse to perform tail merging, doing so would probably be + more effective than the tail merging code in jump.c. +@@ -4119,8 +4182,6 @@ + + hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); + hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); +- hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); +- transpout = sbitmap_vector_alloc (n_blocks, n_exprs); + } + + /* Free vars used for code hoisting analysis. */ +@@ -4134,8 +4195,6 @@ + + sbitmap_vector_free (hoist_vbein); + sbitmap_vector_free (hoist_vbeout); +- sbitmap_vector_free (hoist_exprs); +- sbitmap_vector_free (transpout); + + free_dominance_info (CDI_DOMINATORS); + } +@@ -4166,8 +4225,15 @@ + FOR_EACH_BB_REVERSE (bb) + { + if (bb->next_bb != EXIT_BLOCK_PTR) +- sbitmap_intersection_of_succs (hoist_vbeout[bb->index], +- hoist_vbein, bb->index); ++ { ++ sbitmap_intersection_of_succs (hoist_vbeout[bb->index], ++ hoist_vbein, bb->index); ++ ++ /* Include expressions in VBEout that are calculated ++ in BB and available at its end. */ ++ sbitmap_a_or_b (hoist_vbeout[bb->index], ++ hoist_vbeout[bb->index], comp[bb->index]); ++ } + + changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], + antloc[bb->index], +@@ -4179,7 +4245,17 @@ + } + + if (dump_file) +- fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); ++ { ++ fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); ++ ++ FOR_EACH_BB (bb) ++ { ++ fprintf (dump_file, "vbein (%d): ", bb->index); ++ dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); ++ fprintf (dump_file, "vbeout(%d): ", bb->index); ++ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); ++ } ++ } + } + + /* Top level routine to do the dataflow analysis needed by code hoisting. */ +@@ -4188,7 +4264,7 @@ + compute_code_hoist_data (void) + { + compute_local_properties (transp, comp, antloc, &expr_hash_table); +- compute_transpout (); ++ prune_expressions (false); + compute_code_hoist_vbeinout (); + calculate_dominance_info (CDI_DOMINATORS); + if (dump_file) +@@ -4197,6 +4273,8 @@ + + /* Determine if the expression identified by EXPR_INDEX would + reach BB unimpared if it was placed at the end of EXPR_BB. ++ Stop the search if the expression would need to be moved more ++ than DISTANCE instructions. + + It's unclear exactly what Muchnick meant by "unimpared". It seems + to me that the expression must either be computed or transparent in +@@ -4209,12 +4287,24 @@ + paths. */ + + static int +-hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) ++hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, ++ char *visited, int distance, int *bb_size) + { + edge pred; + edge_iterator ei; + int visited_allocated_locally = 0; + ++ /* Terminate the search if distance, for which EXPR is allowed to move, ++ is exhausted. */ ++ if (distance > 0) ++ { ++ distance -= bb_size[bb->index]; ++ ++ if (distance <= 0) ++ return 0; ++ } ++ else ++ gcc_assert (distance == 0); + + if (visited == NULL) + { +@@ -4233,9 +4323,6 @@ + else if (visited[pred_bb->index]) + continue; + +- /* Does this predecessor generate this expression? */ +- else if (TEST_BIT (comp[pred_bb->index], expr_index)) +- break; + else if (! TEST_BIT (transp[pred_bb->index], expr_index)) + break; + +@@ -4243,8 +4330,8 @@ + else + { + visited[pred_bb->index] = 1; +- if (! hoist_expr_reaches_here_p (expr_bb, expr_index, +- pred_bb, visited)) ++ if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, ++ visited, distance, bb_size)) + break; + } + } +@@ -4254,20 +4341,33 @@ + return (pred == NULL); + } + ++/* Find occurence in BB. */ ++static struct occr * ++find_occr_in_bb (struct occr *occr, basic_block bb) ++{ ++ /* Find the right occurrence of this expression. */ ++ while (occr && BLOCK_FOR_INSN (occr->insn) != bb) ++ occr = occr->next; ++ ++ return occr; ++} ++ + /* Actually perform code hoisting. */ + + static int + hoist_code (void) + { + basic_block bb, dominated; ++ VEC (basic_block, heap) *dom_tree_walk; ++ unsigned int dom_tree_walk_index; + VEC (basic_block, heap) *domby; + unsigned int i,j; + struct expr **index_map; + struct expr *expr; ++ int *to_bb_head; ++ int *bb_size; + int changed = 0; + +- sbitmap_vector_zero (hoist_exprs, last_basic_block); +- + /* Compute a mapping from expression number (`bitmap_index') to + hash table entry. */ + +@@ -4276,28 +4376,98 @@ + for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) + index_map[expr->bitmap_index] = expr; + ++ /* Calculate sizes of basic blocks and note how far ++ each instruction is from the start of its block. We then use this ++ data to restrict distance an expression can travel. */ ++ ++ to_bb_head = XCNEWVEC (int, get_max_uid ()); ++ bb_size = XCNEWVEC (int, last_basic_block); ++ ++ FOR_EACH_BB (bb) ++ { ++ rtx insn; ++ int to_head; ++ ++ to_head = 0; ++ FOR_BB_INSNS (bb, insn) ++ { ++ /* Don't count debug instructions to avoid them affecting ++ decision choices. */ ++ if (NONDEBUG_INSN_P (insn)) ++ to_bb_head[INSN_UID (insn)] = to_head++; ++ } ++ ++ bb_size[bb->index] = to_head; ++ } ++ ++ gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 ++ && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest ++ == ENTRY_BLOCK_PTR->next_bb)); ++ ++ dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, ++ ENTRY_BLOCK_PTR->next_bb); ++ + /* Walk over each basic block looking for potentially hoistable + expressions, nothing gets hoisted from the entry block. */ +- FOR_EACH_BB (bb) ++ for (dom_tree_walk_index = 0; ++ VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb); ++ dom_tree_walk_index++) + { +- int found = 0; +- int insn_inserted_p; +- +- domby = get_dominated_by (CDI_DOMINATORS, bb); ++ domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); ++ ++ if (VEC_length (basic_block, domby) == 0) ++ continue; ++ + /* Examine each expression that is very busy at the exit of this + block. These are the potentially hoistable expressions. */ + for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) + { +- int hoistable = 0; +- +- if (TEST_BIT (hoist_vbeout[bb->index], i) +- && TEST_BIT (transpout[bb->index], i)) ++ if (TEST_BIT (hoist_vbeout[bb->index], i)) + { ++ /* Current expression. */ ++ struct expr *expr = index_map[i]; ++ /* Number of occurences of EXPR that can be hoisted to BB. */ ++ int hoistable = 0; ++ /* Basic blocks that have occurences reachable from BB. */ ++ bitmap_head _from_bbs, *from_bbs = &_from_bbs; ++ /* Occurences reachable from BB. */ ++ VEC (occr_t, heap) *occrs_to_hoist = NULL; ++ /* We want to insert the expression into BB only once, so ++ note when we've inserted it. */ ++ int insn_inserted_p; ++ occr_t occr; ++ ++ bitmap_initialize (from_bbs, 0); ++ ++ /* If an expression is computed in BB and is available at end of ++ BB, hoist all occurences dominated by BB to BB. */ ++ if (TEST_BIT (comp[bb->index], i)) ++ { ++ occr = find_occr_in_bb (expr->antic_occr, bb); ++ ++ if (occr) ++ { ++ /* An occurence might've been already deleted ++ while processing a dominator of BB. */ ++ if (occr->deleted_p) ++ gcc_assert (MAX_HOIST_DEPTH > 1); ++ else ++ { ++ gcc_assert (NONDEBUG_INSN_P (occr->insn)); ++ hoistable++; ++ } ++ } ++ else ++ hoistable++; ++ } ++ + /* We've found a potentially hoistable expression, now + we look at every block BB dominates to see if it + computes the expression. */ + for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) + { ++ int max_distance; ++ + /* Ignore self dominance. */ + if (bb == dominated) + continue; +@@ -4307,17 +4477,43 @@ + if (!TEST_BIT (antloc[dominated->index], i)) + continue; + ++ occr = find_occr_in_bb (expr->antic_occr, dominated); ++ gcc_assert (occr); ++ ++ /* An occurence might've been already deleted ++ while processing a dominator of BB. */ ++ if (occr->deleted_p) ++ { ++ gcc_assert (MAX_HOIST_DEPTH > 1); ++ continue; ++ } ++ gcc_assert (NONDEBUG_INSN_P (occr->insn)); ++ ++ max_distance = expr->max_distance; ++ if (max_distance > 0) ++ /* Adjust MAX_DISTANCE to account for the fact that ++ OCCR won't have to travel all of DOMINATED, but ++ only part of it. */ ++ max_distance += (bb_size[dominated->index] ++ - to_bb_head[INSN_UID (occr->insn)]); ++ + /* Note if the expression would reach the dominated block + unimpared if it was placed at the end of BB. + + Keep track of how many times this expression is hoistable + from a dominated block into BB. */ +- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) +- hoistable++; ++ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, ++ max_distance, bb_size)) ++ { ++ hoistable++; ++ VEC_safe_push (occr_t, heap, ++ occrs_to_hoist, occr); ++ bitmap_set_bit (from_bbs, dominated->index); ++ } + } + + /* If we found more than one hoistable occurrence of this +- expression, then note it in the bitmap of expressions to ++ expression, then note it in the vector of expressions to + hoist. It makes no sense to hoist things which are computed + in only one BB, and doing so tends to pessimize register + allocation. One could increase this value to try harder +@@ -4326,91 +4522,80 @@ + the vast majority of hoistable expressions are only movable + from two successors, so raising this threshold is likely + to nullify any benefit we get from code hoisting. */ +- if (hoistable > 1) +- { +- SET_BIT (hoist_exprs[bb->index], i); +- found = 1; +- } +- } +- } +- /* If we found nothing to hoist, then quit now. */ +- if (! found) +- { +- VEC_free (basic_block, heap, domby); +- continue; +- } +- +- /* Loop over all the hoistable expressions. */ +- for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) +- { +- /* We want to insert the expression into BB only once, so +- note when we've inserted it. */ +- insn_inserted_p = 0; +- +- /* These tests should be the same as the tests above. */ +- if (TEST_BIT (hoist_exprs[bb->index], i)) +- { +- /* We've found a potentially hoistable expression, now +- we look at every block BB dominates to see if it +- computes the expression. */ +- for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) +- { +- /* Ignore self dominance. */ +- if (bb == dominated) +- continue; +- +- /* We've found a dominated block, now see if it computes +- the busy expression and whether or not moving that +- expression to the "beginning" of that block is safe. */ +- if (!TEST_BIT (antloc[dominated->index], i)) +- continue; +- +- /* The expression is computed in the dominated block and +- it would be safe to compute it at the start of the +- dominated block. Now we have to determine if the +- expression would reach the dominated block if it was +- placed at the end of BB. */ +- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) +- { +- struct expr *expr = index_map[i]; +- struct occr *occr = expr->antic_occr; +- rtx insn; +- rtx set; +- +- /* Find the right occurrence of this expression. */ +- while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) +- occr = occr->next; +- +- gcc_assert (occr); +- insn = occr->insn; +- set = single_set (insn); +- gcc_assert (set); +- +- /* Create a pseudo-reg to store the result of reaching +- expressions into. Get the mode for the new pseudo +- from the mode of the original destination pseudo. */ +- if (expr->reaching_reg == NULL) +- expr->reaching_reg +- = gen_reg_rtx_and_attrs (SET_DEST (set)); +- +- gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); +- delete_insn (insn); +- occr->deleted_p = 1; +- changed = 1; +- gcse_subst_count++; +- +- if (!insn_inserted_p) +- { +- insert_insn_end_basic_block (index_map[i], bb, 0); +- insn_inserted_p = 1; +- } +- } +- } ++ if (hoistable > 1 && dbg_cnt (hoist_insn)) ++ { ++ /* If (hoistable != VEC_length), then there is ++ an occurence of EXPR in BB itself. Don't waste ++ time looking for LCA in this case. */ ++ if ((unsigned) hoistable ++ == VEC_length (occr_t, occrs_to_hoist)) ++ { ++ basic_block lca; ++ ++ lca = nearest_common_dominator_for_set (CDI_DOMINATORS, ++ from_bbs); ++ if (lca != bb) ++ /* Punt, it's better to hoist these occurences to ++ LCA. */ ++ VEC_free (occr_t, heap, occrs_to_hoist); ++ } ++ } ++ else ++ /* Punt, no point hoisting a single occurence. */ ++ VEC_free (occr_t, heap, occrs_to_hoist); ++ ++ insn_inserted_p = 0; ++ ++ /* Walk through occurences of I'th expressions we want ++ to hoist to BB and make the transformations. */ ++ for (j = 0; ++ VEC_iterate (occr_t, occrs_to_hoist, j, occr); ++ j++) ++ { ++ rtx insn; ++ rtx set; ++ ++ gcc_assert (!occr->deleted_p); ++ ++ insn = occr->insn; ++ set = single_set (insn); ++ gcc_assert (set); ++ ++ /* Create a pseudo-reg to store the result of reaching ++ expressions into. Get the mode for the new pseudo ++ from the mode of the original destination pseudo. ++ ++ It is important to use new pseudos whenever we ++ emit a set. This will allow reload to use ++ rematerialization for such registers. */ ++ if (!insn_inserted_p) ++ expr->reaching_reg ++ = gen_reg_rtx_and_attrs (SET_DEST (set)); ++ ++ gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), ++ insn); ++ delete_insn (insn); ++ occr->deleted_p = 1; ++ changed = 1; ++ gcse_subst_count++; ++ ++ if (!insn_inserted_p) ++ { ++ insert_insn_end_basic_block (expr, bb); ++ insn_inserted_p = 1; ++ } ++ } ++ ++ VEC_free (occr_t, heap, occrs_to_hoist); ++ bitmap_clear (from_bbs); + } + } + VEC_free (basic_block, heap, domby); + } + ++ VEC_free (basic_block, heap, dom_tree_walk); ++ free (bb_size); ++ free (to_bb_head); + free (index_map); + + return changed; +@@ -4433,6 +4618,8 @@ + || is_too_expensive (_("GCSE disabled"))) + return 0; + ++ doing_code_hoisting_p = true; ++ + /* We need alias. */ + init_alias_analysis (); + +@@ -4468,6 +4655,8 @@ + gcse_subst_count, gcse_create_count); + } + ++ doing_code_hoisting_p = false; ++ + return changed; + } + + +=== modified file 'gcc/params.def' +--- old/gcc/params.def 2010-04-02 18:54:46 +0000 ++++ new/gcc/params.def 2010-08-16 09:41:58 +0000 +@@ -219,6 +219,29 @@ + "gcse-after-reload-critical-fraction", + "The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload", + 10, 0, 0) ++ ++/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor ++ to calculate maximum distance for which an expression is allowed to move ++ from its rtx_cost. */ ++DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO, ++ "gcse-cost-distance-ratio", ++ "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations", ++ 10, 0, 0) ++/* GCSE won't restrict distance for which an expression with rtx_cost greater ++ than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */ ++DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST, ++ "gcse-unrestricted-cost", ++ "Cost at which GCSE optimizations will not constraint the distance an expression can travel", ++ 3, 0, 0) ++ ++/* How deep from a given basic block the dominator tree should be searched ++ for expressions to hoist to the block. The value of 0 will avoid limiting ++ the search. */ ++DEFPARAM(PARAM_MAX_HOIST_DEPTH, ++ "max-hoist-depth", ++ "Maximum depth of search in the dominator tree for expressions to hoist", ++ 30, 0, 0) ++ + /* This parameter limits the number of insns in a loop that will be unrolled, + and by how much the loop is unrolled. + + +=== modified file 'gcc/params.h' +--- old/gcc/params.h 2009-12-01 19:12:29 +0000 ++++ new/gcc/params.h 2010-08-16 09:41:58 +0000 +@@ -125,6 +125,12 @@ + PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) + #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ + PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) ++#define GCSE_COST_DISTANCE_RATIO \ ++ PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO) ++#define GCSE_UNRESTRICTED_COST \ ++ PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST) ++#define MAX_HOIST_DEPTH \ ++ PARAM_VALUE (PARAM_MAX_HOIST_DEPTH) + #define MAX_UNROLLED_INSNS \ + PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) + #define MAX_SMS_LOOP_NUMBER \ + +=== added file 'gcc/testsuite/gcc.dg/pr45101.c' +--- old/gcc/testsuite/gcc.dg/pr45101.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr45101.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,15 @@ ++/* PR rtl-optimization/45101 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fgcse -fgcse-las" } */ ++ ++struct ++{ ++ int i; ++} *s; ++ ++extern void bar (void); ++ ++void foo () ++{ ++ !s ? s->i++ : bar (); ++} + +=== added file 'gcc/testsuite/gcc.dg/pr45105.c' +--- old/gcc/testsuite/gcc.dg/pr45105.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr45105.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,27 @@ ++/* PR debug/45105 */ ++/* { dg-do compile } */ ++/* { dg-options "-Os -fcompare-debug" } */ ++ ++extern int *baz (int *, int *); ++ ++void ++bar (int *p1, int *p2) ++{ ++ int n = *baz (0, 0); ++ p1[n] = p2[n]; ++} ++ ++void ++foo (int *p, int l) ++{ ++ int a1[32]; ++ int a2[32]; ++ baz (a1, a2); ++ while (l) ++ { ++ if (l & 1) ++ p = baz (a2, p); ++ l--; ++ bar (a1, a2); ++ } ++} + +=== added file 'gcc/testsuite/gcc.dg/pr45107.c' +--- old/gcc/testsuite/gcc.dg/pr45107.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr45107.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,13 @@ ++/* PR rtl-optimization/45107 */ ++/* { dg-do compile } */ ++/* { dg-options "-Os -fgcse-las" } */ ++ ++extern void bar(int *); ++ ++int foo (int *p) ++{ ++ int i = *p; ++ if (i != 1) ++ bar(&i); ++ *p = i; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/pr40956.c' +--- old/gcc/testsuite/gcc.target/arm/pr40956.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr40956.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,14 @@ ++/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ ++/* { dg-require-effective-target arm_thumb1_ok } */ ++/* { dg-require-effective-target fpic } */ ++/* Make sure the constant "0" is loaded into register only once. */ ++/* { dg-final { scan-assembler-times "mov\[\\t \]*r., #0" 1 } } */ ++ ++int foo(int p, int* q) ++{ ++ if (p!=9) ++ *q = 0; ++ else ++ *(q+1) = 0; ++ return 3; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/pr42495.c' +--- old/gcc/testsuite/gcc.target/arm/pr42495.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr42495.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,31 @@ ++/* { dg-options "-mthumb -Os -fpic -march=armv5te -fdump-rtl-hoist" } */ ++/* { dg-require-effective-target arm_thumb1_ok } */ ++/* { dg-require-effective-target fpic } */ ++/* Make sure all calculations of gObj's address get hoisted to one location. */ ++/* { dg-final { scan-rtl-dump "PRE/HOIST: end of bb .* copying expression" "hoist" } } */ ++ ++struct st_a { ++ int data; ++}; ++ ++struct st_b { ++ struct st_a *p_a; ++ struct st_b *next; ++}; ++ ++extern struct st_b gObj; ++extern void foo(int, struct st_b*); ++ ++int goo(struct st_b * obj) { ++ struct st_a *pa; ++ if (gObj.p_a->data != 0) { ++ foo(gObj.p_a->data, obj); ++ } ++ pa = obj->p_a; ++ if (pa == 0) { ++ return 0; ++ } else if (pa == gObj.p_a) { ++ return 0; ++ } ++ return pa->data; ++} + +=== added file 'gcc/testsuite/gcc.target/arm/pr42574.c' +--- old/gcc/testsuite/gcc.target/arm/pr42574.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/pr42574.c 2010-08-16 09:41:58 +0000 +@@ -0,0 +1,24 @@ ++/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ ++/* { dg-require-effective-target arm_thumb1_ok } */ ++/* { dg-require-effective-target fpic } */ ++/* Make sure the address of glob.c is calculated only once and using ++ a logical shift for the offset (200<<1). */ ++/* { dg-final { scan-assembler-times "lsl" 1 } } */ ++ ++struct A { ++ char a[400]; ++ float* c; ++}; ++struct A glob; ++void func(); ++void func1(float*); ++int func2(float*, int*); ++void func3(float*); ++ ++void test(int *p) { ++ func1(glob.c); ++ if (func2(glob.c, p)) { ++ func(); ++ } ++ func3(glob.c); ++} + -- cgit 1.2.3-korg