aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch')
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch1759
1 files changed, 1759 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch
new file mode 100644
index 0000000000..a58dd24416
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch
@@ -0,0 +1,1759 @@
+2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com>
+
+ Backport code hoisting improvements from mainline:
+
+ 2010-07-28 Jakub Jelinek <jakub@redhat.com>
+ PR debug/45105
+ * gcc.dg/pr45105.c: New test.
+
+ 2010-07-28 Jakub Jelinek <jakub@redhat.com>
+ PR debug/45105
+ * gcse.c (hoist_code): Use FOR_BB_INSNS macro.
+
+ 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/45107
+ * gcc.dg/pr45107.c: New test.
+
+ 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/45107
+ * gcse.c (hash_scan_set): Use max_distance for gcse-las.
+
+ 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/45101
+ * gcc.dg/pr45101.c: New test.
+
+ 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/45101
+ * gcse.c (hash_scan_set): Fix argument ordering of insert_expr_in_table
+ for gcse-las.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/40956
+ PR target/42495
+ PR middle-end/42574
+ * gcc.target/arm/pr40956.c, gcc.target/arm/pr42495.c,
+ * gcc.target/arm/pr42574.c: Add tests.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ * config/arm/arm.c (params.h): Include.
+ (arm_override_options): Tune gcse-unrestricted-cost.
+ * config/arm/t-arm (arm.o): Define dependencies.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR target/42495
+ PR middle-end/42574
+ * basic-block.h (get_dominated_to_depth): Declare.
+ * dominance.c (get_dominated_to_depth): New function, use
+ get_all_dominated_blocks as a base.
+ (get_all_dominated_blocks): Use get_dominated_to_depth.
+ * gcse.c (occr_t, VEC (occr_t, heap)): Define.
+ (hoist_exprs): Remove.
+ (alloc_code_hoist_mem, free_code_hoist_mem): Update.
+ (compute_code_hoist_vbeinout): Add debug print outs.
+ (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth.
+ * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid
+ quadratic behavior.
+ * params.h (MAX_HOIST_DEPTH): New macro.
+ * doc/invoke.texi (max-hoist-depth): Document.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/40956
+ * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple
+ constants.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR target/42495
+ PR middle-end/42574
+ * config/arm/arm.c (legitimize_pic_address): Use
+ gen_calculate_pic_address pattern to emit calculation of PIC address.
+ (will_be_in_index_register): New function.
+ (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,)
+ (thumb1_legitimate_address_p): Use it provided !strict_p.
+ * config/arm/arm.md (calculate_pic_address): New expand and split.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR target/42495
+ PR middle-end/42574
+ * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants.
+ * config/arm/arm.md (define_split "J", define_split "K"): Make
+ IRA/reload friendly.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ * gcse.c (insert_insn_end_basic_block): Update signature, remove
+ unused checks.
+ (pre_edge_insert, hoist_code): Update.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR target/42495
+ PR middle-end/42574
+ * gcse.c (hoist_expr_reaches_here_p): Remove excessive check.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ * gcse.c (hoist_code): Generate new pseudo for every new set insn.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ PR rtl-optimization/40956
+ PR target/42495
+ PR middle-end/42574
+ * gcse.c (compute_code_hoist_vbeinout): Consider more expressions
+ for hoisting.
+ (hoist_code): Count occurences in current block too.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ * gcse.c (struct expr:max_distance): New field.
+ (doing_code_hoisting_p): New static variable.
+ (want_to_gcse_p): Change signature. Allow constrained hoisting of
+ simple expressions, don't change behavior for PRE. Set max_distance.
+ (insert_expr_in_table): Set new max_distance field.
+ (hash_scan_set): Update.
+ (hoist_expr_reaches_here_p): Stop search after max_distance
+ instructions.
+ (find_occr_in_bb): New static function. Use it in ...
+ (hoist_code): Calculate sizes of basic block before any changes are
+ done. Pass max_distance to hoist_expr_reaches_here_p.
+ (one_code_hoisting_pass): Set doing_code_hoisting_p.
+ * params.def (PARAM_GCSE_COST_DISTANCE_RATIO,)
+ (PARAM_GCSE_UNRESTRICTED_COST): New parameters.
+ * params.h (GCSE_COST_DISTANCE_RATIO, GCSE_UNRESTRICTED_COST): New
+ macros.
+ * doc/invoke.texi (gcse-cost-distance-ratio, gcse-unrestricted-cost):
+ Document.
+
+ 2010-07-27 Jeff Law <law@redhat.com>
+ Maxim Kuvyrkov <maxim@codesourcery.com>
+ * gcse.c (compute_transpout, transpout): Remove, move logic
+ to prune_expressions.
+ (compute_pre_data): Move pruning of trapping expressions ...
+ (prune_expressions): ... here. New static function.
+ (compute_code_hoist_data): Use it.
+ (alloc_code_hoist_mem, free_code_hoist_mem, hoist_code): Update.
+
+ 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
+ * dbgcnt.def (hoist_insn): New debug counter.
+ * gcse.c (hoist_code): Use it.
+
+ 2010-07-28 Julian Brown <julian@codesourcery.com>
+
+ Backport from FSF mainline:
+
+=== modified file 'gcc/basic-block.h'
+--- old/gcc/basic-block.h 2010-04-02 18:54:46 +0000
++++ new/gcc/basic-block.h 2010-08-16 09:41:58 +0000
+@@ -932,6 +932,8 @@
+ extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction,
+ basic_block *,
+ unsigned);
++extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction,
++ basic_block, int);
+ extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction,
+ basic_block);
+ extern void add_to_dominance_info (enum cdi_direction, basic_block);
+
+=== modified file 'gcc/config/arm/arm.c'
+--- old/gcc/config/arm/arm.c 2010-08-13 15:37:39 +0000
++++ new/gcc/config/arm/arm.c 2010-08-16 09:41:58 +0000
+@@ -56,6 +56,7 @@
+ #include "df.h"
+ #include "intl.h"
+ #include "libfuncs.h"
++#include "params.h"
+
+ /* Forward definitions of types. */
+ typedef struct minipool_node Mnode;
+@@ -1902,6 +1903,14 @@
+ flag_reorder_blocks = 1;
+ }
+
++ if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
++ && flag_pic)
++ /* Hoisting PIC address calculations more aggressively provides a small,
++ but measurable, size reduction for PIC code. Therefore, we decrease
++ the bar for unrestricted expression hoisting to the cost of PIC address
++ calculation, which is 2 instructions. */
++ set_param_value ("gcse-unrestricted-cost", 2);
++
+ /* Register global variables with the garbage collector. */
+ arm_add_gc_roots ();
+
+@@ -5070,17 +5079,13 @@
+ if (GET_CODE (orig) == SYMBOL_REF
+ || GET_CODE (orig) == LABEL_REF)
+ {
+- rtx pic_ref, address;
+ rtx insn;
+
+ if (reg == 0)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ reg = gen_reg_rtx (Pmode);
+- address = gen_reg_rtx (Pmode);
+ }
+- else
+- address = reg;
+
+ /* VxWorks does not impose a fixed gap between segments; the run-time
+ gap can be different from the object-file gap. We therefore can't
+@@ -5096,18 +5101,21 @@
+ insn = arm_pic_static_addr (orig, reg);
+ else
+ {
++ rtx pat;
++ rtx mem;
++
+ /* If this function doesn't have a pic register, create one now. */
+ require_pic_register ();
+
+- if (TARGET_32BIT)
+- emit_insn (gen_pic_load_addr_32bit (address, orig));
+- else /* TARGET_THUMB1 */
+- emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+-
+- pic_ref = gen_const_mem (Pmode,
+- gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
+- address));
+- insn = emit_move_insn (reg, pic_ref);
++ pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
++
++ /* Make the MEM as close to a constant as possible. */
++ mem = SET_SRC (pat);
++ gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
++ MEM_READONLY_P (mem) = 1;
++ MEM_NOTRAP_P (mem) = 1;
++
++ insn = emit_insn (pat);
+ }
+
+ /* Put a REG_EQUAL note on this insn, so that it can be optimized
+@@ -5387,6 +5395,15 @@
+ return FALSE;
+ }
+
++/* Return true if X will surely end up in an index register after next
++ splitting pass. */
++static bool
++will_be_in_index_register (const_rtx x)
++{
++ /* arm.md: calculate_pic_address will split this into a register. */
++ return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
++}
++
+ /* Return nonzero if X is a valid ARM state address operand. */
+ int
+ arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+@@ -5444,8 +5461,9 @@
+ rtx xop1 = XEXP (x, 1);
+
+ return ((arm_address_register_rtx_p (xop0, strict_p)
+- && GET_CODE(xop1) == CONST_INT
+- && arm_legitimate_index_p (mode, xop1, outer, strict_p))
++ && ((GET_CODE(xop1) == CONST_INT
++ && arm_legitimate_index_p (mode, xop1, outer, strict_p))
++ || (!strict_p && will_be_in_index_register (xop1))))
+ || (arm_address_register_rtx_p (xop1, strict_p)
+ && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
+ }
+@@ -5531,7 +5549,8 @@
+ rtx xop1 = XEXP (x, 1);
+
+ return ((arm_address_register_rtx_p (xop0, strict_p)
+- && thumb2_legitimate_index_p (mode, xop1, strict_p))
++ && (thumb2_legitimate_index_p (mode, xop1, strict_p)
++ || (!strict_p && will_be_in_index_register (xop1))))
+ || (arm_address_register_rtx_p (xop1, strict_p)
+ && thumb2_legitimate_index_p (mode, xop0, strict_p)));
+ }
+@@ -5834,7 +5853,8 @@
+ && XEXP (x, 0) != frame_pointer_rtx
+ && XEXP (x, 1) != frame_pointer_rtx
+ && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+- && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
++ && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
++ || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
+ return 1;
+
+ /* REG+const has 5-7 bit offset for non-SP registers. */
+@@ -6413,12 +6433,16 @@
+
+ case CONST_INT:
+ if (outer == SET)
+- {
+- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+- return 0;
+- if (thumb_shiftable_const (INTVAL (x)))
+- return COSTS_N_INSNS (2);
+- return COSTS_N_INSNS (3);
++ {
++ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
++ return COSTS_N_INSNS (1);
++ /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
++ if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
++ return COSTS_N_INSNS (2);
++ /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
++ if (thumb_shiftable_const (INTVAL (x)))
++ return COSTS_N_INSNS (2);
++ return COSTS_N_INSNS (3);
+ }
+ else if ((outer == PLUS || outer == COMPARE)
+ && INTVAL (x) < 256 && INTVAL (x) > -256)
+@@ -7110,6 +7134,12 @@
+ a single register, otherwise it costs one insn per word. */
+ if (REG_P (XEXP (x, 0)))
+ *total = COSTS_N_INSNS (1);
++ else if (flag_pic
++ && GET_CODE (XEXP (x, 0)) == PLUS
++ && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
++ /* This will be split into two instructions.
++ See arm.md:calculate_pic_address. */
++ *total = COSTS_N_INSNS (2);
+ else
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ return true;
+
+=== modified file 'gcc/config/arm/arm.md'
+--- old/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000
++++ new/gcc/config/arm/arm.md 2010-08-16 09:41:58 +0000
+@@ -5290,17 +5290,21 @@
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_THUMB1 && satisfies_constraint_J (operands[1])"
+- [(set (match_dup 0) (match_dup 1))
+- (set (match_dup 0) (neg:SI (match_dup 0)))]
+- "operands[1] = GEN_INT (- INTVAL (operands[1]));"
++ [(set (match_dup 2) (match_dup 1))
++ (set (match_dup 0) (neg:SI (match_dup 2)))]
++ "
++ {
++ operands[1] = GEN_INT (- INTVAL (operands[1]));
++ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
++ }"
+ )
+
+ (define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_THUMB1 && satisfies_constraint_K (operands[1])"
+- [(set (match_dup 0) (match_dup 1))
+- (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))]
++ [(set (match_dup 2) (match_dup 1))
++ (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))]
+ "
+ {
+ unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu;
+@@ -5311,12 +5315,13 @@
+ if ((val & (mask << i)) == val)
+ break;
+
+- /* Shouldn't happen, but we don't want to split if the shift is zero. */
++ /* Don't split if the shift is zero. */
+ if (i == 0)
+ FAIL;
+
+ operands[1] = GEN_INT (val >> i);
+- operands[2] = GEN_INT (i);
++ operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
++ operands[3] = GEN_INT (i);
+ }"
+ )
+
+@@ -5325,6 +5330,34 @@
+ ;; we use an unspec. The offset will be loaded from a constant pool entry,
+ ;; since that is the only type of relocation we can use.
+
++;; Wrap calculation of the whole PIC address in a single pattern for the
++;; benefit of optimizers, particularly, PRE and HOIST. Calculation of
++;; a PIC address involves two loads from memory, so we want to CSE it
++;; as often as possible.
++;; This pattern will be split into one of the pic_load_addr_* patterns
++;; and a move after GCSE optimizations.
++;;
++;; Note: Update arm.c: legitimize_pic_address() when changing this pattern.
++(define_expand "calculate_pic_address"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
++ (unspec:SI [(match_operand:SI 2 "" "")]
++ UNSPEC_PIC_SYM))))]
++ "flag_pic"
++)
++
++;; Split calculate_pic_address into pic_load_addr_* and a move.
++(define_split
++ [(set (match_operand:SI 0 "register_operand" "")
++ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
++ (unspec:SI [(match_operand:SI 2 "" "")]
++ UNSPEC_PIC_SYM))))]
++ "flag_pic"
++ [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM))
++ (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))]
++ "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];"
++)
++
+ ;; The rather odd constraints on the following are to force reload to leave
+ ;; the insn alone, and to force the minipool generation pass to then move
+ ;; the GOT symbol to memory.
+
+=== modified file 'gcc/config/arm/t-arm'
+--- old/gcc/config/arm/t-arm 2009-06-21 19:48:15 +0000
++++ new/gcc/config/arm/t-arm 2010-08-16 09:41:58 +0000
+@@ -45,6 +45,15 @@
+ $(srcdir)/config/arm/arm-cores.def > \
+ $(srcdir)/config/arm/arm-tune.md
+
++arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++ $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++ insn-config.h conditions.h output.h \
++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++ $(EXPR_H) $(OPTABS_H) toplev.h $(RECOG_H) $(CGRAPH_H) \
++ $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \
++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++ intl.h libfuncs.h $(PARAMS_H)
++
+ arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
+ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+
+=== modified file 'gcc/dbgcnt.def'
+--- old/gcc/dbgcnt.def 2009-11-25 10:55:54 +0000
++++ new/gcc/dbgcnt.def 2010-08-16 09:41:58 +0000
+@@ -158,6 +158,7 @@
+ DEBUG_COUNTER (global_alloc_at_func)
+ DEBUG_COUNTER (global_alloc_at_reg)
+ DEBUG_COUNTER (hoist)
++DEBUG_COUNTER (hoist_insn)
+ DEBUG_COUNTER (ia64_sched2)
+ DEBUG_COUNTER (if_conversion)
+ DEBUG_COUNTER (if_after_combine)
+
+=== modified file 'gcc/doc/invoke.texi'
+--- old/gcc/doc/invoke.texi 2010-08-05 15:20:54 +0000
++++ new/gcc/doc/invoke.texi 2010-08-16 09:41:58 +0000
+@@ -8086,6 +8086,29 @@
+ vectorization needs to be greater than the value specified by this option
+ to allow vectorization. The default value is 0.
+
++@item gcse-cost-distance-ratio
++Scaling factor in calculation of maximum distance an expression
++can be moved by GCSE optimizations. This is currently supported only in
++code hoisting pass. The bigger the ratio, the more agressive code hoisting
++will be with simple expressions, i.e., the expressions which have cost
++less than @option{gcse-unrestricted-cost}. Specifying 0 will disable
++hoisting of simple expressions. The default value is 10.
++
++@item gcse-unrestricted-cost
++Cost, roughly measured as the cost of a single typical machine
++instruction, at which GCSE optimizations will not constrain
++the distance an expression can travel. This is currently
++supported only in code hoisting pass. The lesser the cost,
++the more aggressive code hoisting will be. Specifying 0 will
++allow all expressions to travel unrestricted distances.
++The default value is 3.
++
++@item max-hoist-depth
++The depth of search in the dominator tree for expressions to hoist.
++This is used to avoid quadratic behavior in hoisting algorithm.
++The value of 0 will avoid limiting the search, but may slow down compilation
++of huge functions. The default value is 30.
++
+ @item max-unrolled-insns
+ The maximum number of instructions that a loop should have if that loop
+ is unrolled, and if the loop is unrolled, it determines how many times
+
+=== modified file 'gcc/dominance.c'
+--- old/gcc/dominance.c 2010-04-02 18:54:46 +0000
++++ new/gcc/dominance.c 2010-08-16 09:41:58 +0000
+@@ -782,16 +782,20 @@
+ }
+
+ /* Returns the list of basic blocks including BB dominated by BB, in the
+- direction DIR. The vector will be sorted in preorder. */
++ direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will
++ produce a vector containing all dominated blocks. The vector will be sorted
++ in preorder. */
+
+ VEC (basic_block, heap) *
+-get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
++get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth)
+ {
+ VEC(basic_block, heap) *bbs = NULL;
+ unsigned i;
++ unsigned next_level_start;
+
+ i = 0;
+ VEC_safe_push (basic_block, heap, bbs, bb);
++ next_level_start = 1; /* = VEC_length (basic_block, bbs); */
+
+ do
+ {
+@@ -802,12 +806,24 @@
+ son;
+ son = next_dom_son (dir, son))
+ VEC_safe_push (basic_block, heap, bbs, son);
++
++ if (i == next_level_start && --depth)
++ next_level_start = VEC_length (basic_block, bbs);
+ }
+- while (i < VEC_length (basic_block, bbs));
++ while (i < next_level_start);
+
+ return bbs;
+ }
+
++/* Returns the list of basic blocks including BB dominated by BB, in the
++ direction DIR. The vector will be sorted in preorder. */
++
++VEC (basic_block, heap) *
++get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
++{
++ return get_dominated_to_depth (dir, bb, 0);
++}
++
+ /* Redirect all edges pointing to BB to TO. */
+ void
+ redirect_immediate_dominators (enum cdi_direction dir, basic_block bb,
+
+=== modified file 'gcc/gcse.c'
+--- old/gcc/gcse.c 2010-03-16 10:50:42 +0000
++++ new/gcc/gcse.c 2010-08-16 09:41:58 +0000
+@@ -296,6 +296,12 @@
+ The value is the newly created pseudo-reg to record a copy of the
+ expression in all the places that reach the redundant copy. */
+ rtx reaching_reg;
++ /* Maximum distance in instructions this expression can travel.
++ We avoid moving simple expressions for more than a few instructions
++ to keep register pressure under control.
++ A value of "0" removes restrictions on how far the expression can
++ travel. */
++ int max_distance;
+ };
+
+ /* Occurrence of an expression.
+@@ -317,6 +323,10 @@
+ char copied_p;
+ };
+
++typedef struct occr *occr_t;
++DEF_VEC_P (occr_t);
++DEF_VEC_ALLOC_P (occr_t, heap);
++
+ /* Expression and copy propagation hash tables.
+ Each hash table is an array of buckets.
+ ??? It is known that if it were an array of entries, structure elements
+@@ -419,6 +429,9 @@
+ /* Number of global copies propagated. */
+ static int global_copy_prop_count;
+
++/* Doing code hoisting. */
++static bool doing_code_hoisting_p = false;
++
+ /* For available exprs */
+ static sbitmap *ae_kill;
+
+@@ -432,12 +445,12 @@
+ static void hash_scan_set (rtx, rtx, struct hash_table_d *);
+ static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
+ static void hash_scan_call (rtx, rtx, struct hash_table_d *);
+-static int want_to_gcse_p (rtx);
++static int want_to_gcse_p (rtx, int *);
+ static bool gcse_constant_p (const_rtx);
+ static int oprs_unchanged_p (const_rtx, const_rtx, int);
+ static int oprs_anticipatable_p (const_rtx, const_rtx);
+ static int oprs_available_p (const_rtx, const_rtx);
+-static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int,
++static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
+ struct hash_table_d *);
+ static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
+ static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
+@@ -462,7 +475,6 @@
+ static void alloc_cprop_mem (int, int);
+ static void free_cprop_mem (void);
+ static void compute_transp (const_rtx, int, sbitmap *, int);
+-static void compute_transpout (void);
+ static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
+ struct hash_table_d *);
+ static void compute_cprop_data (void);
+@@ -486,7 +498,7 @@
+ static void compute_pre_data (void);
+ static int pre_expr_reaches_here_p (basic_block, struct expr *,
+ basic_block);
+-static void insert_insn_end_basic_block (struct expr *, basic_block, int);
++static void insert_insn_end_basic_block (struct expr *, basic_block);
+ static void pre_insert_copy_insn (struct expr *, rtx);
+ static void pre_insert_copies (void);
+ static int pre_delete (void);
+@@ -497,7 +509,8 @@
+ static void free_code_hoist_mem (void);
+ static void compute_code_hoist_vbeinout (void);
+ static void compute_code_hoist_data (void);
+-static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *);
++static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
++ int, int *);
+ static int hoist_code (void);
+ static int one_code_hoisting_pass (void);
+ static rtx process_insert_insn (struct expr *);
+@@ -755,7 +768,7 @@
+ GCSE. */
+
+ static int
+-want_to_gcse_p (rtx x)
++want_to_gcse_p (rtx x, int *max_distance_ptr)
+ {
+ #ifdef STACK_REGS
+ /* On register stack architectures, don't GCSE constants from the
+@@ -765,18 +778,67 @@
+ x = avoid_constant_pool_reference (x);
+ #endif
+
++ /* GCSE'ing constants:
++
++ We do not specifically distinguish between constant and non-constant
++ expressions in PRE and Hoist. We use rtx_cost below to limit
++ the maximum distance simple expressions can travel.
++
++ Nevertheless, constants are much easier to GCSE, and, hence,
++ it is easy to overdo the optimizations. Usually, excessive PRE and
++ Hoisting of constant leads to increased register pressure.
++
++ RA can deal with this by rematerialing some of the constants.
++ Therefore, it is important that the back-end generates sets of constants
++ in a way that allows reload rematerialize them under high register
++ pressure, i.e., a pseudo register with REG_EQUAL to constant
++ is set only once. Failing to do so will result in IRA/reload
++ spilling such constants under high register pressure instead of
++ rematerializing them. */
++
+ switch (GET_CODE (x))
+ {
+ case REG:
+ case SUBREG:
+- case CONST_INT:
+- case CONST_DOUBLE:
+- case CONST_FIXED:
+- case CONST_VECTOR:
+ case CALL:
+ return 0;
+
++ case CONST_INT:
++ case CONST_DOUBLE:
++ case CONST_FIXED:
++ case CONST_VECTOR:
++ if (!doing_code_hoisting_p)
++ /* Do not PRE constants. */
++ return 0;
++
++ /* FALLTHRU */
++
+ default:
++ if (doing_code_hoisting_p)
++ /* PRE doesn't implement max_distance restriction. */
++ {
++ int cost;
++ int max_distance;
++
++ gcc_assert (!optimize_function_for_speed_p (cfun)
++ && optimize_function_for_size_p (cfun));
++ cost = rtx_cost (x, SET, 0);
++
++ if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
++ {
++ max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
++ if (max_distance == 0)
++ return 0;
++
++ gcc_assert (max_distance > 0);
++ }
++ else
++ max_distance = 0;
++
++ if (max_distance_ptr)
++ *max_distance_ptr = max_distance;
++ }
++
+ return can_assign_to_reg_without_clobbers_p (x);
+ }
+ }
+@@ -1090,11 +1152,14 @@
+ It is only used if X is a CONST_INT.
+
+ ANTIC_P is nonzero if X is an anticipatable expression.
+- AVAIL_P is nonzero if X is an available expression. */
++ AVAIL_P is nonzero if X is an available expression.
++
++ MAX_DISTANCE is the maximum distance in instructions this expression can
++ be moved. */
+
+ static void
+ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
+- int avail_p, struct hash_table_d *table)
++ int avail_p, int max_distance, struct hash_table_d *table)
+ {
+ int found, do_not_record_p;
+ unsigned int hash;
+@@ -1137,7 +1202,11 @@
+ cur_expr->next_same_hash = NULL;
+ cur_expr->antic_occr = NULL;
+ cur_expr->avail_occr = NULL;
++ gcc_assert (max_distance >= 0);
++ cur_expr->max_distance = max_distance;
+ }
++ else
++ gcc_assert (cur_expr->max_distance == max_distance);
+
+ /* Now record the occurrence(s). */
+ if (antic_p)
+@@ -1238,6 +1307,8 @@
+ cur_expr->next_same_hash = NULL;
+ cur_expr->antic_occr = NULL;
+ cur_expr->avail_occr = NULL;
++ /* Not used for set_p tables. */
++ cur_expr->max_distance = 0;
+ }
+
+ /* Now record the occurrence. */
+@@ -1307,6 +1378,7 @@
+ {
+ unsigned int regno = REGNO (dest);
+ rtx tmp;
++ int max_distance = 0;
+
+ /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
+
+@@ -1329,7 +1401,7 @@
+ && !REG_P (src)
+ && (table->set_p
+ ? gcse_constant_p (XEXP (note, 0))
+- : want_to_gcse_p (XEXP (note, 0))))
++ : want_to_gcse_p (XEXP (note, 0), NULL)))
+ src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
+
+ /* Only record sets of pseudo-regs in the hash table. */
+@@ -1344,7 +1416,7 @@
+ can't do the same thing at the rtl level. */
+ && !can_throw_internal (insn)
+ /* Is SET_SRC something we want to gcse? */
+- && want_to_gcse_p (src)
++ && want_to_gcse_p (src, &max_distance)
+ /* Don't CSE a nop. */
+ && ! set_noop_p (pat)
+ /* Don't GCSE if it has attached REG_EQUIV note.
+@@ -1368,7 +1440,8 @@
+ int avail_p = (oprs_available_p (src, insn)
+ && ! JUMP_P (insn));
+
+- insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
++ insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
++ max_distance, table);
+ }
+
+ /* Record sets for constant/copy propagation. */
+@@ -1394,6 +1467,7 @@
+ else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
+ {
+ unsigned int regno = REGNO (src);
++ int max_distance = 0;
+
+ /* Do not do this for constant/copy propagation. */
+ if (! table->set_p
+@@ -1405,7 +1479,7 @@
+ do that easily for EH edges so disable GCSE on these for now. */
+ && !can_throw_internal (insn)
+ /* Is SET_DEST something we want to gcse? */
+- && want_to_gcse_p (dest)
++ && want_to_gcse_p (dest, &max_distance)
+ /* Don't CSE a nop. */
+ && ! set_noop_p (pat)
+ /* Don't GCSE if it has attached REG_EQUIV note.
+@@ -1427,7 +1501,7 @@
+
+ /* Record the memory expression (DEST) in the hash table. */
+ insert_expr_in_table (dest, GET_MODE (dest), insn,
+- antic_p, avail_p, table);
++ antic_p, avail_p, max_distance, table);
+ }
+ }
+ }
+@@ -1513,8 +1587,8 @@
+ if (flat_table[i] != 0)
+ {
+ expr = flat_table[i];
+- fprintf (file, "Index %d (hash value %d)\n ",
+- expr->bitmap_index, hash_val[i]);
++ fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
++ expr->bitmap_index, hash_val[i], expr->max_distance);
+ print_rtl (file, expr->expr);
+ fprintf (file, "\n");
+ }
+@@ -3168,11 +3242,6 @@
+ /* Nonzero for expressions that are transparent in the block. */
+ static sbitmap *transp;
+
+-/* Nonzero for expressions that are transparent at the end of the block.
+- This is only zero for expressions killed by abnormal critical edge
+- created by a calls. */
+-static sbitmap *transpout;
+-
+ /* Nonzero for expressions that are computed (available) in the block. */
+ static sbitmap *comp;
+
+@@ -3236,28 +3305,105 @@
+ pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
+ }
+
+-/* Top level routine to do the dataflow analysis needed by PRE. */
++/* Remove certain expressions from anticipatable and transparent
++ sets of basic blocks that have incoming abnormal edge.
++ For PRE remove potentially trapping expressions to avoid placing
++ them on abnormal edges. For hoisting remove memory references that
++ can be clobbered by calls. */
+
+ static void
+-compute_pre_data (void)
++prune_expressions (bool pre_p)
+ {
+- sbitmap trapping_expr;
+- basic_block bb;
++ sbitmap prune_exprs;
+ unsigned int ui;
+-
+- compute_local_properties (transp, comp, antloc, &expr_hash_table);
+- sbitmap_vector_zero (ae_kill, last_basic_block);
+-
+- /* Collect expressions which might trap. */
+- trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
+- sbitmap_zero (trapping_expr);
++ basic_block bb;
++
++ prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
++ sbitmap_zero (prune_exprs);
+ for (ui = 0; ui < expr_hash_table.size; ui++)
+ {
+ struct expr *e;
+ for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
+- if (may_trap_p (e->expr))
+- SET_BIT (trapping_expr, e->bitmap_index);
+- }
++ {
++ /* Note potentially trapping expressions. */
++ if (may_trap_p (e->expr))
++ {
++ SET_BIT (prune_exprs, e->bitmap_index);
++ continue;
++ }
++
++ if (!pre_p && MEM_P (e->expr))
++ /* Note memory references that can be clobbered by a call.
++ We do not split abnormal edges in hoisting, so would
++ a memory reference get hoisted along an abnormal edge,
++ it would be placed /before/ the call. Therefore, only
++ constant memory references can be hoisted along abnormal
++ edges. */
++ {
++ if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF
++ && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0)))
++ continue;
++
++ if (MEM_READONLY_P (e->expr)
++ && !MEM_VOLATILE_P (e->expr)
++ && MEM_NOTRAP_P (e->expr))
++ /* Constant memory reference, e.g., a PIC address. */
++ continue;
++
++ /* ??? Optimally, we would use interprocedural alias
++ analysis to determine if this mem is actually killed
++ by this call. */
++
++ SET_BIT (prune_exprs, e->bitmap_index);
++ }
++ }
++ }
++
++ FOR_EACH_BB (bb)
++ {
++ edge e;
++ edge_iterator ei;
++
++ /* If the current block is the destination of an abnormal edge, we
++ kill all trapping (for PRE) and memory (for hoist) expressions
++ because we won't be able to properly place the instruction on
++ the edge. So make them neither anticipatable nor transparent.
++ This is fairly conservative.
++
++ ??? For hoisting it may be necessary to check for set-and-jump
++ instructions here, not just for abnormal edges. The general problem
++ is that when an expression cannot not be placed right at the end of
++ a basic block we should account for any side-effects of a subsequent
++ jump instructions that could clobber the expression. It would
++ be best to implement this check along the lines of
++ hoist_expr_reaches_here_p where the target block is already known
++ and, hence, there's no need to conservatively prune expressions on
++ "intermediate" set-and-jump instructions. */
++ FOR_EACH_EDGE (e, ei, bb->preds)
++ if ((e->flags & EDGE_ABNORMAL)
++ && (pre_p || CALL_P (BB_END (e->src))))
++ {
++ sbitmap_difference (antloc[bb->index],
++ antloc[bb->index], prune_exprs);
++ sbitmap_difference (transp[bb->index],
++ transp[bb->index], prune_exprs);
++ break;
++ }
++ }
++
++ sbitmap_free (prune_exprs);
++}
++
++/* Top level routine to do the dataflow analysis needed by PRE. */
++
++static void
++compute_pre_data (void)
++{
++ basic_block bb;
++
++ compute_local_properties (transp, comp, antloc, &expr_hash_table);
++ prune_expressions (true);
++ sbitmap_vector_zero (ae_kill, last_basic_block);
+
+ /* Compute ae_kill for each basic block using:
+
+@@ -3266,21 +3412,6 @@
+
+ FOR_EACH_BB (bb)
+ {
+- edge e;
+- edge_iterator ei;
+-
+- /* If the current block is the destination of an abnormal edge, we
+- kill all trapping expressions because we won't be able to properly
+- place the instruction on the edge. So make them neither
+- anticipatable nor transparent. This is fairly conservative. */
+- FOR_EACH_EDGE (e, ei, bb->preds)
+- if (e->flags & EDGE_ABNORMAL)
+- {
+- sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
+- sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
+- break;
+- }
+-
+ sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
+ sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
+ }
+@@ -3291,7 +3422,6 @@
+ antloc = NULL;
+ sbitmap_vector_free (ae_kill);
+ ae_kill = NULL;
+- sbitmap_free (trapping_expr);
+ }
+
+ /* PRE utilities */
+@@ -3406,14 +3536,10 @@
+
+ /* Add EXPR to the end of basic block BB.
+
+- This is used by both the PRE and code hoisting.
+-
+- For PRE, we want to verify that the expr is either transparent
+- or locally anticipatable in the target block. This check makes
+- no sense for code hoisting. */
++ This is used by both the PRE and code hoisting. */
+
+ static void
+-insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre)
++insert_insn_end_basic_block (struct expr *expr, basic_block bb)
+ {
+ rtx insn = BB_END (bb);
+ rtx new_insn;
+@@ -3440,12 +3566,6 @@
+ #ifdef HAVE_cc0
+ rtx note;
+ #endif
+- /* It should always be the case that we can put these instructions
+- anywhere in the basic block with performing PRE optimizations.
+- Check this. */
+- gcc_assert (!NONJUMP_INSN_P (insn) || !pre
+- || TEST_BIT (antloc[bb->index], expr->bitmap_index)
+- || TEST_BIT (transp[bb->index], expr->bitmap_index));
+
+ /* If this is a jump table, then we can't insert stuff here. Since
+ we know the previous real insn must be the tablejump, we insert
+@@ -3482,15 +3602,7 @@
+ /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
+ we search backward and place the instructions before the first
+ parameter is loaded. Do this for everyone for consistency and a
+- presumption that we'll get better code elsewhere as well.
+-
+- It should always be the case that we can put these instructions
+- anywhere in the basic block with performing PRE optimizations.
+- Check this. */
+-
+- gcc_assert (!pre
+- || TEST_BIT (antloc[bb->index], expr->bitmap_index)
+- || TEST_BIT (transp[bb->index], expr->bitmap_index));
++ presumption that we'll get better code elsewhere as well. */
+
+ /* Since different machines initialize their parameter registers
+ in different orders, assume nothing. Collect the set of all
+@@ -3587,7 +3699,7 @@
+ now. */
+
+ if (eg->flags & EDGE_ABNORMAL)
+- insert_insn_end_basic_block (index_map[j], bb, 0);
++ insert_insn_end_basic_block (index_map[j], bb);
+ else
+ {
+ insn = process_insert_insn (index_map[j]);
+@@ -4046,61 +4158,12 @@
+ }
+ }
+
+-/* Compute transparent outgoing information for each block.
+-
+- An expression is transparent to an edge unless it is killed by
+- the edge itself. This can only happen with abnormal control flow,
+- when the edge is traversed through a call. This happens with
+- non-local labels and exceptions.
+-
+- This would not be necessary if we split the edge. While this is
+- normally impossible for abnormal critical edges, with some effort
+- it should be possible with exception handling, since we still have
+- control over which handler should be invoked. But due to increased
+- EH table sizes, this may not be worthwhile. */
+-
+-static void
+-compute_transpout (void)
+-{
+- basic_block bb;
+- unsigned int i;
+- struct expr *expr;
+-
+- sbitmap_vector_ones (transpout, last_basic_block);
+-
+- FOR_EACH_BB (bb)
+- {
+- /* Note that flow inserted a nop at the end of basic blocks that
+- end in call instructions for reasons other than abnormal
+- control flow. */
+- if (! CALL_P (BB_END (bb)))
+- continue;
+-
+- for (i = 0; i < expr_hash_table.size; i++)
+- for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
+- if (MEM_P (expr->expr))
+- {
+- if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
+- && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
+- continue;
+-
+- /* ??? Optimally, we would use interprocedural alias
+- analysis to determine if this mem is actually killed
+- by this call. */
+- RESET_BIT (transpout[bb->index], expr->bitmap_index);
+- }
+- }
+-}
+-
+ /* Code Hoisting variables and subroutines. */
+
+ /* Very busy expressions. */
+ static sbitmap *hoist_vbein;
+ static sbitmap *hoist_vbeout;
+
+-/* Hoistable expressions. */
+-static sbitmap *hoist_exprs;
+-
+ /* ??? We could compute post dominators and run this algorithm in
+ reverse to perform tail merging, doing so would probably be
+ more effective than the tail merging code in jump.c.
+@@ -4119,8 +4182,6 @@
+
+ hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
+ hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
+- hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
+- transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
+ }
+
+ /* Free vars used for code hoisting analysis. */
+@@ -4134,8 +4195,6 @@
+
+ sbitmap_vector_free (hoist_vbein);
+ sbitmap_vector_free (hoist_vbeout);
+- sbitmap_vector_free (hoist_exprs);
+- sbitmap_vector_free (transpout);
+
+ free_dominance_info (CDI_DOMINATORS);
+ }
+@@ -4166,8 +4225,15 @@
+ FOR_EACH_BB_REVERSE (bb)
+ {
+ if (bb->next_bb != EXIT_BLOCK_PTR)
+- sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
+- hoist_vbein, bb->index);
++ {
++ sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
++ hoist_vbein, bb->index);
++
++ /* Include expressions in VBEout that are calculated
++ in BB and available at its end. */
++ sbitmap_a_or_b (hoist_vbeout[bb->index],
++ hoist_vbeout[bb->index], comp[bb->index]);
++ }
+
+ changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
+ antloc[bb->index],
+@@ -4179,7 +4245,17 @@
+ }
+
+ if (dump_file)
+- fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
++ {
++ fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
++
++ FOR_EACH_BB (bb)
++ {
++ fprintf (dump_file, "vbein (%d): ", bb->index);
++ dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
++ fprintf (dump_file, "vbeout(%d): ", bb->index);
++ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
++ }
++ }
+ }
+
+ /* Top level routine to do the dataflow analysis needed by code hoisting. */
+@@ -4188,7 +4264,7 @@
+ compute_code_hoist_data (void)
+ {
+ compute_local_properties (transp, comp, antloc, &expr_hash_table);
+- compute_transpout ();
++ prune_expressions (false);
+ compute_code_hoist_vbeinout ();
+ calculate_dominance_info (CDI_DOMINATORS);
+ if (dump_file)
+@@ -4197,6 +4273,8 @@
+
+ /* Determine if the expression identified by EXPR_INDEX would
+ reach BB unimpared if it was placed at the end of EXPR_BB.
++ Stop the search if the expression would need to be moved more
++ than DISTANCE instructions.
+
+ It's unclear exactly what Muchnick meant by "unimpared". It seems
+ to me that the expression must either be computed or transparent in
+@@ -4209,12 +4287,24 @@
+ paths. */
+
+ static int
+-hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited)
++hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
++ char *visited, int distance, int *bb_size)
+ {
+ edge pred;
+ edge_iterator ei;
+ int visited_allocated_locally = 0;
+
++ /* Terminate the search if distance, for which EXPR is allowed to move,
++ is exhausted. */
++ if (distance > 0)
++ {
++ distance -= bb_size[bb->index];
++
++ if (distance <= 0)
++ return 0;
++ }
++ else
++ gcc_assert (distance == 0);
+
+ if (visited == NULL)
+ {
+@@ -4233,9 +4323,6 @@
+ else if (visited[pred_bb->index])
+ continue;
+
+- /* Does this predecessor generate this expression? */
+- else if (TEST_BIT (comp[pred_bb->index], expr_index))
+- break;
+ else if (! TEST_BIT (transp[pred_bb->index], expr_index))
+ break;
+
+@@ -4243,8 +4330,8 @@
+ else
+ {
+ visited[pred_bb->index] = 1;
+- if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
+- pred_bb, visited))
++ if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
++ visited, distance, bb_size))
+ break;
+ }
+ }
+@@ -4254,20 +4341,33 @@
+ return (pred == NULL);
+ }
+
++/* Find occurence in BB. */
++static struct occr *
++find_occr_in_bb (struct occr *occr, basic_block bb)
++{
++ /* Find the right occurrence of this expression. */
++ while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
++ occr = occr->next;
++
++ return occr;
++}
++
+ /* Actually perform code hoisting. */
+
+ static int
+ hoist_code (void)
+ {
+ basic_block bb, dominated;
++ VEC (basic_block, heap) *dom_tree_walk;
++ unsigned int dom_tree_walk_index;
+ VEC (basic_block, heap) *domby;
+ unsigned int i,j;
+ struct expr **index_map;
+ struct expr *expr;
++ int *to_bb_head;
++ int *bb_size;
+ int changed = 0;
+
+- sbitmap_vector_zero (hoist_exprs, last_basic_block);
+-
+ /* Compute a mapping from expression number (`bitmap_index') to
+ hash table entry. */
+
+@@ -4276,28 +4376,98 @@
+ for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
+ index_map[expr->bitmap_index] = expr;
+
++ /* Calculate sizes of basic blocks and note how far
++ each instruction is from the start of its block. We then use this
++ data to restrict distance an expression can travel. */
++
++ to_bb_head = XCNEWVEC (int, get_max_uid ());
++ bb_size = XCNEWVEC (int, last_basic_block);
++
++ FOR_EACH_BB (bb)
++ {
++ rtx insn;
++ int to_head;
++
++ to_head = 0;
++ FOR_BB_INSNS (bb, insn)
++ {
++ /* Don't count debug instructions to avoid them affecting
++ decision choices. */
++ if (NONDEBUG_INSN_P (insn))
++ to_bb_head[INSN_UID (insn)] = to_head++;
++ }
++
++ bb_size[bb->index] = to_head;
++ }
++
++ gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
++ && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
++ == ENTRY_BLOCK_PTR->next_bb));
++
++ dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
++ ENTRY_BLOCK_PTR->next_bb);
++
+ /* Walk over each basic block looking for potentially hoistable
+ expressions, nothing gets hoisted from the entry block. */
+- FOR_EACH_BB (bb)
++ for (dom_tree_walk_index = 0;
++ VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb);
++ dom_tree_walk_index++)
+ {
+- int found = 0;
+- int insn_inserted_p;
+-
+- domby = get_dominated_by (CDI_DOMINATORS, bb);
++ domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
++
++ if (VEC_length (basic_block, domby) == 0)
++ continue;
++
+ /* Examine each expression that is very busy at the exit of this
+ block. These are the potentially hoistable expressions. */
+ for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
+ {
+- int hoistable = 0;
+-
+- if (TEST_BIT (hoist_vbeout[bb->index], i)
+- && TEST_BIT (transpout[bb->index], i))
++ if (TEST_BIT (hoist_vbeout[bb->index], i))
+ {
++ /* Current expression. */
++ struct expr *expr = index_map[i];
++ /* Number of occurences of EXPR that can be hoisted to BB. */
++ int hoistable = 0;
++ /* Basic blocks that have occurences reachable from BB. */
++ bitmap_head _from_bbs, *from_bbs = &_from_bbs;
++ /* Occurences reachable from BB. */
++ VEC (occr_t, heap) *occrs_to_hoist = NULL;
++ /* We want to insert the expression into BB only once, so
++ note when we've inserted it. */
++ int insn_inserted_p;
++ occr_t occr;
++
++ bitmap_initialize (from_bbs, 0);
++
++ /* If an expression is computed in BB and is available at end of
++ BB, hoist all occurences dominated by BB to BB. */
++ if (TEST_BIT (comp[bb->index], i))
++ {
++ occr = find_occr_in_bb (expr->antic_occr, bb);
++
++ if (occr)
++ {
++ /* An occurence might've been already deleted
++ while processing a dominator of BB. */
++ if (occr->deleted_p)
++ gcc_assert (MAX_HOIST_DEPTH > 1);
++ else
++ {
++ gcc_assert (NONDEBUG_INSN_P (occr->insn));
++ hoistable++;
++ }
++ }
++ else
++ hoistable++;
++ }
++
+ /* We've found a potentially hoistable expression, now
+ we look at every block BB dominates to see if it
+ computes the expression. */
+ for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
+ {
++ int max_distance;
++
+ /* Ignore self dominance. */
+ if (bb == dominated)
+ continue;
+@@ -4307,17 +4477,43 @@
+ if (!TEST_BIT (antloc[dominated->index], i))
+ continue;
+
++ occr = find_occr_in_bb (expr->antic_occr, dominated);
++ gcc_assert (occr);
++
++ /* An occurence might've been already deleted
++ while processing a dominator of BB. */
++ if (occr->deleted_p)
++ {
++ gcc_assert (MAX_HOIST_DEPTH > 1);
++ continue;
++ }
++ gcc_assert (NONDEBUG_INSN_P (occr->insn));
++
++ max_distance = expr->max_distance;
++ if (max_distance > 0)
++ /* Adjust MAX_DISTANCE to account for the fact that
++ OCCR won't have to travel all of DOMINATED, but
++ only part of it. */
++ max_distance += (bb_size[dominated->index]
++ - to_bb_head[INSN_UID (occr->insn)]);
++
+ /* Note if the expression would reach the dominated block
+ unimpared if it was placed at the end of BB.
+
+ Keep track of how many times this expression is hoistable
+ from a dominated block into BB. */
+- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+- hoistable++;
++ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
++ max_distance, bb_size))
++ {
++ hoistable++;
++ VEC_safe_push (occr_t, heap,
++ occrs_to_hoist, occr);
++ bitmap_set_bit (from_bbs, dominated->index);
++ }
+ }
+
+ /* If we found more than one hoistable occurrence of this
+- expression, then note it in the bitmap of expressions to
++ expression, then note it in the vector of expressions to
+ hoist. It makes no sense to hoist things which are computed
+ in only one BB, and doing so tends to pessimize register
+ allocation. One could increase this value to try harder
+@@ -4326,91 +4522,80 @@
+ the vast majority of hoistable expressions are only movable
+ from two successors, so raising this threshold is likely
+ to nullify any benefit we get from code hoisting. */
+- if (hoistable > 1)
+- {
+- SET_BIT (hoist_exprs[bb->index], i);
+- found = 1;
+- }
+- }
+- }
+- /* If we found nothing to hoist, then quit now. */
+- if (! found)
+- {
+- VEC_free (basic_block, heap, domby);
+- continue;
+- }
+-
+- /* Loop over all the hoistable expressions. */
+- for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
+- {
+- /* We want to insert the expression into BB only once, so
+- note when we've inserted it. */
+- insn_inserted_p = 0;
+-
+- /* These tests should be the same as the tests above. */
+- if (TEST_BIT (hoist_exprs[bb->index], i))
+- {
+- /* We've found a potentially hoistable expression, now
+- we look at every block BB dominates to see if it
+- computes the expression. */
+- for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
+- {
+- /* Ignore self dominance. */
+- if (bb == dominated)
+- continue;
+-
+- /* We've found a dominated block, now see if it computes
+- the busy expression and whether or not moving that
+- expression to the "beginning" of that block is safe. */
+- if (!TEST_BIT (antloc[dominated->index], i))
+- continue;
+-
+- /* The expression is computed in the dominated block and
+- it would be safe to compute it at the start of the
+- dominated block. Now we have to determine if the
+- expression would reach the dominated block if it was
+- placed at the end of BB. */
+- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+- {
+- struct expr *expr = index_map[i];
+- struct occr *occr = expr->antic_occr;
+- rtx insn;
+- rtx set;
+-
+- /* Find the right occurrence of this expression. */
+- while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
+- occr = occr->next;
+-
+- gcc_assert (occr);
+- insn = occr->insn;
+- set = single_set (insn);
+- gcc_assert (set);
+-
+- /* Create a pseudo-reg to store the result of reaching
+- expressions into. Get the mode for the new pseudo
+- from the mode of the original destination pseudo. */
+- if (expr->reaching_reg == NULL)
+- expr->reaching_reg
+- = gen_reg_rtx_and_attrs (SET_DEST (set));
+-
+- gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
+- delete_insn (insn);
+- occr->deleted_p = 1;
+- changed = 1;
+- gcse_subst_count++;
+-
+- if (!insn_inserted_p)
+- {
+- insert_insn_end_basic_block (index_map[i], bb, 0);
+- insn_inserted_p = 1;
+- }
+- }
+- }
++ if (hoistable > 1 && dbg_cnt (hoist_insn))
++ {
++ /* If (hoistable != VEC_length), then there is
++ an occurence of EXPR in BB itself. Don't waste
++ time looking for LCA in this case. */
++ if ((unsigned) hoistable
++ == VEC_length (occr_t, occrs_to_hoist))
++ {
++ basic_block lca;
++
++ lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
++ from_bbs);
++ if (lca != bb)
++ /* Punt, it's better to hoist these occurences to
++ LCA. */
++ VEC_free (occr_t, heap, occrs_to_hoist);
++ }
++ }
++ else
++ /* Punt, no point hoisting a single occurence. */
++ VEC_free (occr_t, heap, occrs_to_hoist);
++
++ insn_inserted_p = 0;
++
++ /* Walk through occurences of I'th expressions we want
++ to hoist to BB and make the transformations. */
++ for (j = 0;
++ VEC_iterate (occr_t, occrs_to_hoist, j, occr);
++ j++)
++ {
++ rtx insn;
++ rtx set;
++
++ gcc_assert (!occr->deleted_p);
++
++ insn = occr->insn;
++ set = single_set (insn);
++ gcc_assert (set);
++
++ /* Create a pseudo-reg to store the result of reaching
++ expressions into. Get the mode for the new pseudo
++ from the mode of the original destination pseudo.
++
++ It is important to use new pseudos whenever we
++ emit a set. This will allow reload to use
++ rematerialization for such registers. */
++ if (!insn_inserted_p)
++ expr->reaching_reg
++ = gen_reg_rtx_and_attrs (SET_DEST (set));
++
++ gcse_emit_move_after (expr->reaching_reg, SET_DEST (set),
++ insn);
++ delete_insn (insn);
++ occr->deleted_p = 1;
++ changed = 1;
++ gcse_subst_count++;
++
++ if (!insn_inserted_p)
++ {
++ insert_insn_end_basic_block (expr, bb);
++ insn_inserted_p = 1;
++ }
++ }
++
++ VEC_free (occr_t, heap, occrs_to_hoist);
++ bitmap_clear (from_bbs);
+ }
+ }
+ VEC_free (basic_block, heap, domby);
+ }
+
++ VEC_free (basic_block, heap, dom_tree_walk);
++ free (bb_size);
++ free (to_bb_head);
+ free (index_map);
+
+ return changed;
+@@ -4433,6 +4618,8 @@
+ || is_too_expensive (_("GCSE disabled")))
+ return 0;
+
++ doing_code_hoisting_p = true;
++
+ /* We need alias. */
+ init_alias_analysis ();
+
+@@ -4468,6 +4655,8 @@
+ gcse_subst_count, gcse_create_count);
+ }
+
++ doing_code_hoisting_p = false;
++
+ return changed;
+ }
+
+
+=== modified file 'gcc/params.def'
+--- old/gcc/params.def 2010-04-02 18:54:46 +0000
++++ new/gcc/params.def 2010-08-16 09:41:58 +0000
+@@ -219,6 +219,29 @@
+ "gcse-after-reload-critical-fraction",
+ "The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload",
+ 10, 0, 0)
++
++/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor
++ to calculate maximum distance for which an expression is allowed to move
++ from its rtx_cost. */
++DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO,
++ "gcse-cost-distance-ratio",
++ "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations",
++ 10, 0, 0)
++/* GCSE won't restrict distance for which an expression with rtx_cost greater
++ than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */
++DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST,
++ "gcse-unrestricted-cost",
++ "Cost at which GCSE optimizations will not constraint the distance an expression can travel",
++ 3, 0, 0)
++
++/* How deep from a given basic block the dominator tree should be searched
++ for expressions to hoist to the block. The value of 0 will avoid limiting
++ the search. */
++DEFPARAM(PARAM_MAX_HOIST_DEPTH,
++ "max-hoist-depth",
++ "Maximum depth of search in the dominator tree for expressions to hoist",
++ 30, 0, 0)
++
+ /* This parameter limits the number of insns in a loop that will be unrolled,
+ and by how much the loop is unrolled.
+
+
+=== modified file 'gcc/params.h'
+--- old/gcc/params.h 2009-12-01 19:12:29 +0000
++++ new/gcc/params.h 2010-08-16 09:41:58 +0000
+@@ -125,6 +125,12 @@
+ PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
+ #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
+ PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
++#define GCSE_COST_DISTANCE_RATIO \
++ PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO)
++#define GCSE_UNRESTRICTED_COST \
++ PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST)
++#define MAX_HOIST_DEPTH \
++ PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
+ #define MAX_UNROLLED_INSNS \
+ PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
+ #define MAX_SMS_LOOP_NUMBER \
+
+=== added file 'gcc/testsuite/gcc.dg/pr45101.c'
+--- old/gcc/testsuite/gcc.dg/pr45101.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/pr45101.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,15 @@
++/* PR rtl-optimization/45101 */
++/* { dg-do compile } */
++/* { dg-options "-O2 -fgcse -fgcse-las" } */
++
++struct
++{
++ int i;
++} *s;
++
++extern void bar (void);
++
++void foo ()
++{
++ !s ? s->i++ : bar ();
++}
+
+=== added file 'gcc/testsuite/gcc.dg/pr45105.c'
+--- old/gcc/testsuite/gcc.dg/pr45105.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/pr45105.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,27 @@
++/* PR debug/45105 */
++/* { dg-do compile } */
++/* { dg-options "-Os -fcompare-debug" } */
++
++extern int *baz (int *, int *);
++
++void
++bar (int *p1, int *p2)
++{
++ int n = *baz (0, 0);
++ p1[n] = p2[n];
++}
++
++void
++foo (int *p, int l)
++{
++ int a1[32];
++ int a2[32];
++ baz (a1, a2);
++ while (l)
++ {
++ if (l & 1)
++ p = baz (a2, p);
++ l--;
++ bar (a1, a2);
++ }
++}
+
+=== added file 'gcc/testsuite/gcc.dg/pr45107.c'
+--- old/gcc/testsuite/gcc.dg/pr45107.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.dg/pr45107.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,13 @@
++/* PR rtl-optimization/45107 */
++/* { dg-do compile } */
++/* { dg-options "-Os -fgcse-las" } */
++
++extern void bar(int *);
++
++int foo (int *p)
++{
++ int i = *p;
++ if (i != 1)
++ bar(&i);
++ *p = i;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr40956.c'
+--- old/gcc/testsuite/gcc.target/arm/pr40956.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr40956.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,14 @@
++/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */
++/* { dg-require-effective-target arm_thumb1_ok } */
++/* { dg-require-effective-target fpic } */
++/* Make sure the constant "0" is loaded into register only once. */
++/* { dg-final { scan-assembler-times "mov\[\\t \]*r., #0" 1 } } */
++
++int foo(int p, int* q)
++{
++ if (p!=9)
++ *q = 0;
++ else
++ *(q+1) = 0;
++ return 3;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr42495.c'
+--- old/gcc/testsuite/gcc.target/arm/pr42495.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr42495.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,31 @@
++/* { dg-options "-mthumb -Os -fpic -march=armv5te -fdump-rtl-hoist" } */
++/* { dg-require-effective-target arm_thumb1_ok } */
++/* { dg-require-effective-target fpic } */
++/* Make sure all calculations of gObj's address get hoisted to one location. */
++/* { dg-final { scan-rtl-dump "PRE/HOIST: end of bb .* copying expression" "hoist" } } */
++
++struct st_a {
++ int data;
++};
++
++struct st_b {
++ struct st_a *p_a;
++ struct st_b *next;
++};
++
++extern struct st_b gObj;
++extern void foo(int, struct st_b*);
++
++int goo(struct st_b * obj) {
++ struct st_a *pa;
++ if (gObj.p_a->data != 0) {
++ foo(gObj.p_a->data, obj);
++ }
++ pa = obj->p_a;
++ if (pa == 0) {
++ return 0;
++ } else if (pa == gObj.p_a) {
++ return 0;
++ }
++ return pa->data;
++}
+
+=== added file 'gcc/testsuite/gcc.target/arm/pr42574.c'
+--- old/gcc/testsuite/gcc.target/arm/pr42574.c 1970-01-01 00:00:00 +0000
++++ new/gcc/testsuite/gcc.target/arm/pr42574.c 2010-08-16 09:41:58 +0000
+@@ -0,0 +1,24 @@
++/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */
++/* { dg-require-effective-target arm_thumb1_ok } */
++/* { dg-require-effective-target fpic } */
++/* Make sure the address of glob.c is calculated only once and using
++ a logical shift for the offset (200<<1). */
++/* { dg-final { scan-assembler-times "lsl" 1 } } */
++
++struct A {
++ char a[400];
++ float* c;
++};
++struct A glob;
++void func();
++void func1(float*);
++int func2(float*, int*);
++void func3(float*);
++
++void test(int *p) {
++ func1(glob.c);
++ if (func2(glob.c, p)) {
++ func();
++ }
++ func3(glob.c);
++}
+