2010-08-31 Chung-Lin Tang Backport from mainline: 2010-04-14 Bernd Schmidt PR target/21803 gcc/ * ifcvt.c (cond_exec_process_if_block): Look for identical sequences at the start and end of the then/else blocks, and omit them from the conversion. * cfgcleanup.c (flow_find_cross_jump): No longer static. Remove MODE argument; all callers changed. Pass zero to old_insns_match_p instead. (flow_find_head_matching_sequence): New function. (old_insns_match_p): Check REG_EH_REGION notes for calls. * basic-block.h (flow_find_cross_jump, flow_find_head_matching_sequence): Declare functions. gcc/testsuite/ * gcc.target/arm/pr42496.c: New test. 2010-04-22 Bernd Schmidt PR middle-end/29274 gcc/ * tree-pass.h (pass_optimize_widening_mul): Declare. * tree-ssa-math-opts.c (execute_optimize_widening_mul, gate_optimize_widening_mul): New static functions. (pass_optimize_widening_mul): New. * expr.c (expand_expr_real_2) : New case. : Remove support for widening multiplies. * tree.def (WIDEN_MULT_EXPR): Tweak comment. * cfgexpand.c (expand_debug_expr) : Use simplify_gen_unary rather than directly building extensions. * tree-cfg.c (verify_gimple_assign_binary): Add tests for WIDEN_MULT_EXPR. * expmed.c (expand_widening_mult): New function. * passes.c (init_optimization_passes): Add pass_optimize_widening_mul. * optabs.h (expand_widening_mult): Declare. gcc/testsuite/ * gcc.target/i386/wmul-1.c: New test. * gcc.target/i386/wmul-2.c: New test. * gcc.target/bfin/wmul-1.c: New test. * gcc.target/bfin/wmul-2.c: New test. * gcc.target/arm/wmul-1.c: New test. * gcc.target/arm/wmul-2.c: New test. 2010-04-24 Bernd Schmidt PR tree-optimization/41442 gcc/ * fold-const.c (merge_truthop_with_opposite_arm): New function. (fold_binary_loc): Call it. gcc/testsuite/ * gcc.target/i386/pr41442.c: New test. 2010-04-29 Bernd Schmidt PR target/42895 gcc/ * doc/tm.texi (ADJUST_REG_ALLOC_ORDER): Renamed from ORDER_REGS_FOR_LOCAL_ALLOC. All instances of this macro changed. (HONOR_REG_ALLOC_ORDER): Describe new macro. * ira.c (setup_alloc_regs): Use ADJUST_REG_ALLOC_ORDER if defined. * ira-color.c (assign_hard_reg): Take prologue/epilogue costs into account only if HONOR_REG_ALLOC_ORDER is not defined. * config/arm/arm.h (HONOR_REG_ALLOC_ORDER): Define. * system.h (ORDER_REGS_FOR_LOCAL_ALLOC): Poison. 2010-05-04 Mikael Pettersson PR bootstrap/43964 gcc/ * ira-color.c (assign_hard_reg): Declare rclass and add_cost only if HONOR_REG_ALLOC_ORDER is not defined. 2010-06-04 Bernd Schmidt PR rtl-optimization/39871 PR rtl-optimization/40615 PR rtl-optimization/42500 PR rtl-optimization/42502 gcc/ * ira.c (init_reg_equiv_memory_loc: New function. (ira): Call it twice. * reload.h (calculate_elim_costs_all_insns): Declare. * ira-costs.c: Include "reload.h". (regno_equiv_gains): New static variable. (init_costs): Allocate it. (finish_costs): Free it. (ira_costs): Call calculate_elim_costs_all_insns. (find_costs_and_classes): Take estimated elimination costs into account. (ira_adjust_equiv_reg_cost): New function. * ira.h (ira_adjust_equiv_reg_cost): Declare it. * reload1.c (init_eliminable_invariants, free_reg_equiv, elimination_costs_in_insn, note_reg_elim_costly): New static functions. (elim_bb): New static variable. (reload): Move code out of here into init_eliminable_invariants and free_reg_equiv. Call them. (calculate_elim_costs_all_insns): New function. (eliminate_regs_1): Declare. Add extra arg FOR_COSTS; all callers changed. If FOR_COSTS is true, don't call alter_reg, but call note_reg_elim_costly if we turned a valid memory address into an invalid one. * Makefile.in (ira-costs.o): Depend on reload.h. gcc/testsuite/ * gcc.target/arm/eliminate.c: New test. 2010-06-09 Bernd Schmidt gcc/ * config/arm/arm.c (thumb2_reorg): New function. (arm_reorg): Call it. * config/arm/thumb2.md (define_peephole2 for flag clobbering arithmetic operations): Delete. 2010-06-12 Bernd Schmidt gcc/ * config/arm/arm.c (thumb2_reorg): Fix errors in previous change. 2010-06-17 Bernd Schmidt PR rtl-optimization/39871 gcc/ * reload1.c (init_eliminable_invariants): For flag_pic, disable equivalences only for constants that aren't LEGITIMATE_PIC_OPERAND_P. (function_invariant_p): Rule out a plus of frame or arg pointer with a SYMBOL_REF. * ira.c (find_reg_equiv_invariant_const): Likewise. 2010-06-18 Eric Botcazou PR rtl-optimization/40900 gcc/ * expr.c (expand_expr_real_1) : Fix long line. Save the original expression for later reuse. : Use promote_function_mode to compute the signedness of the promoted RTL for a SSA_NAME on the LHS of a call statement. 2010-06-18 Bernd Schmidt gcc/testsuite/ * gcc.target/arm/pr40900.c: New test. 2010-06-30 Bernd Schmidt PR tree-optimization/39799 gcc/ * tree-inline.c (remap_ssa_name): Initialize variable only if SSA_NAME_OCCURS_IN_ABNORMAL_PHI. * tree-ssa.c (warn_uninit): Avoid emitting an unnecessary message. gcc/testsuite/ * c-c++-common/uninit-17.c: New test. 2010-07-25 Eric Botcazou PR target/44484 gcc/ * config/sparc/predicates.md (memory_reg_operand): Delete. * config/sparc/sync.md (sync_compare_and_swap): Minor tweaks. (*sync_compare_and_swap): Encode the address form in the pattern. (*sync_compare_and_swapdi_v8plus): Likewise. 2010-08-29 Chung-Lin Tang Backport from mainline: === modified file 'gcc/Makefile.in' --- old/gcc/Makefile.in 2010-08-10 13:31:21 +0000 +++ new/gcc/Makefile.in 2010-09-01 13:29:58 +0000 @@ -3193,7 +3193,7 @@ ira-costs.o: ira-costs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ hard-reg-set.h $(RTL_H) $(EXPR_H) $(TM_P_H) $(FLAGS_H) $(BASIC_BLOCK_H) \ $(REGS_H) addresses.h insn-config.h $(RECOG_H) $(TOPLEV_H) $(TARGET_H) \ - $(PARAMS_H) $(IRA_INT_H) + $(PARAMS_H) $(IRA_INT_H) reload.h ira-conflicts.o: ira-conflicts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \ insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \ === modified file 'gcc/basic-block.h' --- old/gcc/basic-block.h 2010-08-16 09:41:58 +0000 +++ new/gcc/basic-block.h 2010-09-01 13:29:58 +0000 @@ -894,6 +894,10 @@ /* In cfgcleanup.c. */ extern bool cleanup_cfg (int); +extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *); +extern int flow_find_head_matching_sequence (basic_block, basic_block, + rtx *, rtx *, int); + extern bool delete_unreachable_blocks (void); extern bool mark_dfs_back_edges (void); === modified file 'gcc/cfgcleanup.c' --- old/gcc/cfgcleanup.c 2010-05-17 16:26:22 +0000 +++ new/gcc/cfgcleanup.c 2010-09-01 13:29:58 +0000 @@ -68,7 +68,6 @@ static bool try_crossjump_to_edge (int, edge, edge); static bool try_crossjump_bb (int, basic_block); static bool outgoing_edges_match (int, basic_block, basic_block); -static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *); static bool old_insns_match_p (int, rtx, rtx); static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block); @@ -972,13 +971,27 @@ be filled that clobbers a parameter expected by the subroutine. ??? We take the simple route for now and assume that if they're - equal, they were constructed identically. */ - - if (CALL_P (i1) - && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1), + equal, they were constructed identically. + + Also check for identical exception regions. */ + + if (CALL_P (i1)) + { + /* Ensure the same EH region. */ + rtx n1 = find_reg_note (i1, REG_EH_REGION, 0); + rtx n2 = find_reg_note (i2, REG_EH_REGION, 0); + + if (!n1 && n2) + return false; + + if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0))) + return false; + + if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1), CALL_INSN_FUNCTION_USAGE (i2)) - || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))) - return false; + || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)) + return false; + } #ifdef STACK_REGS /* If cross_jump_death_matters is not 0, the insn's mode @@ -1017,6 +1030,29 @@ return false; } +/* When comparing insns I1 and I2 in flow_find_cross_jump or + flow_find_head_matching_sequence, ensure the notes match. */ + +static void +merge_notes (rtx i1, rtx i2) +{ + /* If the merged insns have different REG_EQUAL notes, then + remove them. */ + rtx equiv1 = find_reg_equal_equiv_note (i1); + rtx equiv2 = find_reg_equal_equiv_note (i2); + + if (equiv1 && !equiv2) + remove_note (i1, equiv1); + else if (!equiv1 && equiv2) + remove_note (i2, equiv2); + else if (equiv1 && equiv2 + && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0))) + { + remove_note (i1, equiv1); + remove_note (i2, equiv2); + } +} + /* Look through the insns at the end of BB1 and BB2 and find the longest sequence that are equivalent. Store the first insns for that sequence in *F1 and *F2 and return the sequence length. @@ -1024,9 +1060,8 @@ To simplify callers of this function, if the blocks match exactly, store the head of the blocks in *F1 and *F2. */ -static int -flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1, - basic_block bb2, rtx *f1, rtx *f2) +int +flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2) { rtx i1, i2, last1, last2, afterlast1, afterlast2; int ninsns = 0; @@ -1066,7 +1101,7 @@ if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2)) break; - if (!old_insns_match_p (mode, i1, i2)) + if (!old_insns_match_p (0, i1, i2)) break; merge_memattrs (i1, i2); @@ -1074,21 +1109,7 @@ /* Don't begin a cross-jump with a NOTE insn. */ if (INSN_P (i1)) { - /* If the merged insns have different REG_EQUAL notes, then - remove them. */ - rtx equiv1 = find_reg_equal_equiv_note (i1); - rtx equiv2 = find_reg_equal_equiv_note (i2); - - if (equiv1 && !equiv2) - remove_note (i1, equiv1); - else if (!equiv1 && equiv2) - remove_note (i2, equiv2); - else if (equiv1 && equiv2 - && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0))) - { - remove_note (i1, equiv1); - remove_note (i2, equiv2); - } + merge_notes (i1, i2); afterlast1 = last1, afterlast2 = last2; last1 = i1, last2 = i2; @@ -1130,6 +1151,97 @@ return ninsns; } +/* Like flow_find_cross_jump, except start looking for a matching sequence from + the head of the two blocks. Do not include jumps at the end. + If STOP_AFTER is nonzero, stop after finding that many matching + instructions. */ + +int +flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1, + rtx *f2, int stop_after) +{ + rtx i1, i2, last1, last2, beforelast1, beforelast2; + int ninsns = 0; + edge e; + edge_iterator ei; + int nehedges1 = 0, nehedges2 = 0; + + FOR_EACH_EDGE (e, ei, bb1->succs) + if (e->flags & EDGE_EH) + nehedges1++; + FOR_EACH_EDGE (e, ei, bb2->succs) + if (e->flags & EDGE_EH) + nehedges2++; + + i1 = BB_HEAD (bb1); + i2 = BB_HEAD (bb2); + last1 = beforelast1 = last2 = beforelast2 = NULL_RTX; + + while (true) + { + + /* Ignore notes. */ + while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1)) + i1 = NEXT_INSN (i1); + + while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2)) + i2 = NEXT_INSN (i2); + + if (NOTE_P (i1) || NOTE_P (i2) + || JUMP_P (i1) || JUMP_P (i2)) + break; + + /* A sanity check to make sure we're not merging insns with different + effects on EH. If only one of them ends a basic block, it shouldn't + have an EH edge; if both end a basic block, there should be the same + number of EH edges. */ + if ((i1 == BB_END (bb1) && i2 != BB_END (bb2) + && nehedges1 > 0) + || (i2 == BB_END (bb2) && i1 != BB_END (bb1) + && nehedges2 > 0) + || (i1 == BB_END (bb1) && i2 == BB_END (bb2) + && nehedges1 != nehedges2)) + break; + + if (!old_insns_match_p (0, i1, i2)) + break; + + merge_memattrs (i1, i2); + + /* Don't begin a cross-jump with a NOTE insn. */ + if (INSN_P (i1)) + { + merge_notes (i1, i2); + + beforelast1 = last1, beforelast2 = last2; + last1 = i1, last2 = i2; + ninsns++; + } + + if (i1 == BB_END (bb1) || i2 == BB_END (bb2) + || (stop_after > 0 && ninsns == stop_after)) + break; + + i1 = NEXT_INSN (i1); + i2 = NEXT_INSN (i2); + } + +#ifdef HAVE_cc0 + /* Don't allow a compare to be shared by cross-jumping unless the insn + after the compare is also shared. */ + if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1)) + last1 = beforelast1, last2 = beforelast2, ninsns--; +#endif + + if (ninsns) + { + *f1 = last1; + *f2 = last2; + } + + return ninsns; +} + /* Return true iff outgoing edges of BB1 and BB2 match, together with the branch instruction. This means that if we commonize the control flow before end of the basic block, the semantic remains unchanged. @@ -1498,7 +1610,7 @@ return false; /* ... and part the second. */ - nmatch = flow_find_cross_jump (mode, src1, src2, &newpos1, &newpos2); + nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2); /* Don't proceed with the crossjump unless we found a sufficient number of matching instructions or the 'from' block was totally matched === modified file 'gcc/cfgexpand.c' --- old/gcc/cfgexpand.c 2010-05-14 17:11:03 +0000 +++ new/gcc/cfgexpand.c 2010-09-01 13:29:58 +0000 @@ -3026,14 +3026,15 @@ if (SCALAR_INT_MODE_P (GET_MODE (op0)) && SCALAR_INT_MODE_P (mode)) { + enum machine_mode inner_mode = GET_MODE (op0); if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 0)))) - op0 = gen_rtx_ZERO_EXTEND (mode, op0); + op0 = simplify_gen_unary (ZERO_EXTEND, mode, op0, inner_mode); else - op0 = gen_rtx_SIGN_EXTEND (mode, op0); + op0 = simplify_gen_unary (SIGN_EXTEND, mode, op0, inner_mode); if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 1)))) - op1 = gen_rtx_ZERO_EXTEND (mode, op1); + op1 = simplify_gen_unary (ZERO_EXTEND, mode, op1, inner_mode); else - op1 = gen_rtx_SIGN_EXTEND (mode, op1); + op1 = simplify_gen_unary (SIGN_EXTEND, mode, op1, inner_mode); return gen_rtx_MULT (mode, op0, op1); } return NULL; === modified file 'gcc/config/arm/arm.c' --- old/gcc/config/arm/arm.c 2010-08-31 10:00:27 +0000 +++ new/gcc/config/arm/arm.c 2010-09-01 13:29:58 +0000 @@ -8116,8 +8116,6 @@ static bool xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) { - rtx i_pat, d_pat; - /* Some true dependencies can have a higher cost depending on precisely how certain input operands are used. */ if (REG_NOTE_KIND (link) == 0 @@ -12166,6 +12164,60 @@ return result; } +/* Convert instructions to their cc-clobbering variant if possible, since + that allows us to use smaller encodings. */ + +static void +thumb2_reorg (void) +{ + basic_block bb; + regset_head live; + + INIT_REG_SET (&live); + + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + df_analyze (); + + FOR_EACH_BB (bb) + { + rtx insn; + COPY_REG_SET (&live, DF_LR_OUT (bb)); + df_simulate_initialize_backwards (bb, &live); + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (NONJUMP_INSN_P (insn) + && !REGNO_REG_SET_P (&live, CC_REGNUM)) + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SET + && low_register_operand (XEXP (pat, 0), SImode) + && thumb_16bit_operator (XEXP (pat, 1), SImode) + && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode) + && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode)) + { + rtx dst = XEXP (pat, 0); + rtx src = XEXP (pat, 1); + rtx op0 = XEXP (src, 0); + if (rtx_equal_p (dst, op0) + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); + rtvec vec = gen_rtvec (2, pat, clobber); + PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); + INSN_CODE (insn) = -1; + } + } + } + if (NONDEBUG_INSN_P (insn)) + df_simulate_one_insn_backwards (bb, insn, &live); + } + } + CLEAR_REG_SET (&live); +} + /* Gcc puts the pool in the wrong place for ARM, since we can only load addresses a limited distance around the pc. We do some special munging to move the constant pool values to the correct @@ -12177,6 +12229,9 @@ HOST_WIDE_INT address = 0; Mfix * fix; + if (TARGET_THUMB2) + thumb2_reorg (); + minipool_fix_head = minipool_fix_tail = NULL; /* The first insn must always be a note, or the code below won't === modified file 'gcc/config/arm/arm.h' --- old/gcc/config/arm/arm.h 2010-08-13 11:11:15 +0000 +++ new/gcc/config/arm/arm.h 2010-09-01 13:29:58 +0000 @@ -1133,7 +1133,11 @@ } /* Use different register alloc ordering for Thumb. */ -#define ORDER_REGS_FOR_LOCAL_ALLOC arm_order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc () + +/* Tell IRA to use the order we define rather than messing it up with its + own cost calculations. */ +#define HONOR_REG_ALLOC_ORDER /* Interrupt functions can only use registers that have already been saved by the prologue, even if they would normally be === modified file 'gcc/config/arm/arm.md' --- old/gcc/config/arm/arm.md 2010-08-31 10:00:27 +0000 +++ new/gcc/config/arm/arm.md 2010-09-01 13:29:58 +0000 @@ -4074,7 +4074,7 @@ (define_split [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "register_operand" "l,m")))] + (zero_extend:SI (match_operand:HI 1 "register_operand" "")))] "!TARGET_THUMB2 && !arm_arch6" [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))] === modified file 'gcc/config/arm/thumb2.md' --- old/gcc/config/arm/thumb2.md 2010-08-31 10:00:27 +0000 +++ new/gcc/config/arm/thumb2.md 2010-09-01 13:29:58 +0000 @@ -1046,29 +1046,6 @@ }" ) -;; Peepholes and insns for 16-bit flag clobbering instructions. -;; The conditional forms of these instructions do not clobber CC. -;; However by the time peepholes are run it is probably too late to do -;; anything useful with this information. -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (match_operator:SI 3 "thumb_16bit_operator" - [(match_operand:SI 1 "low_register_operand" "") - (match_operand:SI 2 "low_register_operand" "")]))] - "TARGET_THUMB2 - && (rtx_equal_p(operands[0], operands[1]) - || GET_CODE(operands[3]) == PLUS - || GET_CODE(operands[3]) == MINUS) - && peep2_regno_dead_p(0, CC_REGNUM)" - [(parallel - [(set (match_dup 0) - (match_op_dup 3 - [(match_dup 1) - (match_dup 2)])) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_alusi3_short" [(set (match_operand:SI 0 "s_register_operand" "=l") (match_operator:SI 3 "thumb_16bit_operator" === modified file 'gcc/config/avr/avr.h' --- old/gcc/config/avr/avr.h 2010-01-11 23:12:14 +0000 +++ new/gcc/config/avr/avr.h 2010-09-01 13:29:58 +0000 @@ -232,7 +232,7 @@ 32,33,34,35 \ } -#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () #define HARD_REGNO_NREGS(REGNO, MODE) ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) === modified file 'gcc/config/i386/i386.h' --- old/gcc/config/i386/i386.h 2010-04-27 19:14:19 +0000 +++ new/gcc/config/i386/i386.h 2010-09-01 13:29:58 +0000 @@ -955,7 +955,7 @@ registers listed in CALL_USED_REGISTERS, keeping the others available for storage of persistent values. - The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order, + The ADJUST_REG_ALLOC_ORDER actually overwrite the order, so this is just empty initializer for array. */ #define REG_ALLOC_ORDER \ @@ -964,11 +964,11 @@ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ 48, 49, 50, 51, 52 } -/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order +/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order to be rearranged based on a particular function. When using sse math, we want to allocate SSE before x87 registers and vice versa. */ -#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER x86_order_regs_for_local_alloc () #define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL) === modified file 'gcc/config/mips/mips.h' --- old/gcc/config/mips/mips.h 2009-10-29 17:39:52 +0000 +++ new/gcc/config/mips/mips.h 2010-09-01 13:29:58 +0000 @@ -2059,12 +2059,12 @@ 182,183,184,185,186,187 \ } -/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order +/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order to be rearranged based on a particular function. On the mips16, we want to allocate $24 (T_REG) before other registers for instructions for which it is possible. */ -#define ORDER_REGS_FOR_LOCAL_ALLOC mips_order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER mips_order_regs_for_local_alloc () /* True if VALUE is an unsigned 6-bit number. */ === modified file 'gcc/config/picochip/picochip.h' --- old/gcc/config/picochip/picochip.h 2009-11-04 11:06:36 +0000 +++ new/gcc/config/picochip/picochip.h 2010-09-01 13:29:58 +0000 @@ -261,7 +261,7 @@ /* We can dynamically change the REG_ALLOC_ORDER using the following hook. It would be desirable to change it for leaf functions so we can put r12 at the end of this list.*/ -#define ORDER_REGS_FOR_LOCAL_ALLOC picochip_order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER picochip_order_regs_for_local_alloc () /* How Values Fit in Registers */ === modified file 'gcc/config/sparc/predicates.md' --- old/gcc/config/sparc/predicates.md 2009-02-20 15:20:38 +0000 +++ new/gcc/config/sparc/predicates.md 2010-09-01 13:29:58 +0000 @@ -1,5 +1,5 @@ ;; Predicate definitions for SPARC. -;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. +;; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; @@ -473,9 +473,3 @@ ;; and (xor ... (not ...)) to (not (xor ...)). */ (define_predicate "cc_arith_not_operator" (match_code "and,ior")) - -;; Return true if OP is memory operand with just [%reg] addressing mode. -(define_predicate "memory_reg_operand" - (and (match_code "mem") - (and (match_operand 0 "memory_operand") - (match_test "REG_P (XEXP (op, 0))")))) === modified file 'gcc/config/sparc/sparc.h' --- old/gcc/config/sparc/sparc.h 2010-04-02 18:54:46 +0000 +++ new/gcc/config/sparc/sparc.h 2010-09-01 13:29:58 +0000 @@ -1181,7 +1181,7 @@ 96, 97, 98, 99, /* %fcc0-3 */ \ 100, 0, 14, 30, 31, 101} /* %icc, %g0, %o6, %i6, %i7, %sfp */ -#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () extern char sparc_leaf_regs[]; #define LEAF_REGISTERS sparc_leaf_regs === modified file 'gcc/config/sparc/sync.md' --- old/gcc/config/sparc/sync.md 2009-02-20 15:20:38 +0000 +++ new/gcc/config/sparc/sync.md 2010-09-01 13:29:58 +0000 @@ -1,5 +1,5 @@ ;; GCC machine description for SPARC synchronization instructions. -;; Copyright (C) 2005, 2007, 2009 +;; Copyright (C) 2005, 2007, 2009, 2010 ;; Free Software Foundation, Inc. ;; ;; This file is part of GCC. @@ -62,7 +62,7 @@ (define_expand "sync_compare_and_swap" [(parallel - [(set (match_operand:I48MODE 0 "register_operand" "=r") + [(set (match_operand:I48MODE 0 "register_operand" "") (match_operand:I48MODE 1 "memory_operand" "")) (set (match_dup 1) (unspec_volatile:I48MODE @@ -71,7 +71,7 @@ UNSPECV_CAS))])] "TARGET_V9" { - if (! REG_P (XEXP (operands[1], 0))) + if (!REG_P (XEXP (operands[1], 0))) { rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); operands[1] = replace_equiv_address (operands[1], addr); @@ -81,20 +81,20 @@ (define_insn "*sync_compare_and_swap" [(set (match_operand:I48MODE 0 "register_operand" "=r") - (match_operand:I48MODE 1 "memory_reg_operand" "+m")) - (set (match_dup 1) + (mem:I48MODE (match_operand 1 "register_operand" "r"))) + (set (mem:I48MODE (match_dup 1)) (unspec_volatile:I48MODE [(match_operand:I48MODE 2 "register_operand" "r") (match_operand:I48MODE 3 "register_operand" "0")] UNSPECV_CAS))] "TARGET_V9 && (mode == SImode || TARGET_ARCH64)" - "cas\t%1, %2, %0" + "cas\t[%1], %2, %0" [(set_attr "type" "multi")]) (define_insn "*sync_compare_and_swapdi_v8plus" [(set (match_operand:DI 0 "register_operand" "=h") - (match_operand:DI 1 "memory_reg_operand" "+m")) - (set (match_dup 1) + (mem:DI (match_operand 1 "register_operand" "r"))) + (set (mem:DI (match_dup 1)) (unspec_volatile:DI [(match_operand:DI 2 "register_operand" "h") (match_operand:DI 3 "register_operand" "0")] @@ -109,7 +109,7 @@ output_asm_insn ("srl\t%L2, 0, %L2", operands); output_asm_insn ("sllx\t%H2, 32, %H3", operands); output_asm_insn ("or\t%L2, %H3, %H3", operands); - output_asm_insn ("casx\t%1, %H3, %L3", operands); + output_asm_insn ("casx\t[%1], %H3, %L3", operands); return "srlx\t%L3, 32, %H3"; } [(set_attr "type" "multi") === modified file 'gcc/config/xtensa/xtensa.h' --- old/gcc/config/xtensa/xtensa.h 2009-09-23 21:24:42 +0000 +++ new/gcc/config/xtensa/xtensa.h 2010-09-01 13:29:58 +0000 @@ -286,7 +286,7 @@ incoming argument in a2 is live throughout the function and local-alloc decides to use a2, then the incoming argument must either be spilled or copied to another register. To get around - this, we define ORDER_REGS_FOR_LOCAL_ALLOC to redefine + this, we define ADJUST_REG_ALLOC_ORDER to redefine reg_alloc_order for leaf functions such that lowest numbered registers are used first with the exception that the incoming argument registers are not used until after other register choices @@ -300,7 +300,7 @@ 35, \ } -#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc () +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () /* For Xtensa, the only point of this is to prevent GCC from otherwise giving preference to call-used registers. To minimize window === modified file 'gcc/doc/tm.texi' --- old/gcc/doc/tm.texi 2010-08-13 11:53:46 +0000 +++ new/gcc/doc/tm.texi 2010-09-01 13:29:58 +0000 @@ -2093,7 +2093,7 @@ the highest numbered allocable register first. @end defmac -@defmac ORDER_REGS_FOR_LOCAL_ALLOC +@defmac ADJUST_REG_ALLOC_ORDER A C statement (sans semicolon) to choose the order in which to allocate hard registers for pseudo-registers local to a basic block. @@ -2107,6 +2107,15 @@ On most machines, it is not necessary to define this macro. @end defmac +@defmac HONOR_REG_ALLOC_ORDER +Normally, IRA tries to estimate the costs for saving a register in the +prologue and restoring it in the epilogue. This discourages it from +using call-saved registers. If a machine wants to ensure that IRA +allocates registers in the order given by REG_ALLOC_ORDER even if some +call-saved registers appear earlier than call-used ones, this macro +should be defined. +@end defmac + @defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno}) In some case register allocation order is not enough for the Integrated Register Allocator (@acronym{IRA}) to generate a good code. === modified file 'gcc/expmed.c' --- old/gcc/expmed.c 2010-03-03 22:10:17 +0000 +++ new/gcc/expmed.c 2010-09-01 13:29:58 +0000 @@ -3253,6 +3253,55 @@ gcc_assert (op0); return op0; } + +/* Perform a widening multiplication and return an rtx for the result. + MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); + TARGET is a suggestion for where to store the result (an rtx). + THIS_OPTAB is the optab we should use, it must be either umul_widen_optab + or smul_widen_optab. + + We check specially for a constant integer as OP1, comparing the + cost of a widening multiply against the cost of a sequence of shifts + and adds. */ + +rtx +expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, + int unsignedp, optab this_optab) +{ + bool speed = optimize_insn_for_speed_p (); + + if (CONST_INT_P (op1) + && (INTVAL (op1) >= 0 + || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)) + { + HOST_WIDE_INT coeff = INTVAL (op1); + int max_cost; + enum mult_variant variant; + struct algorithm algorithm; + + /* Special case powers of two. */ + if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) + { + op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); + return expand_shift (LSHIFT_EXPR, mode, op0, + build_int_cst (NULL_TREE, floor_log2 (coeff)), + target, unsignedp); + } + + /* Exclude cost of op0 from max_cost to match the cost + calculation of the synth_mult. */ + max_cost = mul_widen_cost[speed][mode]; + if (choose_mult_variant (mode, coeff, &algorithm, &variant, + max_cost)) + { + op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); + return expand_mult_const (mode, op0, coeff, target, + &algorithm, variant); + } + } + return expand_binop (mode, this_optab, op0, op1, target, + unsignedp, OPTAB_LIB_WIDEN); +} /* Return the smallest n such that 2**n >= X. */ === modified file 'gcc/expr.c' --- old/gcc/expr.c 2010-08-20 16:21:01 +0000 +++ new/gcc/expr.c 2010-09-01 13:29:58 +0000 @@ -7224,7 +7224,6 @@ optab this_optab; rtx subtarget, original_target; int ignore; - tree subexp0, subexp1; bool reduce_bit_field; gimple subexp0_def, subexp1_def; tree top0, top1; @@ -7679,13 +7678,7 @@ goto binop2; - case MULT_EXPR: - /* If this is a fixed-point operation, then we cannot use the code - below because "expand_mult" doesn't support sat/no-sat fixed-point - multiplications. */ - if (ALL_FIXED_POINT_MODE_P (mode)) - goto binop; - + case WIDEN_MULT_EXPR: /* If first operand is constant, swap them. Thus the following special case checks need only check the second operand. */ @@ -7696,96 +7689,35 @@ treeop1 = t1; } - /* Attempt to return something suitable for generating an - indexed address, for machines that support that. */ - - if (modifier == EXPAND_SUM && mode == ptr_mode - && host_integerp (treeop1, 0)) - { - tree exp1 = treeop1; - - op0 = expand_expr (treeop0, subtarget, VOIDmode, - EXPAND_SUM); - - if (!REG_P (op0)) - op0 = force_operand (op0, NULL_RTX); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (mode, op0); - - return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, - gen_int_mode (tree_low_cst (exp1, 0), - TYPE_MODE (TREE_TYPE (exp1))))); - } - - if (modifier == EXPAND_STACK_PARM) - target = 0; - - /* Check for multiplying things that have been extended - from a narrower type. If this machine supports multiplying - in that narrower type with a result in the desired type, - do it that way, and avoid the explicit type-conversion. */ - - subexp0 = treeop0; - subexp1 = treeop1; - subexp0_def = get_def_for_expr (subexp0, NOP_EXPR); - subexp1_def = get_def_for_expr (subexp1, NOP_EXPR); - top0 = top1 = NULL_TREE; - /* First, check if we have a multiplication of one signed and one unsigned operand. */ - if (subexp0_def - && (top0 = gimple_assign_rhs1 (subexp0_def)) - && subexp1_def - && (top1 = gimple_assign_rhs1 (subexp1_def)) - && TREE_CODE (type) == INTEGER_TYPE - && (TYPE_PRECISION (TREE_TYPE (top0)) - < TYPE_PRECISION (TREE_TYPE (subexp0))) - && (TYPE_PRECISION (TREE_TYPE (top0)) - == TYPE_PRECISION (TREE_TYPE (top1))) - && (TYPE_UNSIGNED (TREE_TYPE (top0)) - != TYPE_UNSIGNED (TREE_TYPE (top1)))) + if (TREE_CODE (treeop1) != INTEGER_CST + && (TYPE_UNSIGNED (TREE_TYPE (treeop0)) + != TYPE_UNSIGNED (TREE_TYPE (treeop1)))) { - enum machine_mode innermode - = TYPE_MODE (TREE_TYPE (top0)); + enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0)); this_optab = usmul_widen_optab; - if (mode == GET_MODE_WIDER_MODE (innermode)) + if (mode == GET_MODE_2XWIDER_MODE (innermode)) { if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing) { - if (TYPE_UNSIGNED (TREE_TYPE (top0))) - expand_operands (top0, top1, NULL_RTX, &op0, &op1, + if (TYPE_UNSIGNED (TREE_TYPE (treeop0))) + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); else - expand_operands (top0, top1, NULL_RTX, &op1, &op0, + expand_operands (treeop0, treeop1, subtarget, &op1, &op0, EXPAND_NORMAL); - goto binop3; } } } - /* Check for a multiplication with matching signedness. If - valid, TOP0 and TOP1 were set in the previous if - condition. */ - else if (top0 - && TREE_CODE (type) == INTEGER_TYPE - && (TYPE_PRECISION (TREE_TYPE (top0)) - < TYPE_PRECISION (TREE_TYPE (subexp0))) - && ((TREE_CODE (subexp1) == INTEGER_CST - && int_fits_type_p (subexp1, TREE_TYPE (top0)) - /* Don't use a widening multiply if a shift will do. */ - && ((GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (subexp1))) - > HOST_BITS_PER_WIDE_INT) - || exact_log2 (TREE_INT_CST_LOW (subexp1)) < 0)) - || - (top1 - && (TYPE_PRECISION (TREE_TYPE (top1)) - == TYPE_PRECISION (TREE_TYPE (top0)) - /* If both operands are extended, they must either both - be zero-extended or both be sign-extended. */ - && (TYPE_UNSIGNED (TREE_TYPE (top1)) - == TYPE_UNSIGNED (TREE_TYPE (top0))))))) + /* Check for a multiplication with matching signedness. */ + else if ((TREE_CODE (treeop1) == INTEGER_CST + && int_fits_type_p (treeop1, TREE_TYPE (treeop0))) + || (TYPE_UNSIGNED (TREE_TYPE (treeop1)) + == TYPE_UNSIGNED (TREE_TYPE (treeop0)))) { - tree op0type = TREE_TYPE (top0); + tree op0type = TREE_TYPE (treeop0); enum machine_mode innermode = TYPE_MODE (op0type); bool zextend_p = TYPE_UNSIGNED (op0type); optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab; @@ -7795,24 +7727,22 @@ { if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing) { - if (TREE_CODE (subexp1) == INTEGER_CST) - expand_operands (top0, subexp1, NULL_RTX, &op0, &op1, - EXPAND_NORMAL); - else - expand_operands (top0, top1, NULL_RTX, &op0, &op1, - EXPAND_NORMAL); - goto binop3; + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, + EXPAND_NORMAL); + temp = expand_widening_mult (mode, op0, op1, target, + unsignedp, this_optab); + return REDUCE_BIT_FIELD (temp); } - else if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing - && innermode == word_mode) + if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing + && innermode == word_mode) { rtx htem, hipart; - op0 = expand_normal (top0); - if (TREE_CODE (subexp1) == INTEGER_CST) + op0 = expand_normal (treeop0); + if (TREE_CODE (treeop1) == INTEGER_CST) op1 = convert_modes (innermode, mode, - expand_normal (subexp1), unsignedp); + expand_normal (treeop1), unsignedp); else - op1 = expand_normal (top1); + op1 = expand_normal (treeop1); temp = expand_binop (mode, other_optab, op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); hipart = gen_highpart (innermode, temp); @@ -7825,7 +7755,53 @@ } } } - expand_operands (subexp0, subexp1, subtarget, &op0, &op1, EXPAND_NORMAL); + treeop0 = fold_build1 (CONVERT_EXPR, type, treeop0); + treeop1 = fold_build1 (CONVERT_EXPR, type, treeop1); + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); + return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); + + case MULT_EXPR: + /* If this is a fixed-point operation, then we cannot use the code + below because "expand_mult" doesn't support sat/no-sat fixed-point + multiplications. */ + if (ALL_FIXED_POINT_MODE_P (mode)) + goto binop; + + /* If first operand is constant, swap them. + Thus the following special case checks need only + check the second operand. */ + if (TREE_CODE (treeop0) == INTEGER_CST) + { + tree t1 = treeop0; + treeop0 = treeop1; + treeop1 = t1; + } + + /* Attempt to return something suitable for generating an + indexed address, for machines that support that. */ + + if (modifier == EXPAND_SUM && mode == ptr_mode + && host_integerp (treeop1, 0)) + { + tree exp1 = treeop1; + + op0 = expand_expr (treeop0, subtarget, VOIDmode, + EXPAND_SUM); + + if (!REG_P (op0)) + op0 = force_operand (op0, NULL_RTX); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (mode, op0); + + return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, + gen_int_mode (tree_low_cst (exp1, 0), + TYPE_MODE (TREE_TYPE (exp1))))); + } + + if (modifier == EXPAND_STACK_PARM) + target = 0; + + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); case TRUNC_DIV_EXPR: @@ -8311,6 +8287,8 @@ location_t loc = EXPR_LOCATION (exp); struct separate_ops ops; tree treeop0, treeop1, treeop2; + tree ssa_name = NULL_TREE; + gimple g; type = TREE_TYPE (exp); mode = TYPE_MODE (type); @@ -8423,15 +8401,17 @@ base variable. This unnecessarily allocates a pseudo, see how we can reuse it, if partition base vars have it set already. */ if (!currently_expanding_to_rtl) - return expand_expr_real_1 (SSA_NAME_VAR (exp), target, tmode, modifier, NULL); - { - gimple g = get_gimple_for_ssa_name (exp); - if (g) - return expand_expr_real (gimple_assign_rhs_to_tree (g), target, - tmode, modifier, NULL); - } - decl_rtl = get_rtx_for_ssa_name (exp); - exp = SSA_NAME_VAR (exp); + return expand_expr_real_1 (SSA_NAME_VAR (exp), target, tmode, modifier, + NULL); + + g = get_gimple_for_ssa_name (exp); + if (g) + return expand_expr_real (gimple_assign_rhs_to_tree (g), target, tmode, + modifier, NULL); + + ssa_name = exp; + decl_rtl = get_rtx_for_ssa_name (ssa_name); + exp = SSA_NAME_VAR (ssa_name); goto expand_decl_rtl; case PARM_DECL: @@ -8533,15 +8513,21 @@ /* If the mode of DECL_RTL does not match that of the decl, it must be a promoted value. We return a SUBREG of the wanted mode, but mark it so that we know that it was already extended. */ - - if (REG_P (decl_rtl) - && GET_MODE (decl_rtl) != DECL_MODE (exp)) + if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp)) { enum machine_mode pmode; - /* Get the signedness used for this variable. Ensure we get the - same mode we got when the variable was declared. */ - pmode = promote_decl_mode (exp, &unsignedp); + /* Get the signedness to be used for this variable. Ensure we get + the same mode we got when the variable was declared. */ + if (code == SSA_NAME + && (g = SSA_NAME_DEF_STMT (ssa_name)) + && gimple_code (g) == GIMPLE_CALL) + pmode = promote_function_mode (type, mode, &unsignedp, + TREE_TYPE + (TREE_TYPE (gimple_call_fn (g))), + 2); + else + pmode = promote_decl_mode (exp, &unsignedp); gcc_assert (GET_MODE (decl_rtl) == pmode); temp = gen_lowpart_SUBREG (mode, decl_rtl); === modified file 'gcc/fold-const.c' --- old/gcc/fold-const.c 2010-04-06 09:36:57 +0000 +++ new/gcc/fold-const.c 2010-09-01 13:29:58 +0000 @@ -5741,6 +5741,76 @@ const_binop (BIT_XOR_EXPR, c, temp, 0)); } +/* For an expression that has the form + (A && B) || ~B + or + (A || B) && ~B, + we can drop one of the inner expressions and simplify to + A || ~B + or + A && ~B + LOC is the location of the resulting expression. OP is the inner + logical operation; the left-hand side in the examples above, while CMPOP + is the right-hand side. RHS_ONLY is used to prevent us from accidentally + removing a condition that guards another, as in + (A != NULL && A->...) || A == NULL + which we must not transform. If RHS_ONLY is true, only eliminate the + right-most operand of the inner logical operation. */ + +static tree +merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop, + bool rhs_only) +{ + tree type = TREE_TYPE (cmpop); + enum tree_code code = TREE_CODE (cmpop); + enum tree_code truthop_code = TREE_CODE (op); + tree lhs = TREE_OPERAND (op, 0); + tree rhs = TREE_OPERAND (op, 1); + tree orig_lhs = lhs, orig_rhs = rhs; + enum tree_code rhs_code = TREE_CODE (rhs); + enum tree_code lhs_code = TREE_CODE (lhs); + enum tree_code inv_code; + + if (TREE_SIDE_EFFECTS (op) || TREE_SIDE_EFFECTS (cmpop)) + return NULL_TREE; + + if (TREE_CODE_CLASS (code) != tcc_comparison) + return NULL_TREE; + + if (rhs_code == truthop_code) + { + tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, rhs_only); + if (newrhs != NULL_TREE) + { + rhs = newrhs; + rhs_code = TREE_CODE (rhs); + } + } + if (lhs_code == truthop_code && !rhs_only) + { + tree newlhs = merge_truthop_with_opposite_arm (loc, lhs, cmpop, false); + if (newlhs != NULL_TREE) + { + lhs = newlhs; + lhs_code = TREE_CODE (lhs); + } + } + + inv_code = invert_tree_comparison (code, HONOR_NANS (TYPE_MODE (type))); + if (inv_code == rhs_code + && operand_equal_p (TREE_OPERAND (rhs, 0), TREE_OPERAND (cmpop, 0), 0) + && operand_equal_p (TREE_OPERAND (rhs, 1), TREE_OPERAND (cmpop, 1), 0)) + return lhs; + if (!rhs_only && inv_code == lhs_code + && operand_equal_p (TREE_OPERAND (lhs, 0), TREE_OPERAND (cmpop, 0), 0) + && operand_equal_p (TREE_OPERAND (lhs, 1), TREE_OPERAND (cmpop, 1), 0)) + return rhs; + if (rhs != orig_rhs || lhs != orig_lhs) + return fold_build2_loc (loc, truthop_code, TREE_TYPE (cmpop), + lhs, rhs); + return NULL_TREE; +} + /* Find ways of folding logical expressions of LHS and RHS: Try to merge two comparisons to the same innermost item. Look for range tests like "ch >= '0' && ch <= '9'". @@ -12539,6 +12609,22 @@ if (0 != (tem = fold_range_test (loc, code, type, op0, op1))) return tem; + if ((code == TRUTH_ANDIF_EXPR && TREE_CODE (arg0) == TRUTH_ORIF_EXPR) + || (code == TRUTH_ORIF_EXPR && TREE_CODE (arg0) == TRUTH_ANDIF_EXPR)) + { + tem = merge_truthop_with_opposite_arm (loc, arg0, arg1, true); + if (tem) + return fold_build2_loc (loc, code, type, tem, arg1); + } + + if ((code == TRUTH_ANDIF_EXPR && TREE_CODE (arg1) == TRUTH_ORIF_EXPR) + || (code == TRUTH_ORIF_EXPR && TREE_CODE (arg1) == TRUTH_ANDIF_EXPR)) + { + tem = merge_truthop_with_opposite_arm (loc, arg1, arg0, false); + if (tem) + return fold_build2_loc (loc, code, type, arg0, tem); + } + /* Check for the possibility of merging component references. If our lhs is another similar operation, try to merge its rhs with our rhs. Then try to merge our lhs and rhs. */ === modified file 'gcc/ifcvt.c' --- old/gcc/ifcvt.c 2010-04-02 18:54:46 +0000 +++ new/gcc/ifcvt.c 2010-09-01 13:29:58 +0000 @@ -385,7 +385,11 @@ rtx false_expr; /* test for then block insns */ rtx true_prob_val; /* probability of else block */ rtx false_prob_val; /* probability of then block */ - int n_insns; + rtx then_last_head = NULL_RTX; /* Last match at the head of THEN */ + rtx else_last_head = NULL_RTX; /* Last match at the head of ELSE */ + rtx then_first_tail = NULL_RTX; /* First match at the tail of THEN */ + rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */ + int then_n_insns, else_n_insns, n_insns; enum rtx_code false_code; /* If test is comprised of && or || elements, and we've failed at handling @@ -418,15 +422,78 @@ number of insns and see if it is small enough to convert. */ then_start = first_active_insn (then_bb); then_end = last_active_insn (then_bb, TRUE); - n_insns = ce_info->num_then_insns = count_bb_insns (then_bb); + then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb); + n_insns = then_n_insns; max = MAX_CONDITIONAL_EXECUTE; if (else_bb) { + int n_matching; + max *= 2; else_start = first_active_insn (else_bb); else_end = last_active_insn (else_bb, TRUE); - n_insns += ce_info->num_else_insns = count_bb_insns (else_bb); + else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb); + n_insns += else_n_insns; + + /* Look for matching sequences at the head and tail of the two blocks, + and limit the range of insns to be converted if possible. */ + n_matching = flow_find_cross_jump (then_bb, else_bb, + &then_first_tail, &else_first_tail); + if (then_first_tail == BB_HEAD (then_bb)) + then_start = then_end = NULL_RTX; + if (else_first_tail == BB_HEAD (else_bb)) + else_start = else_end = NULL_RTX; + + if (n_matching > 0) + { + if (then_end) + then_end = prev_active_insn (then_first_tail); + if (else_end) + else_end = prev_active_insn (else_first_tail); + n_insns -= 2 * n_matching; + } + + if (then_start && else_start) + { + int longest_match = MIN (then_n_insns - n_matching, + else_n_insns - n_matching); + n_matching + = flow_find_head_matching_sequence (then_bb, else_bb, + &then_last_head, + &else_last_head, + longest_match); + + if (n_matching > 0) + { + rtx insn; + + /* We won't pass the insns in the head sequence to + cond_exec_process_insns, so we need to test them here + to make sure that they don't clobber the condition. */ + for (insn = BB_HEAD (then_bb); + insn != NEXT_INSN (then_last_head); + insn = NEXT_INSN (insn)) + if (!LABEL_P (insn) && !NOTE_P (insn) + && !DEBUG_INSN_P (insn) + && modified_in_p (test_expr, insn)) + return FALSE; + } + + if (then_last_head == then_end) + then_start = then_end = NULL_RTX; + if (else_last_head == else_end) + else_start = else_end = NULL_RTX; + + if (n_matching > 0) + { + if (then_start) + then_start = next_active_insn (then_last_head); + if (else_start) + else_start = next_active_insn (else_last_head); + n_insns -= 2 * n_matching; + } + } } if (n_insns > max) @@ -570,7 +637,21 @@ fprintf (dump_file, "%d insn%s converted to conditional execution.\n", n_insns, (n_insns == 1) ? " was" : "s were"); - /* Merge the blocks! */ + /* Merge the blocks! If we had matching sequences, make sure to delete one + copy at the appropriate location first: delete the copy in the THEN branch + for a tail sequence so that the remaining one is executed last for both + branches, and delete the copy in the ELSE branch for a head sequence so + that the remaining one is executed first for both branches. */ + if (then_first_tail) + { + rtx from = then_first_tail; + if (!INSN_P (from)) + from = next_active_insn (from); + delete_insn_chain (from, BB_END (then_bb), false); + } + if (else_last_head) + delete_insn_chain (first_active_insn (else_bb), else_last_head, false); + merge_if_block (ce_info); cond_exec_changed_p = TRUE; return TRUE; === modified file 'gcc/ira-color.c' --- old/gcc/ira-color.c 2010-04-02 18:54:46 +0000 +++ new/gcc/ira-color.c 2010-09-01 13:29:58 +0000 @@ -441,14 +441,18 @@ { HARD_REG_SET conflicting_regs; int i, j, k, hard_regno, best_hard_regno, class_size; - int cost, mem_cost, min_cost, full_cost, min_full_cost, add_cost; + int cost, mem_cost, min_cost, full_cost, min_full_cost; int *a_costs; int *conflict_costs; - enum reg_class cover_class, rclass, conflict_cover_class; + enum reg_class cover_class, conflict_cover_class; enum machine_mode mode; ira_allocno_t a, conflict_allocno; ira_allocno_conflict_iterator aci; static int costs[FIRST_PSEUDO_REGISTER], full_costs[FIRST_PSEUDO_REGISTER]; +#ifndef HONOR_REG_ALLOC_ORDER + enum reg_class rclass; + int add_cost; +#endif #ifdef STACK_REGS bool no_stack_reg_p; #endif @@ -586,6 +590,7 @@ continue; cost = costs[i]; full_cost = full_costs[i]; +#ifndef HONOR_REG_ALLOC_ORDER if (! allocated_hardreg_p[hard_regno] && ira_hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set)) /* We need to save/restore the hard register in @@ -598,6 +603,7 @@ cost += add_cost; full_cost += add_cost; } +#endif if (min_cost > cost) min_cost = cost; if (min_full_cost > full_cost) === modified file 'gcc/ira-costs.c' --- old/gcc/ira-costs.c 2010-08-13 11:40:17 +0000 +++ new/gcc/ira-costs.c 2010-09-01 13:29:58 +0000 @@ -33,6 +33,7 @@ #include "addresses.h" #include "insn-config.h" #include "recog.h" +#include "reload.h" #include "toplev.h" #include "target.h" #include "params.h" @@ -123,6 +124,10 @@ /* Record cover register class of each allocno with the same regno. */ static enum reg_class *regno_cover_class; +/* Record cost gains for not allocating a register with an invariant + equivalence. */ +static int *regno_equiv_gains; + /* Execution frequency of the current insn. */ static int frequency; @@ -1263,6 +1268,7 @@ #ifdef FORBIDDEN_INC_DEC_CLASSES int inc_dec_p = false; #endif + int equiv_savings = regno_equiv_gains[i]; if (! allocno_p) { @@ -1311,6 +1317,15 @@ #endif } } + if (equiv_savings < 0) + temp_costs->mem_cost = -equiv_savings; + else if (equiv_savings > 0) + { + temp_costs->mem_cost = 0; + for (k = 0; k < cost_classes_num; k++) + temp_costs->cost[k] += equiv_savings; + } + best_cost = (1 << (HOST_BITS_PER_INT - 2)) - 1; best = ALL_REGS; alt_class = NO_REGS; @@ -1680,6 +1695,8 @@ regno_cover_class = (enum reg_class *) ira_allocate (sizeof (enum reg_class) * max_reg_num ()); + regno_equiv_gains = (int *) ira_allocate (sizeof (int) * max_reg_num ()); + memset (regno_equiv_gains, 0, sizeof (int) * max_reg_num ()); } /* Common finalization function for ira_costs and @@ -1687,6 +1704,7 @@ static void finish_costs (void) { + ira_free (regno_equiv_gains); ira_free (regno_cover_class); ira_free (pref_buffer); ira_free (costs); @@ -1702,6 +1720,7 @@ init_costs (); total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size * ira_allocnos_num); + calculate_elim_costs_all_insns (); find_costs_and_classes (ira_dump_file); setup_allocno_cover_class_and_costs (); finish_costs (); @@ -1775,3 +1794,16 @@ ALLOCNO_COVER_CLASS_COST (a) = min_cost; } } + +/* Add COST to the estimated gain for eliminating REGNO with its + equivalence. If COST is zero, record that no such elimination is + possible. */ + +void +ira_adjust_equiv_reg_cost (unsigned regno, int cost) +{ + if (cost == 0) + regno_equiv_gains[regno] = 0; + else + regno_equiv_gains[regno] += cost; +} === modified file 'gcc/ira.c' --- old/gcc/ira.c 2010-08-12 13:51:16 +0000 +++ new/gcc/ira.c 2010-09-01 13:29:58 +0000 @@ -431,9 +431,6 @@ HARD_REG_SET processed_hard_reg_set; ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER); - /* We could call ORDER_REGS_FOR_LOCAL_ALLOC here (it is usually - putting hard callee-used hard registers first). But our - heuristics work better. */ for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) { COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); @@ -490,6 +487,9 @@ static void setup_alloc_regs (bool use_hard_frame_p) { +#ifdef ADJUST_REG_ALLOC_ORDER + ADJUST_REG_ALLOC_ORDER; +#endif COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_reg_set); if (! use_hard_frame_p) SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM); @@ -1533,12 +1533,8 @@ x = XEXP (note, 0); - if (! function_invariant_p (x) - || ! flag_pic - /* A function invariant is often CONSTANT_P but may - include a register. We promise to only pass CONSTANT_P - objects to LEGITIMATE_PIC_OPERAND_P. */ - || (CONSTANT_P (x) && LEGITIMATE_PIC_OPERAND_P (x))) + if (! CONSTANT_P (x) + || ! flag_pic || LEGITIMATE_PIC_OPERAND_P (x)) { /* It can happen that a REG_EQUIV note contains a MEM that is not a legitimate memory operand. As later @@ -3097,8 +3093,19 @@ if (dump_file) print_insn_chains (dump_file); } - +/* Allocate memory for reg_equiv_memory_loc. */ +static void +init_reg_equiv_memory_loc (void) +{ + max_regno = max_reg_num (); + + /* And the reg_equiv_memory_loc array. */ + VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno); + memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0, + sizeof (rtx) * max_regno); + reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec); +} /* All natural loops. */ struct loops ira_loops; @@ -3203,6 +3210,8 @@ record_loop_exits (); current_loops = &ira_loops; + init_reg_equiv_memory_loc (); + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) fprintf (ira_dump_file, "Building IRA IR\n"); loops_p = ira_build (optimize @@ -3263,13 +3272,8 @@ #endif delete_trivially_dead_insns (get_insns (), max_reg_num ()); - max_regno = max_reg_num (); - /* And the reg_equiv_memory_loc array. */ - VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno); - memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0, - sizeof (rtx) * max_regno); - reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec); + init_reg_equiv_memory_loc (); if (max_regno != max_regno_before_ira) { === modified file 'gcc/ira.h' --- old/gcc/ira.h 2009-09-02 17:54:25 +0000 +++ new/gcc/ira.h 2010-09-01 13:29:58 +0000 @@ -87,3 +87,4 @@ extern void ira_mark_new_stack_slot (rtx, int, unsigned int); extern bool ira_better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx); +extern void ira_adjust_equiv_reg_cost (unsigned, int); === modified file 'gcc/optabs.h' --- old/gcc/optabs.h 2009-11-25 10:55:54 +0000 +++ new/gcc/optabs.h 2010-09-01 13:29:58 +0000 @@ -771,6 +771,9 @@ /* Generate code for float to integral conversion. */ extern bool expand_sfix_optab (rtx, rtx, convert_optab); +/* Generate code for a widening multiply. */ +extern rtx expand_widening_mult (enum machine_mode, rtx, rtx, rtx, int, optab); + /* Return tree if target supports vector operations for COND_EXPR. */ bool expand_vec_cond_expr_p (tree, enum machine_mode); === modified file 'gcc/passes.c' --- old/gcc/passes.c 2010-05-19 12:14:37 +0000 +++ new/gcc/passes.c 2010-09-01 13:29:58 +0000 @@ -944,6 +944,7 @@ NEXT_PASS (pass_forwprop); NEXT_PASS (pass_phiopt); NEXT_PASS (pass_fold_builtins); + NEXT_PASS (pass_optimize_widening_mul); NEXT_PASS (pass_tail_calls); NEXT_PASS (pass_rename_ssa_copies); NEXT_PASS (pass_uncprop); === modified file 'gcc/reload.h' --- old/gcc/reload.h 2010-04-02 18:54:46 +0000 +++ new/gcc/reload.h 2010-09-01 13:29:58 +0000 @@ -347,6 +347,10 @@ extern rtx eliminate_regs (rtx, enum machine_mode, rtx); extern bool elimination_target_reg_p (rtx); +/* Called from the register allocator to estimate costs of eliminating + invariant registers. */ +extern void calculate_elim_costs_all_insns (void); + /* Deallocate the reload register used by reload number R. */ extern void deallocate_reload_reg (int r); === modified file 'gcc/reload1.c' --- old/gcc/reload1.c 2010-03-02 18:56:50 +0000 +++ new/gcc/reload1.c 2010-09-01 13:29:58 +0000 @@ -413,6 +413,7 @@ static void set_label_offsets (rtx, rtx, int); static void check_eliminable_occurrences (rtx); static void elimination_effects (rtx, enum machine_mode); +static rtx eliminate_regs_1 (rtx, enum machine_mode, rtx, bool, bool); static int eliminate_regs_in_insn (rtx, int); static void update_eliminable_offsets (void); static void mark_not_eliminable (rtx, const_rtx, void *); @@ -420,8 +421,11 @@ static bool verify_initial_elim_offsets (void); static void set_initial_label_offsets (void); static void set_offsets_for_label (rtx); +static void init_eliminable_invariants (rtx, bool); static void init_elim_table (void); +static void free_reg_equiv (void); static void update_eliminables (HARD_REG_SET *); +static void elimination_costs_in_insn (rtx); static void spill_hard_reg (unsigned int, int); static int finish_spills (int); static void scan_paradoxical_subregs (rtx); @@ -698,6 +702,9 @@ /* Global variables used by reload and its subroutines. */ +/* The current basic block while in calculate_elim_costs_all_insns. */ +static basic_block elim_bb; + /* Set during calculate_needs if an insn needs register elimination. */ static int something_needs_elimination; /* Set during calculate_needs if an insn needs an operand changed. */ @@ -776,22 +783,6 @@ if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) df_set_regs_ever_live (i, true); - /* Find all the pseudo registers that didn't get hard regs - but do have known equivalent constants or memory slots. - These include parameters (known equivalent to parameter slots) - and cse'd or loop-moved constant memory addresses. - - Record constant equivalents in reg_equiv_constant - so they will be substituted by find_reloads. - Record memory equivalents in reg_mem_equiv so they can - be substituted eventually by altering the REG-rtx's. */ - - reg_equiv_constant = XCNEWVEC (rtx, max_regno); - reg_equiv_invariant = XCNEWVEC (rtx, max_regno); - reg_equiv_mem = XCNEWVEC (rtx, max_regno); - reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno); - reg_equiv_address = XCNEWVEC (rtx, max_regno); - reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); reg_old_renumber = XCNEWVEC (short, max_regno); memcpy (reg_old_renumber, reg_renumber, max_regno * sizeof (short)); pseudo_forbidden_regs = XNEWVEC (HARD_REG_SET, max_regno); @@ -799,115 +790,9 @@ CLEAR_HARD_REG_SET (bad_spill_regs_global); - /* Look for REG_EQUIV notes; record what each pseudo is equivalent - to. Also find all paradoxical subregs and find largest such for - each pseudo. */ - - num_eliminable_invariants = 0; - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - rtx set = single_set (insn); - - /* We may introduce USEs that we want to remove at the end, so - we'll mark them with QImode. Make sure there are no - previously-marked insns left by say regmove. */ - if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE - && GET_MODE (insn) != VOIDmode) - PUT_MODE (insn, VOIDmode); - - if (NONDEBUG_INSN_P (insn)) - scan_paradoxical_subregs (PATTERN (insn)); - - if (set != 0 && REG_P (SET_DEST (set))) - { - rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX); - rtx x; - - if (! note) - continue; - - i = REGNO (SET_DEST (set)); - x = XEXP (note, 0); - - if (i <= LAST_VIRTUAL_REGISTER) - continue; - - if (! function_invariant_p (x) - || ! flag_pic - /* A function invariant is often CONSTANT_P but may - include a register. We promise to only pass - CONSTANT_P objects to LEGITIMATE_PIC_OPERAND_P. */ - || (CONSTANT_P (x) - && LEGITIMATE_PIC_OPERAND_P (x))) - { - /* It can happen that a REG_EQUIV note contains a MEM - that is not a legitimate memory operand. As later - stages of reload assume that all addresses found - in the reg_equiv_* arrays were originally legitimate, - we ignore such REG_EQUIV notes. */ - if (memory_operand (x, VOIDmode)) - { - /* Always unshare the equivalence, so we can - substitute into this insn without touching the - equivalence. */ - reg_equiv_memory_loc[i] = copy_rtx (x); - } - else if (function_invariant_p (x)) - { - if (GET_CODE (x) == PLUS) - { - /* This is PLUS of frame pointer and a constant, - and might be shared. Unshare it. */ - reg_equiv_invariant[i] = copy_rtx (x); - num_eliminable_invariants++; - } - else if (x == frame_pointer_rtx || x == arg_pointer_rtx) - { - reg_equiv_invariant[i] = x; - num_eliminable_invariants++; - } - else if (LEGITIMATE_CONSTANT_P (x)) - reg_equiv_constant[i] = x; - else - { - reg_equiv_memory_loc[i] - = force_const_mem (GET_MODE (SET_DEST (set)), x); - if (! reg_equiv_memory_loc[i]) - reg_equiv_init[i] = NULL_RTX; - } - } - else - { - reg_equiv_init[i] = NULL_RTX; - continue; - } - } - else - reg_equiv_init[i] = NULL_RTX; - } - } - - if (dump_file) - for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) - if (reg_equiv_init[i]) - { - fprintf (dump_file, "init_insns for %u: ", i); - print_inline_rtx (dump_file, reg_equiv_init[i], 20); - fprintf (dump_file, "\n"); - } - + init_eliminable_invariants (first, true); init_elim_table (); - first_label_num = get_first_label_num (); - num_labels = max_label_num () - first_label_num; - - /* Allocate the tables used to store offset information at labels. */ - /* We used to use alloca here, but the size of what it would try to - allocate would occasionally cause it to exceed the stack limit and - cause a core dump. */ - offsets_known_at = XNEWVEC (char, num_labels); - offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT)); - /* Alter each pseudo-reg rtx to contain its hard reg number. Assign stack slots to the pseudos that lack hard regs or equivalents. Do not touch virtual registers. */ @@ -1411,31 +1296,11 @@ } } + free (temp_pseudo_reg_arr); + /* Indicate that we no longer have known memory locations or constants. */ - if (reg_equiv_constant) - free (reg_equiv_constant); - if (reg_equiv_invariant) - free (reg_equiv_invariant); - reg_equiv_constant = 0; - reg_equiv_invariant = 0; - VEC_free (rtx, gc, reg_equiv_memory_loc_vec); - reg_equiv_memory_loc = 0; - - free (temp_pseudo_reg_arr); - - if (offsets_known_at) - free (offsets_known_at); - if (offsets_at) - free (offsets_at); - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (reg_equiv_alt_mem_list[i]) - free_EXPR_LIST_list (®_equiv_alt_mem_list[i]); - free (reg_equiv_alt_mem_list); - - free (reg_equiv_mem); + free_reg_equiv (); reg_equiv_init = 0; - free (reg_equiv_address); free (reg_max_ref_width); free (reg_old_renumber); free (pseudo_previous_regs); @@ -1728,6 +1593,100 @@ *pprev_reload = 0; } +/* This function is called from the register allocator to set up estimates + for the cost of eliminating pseudos which have REG_EQUIV equivalences to + an invariant. The structure is similar to calculate_needs_all_insns. */ + +void +calculate_elim_costs_all_insns (void) +{ + int *reg_equiv_init_cost; + basic_block bb; + int i; + + reg_equiv_init_cost = XCNEWVEC (int, max_regno); + init_elim_table (); + init_eliminable_invariants (get_insns (), false); + + set_initial_elim_offsets (); + set_initial_label_offsets (); + + FOR_EACH_BB (bb) + { + rtx insn; + elim_bb = bb; + + FOR_BB_INSNS (bb, insn) + { + /* If this is a label, a JUMP_INSN, or has REG_NOTES (which might + include REG_LABEL_OPERAND and REG_LABEL_TARGET), we need to see + what effects this has on the known offsets at labels. */ + + if (LABEL_P (insn) || JUMP_P (insn) + || (INSN_P (insn) && REG_NOTES (insn) != 0)) + set_label_offsets (insn, insn, 0); + + if (INSN_P (insn)) + { + rtx set = single_set (insn); + + /* Skip insns that only set an equivalence. */ + if (set && REG_P (SET_DEST (set)) + && reg_renumber[REGNO (SET_DEST (set))] < 0 + && (reg_equiv_constant[REGNO (SET_DEST (set))] + || (reg_equiv_invariant[REGNO (SET_DEST (set))]))) + { + unsigned regno = REGNO (SET_DEST (set)); + rtx init = reg_equiv_init[regno]; + if (init) + { + rtx t = eliminate_regs_1 (SET_SRC (set), VOIDmode, insn, + false, true); + int cost = rtx_cost (t, SET, + optimize_bb_for_speed_p (bb)); + int freq = REG_FREQ_FROM_BB (bb); + + reg_equiv_init_cost[regno] = cost * freq; + continue; + } + } + /* If needed, eliminate any eliminable registers. */ + if (num_eliminable || num_eliminable_invariants) + elimination_costs_in_insn (insn); + + if (num_eliminable) + update_eliminable_offsets (); + } + } + } + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + { + if (reg_equiv_invariant[i]) + { + if (reg_equiv_init[i]) + { + int cost = reg_equiv_init_cost[i]; + if (dump_file) + fprintf (dump_file, + "Reg %d has equivalence, initial gains %d\n", i, cost); + if (cost != 0) + ira_adjust_equiv_reg_cost (i, cost); + } + else + { + if (dump_file) + fprintf (dump_file, + "Reg %d had equivalence, but can't be eliminated\n", + i); + ira_adjust_equiv_reg_cost (i, 0); + } + } + } + + free_reg_equiv (); + free (reg_equiv_init_cost); +} + /* Comparison function for qsort to decide which of two reloads should be handled first. *P1 and *P2 are the reload numbers. */ @@ -2514,6 +2473,36 @@ } } +/* Called through for_each_rtx, this function examines every reg that occurs + in PX and adjusts the costs for its elimination which are gathered by IRA. + DATA is the insn in which PX occurs. We do not recurse into MEM + expressions. */ + +static int +note_reg_elim_costly (rtx *px, void *data) +{ + rtx insn = (rtx)data; + rtx x = *px; + + if (MEM_P (x)) + return -1; + + if (REG_P (x) + && REGNO (x) >= FIRST_PSEUDO_REGISTER + && reg_equiv_init[REGNO (x)] + && reg_equiv_invariant[REGNO (x)]) + { + rtx t = reg_equiv_invariant[REGNO (x)]; + rtx new_rtx = eliminate_regs_1 (t, Pmode, insn, true, true); + int cost = rtx_cost (new_rtx, SET, optimize_bb_for_speed_p (elim_bb)); + int freq = REG_FREQ_FROM_BB (elim_bb); + + if (cost != 0) + ira_adjust_equiv_reg_cost (REGNO (x), -cost * freq); + } + return 0; +} + /* Scan X and replace any eliminable registers (such as fp) with a replacement (such as sp), plus an offset. @@ -2533,6 +2522,9 @@ This means, do not set ref_outside_mem even if the reference is outside of MEMs. + If FOR_COSTS is true, we are being called before reload in order to + estimate the costs of keeping registers with an equivalence unallocated. + REG_EQUIV_MEM and REG_EQUIV_ADDRESS contain address that have had replacements done assuming all offsets are at their initial values. If they are not, or if REG_EQUIV_ADDRESS is nonzero for a pseudo we @@ -2541,7 +2533,7 @@ static rtx eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn, - bool may_use_invariant) + bool may_use_invariant, bool for_costs) { enum rtx_code code = GET_CODE (x); struct elim_table *ep; @@ -2589,11 +2581,12 @@ { if (may_use_invariant || (insn && DEBUG_INSN_P (insn))) return eliminate_regs_1 (copy_rtx (reg_equiv_invariant[regno]), - mem_mode, insn, true); + mem_mode, insn, true, for_costs); /* There exists at least one use of REGNO that cannot be eliminated. Prevent the defining insn from being deleted. */ reg_equiv_init[regno] = NULL_RTX; - alter_reg (regno, -1, true); + if (!for_costs) + alter_reg (regno, -1, true); } return x; @@ -2654,8 +2647,10 @@ operand of a load-address insn. */ { - rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true); - rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true); + rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true, + for_costs); + rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs); if (reg_renumber && (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))) { @@ -2729,9 +2724,11 @@ case GE: case GT: case GEU: case GTU: case LE: case LT: case LEU: case LTU: { - rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false); + rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false, + for_costs); rtx new1 = XEXP (x, 1) - ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false) : 0; + ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false, + for_costs) : 0; if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1); @@ -2742,7 +2739,8 @@ /* If we have something in XEXP (x, 0), the usual case, eliminate it. */ if (XEXP (x, 0)) { - new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true); + new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true, + for_costs); if (new_rtx != XEXP (x, 0)) { /* If this is a REG_DEAD note, it is not valid anymore. @@ -2750,7 +2748,8 @@ REG_DEAD note for the stack or frame pointer. */ if (REG_NOTE_KIND (x) == REG_DEAD) return (XEXP (x, 1) - ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true) + ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs) : NULL_RTX); x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1)); @@ -2765,7 +2764,8 @@ strictly needed, but it simplifies the code. */ if (XEXP (x, 1)) { - new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true); + new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true, + for_costs); if (new_rtx != XEXP (x, 1)) return gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x), XEXP (x, 0), new_rtx); @@ -2791,7 +2791,7 @@ && XEXP (XEXP (x, 1), 0) == XEXP (x, 0)) { rtx new_rtx = eliminate_regs_1 (XEXP (XEXP (x, 1), 1), mem_mode, - insn, true); + insn, true, for_costs); if (new_rtx != XEXP (XEXP (x, 1), 1)) return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0), @@ -2814,7 +2814,8 @@ case POPCOUNT: case PARITY: case BSWAP: - new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false, + for_costs); if (new_rtx != XEXP (x, 0)) return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx); return x; @@ -2835,7 +2836,8 @@ new_rtx = SUBREG_REG (x); } else - new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false, + for_costs); if (new_rtx != SUBREG_REG (x)) { @@ -2869,14 +2871,20 @@ /* Our only special processing is to pass the mode of the MEM to our recursive call and copy the flags. While we are here, handle this case more efficiently. */ - return - replace_equiv_address_nv (x, - eliminate_regs_1 (XEXP (x, 0), GET_MODE (x), - insn, true)); + + new_rtx = eliminate_regs_1 (XEXP (x, 0), GET_MODE (x), insn, true, + for_costs); + if (for_costs + && memory_address_p (GET_MODE (x), XEXP (x, 0)) + && !memory_address_p (GET_MODE (x), new_rtx)) + for_each_rtx (&XEXP (x, 0), note_reg_elim_costly, insn); + + return replace_equiv_address_nv (x, new_rtx); case USE: /* Handle insn_list USE that a call to a pure function may generate. */ - new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false, + for_costs); if (new_rtx != XEXP (x, 0)) return gen_rtx_USE (GET_MODE (x), new_rtx); return x; @@ -2900,7 +2908,8 @@ { if (*fmt == 'e') { - new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false, + for_costs); if (new_rtx != XEXP (x, i) && ! copied) { x = shallow_copy_rtx (x); @@ -2913,7 +2922,8 @@ int copied_vec = 0; for (j = 0; j < XVECLEN (x, i); j++) { - new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false); + new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false, + for_costs); if (new_rtx != XVECEXP (x, i, j) && ! copied_vec) { rtvec new_v = gen_rtvec_v (XVECLEN (x, i), @@ -2937,7 +2947,7 @@ rtx eliminate_regs (rtx x, enum machine_mode mem_mode, rtx insn) { - return eliminate_regs_1 (x, mem_mode, insn, false); + return eliminate_regs_1 (x, mem_mode, insn, false, false); } /* Scan rtx X for modifications of elimination target registers. Update @@ -3455,7 +3465,8 @@ /* Companion to the above plus substitution, we can allow invariants as the source of a plain move. */ is_set_src = false; - if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set)) + if (old_set + && recog_data.operand_loc[i] == &SET_SRC (old_set)) is_set_src = true; in_plus = false; if (plus_src @@ -3466,7 +3477,7 @@ substed_operand[i] = eliminate_regs_1 (recog_data.operand[i], VOIDmode, replace ? insn : NULL_RTX, - is_set_src || in_plus); + is_set_src || in_plus, false); if (substed_operand[i] != orig_operand[i]) val = 1; /* Terminate the search in check_eliminable_occurrences at @@ -3594,11 +3605,167 @@ the pre-passes. */ if (val && REG_NOTES (insn) != 0) REG_NOTES (insn) - = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true); + = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true, + false); return val; } +/* Like eliminate_regs_in_insn, but only estimate costs for the use of the + register allocator. INSN is the instruction we need to examine, we perform + eliminations in its operands and record cases where eliminating a reg with + an invariant equivalence would add extra cost. */ + +static void +elimination_costs_in_insn (rtx insn) +{ + int icode = recog_memoized (insn); + rtx old_body = PATTERN (insn); + int insn_is_asm = asm_noperands (old_body) >= 0; + rtx old_set = single_set (insn); + int i; + rtx orig_operand[MAX_RECOG_OPERANDS]; + rtx orig_dup[MAX_RECOG_OPERANDS]; + struct elim_table *ep; + rtx plus_src, plus_cst_src; + bool sets_reg_p; + + if (! insn_is_asm && icode < 0) + { + gcc_assert (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || DEBUG_INSN_P (insn)); + return; + } + + if (old_set != 0 && REG_P (SET_DEST (old_set)) + && REGNO (SET_DEST (old_set)) < FIRST_PSEUDO_REGISTER) + { + /* Check for setting an eliminable register. */ + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++) + if (ep->from_rtx == SET_DEST (old_set) && ep->can_eliminate) + return; + } + + /* We allow one special case which happens to work on all machines we + currently support: a single set with the source or a REG_EQUAL + note being a PLUS of an eliminable register and a constant. */ + plus_src = plus_cst_src = 0; + sets_reg_p = false; + if (old_set && REG_P (SET_DEST (old_set))) + { + sets_reg_p = true; + if (GET_CODE (SET_SRC (old_set)) == PLUS) + plus_src = SET_SRC (old_set); + /* First see if the source is of the form (plus (...) CST). */ + if (plus_src + && CONST_INT_P (XEXP (plus_src, 1))) + plus_cst_src = plus_src; + else if (REG_P (SET_SRC (old_set)) + || plus_src) + { + /* Otherwise, see if we have a REG_EQUAL note of the form + (plus (...) CST). */ + rtx links; + for (links = REG_NOTES (insn); links; links = XEXP (links, 1)) + { + if ((REG_NOTE_KIND (links) == REG_EQUAL + || REG_NOTE_KIND (links) == REG_EQUIV) + && GET_CODE (XEXP (links, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (links, 0), 1))) + { + plus_cst_src = XEXP (links, 0); + break; + } + } + } + } + + /* Determine the effects of this insn on elimination offsets. */ + elimination_effects (old_body, VOIDmode); + + /* Eliminate all eliminable registers occurring in operands that + can be handled by reload. */ + extract_insn (insn); + for (i = 0; i < recog_data.n_dups; i++) + orig_dup[i] = *recog_data.dup_loc[i]; + + for (i = 0; i < recog_data.n_operands; i++) + { + orig_operand[i] = recog_data.operand[i]; + + /* For an asm statement, every operand is eliminable. */ + if (insn_is_asm || insn_data[icode].operand[i].eliminable) + { + bool is_set_src, in_plus; + + /* Check for setting a register that we know about. */ + if (recog_data.operand_type[i] != OP_IN + && REG_P (orig_operand[i])) + { + /* If we are assigning to a register that can be eliminated, it + must be as part of a PARALLEL, since the code above handles + single SETs. We must indicate that we can no longer + eliminate this reg. */ + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; + ep++) + if (ep->from_rtx == orig_operand[i]) + ep->can_eliminate = 0; + } + + /* Companion to the above plus substitution, we can allow + invariants as the source of a plain move. */ + is_set_src = false; + if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set)) + is_set_src = true; + if (is_set_src && !sets_reg_p) + note_reg_elim_costly (&SET_SRC (old_set), insn); + in_plus = false; + if (plus_src && sets_reg_p + && (recog_data.operand_loc[i] == &XEXP (plus_src, 0) + || recog_data.operand_loc[i] == &XEXP (plus_src, 1))) + in_plus = true; + + eliminate_regs_1 (recog_data.operand[i], VOIDmode, + NULL_RTX, + is_set_src || in_plus, true); + /* Terminate the search in check_eliminable_occurrences at + this point. */ + *recog_data.operand_loc[i] = 0; + } + } + + for (i = 0; i < recog_data.n_dups; i++) + *recog_data.dup_loc[i] + = *recog_data.operand_loc[(int) recog_data.dup_num[i]]; + + /* If any eliminable remain, they aren't eliminable anymore. */ + check_eliminable_occurrences (old_body); + + /* Restore the old body. */ + for (i = 0; i < recog_data.n_operands; i++) + *recog_data.operand_loc[i] = orig_operand[i]; + for (i = 0; i < recog_data.n_dups; i++) + *recog_data.dup_loc[i] = orig_dup[i]; + + /* Update all elimination pairs to reflect the status after the current + insn. The changes we make were determined by the earlier call to + elimination_effects. */ + + for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++) + { + if (ep->previous_offset != ep->offset && ep->ref_outside_mem) + ep->can_eliminate = 0; + + ep->ref_outside_mem = 0; + } + + return; +} + /* Loop through all elimination pairs. Recalculate the number not at initial offset. @@ -3908,6 +4075,168 @@ ep->to_rtx = gen_rtx_REG (Pmode, ep->to); } } + +/* Find all the pseudo registers that didn't get hard regs + but do have known equivalent constants or memory slots. + These include parameters (known equivalent to parameter slots) + and cse'd or loop-moved constant memory addresses. + + Record constant equivalents in reg_equiv_constant + so they will be substituted by find_reloads. + Record memory equivalents in reg_mem_equiv so they can + be substituted eventually by altering the REG-rtx's. */ + +static void +init_eliminable_invariants (rtx first, bool do_subregs) +{ + int i; + rtx insn; + + reg_equiv_constant = XCNEWVEC (rtx, max_regno); + reg_equiv_invariant = XCNEWVEC (rtx, max_regno); + reg_equiv_mem = XCNEWVEC (rtx, max_regno); + reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno); + reg_equiv_address = XCNEWVEC (rtx, max_regno); + if (do_subregs) + reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); + else + reg_max_ref_width = NULL; + + num_eliminable_invariants = 0; + + first_label_num = get_first_label_num (); + num_labels = max_label_num () - first_label_num; + + /* Allocate the tables used to store offset information at labels. */ + offsets_known_at = XNEWVEC (char, num_labels); + offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT)); + +/* Look for REG_EQUIV notes; record what each pseudo is equivalent + to. If DO_SUBREGS is true, also find all paradoxical subregs and + find largest such for each pseudo. FIRST is the head of the insn + list. */ + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx set = single_set (insn); + + /* We may introduce USEs that we want to remove at the end, so + we'll mark them with QImode. Make sure there are no + previously-marked insns left by say regmove. */ + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE + && GET_MODE (insn) != VOIDmode) + PUT_MODE (insn, VOIDmode); + + if (do_subregs && NONDEBUG_INSN_P (insn)) + scan_paradoxical_subregs (PATTERN (insn)); + + if (set != 0 && REG_P (SET_DEST (set))) + { + rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX); + rtx x; + + if (! note) + continue; + + i = REGNO (SET_DEST (set)); + x = XEXP (note, 0); + + if (i <= LAST_VIRTUAL_REGISTER) + continue; + + /* If flag_pic and we have constant, verify it's legitimate. */ + if (!CONSTANT_P (x) + || !flag_pic || LEGITIMATE_PIC_OPERAND_P (x)) + { + /* It can happen that a REG_EQUIV note contains a MEM + that is not a legitimate memory operand. As later + stages of reload assume that all addresses found + in the reg_equiv_* arrays were originally legitimate, + we ignore such REG_EQUIV notes. */ + if (memory_operand (x, VOIDmode)) + { + /* Always unshare the equivalence, so we can + substitute into this insn without touching the + equivalence. */ + reg_equiv_memory_loc[i] = copy_rtx (x); + } + else if (function_invariant_p (x)) + { + if (GET_CODE (x) == PLUS) + { + /* This is PLUS of frame pointer and a constant, + and might be shared. Unshare it. */ + reg_equiv_invariant[i] = copy_rtx (x); + num_eliminable_invariants++; + } + else if (x == frame_pointer_rtx || x == arg_pointer_rtx) + { + reg_equiv_invariant[i] = x; + num_eliminable_invariants++; + } + else if (LEGITIMATE_CONSTANT_P (x)) + reg_equiv_constant[i] = x; + else + { + reg_equiv_memory_loc[i] + = force_const_mem (GET_MODE (SET_DEST (set)), x); + if (! reg_equiv_memory_loc[i]) + reg_equiv_init[i] = NULL_RTX; + } + } + else + { + reg_equiv_init[i] = NULL_RTX; + continue; + } + } + else + reg_equiv_init[i] = NULL_RTX; + } + } + + if (dump_file) + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + if (reg_equiv_init[i]) + { + fprintf (dump_file, "init_insns for %u: ", i); + print_inline_rtx (dump_file, reg_equiv_init[i], 20); + fprintf (dump_file, "\n"); + } +} + +/* Indicate that we no longer have known memory locations or constants. + Free all data involved in tracking these. */ + +static void +free_reg_equiv (void) +{ + int i; + + if (reg_equiv_constant) + free (reg_equiv_constant); + if (reg_equiv_invariant) + free (reg_equiv_invariant); + reg_equiv_constant = 0; + reg_equiv_invariant = 0; + VEC_free (rtx, gc, reg_equiv_memory_loc_vec); + reg_equiv_memory_loc = 0; + + if (offsets_known_at) + free (offsets_known_at); + if (offsets_at) + free (offsets_at); + offsets_at = 0; + offsets_known_at = 0; + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (reg_equiv_alt_mem_list[i]) + free_EXPR_LIST_list (®_equiv_alt_mem_list[i]); + free (reg_equiv_alt_mem_list); + + free (reg_equiv_mem); + free (reg_equiv_address); +} /* Kick all pseudos out of hard register REGNO. @@ -5664,7 +5993,7 @@ return 1; if (GET_CODE (x) == PLUS && (XEXP (x, 0) == frame_pointer_rtx || XEXP (x, 0) == arg_pointer_rtx) - && CONSTANT_P (XEXP (x, 1))) + && GET_CODE (XEXP (x, 1)) == CONST_INT) return 1; return 0; } === modified file 'gcc/system.h' --- old/gcc/system.h 2009-12-13 23:00:53 +0000 +++ new/gcc/system.h 2010-09-01 13:29:58 +0000 @@ -761,7 +761,8 @@ TARGET_ASM_EXCEPTION_SECTION TARGET_ASM_EH_FRAME_SECTION \ SMALL_ARG_MAX ASM_OUTPUT_SHARED_BSS ASM_OUTPUT_SHARED_COMMON \ ASM_OUTPUT_SHARED_LOCAL ASM_MAKE_LABEL_LINKONCE \ - STACK_CHECK_PROBE_INTERVAL STACK_CHECK_PROBE_LOAD + STACK_CHECK_PROBE_INTERVAL STACK_CHECK_PROBE_LOAD \ + ORDER_REGS_FOR_LOCAL_ALLOC /* Hooks that are no longer used. */ #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \ === added file 'gcc/testsuite/c-c++-common/uninit-17.c' --- old/gcc/testsuite/c-c++-common/uninit-17.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/c-c++-common/uninit-17.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wuninitialized" } */ + +inline int foo(int x) +{ + return x; +} +static void bar(int a, int *ptr) +{ + do + { + int b; /* { dg-warning "is used uninitialized" } */ + if (b < 40) { + ptr[0] = b; + } + b += 1; + ptr++; + } + while (--a != 0); +} +void foobar(int a, int *ptr) +{ + bar(foo(a), ptr); +} + === added file 'gcc/testsuite/gcc.target/arm/eliminate.c' --- old/gcc/testsuite/gcc.target/arm/eliminate.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/arm/eliminate.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct X +{ + int c; +}; + +extern void bar(struct X *); + +void foo () +{ + struct X x; + bar (&x); + bar (&x); + bar (&x); +} + +/* { dg-final { scan-assembler-times "r0,\[\\t \]*sp" 3 } } */ === added file 'gcc/testsuite/gcc.target/arm/pr40900.c' --- old/gcc/testsuite/gcc.target/arm/pr40900.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/arm/pr40900.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-optimize-sibling-calls" } */ + +extern short shortv2(); +short shortv1() +{ + return shortv2(); +} + +/* { dg-final { scan-assembler-not "lsl" } } */ +/* { dg-final { scan-assembler-not "asr" } } */ +/* { dg-final { scan-assembler-not "sxth" } } */ === added file 'gcc/testsuite/gcc.target/arm/pr42496.c' --- old/gcc/testsuite/gcc.target/arm/pr42496.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/arm/pr42496.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,16 @@ +/* { dg-options "-O2" } */ + +void foo(int i) +{ + extern int j; + + if (i) { + j = 10; + } + else { + j = 20; + } +} + +/* { dg-final { scan-assembler-not "strne" } } */ +/* { dg-final { scan-assembler-not "streq" } } */ === added file 'gcc/testsuite/gcc.target/arm/wmul-1.c' --- old/gcc/testsuite/gcc.target/arm/wmul-1.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/arm/wmul-1.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv6t2 -fno-unroll-loops" } */ + +int mac(const short *a, const short *b, int sqr, int *sum) +{ + int i; + int dotp = *sum; + + for (i = 0; i < 150; i++) { + dotp += b[i] * a[i]; + sqr += b[i] * b[i]; + } + + *sum = dotp; + return sqr; +} + +/* { dg-final { scan-assembler-times "smulbb" 2 } } */ === added file 'gcc/testsuite/gcc.target/arm/wmul-2.c' --- old/gcc/testsuite/gcc.target/arm/wmul-2.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/arm/wmul-2.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv6t2 -fno-unroll-loops" } */ + +void vec_mpy(int y[], const short x[], short scaler) +{ + int i; + + for (i = 0; i < 150; i++) + y[i] += ((scaler * x[i]) >> 31); +} + +/* { dg-final { scan-assembler-times "smulbb" 1 } } */ === added file 'gcc/testsuite/gcc.target/bfin/wmul-1.c' --- old/gcc/testsuite/gcc.target/bfin/wmul-1.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/bfin/wmul-1.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int mac(const short *a, const short *b, int sqr, int *sum) +{ + int i; + int dotp = *sum; + + for (i = 0; i < 150; i++) { + dotp += b[i] * a[i]; + sqr += b[i] * b[i]; + } + + *sum = dotp; + return sqr; +} + +/* { dg-final { scan-assembler-times "\\(IS\\)" 2 } } */ === added file 'gcc/testsuite/gcc.target/bfin/wmul-2.c' --- old/gcc/testsuite/gcc.target/bfin/wmul-2.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/bfin/wmul-2.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void vec_mpy(int y[], const short x[], short scaler) +{ + int i; + + for (i = 0; i < 150; i++) + y[i] += ((scaler * x[i]) >> 31); +} + +/* { dg-final { scan-assembler-times "\\(IS\\)" 1 } } */ === added file 'gcc/testsuite/gcc.target/i386/pr41442.c' --- old/gcc/testsuite/gcc.target/i386/pr41442.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/i386/pr41442.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef struct LINK link; +struct LINK +{ + link* next; +}; + +int haha(link* p1, link* p2) +{ + if ((p1->next && !p2->next) || p2->next) + return 0; + + return 1; +} + +/* { dg-final { scan-assembler-times "test|cmp" 2 } } */ === added file 'gcc/testsuite/gcc.target/i386/wmul-1.c' --- old/gcc/testsuite/gcc.target/i386/wmul-1.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/i386/wmul-1.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long long mac(const int *a, const int *b, long long sqr, long long *sum) +{ + int i; + long long dotp = *sum; + + for (i = 0; i < 150; i++) { + dotp += (long long)b[i] * a[i]; + sqr += (long long)b[i] * b[i]; + } + + *sum = dotp; + return sqr; +} + +/* { dg-final { scan-assembler-times "imull" 2 } } */ === added file 'gcc/testsuite/gcc.target/i386/wmul-2.c' --- old/gcc/testsuite/gcc.target/i386/wmul-2.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.target/i386/wmul-2.c 2010-09-01 13:29:58 +0000 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void vec_mpy(int y[], const int x[], int scaler) +{ + int i; + + for (i = 0; i < 150; i++) + y[i] += (((long long)scaler * x[i]) >> 31); +} + +/* { dg-final { scan-assembler-times "imull" 1 } } */ === modified file 'gcc/tree-cfg.c' --- old/gcc/tree-cfg.c 2010-08-10 13:31:21 +0000 +++ new/gcc/tree-cfg.c 2010-09-01 13:29:58 +0000 @@ -3428,8 +3428,13 @@ connected to the operand types. */ return verify_gimple_comparison (lhs_type, rhs1, rhs2); + case WIDEN_MULT_EXPR: + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return true; + return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)) + || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))); + case WIDEN_SUM_EXPR: - case WIDEN_MULT_EXPR: case VEC_WIDEN_MULT_HI_EXPR: case VEC_WIDEN_MULT_LO_EXPR: case VEC_PACK_TRUNC_EXPR: === modified file 'gcc/tree-inline.c' --- old/gcc/tree-inline.c 2010-08-10 13:31:21 +0000 +++ new/gcc/tree-inline.c 2010-09-01 13:29:58 +0000 @@ -229,6 +229,7 @@ regions of the CFG, but this is expensive to test. */ if (id->entry_bb && is_gimple_reg (SSA_NAME_VAR (name)) + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name) && TREE_CODE (SSA_NAME_VAR (name)) != PARM_DECL && (id->entry_bb != EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest || EDGE_COUNT (id->entry_bb->preds) != 1)) === modified file 'gcc/tree-pass.h' --- old/gcc/tree-pass.h 2010-04-02 18:54:46 +0000 +++ new/gcc/tree-pass.h 2010-09-01 13:29:58 +0000 @@ -407,6 +407,7 @@ extern struct gimple_opt_pass pass_cse_reciprocals; extern struct gimple_opt_pass pass_cse_sincos; extern struct gimple_opt_pass pass_optimize_bswap; +extern struct gimple_opt_pass pass_optimize_widening_mul; extern struct gimple_opt_pass pass_warn_function_return; extern struct gimple_opt_pass pass_warn_function_noreturn; extern struct gimple_opt_pass pass_cselim; === modified file 'gcc/tree-ssa-math-opts.c' --- old/gcc/tree-ssa-math-opts.c 2010-04-02 18:54:46 +0000 +++ new/gcc/tree-ssa-math-opts.c 2010-09-01 13:29:58 +0000 @@ -1260,3 +1260,137 @@ 0 /* todo_flags_finish */ } }; + +/* Find integer multiplications where the operands are extended from + smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR + where appropriate. */ + +static unsigned int +execute_optimize_widening_mul (void) +{ + bool changed = false; + basic_block bb; + + FOR_EACH_BB (bb) + { + gimple_stmt_iterator gsi; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + gimple rhs1_stmt = NULL, rhs2_stmt = NULL; + tree type, type1 = NULL, type2 = NULL; + tree rhs1, rhs2, rhs1_convop = NULL, rhs2_convop = NULL; + enum tree_code rhs1_code, rhs2_code; + + if (!is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != MULT_EXPR) + continue; + + type = TREE_TYPE (gimple_assign_lhs (stmt)); + + if (TREE_CODE (type) != INTEGER_TYPE) + continue; + + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + if (TREE_CODE (rhs1) == SSA_NAME) + { + rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); + if (!is_gimple_assign (rhs1_stmt)) + continue; + rhs1_code = gimple_assign_rhs_code (rhs1_stmt); + if (!CONVERT_EXPR_CODE_P (rhs1_code)) + continue; + rhs1_convop = gimple_assign_rhs1 (rhs1_stmt); + type1 = TREE_TYPE (rhs1_convop); + if (TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) + continue; + } + else if (TREE_CODE (rhs1) != INTEGER_CST) + continue; + + if (TREE_CODE (rhs2) == SSA_NAME) + { + rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); + if (!is_gimple_assign (rhs2_stmt)) + continue; + rhs2_code = gimple_assign_rhs_code (rhs2_stmt); + if (!CONVERT_EXPR_CODE_P (rhs2_code)) + continue; + rhs2_convop = gimple_assign_rhs1 (rhs2_stmt); + type2 = TREE_TYPE (rhs2_convop); + if (TYPE_PRECISION (type2) * 2 != TYPE_PRECISION (type)) + continue; + } + else if (TREE_CODE (rhs2) != INTEGER_CST) + continue; + + if (rhs1_stmt == NULL && rhs2_stmt == NULL) + continue; + + /* Verify that the machine can perform a widening multiply in this + mode/signedness combination, otherwise this transformation is + likely to pessimize code. */ + if ((rhs1_stmt == NULL || TYPE_UNSIGNED (type1)) + && (rhs2_stmt == NULL || TYPE_UNSIGNED (type2)) + && (optab_handler (umul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + else if ((rhs1_stmt == NULL || !TYPE_UNSIGNED (type1)) + && (rhs2_stmt == NULL || !TYPE_UNSIGNED (type2)) + && (optab_handler (smul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + else if (rhs1_stmt != NULL && rhs2_stmt != 0 + && (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) + && (optab_handler (usmul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + + if ((rhs1_stmt == NULL && !int_fits_type_p (rhs1, type2)) + || (rhs2_stmt == NULL && !int_fits_type_p (rhs2, type1))) + continue; + + if (rhs1_stmt == NULL) + gimple_assign_set_rhs1 (stmt, fold_convert (type2, rhs1)); + else + gimple_assign_set_rhs1 (stmt, rhs1_convop); + if (rhs2_stmt == NULL) + gimple_assign_set_rhs2 (stmt, fold_convert (type1, rhs2)); + else + gimple_assign_set_rhs2 (stmt, rhs2_convop); + gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); + update_stmt (stmt); + changed = true; + } + } + return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa + | TODO_verify_stmts : 0); +} + +static bool +gate_optimize_widening_mul (void) +{ + return flag_expensive_optimizations && optimize; +} + +struct gimple_opt_pass pass_optimize_widening_mul = +{ + { + GIMPLE_PASS, + "widening_mul", /* name */ + gate_optimize_widening_mul, /* gate */ + execute_optimize_widening_mul, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ + } +}; === modified file 'gcc/tree-ssa.c' --- old/gcc/tree-ssa.c 2009-12-07 22:42:10 +0000 +++ new/gcc/tree-ssa.c 2010-09-01 13:29:58 +0000 @@ -1671,6 +1671,8 @@ { TREE_NO_WARNING (var) = 1; + if (location == DECL_SOURCE_LOCATION (var)) + return; if (xloc.file != floc.file || xloc.line < floc.line || xloc.line > LOCATION_LINE (cfun->function_end_locus))