2011-11-27 Ira Rosen gcc/ * tree-vectorizer.h (vect_pattern_recog): Add new argument. * tree-vect-loop.c (vect_analyze_loop_2): Update call to vect_pattern_recog. * tree-vect-patterns.c (widened_name_p): Pass basic block info to vect_is_simple_use. (vect_recog_dot_prod_pattern): Fail for basic blocks. (vect_recog_widen_sum_pattern): Likewise. (vect_handle_widen_op_by_const): Support basic blocks. (vect_operation_fits_smaller_type, vect_recog_over_widening_pattern): Likewise. (vect_recog_mixed_size_cond_pattern): Support basic blocks. Add printing. (vect_mark_pattern_stmts): Update calls to new_stmt_vec_info. (vect_pattern_recog_1): Check for reduction only in loops. (vect_pattern_recog): Add new argument. Support basic blocks. * tree-vect-stmts.c (vectorizable_conversion): Pass basic block info to vect_is_simple_use_1. * tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic blocks. (vect_slp_analyze_bb_1): Call vect_pattern_recog. gcc/testsuite/ * gcc.dg/vect/bb-slp-pattern-1.c: New test. * gcc.dg/vect/bb-slp-pattern-2.c: New test. === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c' --- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 2011-11-23 06:37:10 +0000 @@ -0,0 +1,55 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 8 + +unsigned short X[N]; +unsigned short Y[N]; +unsigned int result[N]; + +/* unsigned short->unsigned int widening-mult. */ +__attribute__ ((noinline, noclone)) void +foo (void) +{ + result[0] = (unsigned int)(X[0] * Y[0]); + result[1] = (unsigned int)(X[1] * Y[1]); + result[2] = (unsigned int)(X[2] * Y[2]); + result[3] = (unsigned int)(X[3] * Y[3]); + result[4] = (unsigned int)(X[4] * Y[4]); + result[5] = (unsigned int)(X[5] * Y[5]); + result[6] = (unsigned int)(X[6] * Y[6]); + result[7] = (unsigned int)(X[7] * Y[7]); +} + +int main (void) +{ + int i, tmp; + + check_vect (); + + for (i = 0; i < N; i++) + { + X[i] = i; + Y[i] = 64-i; + } + + foo (); + + for (i = 0; i < N; i++) + { + __asm__ volatile (""); + tmp = X[i] * Y[i]; + if (result[i] != tmp) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c' --- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 1970-01-01 00:00:00 +0000 +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 2011-11-23 06:37:10 +0000 @@ -0,0 +1,53 @@ +/* { dg-require-effective-target vect_condition } */ + +#include "tree-vect.h" + +#define N 128 + +__attribute__((noinline, noclone)) void +foo (short * __restrict__ a, int * __restrict__ b, int stride) +{ + int i; + + for (i = 0; i < N/stride; i++, a += stride, b += stride) + { + a[0] = b[0] ? 1 : 7; + a[1] = b[1] ? 2 : 0; + a[2] = b[2] ? 3 : 0; + a[3] = b[3] ? 4 : 0; + a[4] = b[4] ? 5 : 0; + a[5] = b[5] ? 6 : 0; + a[6] = b[6] ? 7 : 0; + a[7] = b[7] ? 8 : 0; + } +} + +short a[N]; +int b[N]; +int main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + a[i] = i; + b[i] = -i; + } + + foo (a, b, 8); + + for (i = 1; i < N; i++) + if (a[i] != i%8 + 1) + abort (); + + if (a[0] != 7) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + === modified file 'gcc/tree-vect-loop.c' --- old/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000 +++ new/gcc/tree-vect-loop.c 2011-11-23 06:47:35 +0000 @@ -1458,7 +1458,7 @@ vect_analyze_scalar_cycles (loop_vinfo); - vect_pattern_recog (loop_vinfo); + vect_pattern_recog (loop_vinfo, NULL); /* Data-flow analysis to detect stmts that do not need to be vectorized. */ === modified file 'gcc/tree-vect-patterns.c' --- old/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000 +++ new/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000 @@ -83,11 +83,13 @@ tree oprnd0; enum vect_def_type dt; tree def; + bb_vec_info bb_vinfo; stmt_vinfo = vinfo_for_stmt (use_stmt); loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); - if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) + if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt)) return false; if (dt != vect_internal_def @@ -111,7 +113,7 @@ || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) return false; - if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, + if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, &dt)) return false; @@ -188,9 +190,14 @@ gimple pattern_stmt; tree prod_type; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); - struct loop *loop = LOOP_VINFO_LOOP (loop_info); + struct loop *loop; tree var, rhs; + if (!loop_info) + return NULL; + + loop = LOOP_VINFO_LOOP (loop_info); + if (!is_gimple_assign (last_stmt)) return NULL; @@ -358,8 +365,16 @@ { tree new_type, new_oprnd, tmp; gimple new_stmt; - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); - struct loop *loop = LOOP_VINFO_LOOP (loop_info); + loop_vec_info loop_vinfo; + struct loop *loop = NULL; + bb_vec_info bb_vinfo; + stmt_vec_info stmt_vinfo; + + stmt_vinfo = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); if (code != MULT_EXPR && code != LSHIFT_EXPR) return false; @@ -377,7 +392,9 @@ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) || !gimple_bb (def_stmt) - || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) + || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo) + && gimple_code (def_stmt) != GIMPLE_PHI) || !vinfo_for_stmt (def_stmt)) return false; @@ -774,9 +791,14 @@ tree type, half_type; gimple pattern_stmt; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); - struct loop *loop = LOOP_VINFO_LOOP (loop_info); + struct loop *loop; tree var; + if (!loop_info) + return NULL; + + loop = LOOP_VINFO_LOOP (loop_info); + if (!is_gimple_assign (last_stmt)) return NULL; @@ -877,7 +899,11 @@ gimple def_stmt, new_stmt; bool first = false; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); - struct loop *loop = LOOP_VINFO_LOOP (loop_info); + bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); + struct loop *loop = NULL; + + if (loop_info) + loop = LOOP_VINFO_LOOP (loop_info); *new_def_stmt = NULL; @@ -909,7 +935,9 @@ first = true; if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) || !gimple_bb (def_stmt) - || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) + || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) + && gimple_code (def_stmt) != GIMPLE_PHI) || !vinfo_for_stmt (def_stmt)) return false; } @@ -1087,7 +1115,16 @@ int nuses = 0; tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; bool first; - struct loop *loop = (gimple_bb (stmt))->loop_father; + loop_vec_info loop_vinfo; + struct loop *loop = NULL; + bb_vec_info bb_vinfo; + stmt_vec_info stmt_vinfo; + + stmt_vinfo = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); first = true; while (1) @@ -1120,7 +1157,8 @@ if (nuses != 1 || !is_gimple_assign (use_stmt) || !gimple_bb (use_stmt) - || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) + || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo))) return NULL; /* Create pattern statement for STMT. */ @@ -1485,6 +1523,7 @@ enum machine_mode cmpmode; gimple pattern_stmt, def_stmt; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); if (!is_gimple_assign (last_stmt) || gimple_assign_rhs_code (last_stmt) != COND_EXPR @@ -1538,7 +1577,8 @@ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), fold_convert (comp_type, then_clause), fold_convert (comp_type, else_clause)); - def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp); + def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), + tmp); pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, @@ -1546,12 +1586,15 @@ gimple_assign_lhs (def_stmt), NULL_TREE); STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; - def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_info); STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; *type_in = vectype; *type_out = vectype; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: "); + return pattern_stmt; } @@ -1565,10 +1608,11 @@ stmt_vec_info pattern_stmt_info, def_stmt_info; stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); gimple def_stmt; set_vinfo_for_stmt (pattern_stmt, - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); + new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo)); gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); pattern_stmt_info = vinfo_for_stmt (pattern_stmt); @@ -1586,7 +1630,7 @@ def_stmt_info = vinfo_for_stmt (def_stmt); if (def_stmt_info == NULL) { - def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_info); } gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); @@ -1697,9 +1741,10 @@ /* Patterns cannot be vectorized using SLP, because they change the order of computation. */ - FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) - if (next == stmt) - VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); + if (loop_vinfo) + FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) + if (next == stmt) + VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); /* It is possible that additional pattern stmts are created and inserted in STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the @@ -1799,26 +1844,46 @@ be recorded in S3. */ void -vect_pattern_recog (loop_vec_info loop_vinfo) +vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned int nbbs = loop->num_nodes; + struct loop *loop; + basic_block *bbs, bb; + unsigned int nbbs; gimple_stmt_iterator si; unsigned int i, j; gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + gimple stmt; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_pattern_recog ==="); - /* Scan through the loop stmts, applying the pattern recognition + if (loop_vinfo) + { + loop = LOOP_VINFO_LOOP (loop_vinfo); + bbs = LOOP_VINFO_BBS (loop_vinfo); + nbbs = loop->num_nodes; + } + else + { + bb = BB_VINFO_BB (bb_vinfo); + nbbs = 1; + bbs = XNEW (basic_block); + bbs[0] = bb; + } + + /* Scan through the stmts, applying the pattern recognition functions starting at each stmt visited: */ for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { + if (bb_vinfo && (stmt = gsi_stmt (si)) + && vinfo_for_stmt (stmt) + && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) + continue; + /* Scan over all generic vect_recog_xxx_pattern functions. */ for (j = 0; j < NUM_PATTERNS; j++) { @@ -1830,4 +1895,6 @@ } VEC_free (gimple, heap, stmts_to_replace); + if (bb_vinfo) + free (bbs); } === modified file 'gcc/tree-vect-slp.c' --- old/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000 +++ new/gcc/tree-vect-slp.c 2011-11-23 06:47:35 +0000 @@ -255,12 +255,14 @@ /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt from the pattern. Check that all the stmts of the node are in the pattern. */ - if (loop && def_stmt && gimple_bb (def_stmt) - && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + if (def_stmt && gimple_bb (def_stmt) + && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) + || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo) + && gimple_code (def_stmt) != GIMPLE_PHI)) && vinfo_for_stmt (def_stmt) && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) - && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) - && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) { pattern = true; if (!first && !oprnd_info->first_pattern) @@ -1972,6 +1974,8 @@ return NULL; } + vect_pattern_recog (NULL, bb_vinfo); + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, &data_dependence_in_bb) || min_vf > max_vf === modified file 'gcc/tree-vect-stmts.c' --- old/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000 +++ new/gcc/tree-vect-stmts.c 2011-11-23 06:47:35 +0000 @@ -3451,11 +3451,11 @@ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of OP1. */ if (CONSTANT_CLASS_P (op0)) - ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, + ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1], &vectype_in); else - ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, - &dt[1]); + ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, + &def, &dt[1]); if (!ok) { === modified file 'gcc/tree-vectorizer.h' --- old/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000 +++ new/gcc/tree-vectorizer.h 2011-11-23 06:47:35 +0000 @@ -918,7 +918,7 @@ in the future. */ typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); #define NUM_PATTERNS 7 -void vect_pattern_recog (loop_vec_info); +void vect_pattern_recog (loop_vec_info, bb_vec_info); /* In tree-vectorizer.c. */ unsigned vectorize_loops (void);