2011-07-11 Ira Rosen Backport from FSF: 2011-06-16 Ira Rosen gcc/ * tree-vectorizer.h (vect_recog_func_ptr): Change the first argument to be a VEC of statements. * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the assert that pattern statements have to have their vector type set. * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Change the first argument to be a VEC of statements. Update documentation. (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. (vect_handle_widen_mult_by_const): New function. (vect_recog_widen_mult_pattern): Change the first argument to be a VEC of statements. Update documentation. Check that the constant is INTEGER_CST. Support multiplication by a constant that fits an intermediate type - call vect_handle_widen_mult_by_const. (vect_pattern_recog_1): Update vect_recog_func_ptr and its call. Handle additional pattern statements if necessary. gcc/testsuite/ * gcc.dg/vect/vect-widen-mult-half-u8.c: New test. and 2011-06-30 Ira Rosen gcc/ * tree-vect-loop.c (vect_determine_vectorization_factor): Handle both pattern and original statements if necessary. (vect_transform_loop): Likewise. * tree-vect-patterns.c (vect_pattern_recog): Update documentation. * tree-vect-stmts.c (vect_mark_relevant): Add new argument. Mark the pattern statement only if the original statement doesn't have its own uses. (process_use): Call vect_mark_relevant with additional parameter. (vect_mark_stmts_to_be_vectorized): Likewise. (vect_get_vec_def_for_operand): Use vectorized pattern statement. (vect_analyze_stmt): Handle both pattern and original statements if necessary. (vect_transform_stmt): Don't store vectorized pattern statement in the original statement. (vect_is_simple_use_1): Use related pattern statement only if the original statement is irrelevant. * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise. gcc/testsuite/ * gcc.dg/vect/slp-widen-mult-half.c: New test. * gcc.dg/vect/vect-widen-mult-half.c: New test. === added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c' Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2012-01-09 15:03:29.156918805 -0800 @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include + +#define N 32 +#define COEF 32470 +#define COEF2 324700 + +unsigned char in[N]; +int out[N]; +int out2[N]; + +__attribute__ ((noinline)) void +foo () +{ + int i; + + for (i = 0; i < N/2; i++) + { + out[2*i] = in[2*i] * COEF; + out2[2*i] = in[2*i] + COEF2; + out[2*i+1] = in[2*i+1] * COEF; + out2[2*i+1] = in[2*i+1] + COEF2; + } +} + +int main (void) +{ + int i; + + for (i = 0; i < N; i++) + { + in[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i = 0; i < N; i++) + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2012-01-09 15:03:29.156918805 -0800 @@ -0,0 +1,59 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include + +#define N 32 +#define COEF 32470 + +unsigned char in[N]; +int out[N]; + +__attribute__ ((noinline)) void +foo () +{ + int i; + + for (i = 0; i < N; i++) + out[i] = in[i] * COEF; +} + +__attribute__ ((noinline)) void +bar () +{ + int i; + + for (i = 0; i < N; i++) + out[i] = COEF * in[i]; +} + +int main (void) +{ + int i; + + for (i = 0; i < N; i++) + { + in[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i = 0; i < N; i++) + if (out[i] != in[i] * COEF) + abort (); + + bar (); + + for (i = 0; i < N; i++) + if (out[i] != in[i] * COEF) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2012-01-09 15:03:29.160918806 -0800 @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include + +#define N 32 +#define COEF 32470 +#define COEF2 324700 + +unsigned char in[N]; +int out[N]; +int out2[N]; + +__attribute__ ((noinline)) void +foo (int a) +{ + int i; + + for (i = 0; i < N; i++) + { + out[i] = in[i] * COEF; + out2[i] = in[i] + a; + } +} + +int main (void) +{ + int i; + + for (i = 0; i < N; i++) + { + in[i] = i; + __asm__ volatile (""); + } + + foo (COEF2); + + for (i = 0; i < N; i++) + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: gcc-4_6-branch/gcc/tree-vect-loop.c =================================================================== --- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2012-01-09 15:02:15.000000000 -0800 +++ gcc-4_6-branch/gcc/tree-vect-loop.c 2012-01-09 15:03:29.160918806 -0800 @@ -181,6 +181,8 @@ stmt_vec_info stmt_info; int i; HOST_WIDE_INT dummy; + gimple stmt, pattern_stmt = NULL; + bool analyze_pattern_stmt = false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); @@ -241,12 +243,20 @@ } } - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) { - tree vf_vectype; - gimple stmt = gsi_stmt (si), pattern_stmt; - stmt_info = vinfo_for_stmt (stmt); + tree vf_vectype; + + if (analyze_pattern_stmt) + { + stmt = pattern_stmt; + analyze_pattern_stmt = false; + } + else + stmt = gsi_stmt (si); + stmt_info = vinfo_for_stmt (stmt); + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "==> examining statement: "); @@ -276,10 +286,17 @@ { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "skip."); + gsi_next (&si); continue; } } + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + analyze_pattern_stmt = true; + if (gimple_get_lhs (stmt) == NULL_TREE) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) @@ -311,9 +328,7 @@ } else { - gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) - && !is_pattern_stmt_p (stmt_info)); - + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); if (vect_print_dump_info (REPORT_DETAILS)) { @@ -385,6 +400,9 @@ if (!vectorization_factor || (nunits > vectorization_factor)) vectorization_factor = nunits; + + if (!analyze_pattern_stmt) + gsi_next (&si); } } @@ -4740,6 +4758,8 @@ tree cond_expr = NULL_TREE; gimple_seq cond_expr_stmt_list = NULL; bool do_peeling_for_loop_bound; + gimple stmt, pattern_stmt; + bool transform_pattern_stmt = false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); @@ -4827,11 +4847,19 @@ } } - for (si = gsi_start_bb (bb); !gsi_end_p (si);) + pattern_stmt = NULL; + for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) { - gimple stmt = gsi_stmt (si), pattern_stmt; bool is_store; + if (transform_pattern_stmt) + { + stmt = pattern_stmt; + transform_pattern_stmt = false; + } + else + stmt = gsi_stmt (si); + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "------>vectorizing statement: "); @@ -4869,6 +4897,11 @@ continue; } } + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + transform_pattern_stmt = true; gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( @@ -4897,8 +4930,9 @@ /* Hybrid SLP stmts must be vectorized in addition to SLP. */ if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) { - gsi_next (&si); - continue; + if (!transform_pattern_stmt) + gsi_next (&si); + continue; } } @@ -4917,7 +4951,7 @@ the chain. */ vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); gsi_remove (&si, true); - continue; + continue; } else { @@ -4927,7 +4961,9 @@ continue; } } - gsi_next (&si); + + if (!transform_pattern_stmt) + gsi_next (&si); } /* stmts in BB */ } /* BBs in loop */ Index: gcc-4_6-branch/gcc/tree-vect-patterns.c =================================================================== --- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-01-09 15:02:15.000000000 -0800 +++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-01-09 15:03:29.160918806 -0800 @@ -39,10 +39,13 @@ #include "diagnostic-core.h" /* Pattern recognition functions */ -static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); -static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); -static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); -static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); +static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, + tree *); +static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, + tree *); +static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, + tree *); +static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, @@ -142,9 +145,9 @@ Input: - * LAST_STMT: A stmt from which the pattern search begins. In the example, - when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be - detected. + * STMTS: Contains a stmt from which the pattern search begins. In the + example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} + will be detected. Output: @@ -165,12 +168,13 @@ inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) +vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, + tree *type_out) { - gimple stmt; + gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); tree oprnd0, oprnd1; tree oprnd00, oprnd01; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); tree type, half_type; gimple pattern_stmt; tree prod_type; @@ -178,10 +182,10 @@ struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var, rhs; - if (!is_gimple_assign (*last_stmt)) + if (!is_gimple_assign (last_stmt)) return NULL; - type = gimple_expr_type (*last_stmt); + type = gimple_expr_type (last_stmt); /* Look for the following pattern DX = (TYPE1) X; @@ -207,7 +211,7 @@ /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) @@ -228,12 +232,12 @@ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (*last_stmt); - oprnd1 = gimple_assign_rhs2 (*last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - stmt = *last_stmt; + stmt = last_stmt; if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) { @@ -319,11 +323,79 @@ /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); return pattern_stmt; } +/* Handle two cases of multiplication by a constant. The first one is when + the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second + operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to + TYPE. + + Otherwise, if the type of the result (TYPE) is at least 4 times bigger than + HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than + TYPE), we can perform widen-mult from the intermediate type to TYPE and + replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ + +static bool +vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, + VEC (gimple, heap) **stmts, tree type, + tree *half_type, gimple def_stmt) +{ + tree new_type, new_oprnd, tmp; + gimple new_stmt; + + if (int_fits_type_p (const_oprnd, *half_type)) + { + /* CONST_OPRND is a constant of HALF_TYPE. */ + *oprnd = gimple_assign_rhs1 (def_stmt); + return true; + } + + if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) + || !vinfo_for_stmt (def_stmt)) + return false; + + /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for + a type 2 times bigger than HALF_TYPE. */ + new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, + TYPE_UNSIGNED (type)); + if (!int_fits_type_p (const_oprnd, new_type)) + return false; + + /* Use NEW_TYPE for widen_mult. */ + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) + { + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); + /* Check if the already created pattern stmt is what we need. */ + if (!is_gimple_assign (new_stmt) + || gimple_assign_rhs_code (new_stmt) != NOP_EXPR + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) + return false; + + *oprnd = gimple_assign_lhs (new_stmt); + } + else + { + /* Create a_T = (NEW_TYPE) a_t; */ + *oprnd = gimple_assign_rhs1 (def_stmt); + tmp = create_tmp_var (new_type, NULL); + add_referenced_var (tmp); + new_oprnd = make_ssa_name (tmp, NULL); + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, + NULL_TREE); + SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; + VEC_safe_push (gimple, heap, *stmts, def_stmt); + *oprnd = new_oprnd; + } + + *half_type = new_type; + return true; +} + + /* Function vect_recog_widen_mult_pattern Try to find the following pattern: @@ -361,28 +433,47 @@ S3 a_T = (TYPE) a_t; S5 prod_T = a_T * CONST; - Input: + A special case of multiplication by constants is when 'TYPE' is 4 times + bigger than 'type', but CONST fits an intermediate type 2 times smaller + than 'TYPE'. In that case we create an additional pattern stmt for S3 + to create a variable of the intermediate type, and perform widen-mult + on the intermediate type as well: + + type a_t; + interm_type a_it; + TYPE a_T, prod_T, prod_T'; + + S1 a_t = ; + S3 a_T = (TYPE) a_t; + '--> a_it = (interm_type) a_t; + S5 prod_T = a_T * CONST; + '--> prod_T' = a_it w* CONST; + + Input/Output: - * LAST_STMT: A stmt from which the pattern search begins. In the example, - when this function is called with S5, the pattern {S3,S4,S5,(S6)} is - detected. + * STMTS: Contains a stmt from which the pattern search begins. In the + example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} + is detected. In case of unsigned widen-mult, the original stmt (S5) is + replaced with S6 in STMTS. In case of multiplication by a constant + of an intermediate type (the last case above), STMTS also contains S3 + (inserted before S5). - Output: + Output: - * TYPE_IN: The type of the input arguments to the pattern. + * TYPE_IN: The type of the input arguments to the pattern. - * TYPE_OUT: The type of the output of this pattern. + * TYPE_OUT: The type of the output of this pattern. - * Return value: A new stmt that will be used to replace the sequence of - stmts that constitute the pattern. In this case it will be: - WIDEN_MULT - */ + * Return value: A new stmt that will be used to replace the sequence of + stmts that constitute the pattern. In this case it will be: + WIDEN_MULT +*/ static gimple -vect_recog_widen_mult_pattern (gimple *last_stmt, - tree *type_in, - tree *type_out) +vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, + tree *type_in, tree *type_out) { + gimple last_stmt = VEC_pop (gimple, *stmts); gimple def_stmt0, def_stmt1; tree oprnd0, oprnd1; tree type, half_type0, half_type1; @@ -395,27 +486,27 @@ VEC (tree, heap) *dummy_vec; bool op0_ok, op1_ok; - if (!is_gimple_assign (*last_stmt)) + if (!is_gimple_assign (last_stmt)) return NULL; - type = gimple_expr_type (*last_stmt); + type = gimple_expr_type (last_stmt); /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) + if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) return NULL; - oprnd0 = gimple_assign_rhs1 (*last_stmt); - oprnd1 = gimple_assign_rhs2 (*last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; /* Check argument 0. */ - op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); + op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); /* Check argument 1. */ - op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); + op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); /* In case of multiplication by a constant one of the operands may not match the pattern, but not both. */ @@ -429,29 +520,21 @@ } else if (!op0_ok) { - if (CONSTANT_CLASS_P (oprnd0) - && TREE_CODE (half_type1) == INTEGER_TYPE - && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) - && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) - { - /* OPRND0 is a constant of HALF_TYPE1. */ - half_type0 = half_type1; - oprnd1 = gimple_assign_rhs1 (def_stmt1); - } + if (TREE_CODE (oprnd0) == INTEGER_CST + && TREE_CODE (half_type1) == INTEGER_TYPE + && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, + &half_type1, def_stmt1)) + half_type0 = half_type1; else return NULL; } else if (!op1_ok) { - if (CONSTANT_CLASS_P (oprnd1) + if (TREE_CODE (oprnd1) == INTEGER_CST && TREE_CODE (half_type0) == INTEGER_TYPE - && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) - && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) - { - /* OPRND1 is a constant of HALF_TYPE0. */ - half_type1 = half_type0; - oprnd0 = gimple_assign_rhs1 (def_stmt0); - } + && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, + &half_type0, def_stmt0)) + half_type1 = half_type0; else return NULL; } @@ -461,7 +544,7 @@ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) { - tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; + tree lhs = gimple_assign_lhs (last_stmt), use_lhs; imm_use_iterator imm_iter; use_operand_p use_p; int nuses = 0; @@ -491,7 +574,7 @@ return NULL; type = use_type; - *last_stmt = use_stmt; + last_stmt = use_stmt; } if (!types_compatible_p (half_type0, half_type1)) @@ -506,7 +589,7 @@ vectype_out = get_vectype_for_scalar_type (type); if (!vectype || !vectype_out - || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, + || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype_out, vectype, &dummy, &dummy, &dummy_code, &dummy_code, &dummy_int, &dummy_vec)) @@ -524,6 +607,7 @@ if (vect_print_dump_info (REPORT_DETAILS)) print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + VEC_safe_push (gimple, heap, *stmts, last_stmt); return pattern_stmt; } @@ -555,16 +639,17 @@ */ static gimple -vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) +vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) { + gimple last_stmt = VEC_index (gimple, *stmts, 0); tree fn, base, exp = NULL; gimple stmt; tree var; - if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) + if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) return NULL; - fn = gimple_call_fndecl (*last_stmt); + fn = gimple_call_fndecl (last_stmt); if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) return NULL; @@ -574,8 +659,8 @@ case BUILT_IN_POWI: case BUILT_IN_POWF: case BUILT_IN_POW: - base = gimple_call_arg (*last_stmt, 0); - exp = gimple_call_arg (*last_stmt, 1); + base = gimple_call_arg (last_stmt, 0); + exp = gimple_call_arg (last_stmt, 1); if (TREE_CODE (exp) != REAL_CST && TREE_CODE (exp) != INTEGER_CST) return NULL; @@ -667,21 +752,23 @@ inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) +vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, + tree *type_out) { + gimple last_stmt = VEC_index (gimple, *stmts, 0); gimple stmt; tree oprnd0, oprnd1; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); tree type, half_type; gimple pattern_stmt; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (*last_stmt)) + if (!is_gimple_assign (last_stmt)) return NULL; - type = gimple_expr_type (*last_stmt); + type = gimple_expr_type (last_stmt); /* Look for the following pattern DX = (TYPE) X; @@ -693,25 +780,25 @@ /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (*last_stmt); - oprnd1 = gimple_assign_rhs2 (*last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* So far so good. Since *last_stmt was detected as a (summation) reduction, + /* So far so good. Since last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a cast from type 'type' to type 'TYPE'. */ - if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) + if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) return NULL; oprnd0 = gimple_assign_rhs1 (stmt); @@ -732,8 +819,9 @@ /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + VEC_safe_push (gimple, heap, *stmts, last_stmt); return pattern_stmt; } @@ -762,7 +850,7 @@ static void vect_pattern_recog_1 ( - gimple (* vect_recog_func) (gimple *, tree *, tree *), + gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; @@ -774,12 +862,14 @@ enum tree_code code; int i; gimple next; + VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); - pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); + VEC_quick_push (gimple, stmts_to_replace, stmt); + pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); if (!pattern_stmt) return; - si = gsi_for_stmt (stmt); + stmt = VEC_last (gimple, stmts_to_replace); stmt_info = vinfo_for_stmt (stmt); loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -849,6 +939,35 @@ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) if (next == stmt) VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); + + /* In case of widen-mult by a constant, it is possible that an additional + pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a + stmt_info for it, and mark the relevant statements. */ + for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) + && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); + i++) + { + stmt_info = vinfo_for_stmt (stmt); + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "additional pattern stmt: "); + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + } + + set_vinfo_for_stmt (pattern_stmt, + new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); + gimple_set_bb (pattern_stmt, gimple_bb (stmt)); + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + + STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; + STMT_VINFO_DEF_TYPE (pattern_stmt_info) + = STMT_VINFO_DEF_TYPE (stmt_info); + STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); + STMT_VINFO_IN_PATTERN_P (stmt_info) = true; + } + + VEC_free (gimple, heap, stmts_to_replace); } @@ -896,10 +1015,8 @@ If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} (because they are marked as irrelevant). It will vectorize S6, and record - a pointer to the new vector stmt VS6 both from S6 (as usual), and also - from S4. We do that so that when we get to vectorizing stmts that use the - def of S4 (like S5 that uses a_0), we'll know where to take the relevant - vector-def from. S4 will be skipped, and S5 will be vectorized as usual: + a pointer to the new vector stmt VS6 from S6 (as usual). + S4 will be skipped, and S5 will be vectorized as usual: in_pattern_p related_stmt vec_stmt S1: a_i = .... - - - @@ -915,7 +1032,21 @@ elsewhere), and we'll end up with: VS6: va_new = .... - VS5: ... = ..vuse(va_new).. */ + VS5: ... = ..vuse(va_new).. + + In case of more than one pattern statements, e.g., widen-mult with + intermediate type: + + S1 a_t = ; + S2 a_T = (TYPE) a_t; + '--> S3: a_it = (interm_type) a_t; + S4 prod_T = a_T * CONST; + '--> S5: prod_T' = a_it w* CONST; + + there may be other users of a_T outside the pattern. In that case S2 will + be marked as relevant (as well as S3), and both S2 and S3 will be analyzed + and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will + be recorded in S3. */ void vect_pattern_recog (loop_vec_info loop_vinfo) @@ -925,7 +1056,7 @@ unsigned int nbbs = loop->num_nodes; gimple_stmt_iterator si; unsigned int i, j; - gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_pattern_recog ==="); Index: gcc-4_6-branch/gcc/tree-vect-slp.c =================================================================== --- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2012-01-09 15:02:15.000000000 -0800 +++ gcc-4_6-branch/gcc/tree-vect-slp.c 2012-01-09 15:03:29.160918806 -0800 @@ -152,7 +152,9 @@ if (loop && def_stmt && gimple_bb (def_stmt) && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) && vinfo_for_stmt (def_stmt) - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) { if (!*first_stmt_dt0) *pattern0 = true; Index: gcc-4_6-branch/gcc/tree-vect-stmts.c =================================================================== --- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-01-09 15:02:15.000000000 -0800 +++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-01-09 15:06:23.636927250 -0800 @@ -126,33 +126,72 @@ static void vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, - enum vect_relevant relevant, bool live_p) + enum vect_relevant relevant, bool live_p, + bool used_in_pattern) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); + gimple pattern_stmt; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); + /* If this stmt is an original stmt in a pattern, we might need to mark its + related pattern stmt instead of the original stmt. However, such stmts + may have their own uses that are not in any pattern, in such cases the + stmt itself should be marked. */ if (STMT_VINFO_IN_PATTERN_P (stmt_info)) { - gimple pattern_stmt; + bool found = false; + if (!used_in_pattern) + { + imm_use_iterator imm_iter; + use_operand_p use_p; + gimple use_stmt; + tree lhs; + + if (is_gimple_assign (stmt)) + lhs = gimple_assign_lhs (stmt); + else + lhs = gimple_call_lhs (stmt); + + /* This use is out of pattern use, if LHS has other uses that are + pattern uses, we should mark the stmt itself, and not the pattern + stmt. */ + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + if (is_gimple_debug (USE_STMT (use_p))) + continue; + use_stmt = USE_STMT (use_p); + + if (vinfo_for_stmt (use_stmt) + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) + { + found = true; + break; + } + } + } + + if (!found) + { + /* This is the last stmt in a sequence that was detected as a + pattern that can potentially be vectorized. Don't mark the stmt + as relevant/live because it's not going to be vectorized. + Instead mark the pattern-stmt that replaces it. */ + + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - /* This is the last stmt in a sequence that was detected as a - pattern that can potentially be vectorized. Don't mark the stmt - as relevant/live because it's not going to be vectorized. - Instead mark the pattern-stmt that replaces it. */ - - pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); - stmt_info = vinfo_for_stmt (pattern_stmt); - gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); - save_relevant = STMT_VINFO_RELEVANT (stmt_info); - save_live_p = STMT_VINFO_LIVE_P (stmt_info); - stmt = pattern_stmt; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "last stmt in pattern. don't mark" + " relevant/live."); + stmt_info = vinfo_for_stmt (pattern_stmt); + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); + save_relevant = STMT_VINFO_RELEVANT (stmt_info); + save_live_p = STMT_VINFO_LIVE_P (stmt_info); + stmt = pattern_stmt; + } } STMT_VINFO_LIVE_P (stmt_info) |= live_p; @@ -437,7 +476,8 @@ } } - vect_mark_relevant (worklist, def_stmt, relevant, live_p); + vect_mark_relevant (worklist, def_stmt, relevant, live_p, + is_pattern_stmt_p (stmt_vinfo)); return true; } @@ -494,7 +534,7 @@ } if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) - vect_mark_relevant (&worklist, phi, relevant, live_p); + vect_mark_relevant (&worklist, phi, relevant, live_p, false); } for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { @@ -506,7 +546,7 @@ } if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) - vect_mark_relevant (&worklist, stmt, relevant, live_p); + vect_mark_relevant (&worklist, stmt, relevant, live_p, false); } } @@ -613,42 +653,55 @@ if (is_gimple_assign (stmt)) { tree rhs = gimple_assign_rhs1 (stmt); - if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) - == GIMPLE_SINGLE_RHS) + unsigned int op_num; + tree op; + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) { - unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 - (stmt)); - for (i = 0; i < op_num; i++) - { - tree op = TREE_OPERAND (rhs, i); - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, - &worklist)) + case GIMPLE_SINGLE_RHS: + op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); + for (i = 0; i < op_num; i++) { - VEC_free (gimple, heap, worklist); - return false; + op = TREE_OPERAND (rhs, i); + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, + &worklist)) + { + VEC_free (gimple, heap, worklist); + return false; + } } - } - } - else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) - == GIMPLE_BINARY_RHS) - { - tree op = gimple_assign_rhs1 (stmt); - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, - &worklist)) - { - VEC_free (gimple, heap, worklist); - return false; - } - op = gimple_assign_rhs2 (stmt); - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, - &worklist)) - { - VEC_free (gimple, heap, worklist); - return false; - } + break; + + case GIMPLE_BINARY_RHS: + op = gimple_assign_rhs1 (stmt); + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, + &worklist)) + { + VEC_free (gimple, heap, worklist); + return false; + } + op = gimple_assign_rhs2 (stmt); + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, + &worklist)) + { + VEC_free (gimple, heap, worklist); + return false; + } + break; + + case GIMPLE_UNARY_RHS: + op = gimple_assign_rhs1 (stmt); + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, + &worklist)) + { + VEC_free (gimple, heap, worklist); + return false; + } + + break; + + default: + return false; } - else - return false; } else if (is_gimple_call (stmt)) { @@ -1210,7 +1263,14 @@ /* Get the def from the vectorized stmt. */ def_stmt_info = vinfo_for_stmt (def_stmt); + vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); + /* Get vectorized pattern statement. */ + if (!vec_stmt + && STMT_VINFO_IN_PATTERN_P (def_stmt_info) + && !STMT_VINFO_RELEVANT (def_stmt_info)) + vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( + STMT_VINFO_RELATED_STMT (def_stmt_info))); gcc_assert (vec_stmt); if (gimple_code (vec_stmt) == GIMPLE_PHI) vec_oprnd = PHI_RESULT (vec_stmt); @@ -4894,6 +4954,7 @@ enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); bool ok; tree scalar_type, vectype; + gimple pattern_stmt; if (vect_print_dump_info (REPORT_DETAILS)) { @@ -4915,16 +4976,22 @@ - any LABEL_EXPRs in the loop - computations that are used only for array indexing or loop control. In basic blocks we only analyze statements that are a part of some SLP - instance, therefore, all the statements are relevant. */ + instance, therefore, all the statements are relevant. + + Pattern statement need to be analyzed instead of the original statement + if the original statement is not relevant. Otherwise, we analyze both + statements. */ + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) { - gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && pattern_stmt && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) { + /* Analyze PATTERN_STMT instead of the original stmt. */ stmt = pattern_stmt; stmt_info = vinfo_for_stmt (pattern_stmt); if (vect_print_dump_info (REPORT_DETAILS)) @@ -4941,6 +5008,21 @@ return true; } } + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && pattern_stmt + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) + { + /* Analyze PATTERN_STMT too. */ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "==> examining pattern statement: "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) + return false; + } switch (STMT_VINFO_DEF_TYPE (stmt_info)) { @@ -5074,7 +5156,6 @@ bool is_store = false; gimple vec_stmt = NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - gimple orig_stmt_in_pattern; bool done; switch (STMT_VINFO_TYPE (stmt_info)) @@ -5213,21 +5294,7 @@ } if (vec_stmt) - { STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; - orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); - if (orig_stmt_in_pattern) - { - stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); - /* STMT was inserted by the vectorizer to replace a computation idiom. - ORIG_STMT_IN_PATTERN is a stmt in the original sequence that - computed this idiom. We need to record a pointer to VEC_STMT in - the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the - documentation of vect_pattern_recog. */ - if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) - STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; - } - } return is_store; } @@ -5605,8 +5672,12 @@ || *dt == vect_nested_cycle) { stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && !STMT_VINFO_RELEVANT (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + *vectype = STMT_VINFO_VECTYPE (stmt_info); gcc_assert (*vectype != NULL_TREE); } Index: gcc-4_6-branch/gcc/tree-vectorizer.h =================================================================== --- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-01-09 15:02:15.000000000 -0800 +++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-01-09 15:03:29.164918806 -0800 @@ -890,7 +890,7 @@ /* Pattern recognition functions. Additional pattern recognition functions can (and will) be added in the future. */ -typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); +typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); #define NUM_PATTERNS 4 void vect_pattern_recog (loop_vec_info);