diff options
author | Khem Raj <raj.khem@gmail.com> | 2011-11-22 07:29:59 -0800 |
---|---|---|
committer | Khem Raj <raj.khem@gmail.com> | 2011-12-03 10:59:33 -0800 |
commit | 2378ee8f21741abd23e434260a88c62cb0e151f1 (patch) | |
tree | 036eef0d3a08a5cc79de7c4233bda78031b7560e /meta-oe/recipes-devtools | |
parent | a2fbb83f951a522f3a1eb2b627847f549a0c245a (diff) | |
download | meta-openembedded-contrib-2378ee8f21741abd23e434260a88c62cb0e151f1.tar.gz |
gcc-4.6: Bring in latest linaro patches
I have tested it on angstrom by successfully building console-image
and systemd-gnome-image for all supported qemu targets.
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Diffstat (limited to 'meta-oe/recipes-devtools')
24 files changed, 10087 insertions, 1 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch new file mode 100644 index 0000000000..91b2191cb8 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch @@ -0,0 +1,80 @@ +2011-09-22 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from trunk -r178804: + modulo-sched.c (remove_node_from_ps): Return void + instead of bool. + (optimize_sc): Adjust call to remove_node_from_ps. + (sms_schedule): Add print info. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000 ++++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 +@@ -211,7 +211,7 @@ + static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, + int, int, sbitmap, int *, sbitmap, + sbitmap); +-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); ++static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); + + #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) + #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +@@ -834,8 +834,7 @@ + if (next_ps_i->node->cuid == g->closing_branch->cuid) + break; + +- gcc_assert (next_ps_i); +- gcc_assert (remove_node_from_ps (ps, next_ps_i)); ++ remove_node_from_ps (ps, next_ps_i); + success = + try_scheduling_node_in_cycle (ps, g->closing_branch, + g->closing_branch->cuid, c, +@@ -1485,8 +1484,8 @@ + if (dump_file) + { + fprintf (dump_file, +- "SMS succeeded %d %d (with ii, sc)\n", ps->ii, +- stage_count); ++ "%s:%d SMS succeeded %d %d (with ii, sc)\n", ++ insn_file (tail), insn_line (tail), ps->ii, stage_count); + print_partial_schedule (ps, dump_file); + } + +@@ -2810,22 +2809,18 @@ + } + + +-/* Removes the given PS_INSN from the partial schedule. Returns false if the +- node is not found in the partial schedule, else returns true. */ +-static bool ++/* Removes the given PS_INSN from the partial schedule. */ ++static void + remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) + { + int row; + +- if (!ps || !ps_i) +- return false; +- ++ gcc_assert (ps && ps_i); ++ + row = SMODULO (ps_i->cycle, ps->ii); + if (! ps_i->prev_in_row) + { +- if (ps_i != ps->rows[row]) +- return false; +- ++ gcc_assert (ps_i == ps->rows[row]); + ps->rows[row] = ps_i->next_in_row; + if (ps->rows[row]) + ps->rows[row]->prev_in_row = NULL; +@@ -2839,7 +2834,7 @@ + + ps->rows_length[row] -= 1; + free (ps_i); +- return true; ++ return; + } + + /* Unlike what literature describes for modulo scheduling (which focuses + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch new file mode 100644 index 0000000000..16779bbf1e --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch @@ -0,0 +1,528 @@ +2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + Replace check_effective_target_arm_neon with + check_effective_target_arm_neon_ok. + + Backport from mainline: + + 2011-09-06 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * config/arm/arm.c (arm_preferred_simd_mode): Check + TARGET_NEON_VECTORIZE_DOUBLE instead of + TARGET_NEON_VECTORIZE_QUAD. + (arm_autovectorize_vector_sizes): Likewise. + * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse + mask of mvectorize-with-neon-double. Add RejectNegative. + (mvectorize-with-neon-double): New. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + New procedure. + (add_options_for_quad_vectors): Replace with ... + (add_options_for_double_vectors): ... this. + * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that + support multiple vector sizes since the vectorizer attempts to + vectorize with both vector sizes. + * gcc.dg/vect/no-vfa-vect-79.c, + gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c, + gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c, + gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c, + gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c, + gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c, + gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise. + * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable. + * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c, + gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c, + gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c, + gcc.dg/vect/vect-40.c: Likewise. + * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as + redundant. + * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c, + gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c, + gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c, + gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c: + Likewise. + * gcc.dg/vect/vect-peel-4.c: Make ia global. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 +@@ -22974,7 +22974,7 @@ + return false; + } + +-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword ++/* Use the option -mvectorize-with-neon-double to override the use of quardword + registers when autovectorizing for Neon, at least until multiple vector + widths are supported properly by the middle-end. */ + +@@ -22985,15 +22985,15 @@ + switch (mode) + { + case SFmode: +- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode; + case SImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode; + case HImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode; + case QImode: +- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode; + case DImode: +- if (TARGET_NEON_VECTORIZE_QUAD) ++ if (!TARGET_NEON_VECTORIZE_DOUBLE) + return V2DImode; + break; + +@@ -24226,7 +24226,7 @@ + static unsigned int + arm_autovectorize_vector_sizes (void) + { +- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; ++ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8); + } + + static bool + +=== modified file 'gcc/config/arm/arm.opt' +--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000 ++++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000 +@@ -158,9 +158,13 @@ + Assume big endian bytes, little endian words + + mvectorize-with-neon-quad +-Target Report Mask(NEON_VECTORIZE_QUAD) ++Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE) + Use Neon quad-word (rather than double-word) registers for vectorization + ++mvectorize-with-neon-double ++Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE) ++Use Neon double-word (rather than quad-word) registers for vectorization ++ + mword-relocations + Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) + Only generate absolute relocations on word sized values. + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000 +@@ -45,6 +45,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000 +@@ -53,6 +53,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000 +@@ -53,6 +53,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000 +@@ -58,5 +58,6 @@ + If/when the aliasing problems are resolved, unalignment may + prevent vectorization on some targets. */ + /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c' +--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000 +@@ -46,5 +46,6 @@ + If/when the aliasing problems are resolved, unalignment may + prevent vectorization on some targets. */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c' +--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000 +@@ -64,6 +64,7 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_float } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000 +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000 +@@ -22,5 +22,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000 +@@ -20,5 +20,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000 +@@ -22,5 +22,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000 +@@ -37,5 +37,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000 +@@ -49,5 +49,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ +-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000 +@@ -49,5 +49,6 @@ + } + + /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_float } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include <signal.h> + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000 +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include <stdarg.h> + #include "tree-vect.h" + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000 +@@ -6,12 +6,12 @@ + #define N 128 + + int ib[N+7]; ++int ia[N+1]; + + __attribute__ ((noinline)) + int main1 () + { + int i; +- int ia[N+1]; + + /* Don't peel keeping one load and the store aligned. */ + for (i = 0; i <= N; i++) + +=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000 +@@ -58,7 +58,8 @@ + } + + /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */ +-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */ + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ + + +=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90' +--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000 ++++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000 +@@ -19,6 +19,7 @@ + end + + ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } +-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } } + ! { dg-final { cleanup-tree-dump "vect" } } + + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 +@@ -3265,6 +3265,24 @@ + }] + } + ++# Return 1 if the target supports multiple vector sizes ++ ++proc check_effective_target_vect_multiple_sizes { } { ++ global et_vect_multiple_sizes ++ ++ if [info exists et_vect_multiple_sizes_saved] { ++ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 ++ } else { ++ set et_vect_multiple_sizes_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_multiple_sizes_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2 ++ return $et_vect_multiple_sizes_saved ++} ++ + # Return 1 if the target supports section-anchors + + proc check_effective_target_section_anchors { } { +@@ -3648,11 +3666,11 @@ + return $flags + } + +-# Add to FLAGS the flags needed to enable 128-bit vectors. ++# Add to FLAGS the flags needed to enable 64-bit vectors. + +-proc add_options_for_quad_vectors { flags } { ++proc add_options_for_double_vectors { flags } { + if [is-effective-target arm_neon_ok] { +- return "$flags -mvectorize-with-neon-quad" ++ return "$flags -mvectorize-with-neon-double" + } + + return $flags + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch new file mode 100644 index 0000000000..2f70b1b9c2 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch @@ -0,0 +1,387 @@ +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/neon.md (neon_move_lo_quad_<mode>): Delete. + (neon_move_hi_quad_<mode>): Likewise. + (move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves. + +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-27 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi) + (neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di) + (neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si) + (neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands + that produce subreg moves. Define using VQX iterators. + +2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-14 Richard Sandiford <richard.sandiford@linaro.org> + + * simplify-rtx.c (simplify_subreg): Check that the inner mode is + a scalar integer before applying integer-only optimisations to + inner arithmetic. + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000 ++++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 +@@ -1235,66 +1235,14 @@ + (const_string "neon_int_1") (const_string "neon_int_5")))] + ) + +-; FIXME: We wouldn't need the following insns if we could write subregs of +-; vector registers. Make an attempt at removing unnecessary moves, though +-; we're really at the mercy of the register allocator. +- +-(define_insn "neon_move_lo_quad_<mode>" +- [(set (match_operand:ANY128 0 "s_register_operand" "+w") +- (vec_concat:ANY128 +- (match_operand:<V_HALF> 1 "s_register_operand" "w") +- (vec_select:<V_HALF> +- (match_dup 0) +- (match_operand:ANY128 2 "vect_par_constant_high" ""))))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%e0, %P1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_move_hi_quad_<mode>" +- [(set (match_operand:ANY128 0 "s_register_operand" "+w") +- (vec_concat:ANY128 +- (vec_select:<V_HALF> +- (match_dup 0) +- (match_operand:ANY128 2 "vect_par_constant_low" "")) +- (match_operand:<V_HALF> 1 "s_register_operand" "w")))] +- +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%f0, %P1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- + (define_expand "move_hi_quad_<mode>" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand:<V_HALF> 1 "s_register_operand" "")] + "TARGET_NEON" + { +- rtvec v = rtvec_alloc (<V_mode_nunits>/2); +- rtx t1; +- int i; +- +- for (i=0; i < (<V_mode_nunits>/2); i++) +- RTVEC_ELT (v, i) = GEN_INT (i); +- +- t1 = gen_rtx_PARALLEL (<MODE>mode, v); +- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1)); +- ++ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode)), ++ operands[1]); + DONE; + }) + +@@ -1303,16 +1251,9 @@ + (match_operand:<V_HALF> 1 "s_register_operand" "")] + "TARGET_NEON" + { +- rtvec v = rtvec_alloc (<V_mode_nunits>/2); +- rtx t1; +- int i; +- +- for (i=0; i < (<V_mode_nunits>/2); i++) +- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); +- +- t1 = gen_rtx_PARALLEL (<MODE>mode, v); +- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1)); +- ++ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], ++ <MODE>mode, 0), ++ operands[1]); + DONE; + }) + +@@ -2950,183 +2891,27 @@ + (set_attr "neon_type" "neon_bp_simple")] + ) + +-(define_insn "neon_vget_highv16qi" +- [(set (match_operand:V8QI 0 "s_register_operand" "=w") +- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") +- (parallel [(const_int 8) (const_int 9) +- (const_int 10) (const_int 11) +- (const_int 12) (const_int 13) +- (const_int 14) (const_int 15)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv8hi" +- [(set (match_operand:V4HI 0 "s_register_operand" "=w") +- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") +- (parallel [(const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv4si" +- [(set (match_operand:V2SI 0 "s_register_operand" "=w") +- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") +- (parallel [(const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv4sf" +- [(set (match_operand:V2SF 0 "s_register_operand" "=w") +- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") +- (parallel [(const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_highv2di" +- [(set (match_operand:DI 0 "s_register_operand" "=w") +- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") +- (parallel [(const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src + 2) +- return "vmov\t%P0, %f1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv16qi" +- [(set (match_operand:V8QI 0 "s_register_operand" "=w") +- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3) +- (const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv8hi" +- [(set (match_operand:V4HI 0 "s_register_operand" "=w") +- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv4si" +- [(set (match_operand:V2SI 0 "s_register_operand" "=w") +- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv4sf" +- [(set (match_operand:V2SF 0 "s_register_operand" "=w") +- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") +- (parallel [(const_int 0) (const_int 1)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) +- +-(define_insn "neon_vget_lowv2di" +- [(set (match_operand:DI 0 "s_register_operand" "=w") +- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") +- (parallel [(const_int 0)])))] +- "TARGET_NEON" +-{ +- int dest = REGNO (operands[0]); +- int src = REGNO (operands[1]); +- +- if (dest != src) +- return "vmov\t%P0, %e1"; +- else +- return ""; +-} +- [(set_attr "neon_type" "neon_bp_simple")] +-) ++(define_expand "neon_vget_high<mode>" ++ [(match_operand:<V_HALF> 0 "s_register_operand") ++ (match_operand:VQX 1 "s_register_operand")] ++ "TARGET_NEON" ++{ ++ emit_move_insn (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode))); ++ DONE; ++}) ++ ++(define_expand "neon_vget_low<mode>" ++ [(match_operand:<V_HALF> 0 "s_register_operand") ++ (match_operand:VQX 1 "s_register_operand")] ++ "TARGET_NEON" ++{ ++ emit_move_insn (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], ++ <MODE>mode, 0)); ++ DONE; ++}) + + (define_insn "neon_vcvt<mode>" + [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") + +=== modified file 'gcc/simplify-rtx.c' +--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000 ++++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000 +@@ -5567,6 +5567,7 @@ + /* Optimize SUBREG truncations of zero and sign extended values. */ + if ((GET_CODE (op) == ZERO_EXTEND + || GET_CODE (op) == SIGN_EXTEND) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)) + { + unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); +@@ -5605,6 +5606,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE + to avoid the possibility that an outer LSHIFTRT shifts by more + than the sign extension's sign_bit_copies and introduces zeros +@@ -5624,6 +5626,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5638,6 +5641,7 @@ + the outer subreg is effectively a truncation to the original mode. */ + if (GET_CODE (op) == ASHIFT + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5651,7 +5655,7 @@ + /* Recognize a word extraction from a multi-word subreg. */ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) +- && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD + && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode)) + && CONST_INT_P (XEXP (op, 1)) +@@ -5673,6 +5677,7 @@ + + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) ++ && SCALAR_INT_MODE_P (innermode) + && MEM_P (XEXP (op, 0)) + && CONST_INT_P (XEXP (op, 1)) + && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op)) + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch new file mode 100644 index 0000000000..d44f8cf1a5 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch @@ -0,0 +1,290 @@ +2011-10-01 Revital Eres <revital.eres@linaro.org> + + gcc/ + Backport from mainline -r179380 and -r179381 + + * ddg.c (autoinc_var_is_used_p): New function. + (create_ddg_dep_from_intra_loop_link, + add_cross_iteration_register_deps): Call it. + * ddg.h (autoinc_var_is_used_p): Declare. + * modulo-sched.c (sms_schedule): Handle instructions with REG_INC. + (generate_reg_moves): Call autoinc_var_is_used_p. Skip + instructions that do not set a register and verify no regmoves + are created for !single_set instructions. + + gcc/testsuite/ + + * gcc.dg/sms-10.c: New file + +=== modified file 'gcc/ddg.c' +--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000 ++++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000 +@@ -145,6 +145,27 @@ + return rtx_mem_access_p (PATTERN (insn)); + } + ++/* Return true if DEF_INSN contains address being auto-inc or auto-dec ++ which is used in USE_INSN. Otherwise return false. The result is ++ being used to decide whether to remove the edge between def_insn and ++ use_insn when -fmodulo-sched-allow-regmoves is set. This function ++ doesn't need to consider the specific address register; no reg_moves ++ will be allowed for any life range defined by def_insn and used ++ by use_insn, if use_insn uses an address register auto-inc'ed by ++ def_insn. */ ++bool ++autoinc_var_is_used_p (rtx def_insn, rtx use_insn) ++{ ++ rtx note; ++ ++ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1)) ++ if (REG_NOTE_KIND (note) == REG_INC ++ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn))) ++ return true; ++ ++ return false; ++} ++ + /* Computes the dependence parameters (latency, distance etc.), creates + a ddg_edge and adds it to the given DDG. */ + static void +@@ -173,10 +194,15 @@ + compensate for that by generating reg-moves based on the life-range + analysis. The anti-deps that will be deleted are the ones which + have true-deps edges in the opposite direction (in other words +- the kernel has only one def of the relevant register). TODO: +- support the removal of all anti-deps edges, i.e. including those ++ the kernel has only one def of the relevant register). ++ If the address that is being auto-inc or auto-dec in DEST_NODE ++ is used in SRC_NODE then do not remove the edge to make sure ++ reg-moves will not be created for this address. ++ TODO: support the removal of all anti-deps edges, i.e. including those + whose register has multiple defs in the loop. */ +- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP)) ++ if (flag_modulo_sched_allow_regmoves ++ && (t == ANTI_DEP && dt == REG_DEP) ++ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn)) + { + rtx set; + +@@ -302,10 +328,14 @@ + gcc_assert (first_def_node); + + /* Always create the edge if the use node is a branch in +- order to prevent the creation of reg-moves. */ ++ order to prevent the creation of reg-moves. ++ If the address that is being auto-inc or auto-dec in LAST_DEF ++ is used in USE_INSN then do not remove the edge to make sure ++ reg-moves will not be created for that address. */ + if (DF_REF_ID (last_def) != DF_REF_ID (first_def) + || !flag_modulo_sched_allow_regmoves +- || JUMP_P (use_node->insn)) ++ || JUMP_P (use_node->insn) ++ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)) + create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, + REG_DEP, 1); + + +=== modified file 'gcc/ddg.h' +--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000 ++++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000 +@@ -186,4 +186,6 @@ + int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to); + int longest_simple_path (ddg_ptr, int from, int to, sbitmap via); + ++bool autoinc_var_is_used_p (rtx, rtx); ++ + #endif /* GCC_DDG_H */ + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 ++++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 +@@ -477,7 +477,12 @@ + sbitmap *uses_of_defs; + rtx last_reg_move; + rtx prev_reg, old_reg; +- ++ rtx set = single_set (u->insn); ++ ++ /* Skip instructions that do not set a register. */ ++ if ((set && !REG_P (SET_DEST (set)))) ++ continue; ++ + /* Compute the number of reg_moves needed for u, by looking at life + ranges started at u (excluding self-loops). */ + for (e = u->out; e; e = e->next_out) +@@ -494,6 +499,20 @@ + && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) + nreg_moves4e--; + ++ if (nreg_moves4e >= 1) ++ { ++ /* !single_set instructions are not supported yet and ++ thus we do not except to encounter them in the loop ++ except from the doloop part. For the latter case ++ we assume no regmoves are generated as the doloop ++ instructions are tied to the branch with an edge. */ ++ gcc_assert (set); ++ /* If the instruction contains auto-inc register then ++ validate that the regmov is being generated for the ++ target regsiter rather then the inc'ed register. */ ++ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); ++ } ++ + nreg_moves = MAX (nreg_moves, nreg_moves4e); + } + +@@ -1266,12 +1285,10 @@ + continue; + } + +- /* Don't handle BBs with calls or barriers or auto-increment insns +- (to avoid creating invalid reg-moves for the auto-increment insns), ++ /* Don't handle BBs with calls or barriers + or !single_set with the exception of instructions that include + count_reg---these instructions are part of the control part + that do-loop recognizes. +- ??? Should handle auto-increment insns. + ??? Should handle insns defining subregs. */ + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) + { +@@ -1282,7 +1299,6 @@ + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE + && !reg_mentioned_p (count_reg, insn)) +- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) + || (INSN_P (insn) && (set = single_set (insn)) + && GET_CODE (SET_DEST (set)) == SUBREG)) + break; +@@ -1296,8 +1312,6 @@ + fprintf (dump_file, "SMS loop-with-call\n"); + else if (BARRIER_P (insn)) + fprintf (dump_file, "SMS loop-with-barrier\n"); +- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) +- fprintf (dump_file, "SMS reg inc\n"); + else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) + fprintf (dump_file, "SMS loop-with-not-single-set\n"); + +=== added file 'gcc/testsuite/gcc.dg/sms-10.c' +--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000 +@@ -0,0 +1,118 @@ ++ /* { dg-do run } */ ++ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */ ++ ++ ++typedef __SIZE_TYPE__ size_t; ++extern void *malloc (size_t); ++extern void free (void *); ++extern void abort (void); ++ ++struct regstat_n_sets_and_refs_t ++{ ++ int sets; ++ int refs; ++}; ++ ++struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs; ++ ++struct df_reg_info ++{ ++ unsigned int n_refs; ++}; ++ ++struct df_d ++{ ++ struct df_reg_info **def_regs; ++ struct df_reg_info **use_regs; ++}; ++struct df_d *df; ++ ++static inline int ++REG_N_SETS (int regno) ++{ ++ return regstat_n_sets_and_refs[regno].sets; ++} ++ ++__attribute__ ((noinline)) ++ int max_reg_num (void) ++{ ++ return 100; ++} ++ ++__attribute__ ((noinline)) ++ void regstat_init_n_sets_and_refs (void) ++{ ++ unsigned int i; ++ unsigned int max_regno = max_reg_num (); ++ ++ for (i = 0; i < max_regno; i++) ++ { ++ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs)); ++ (regstat_n_sets_and_refs[i].refs = ++ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i)); ++ } ++} ++ ++int a_sets[100] = ++ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ++ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, ++ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, ++ 62, 63, 64, ++ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, ++ 84, 85, 86, ++ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ++}; ++ ++int a_refs[100] = ++ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, ++ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, ++ 78, 80, 82, ++ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, ++ 118, 120, ++ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, ++ 152, 154, 156, ++ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, ++ 188, 190, 192, ++ 194, 196, 198 ++}; ++ ++int ++main () ++{ ++ struct df_reg_info *b[100], *c[100]; ++ struct df_d df1; ++ size_t s = sizeof (struct df_reg_info); ++ struct regstat_n_sets_and_refs_t a[100]; ++ ++ df = &df1; ++ regstat_n_sets_and_refs = a; ++ int i; ++ ++ for (i = 0; i < 100; i++) ++ { ++ b[i] = (struct df_reg_info *) malloc (s); ++ b[i]->n_refs = i; ++ c[i] = (struct df_reg_info *) malloc (s); ++ c[i]->n_refs = i; ++ } ++ ++ df1.def_regs = b; ++ df1.use_regs = c; ++ regstat_init_n_sets_and_refs (); ++ ++ for (i = 0; i < 100; i++) ++ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i])) ++ abort (); ++ ++ for (i = 0; i < 100; i++) ++ { ++ free (b[i]); ++ free (c[i]); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */ ++/* { dg-final { cleanup-rtl-dump "sms" } } */ + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch new file mode 100644 index 0000000000..ef98142bc4 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch @@ -0,0 +1,105 @@ +2011-10-03 Michael Hope <michael.hope@linaro.org> + + Backport from mainline: + + 2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru> + + gcc/ + * config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New + define_insn patterns for combine. + + gcc/testsuite/ + * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test. + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 ++++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 +@@ -5428,3 +5428,32 @@ + emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); + DONE; + }) ++ ++(define_insn "neon_vabd<mode>_2" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") ++ (match_operand:VDQ 2 "s_register_operand" "w"))))] ++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" ++ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_fp_vadd_ddd_vabs_dd") ++ (const_string "neon_fp_vadd_qqq_vabs_qq")) ++ (const_string "neon_int_5")))] ++) ++ ++(define_insn "neon_vabd<mode>_3" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") ++ (match_operand:VDQ 2 "s_register_operand" "w")] ++ UNSPEC_VSUB)))] ++ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" ++ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) ++ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) ++ (const_string "neon_fp_vadd_ddd_vabs_dd") ++ (const_string "neon_fp_vadd_qqq_vabs_qq")) ++ (const_string "neon_int_5")))] ++) + +=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c' +--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000 +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -funsafe-math-optimizations" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include <arm_neon.h> ++float32x2_t f_sub_abs_to_vabd_32() ++{ ++ float32x2_t val1 = vdup_n_f32 (10); ++ float32x2_t val2 = vdup_n_f32 (30); ++ float32x2_t sres = vsub_f32(val1, val2); ++ float32x2_t res = vabs_f32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.f32" } }*/ ++ ++#include <arm_neon.h> ++int8x8_t sub_abs_to_vabd_8() ++{ ++ int8x8_t val1 = vdup_n_s8 (10); ++ int8x8_t val2 = vdup_n_s8 (30); ++ int8x8_t sres = vsub_s8(val1, val2); ++ int8x8_t res = vabs_s8 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s8" } }*/ ++ ++int16x4_t sub_abs_to_vabd_16() ++{ ++ int16x4_t val1 = vdup_n_s16 (10); ++ int16x4_t val2 = vdup_n_s16 (30); ++ int16x4_t sres = vsub_s16(val1, val2); ++ int16x4_t res = vabs_s16 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s16" } }*/ ++ ++int32x2_t sub_abs_to_vabd_32() ++{ ++ int32x2_t val1 = vdup_n_s32 (10); ++ int32x2_t val2 = vdup_n_s32 (30); ++ int32x2_t sres = vsub_s32(val1, val2); ++ int32x2_t res = vabs_s32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s32" } }*/ + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch new file mode 100644 index 0000000000..e097ec27fa --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch @@ -0,0 +1,436 @@ +2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> + + * config/arm/predicates.md (expandable_comparison_operator): New + predicate, extracted from... + (arm_comparison_operator): ...here. + * config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4) + (cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc) + (movdfcc): Use expandable_comparison_operator. + + gcc/testsuite/ + Backport from mainline: + + 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/arm/cmp-1.c: New test. + * gcc.target/arm/cmp-2.c: Likewise. + +2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> + + PR target/49030 + * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare. + * config/arm/arm.c (maybe_get_arm_condition_code): New function, + reusing the old code from get_arm_condition_code. Return ARM_NV + for invalid comparison codes. + (get_arm_condition_code): Redefine in terms of + maybe_get_arm_condition_code. + * config/arm/predicates.md (arm_comparison_operator): Use + maybe_get_arm_condition_code. + + gcc/testsuite/ + Backport from mainline: + + 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> + + PR target/49030 + * gcc.dg/torture/pr49030.c: New test. + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000 +@@ -180,6 +180,7 @@ + #endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE ++extern enum arm_cond_code maybe_get_arm_condition_code (rtx); + extern void thumb1_final_prescan_insn (rtx); + extern void thumb2_final_prescan_insn (rtx); + extern const char *thumb_load_double_from_address (rtx *); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 ++++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000 +@@ -17494,10 +17494,10 @@ + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + + /* Returns the index of the ARM condition code string in +- `arm_condition_codes'. COMPARISON should be an rtx like +- `(eq (...) (...))'. */ +-static enum arm_cond_code +-get_arm_condition_code (rtx comparison) ++ `arm_condition_codes', or ARM_NV if the comparison is invalid. ++ COMPARISON should be an rtx like `(eq (...) (...))'. */ ++enum arm_cond_code ++maybe_get_arm_condition_code (rtx comparison) + { + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; +@@ -17521,11 +17521,11 @@ + case CC_DLTUmode: code = ARM_CC; + + dominance: +- gcc_assert (comp_code == EQ || comp_code == NE); +- + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); +- return code; ++ if (comp_code == NE) ++ return code; ++ return ARM_NV; + + case CC_NOOVmode: + switch (comp_code) +@@ -17534,7 +17534,7 @@ + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Zmode: +@@ -17542,7 +17542,7 @@ + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Nmode: +@@ -17550,7 +17550,7 @@ + { + case NE: return ARM_MI; + case EQ: return ARM_PL; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCFPEmode: +@@ -17575,7 +17575,7 @@ + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_SWPmode: +@@ -17591,7 +17591,7 @@ + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_Cmode: +@@ -17599,7 +17599,7 @@ + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_CZmode: +@@ -17611,7 +17611,7 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CC_NCVmode: +@@ -17621,7 +17621,7 @@ + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + case CCmode: +@@ -17637,13 +17637,22 @@ + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; +- default: gcc_unreachable (); ++ default: return ARM_NV; + } + + default: gcc_unreachable (); + } + } + ++/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ ++static enum arm_cond_code ++get_arm_condition_code (rtx comparison) ++{ ++ enum arm_cond_code code = maybe_get_arm_condition_code (comparison); ++ gcc_assert (code != ARM_NV); ++ return code; ++} ++ + /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ + void + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000 ++++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000 +@@ -6543,7 +6543,7 @@ + + (define_expand "cbranchsi4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6594,7 +6594,7 @@ + + (define_expand "cbranchsf4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6606,7 +6606,7 @@ + + (define_expand "cbranchdf4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -6618,7 +6618,7 @@ + + (define_expand "cbranchdi4" + [(set (pc) (if_then_else +- (match_operator 0 "arm_comparison_operator" ++ (match_operator 0 "expandable_comparison_operator" + [(match_operand:DI 1 "cmpdi_operand" "") + (match_operand:DI 2 "cmpdi_operand" "")]) + (label_ref (match_operand 3 "" "")) +@@ -7473,7 +7473,7 @@ + + (define_expand "cstoresi4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "reg_or_int_operand" "")]))] + "TARGET_32BIT || TARGET_THUMB1" +@@ -7609,7 +7609,7 @@ + + (define_expand "cstoresf4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT" +@@ -7619,7 +7619,7 @@ + + (define_expand "cstoredf4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" +@@ -7629,7 +7629,7 @@ + + (define_expand "cstoredi4" + [(set (match_operand:SI 0 "s_register_operand" "") +- (match_operator:SI 1 "arm_comparison_operator" ++ (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DI 2 "cmpdi_operand" "") + (match_operand:DI 3 "cmpdi_operand" "")]))] + "TARGET_32BIT" +@@ -7749,7 +7749,7 @@ + + (define_expand "movsicc" + [(set (match_operand:SI 0 "s_register_operand" "") +- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "") + (match_operand:SI 2 "arm_not_operand" "") + (match_operand:SI 3 "arm_not_operand" "")))] + "TARGET_32BIT" +@@ -7769,7 +7769,7 @@ + + (define_expand "movsfcc" + [(set (match_operand:SF 0 "s_register_operand" "") +- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "") + (match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "nonmemory_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" +@@ -7795,7 +7795,7 @@ + + (define_expand "movdfcc" + [(set (match_operand:DF 0 "s_register_operand" "") +- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") ++ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "") + (match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_add_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 +@@ -242,11 +242,15 @@ + + ;; True for integer comparisons and, if FP is active, for comparisons + ;; other than LTGT or UNEQ. ++(define_special_predicate "expandable_comparison_operator" ++ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, ++ unordered,ordered,unlt,unle,unge,ungt")) ++ ++;; Likewise, but only accept comparisons that are directly supported ++;; by ARM condition codes. + (define_special_predicate "arm_comparison_operator" +- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") +- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT +- && (TARGET_FPA || TARGET_VFP)") +- (match_code "unordered,ordered,unlt,unle,unge,ungt")))) ++ (and (match_operand 0 "expandable_comparison_operator") ++ (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) + + (define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c' +--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000 +@@ -0,0 +1,19 @@ ++void ++sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples, ++ unsigned long dst_skip) ++{ ++ long long y; ++ while (nsamples--) ++ { ++ y = (long long) (*src * 8388608.0f) << 8; ++ if (y > 2147483647) { ++ *(int *) dst = 2147483647; ++ } else if (y < -2147483647 - 1) { ++ *(int *) dst = -2147483647 - 1; ++ } else { ++ *(int *) dst = (int) y; ++ } ++ dst += dst_skip; ++ src++; ++ } ++} + +=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c' +--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000 +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++/* { dg-final { scan-assembler-not "\tbl\t" } } */ ++/* { dg-final { scan-assembler-not "__aeabi" } } */ ++int x, y; ++ ++#define TEST_EXPR(NAME, ARGS, EXPR) \ ++ int NAME##1 ARGS { return (EXPR); } \ ++ int NAME##2 ARGS { return !(EXPR); } \ ++ int NAME##3 ARGS { return (EXPR) ? x : y; } \ ++ void NAME##4 ARGS { if (EXPR) x++; } \ ++ void NAME##5 ARGS { if (!(EXPR)) x++; } ++ ++#define TEST(NAME, TYPE, OPERATOR) \ ++ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \ ++ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \ ++ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \ ++ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \ ++ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \ ++ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1) ++ ++#define TEST_OP(NAME, OPERATOR) \ ++ TEST (sc_##NAME, signed char, OPERATOR) \ ++ TEST (uc_##NAME, unsigned char, OPERATOR) \ ++ TEST (ss_##NAME, short, OPERATOR) \ ++ TEST (us_##NAME, unsigned short, OPERATOR) \ ++ TEST (si_##NAME, int, OPERATOR) \ ++ TEST (ui_##NAME, unsigned int, OPERATOR) \ ++ TEST (sll_##NAME, long long, OPERATOR) \ ++ TEST (ull_##NAME, unsigned long long, OPERATOR) ++ ++TEST_OP (eq, ==) ++TEST_OP (ne, !=) ++TEST_OP (lt, <) ++TEST_OP (gt, >) ++TEST_OP (le, <=) ++TEST_OP (ge, >=) + +=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c' +--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000 +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_vfp_ok } */ ++/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ ++/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */ ++/* { dg-final { scan-assembler-not "\tbl\t" } } */ ++/* { dg-final { scan-assembler-not "__aeabi" } } */ ++int x, y; ++ ++#define EQ(X, Y) ((X) == (Y)) ++#define NE(X, Y) ((X) != (Y)) ++#define LT(X, Y) ((X) < (Y)) ++#define GT(X, Y) ((X) > (Y)) ++#define LE(X, Y) ((X) <= (Y)) ++#define GE(X, Y) ((X) >= (Y)) ++ ++#define TEST_EXPR(NAME, ARGS, EXPR) \ ++ int NAME##1 ARGS { return (EXPR); } \ ++ int NAME##2 ARGS { return !(EXPR); } \ ++ int NAME##3 ARGS { return (EXPR) ? x : y; } \ ++ void NAME##4 ARGS { if (EXPR) x++; } \ ++ void NAME##5 ARGS { if (!(EXPR)) x++; } ++ ++#define TEST(NAME, TYPE, OPERATOR) \ ++ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \ ++ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \ ++ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \ ++ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \ ++ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \ ++ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1)) ++ ++#define TEST_OP(NAME, OPERATOR) \ ++ TEST (f_##NAME, float, OPERATOR) \ ++ TEST (d_##NAME, double, OPERATOR) \ ++ TEST (ld_##NAME, long double, OPERATOR) ++ ++TEST_OP (eq, EQ) ++TEST_OP (ne, NE) ++TEST_OP (lt, LT) ++TEST_OP (gt, GT) ++TEST_OP (le, LE) ++TEST_OP (ge, GE) ++TEST_OP (blt, __builtin_isless) ++TEST_OP (bgt, __builtin_isgreater) ++TEST_OP (ble, __builtin_islessequal) ++TEST_OP (bge, __builtin_isgreaterequal) ++/* This one should be expanded into separate ordered and equality ++ comparisons. */ ++TEST_OP (blg, __builtin_islessgreater) ++TEST_OP (bun, __builtin_isunordered) + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch new file mode 100644 index 0000000000..4a886ce56d --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch @@ -0,0 +1,378 @@ +2011-10-06 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-data-ref.c (dr_analyze_innermost): Add new argument. + Allow not simple iv if analyzing basic block. + (create_data_ref): Update call to dr_analyze_innermost. + (stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise. + * tree-loop-distribution.c (generate_memset_zero): Likewise. + * tree-predcom.c (find_looparound_phi): Likewise. + * tree-data-ref.h (dr_analyze_innermost): Add new argument. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-24.c: New. + + + 2011-09-15 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow + read-after-read dependencies in basic block SLP. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-25.c: New. + + + 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use + operand_equal_p to compare DR_BASE_ADDRESSes. + (vect_check_interleaving): Likewise. + + gcc/testsuite/ + * gcc.dg/vect/vect-119.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, ++ int stride, int dummy) ++{ ++ int i; ++ h /= 8; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0]; ++ dst[1] += A*src[1]; ++ dst[2] += A*src[2]; ++ dst[3] += A*src[3]; ++ dst[4] += A*src[4]; ++ dst[5] += A*src[5]; ++ dst[6] += A*src[6]; ++ dst[7] += A*src[7]; ++ dst += stride; ++ src += stride; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (dst[i] != A * i) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0] + src[stride]; ++ dst[1] += A*src[1] + src[1+stride]; ++ dst[2] += A*src[2] + src[2+stride]; ++ dst[3] += A*src[3] + src[3+stride]; ++ dst[4] += A*src[4] + src[4+stride]; ++ dst[5] += A*src[5] + src[5+stride]; ++ dst[6] += A*src[6] + src[6+stride]; ++ dst[7] += A*src[7] + src[7+stride]; ++ dst += 8; ++ src += 8; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * i + i + 8) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000 +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++ ++#define OUTER 32 ++#define INNER 40 ++ ++static unsigned int ++bar (const unsigned int x[INNER][2], unsigned int sum) ++{ ++ int i; ++ ++ for (i = 0; i < INNER; i++) ++ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; ++ return sum; ++} ++ ++unsigned int foo (const unsigned int x[OUTER][INNER][2]) ++{ ++ int i; ++ unsigned int sum; ++ ++ sum = 0.0f; ++ for (i = 0; i < OUTER; i++) ++ sum = bar (x[i], sum); ++ return sum; ++} ++ ++/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ + +=== modified file 'gcc/tree-data-ref.c' +--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000 ++++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000 +@@ -721,11 +721,11 @@ + } + + /* Analyzes the behavior of the memory reference DR in the innermost loop or +- basic block that contains it. Returns true if analysis succeed or false ++ basic block that contains it. Returns true if analysis succeed or false + otherwise. */ + + bool +-dr_analyze_innermost (struct data_reference *dr) ++dr_analyze_innermost (struct data_reference *dr, struct loop *nest) + { + gimple stmt = DR_STMT (dr); + struct loop *loop = loop_containing_stmt (stmt); +@@ -768,14 +768,25 @@ + } + else + base = build_fold_addr_expr (base); ++ + if (in_loop) + { + if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, + false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of base is not affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of base is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ base_iv.base = base; ++ base_iv.step = ssize_int (0); ++ base_iv.no_overflow = true; ++ } + } + } + else +@@ -800,10 +811,18 @@ + else if (!simple_iv (loop, loop_containing_stmt (stmt), + poffset, &offset_iv, false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of offset is not" +- " affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of offset is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ offset_iv.base = poffset; ++ offset_iv.step = ssize_int (0); ++ } + } + } + +@@ -967,7 +986,7 @@ + DR_REF (dr) = memref; + DR_IS_READ (dr) = is_read; + +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, nest); + dr_analyze_indices (dr, nest, loop); + dr_analyze_alias (dr); + +@@ -5185,7 +5204,7 @@ + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; + +- res = dr_analyze_innermost (dr) ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); + + free_data_ref (dr); +@@ -5225,7 +5244,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = *ref->pos; +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + base_address = DR_BASE_ADDRESS (dr); + + if (!base_address) + +=== modified file 'gcc/tree-data-ref.h' +--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000 ++++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000 +@@ -386,7 +386,7 @@ + DEF_VEC_ALLOC_O (data_ref_loc, heap); + + bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **); +-bool dr_analyze_innermost (struct data_reference *); ++bool dr_analyze_innermost (struct data_reference *, struct loop *); + extern bool compute_data_dependences_for_loop (struct loop *, bool, + VEC (loop_p, heap) **, + VEC (data_reference_p, heap) **, + +=== modified file 'gcc/tree-loop-distribution.c' +--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000 ++++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000 +@@ -267,7 +267,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; +- res = dr_analyze_innermost (dr); ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); + + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); + +=== modified file 'gcc/tree-predcom.c' +--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000 ++++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000 +@@ -1114,7 +1114,7 @@ + memset (&init_dr, 0, sizeof (struct data_reference)); + DR_REF (&init_dr) = init_ref; + DR_STMT (&init_dr) = phi; +- if (!dr_analyze_innermost (&init_dr)) ++ if (!dr_analyze_innermost (&init_dr, loop)) + return NULL; + + if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) + +=== modified file 'gcc/tree-vect-data-refs.c' +--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000 ++++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000 +@@ -353,11 +353,7 @@ + + /* Check that the data-refs have same bases and offsets. If not, we can't + determine if they are dependent. */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) + || !dr_equal_offsets_p (dra, drb)) + return true; + +@@ -403,11 +399,7 @@ + + /* Check that the data-refs have same first location (except init) and they + are both either store or load (not load and store). */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) + || !dr_equal_offsets_p (dra, drb) + || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) + || DR_IS_READ (dra) != DR_IS_READ (drb)) +@@ -615,6 +607,11 @@ + if (vect_check_interleaving (dra, drb)) + return false; + ++ /* Read-read is OK (we need this check here, after checking for ++ interleaving). */ ++ if (DR_IS_READ (dra) && DR_IS_READ (drb)) ++ return false; ++ + if (vect_print_dump_info (REPORT_DR_DETAILS)) + { + fprintf (vect_dump, "can't determine dependence between "); + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch new file mode 100644 index 0000000000..f25a37858d --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch @@ -0,0 +1,240 @@ +2011-10-06 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block + vectorizable. + + Backport from mainline: + + 2011-09-25 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part + of vect_analyze_bb here. + (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect64): New. + * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case + of multiple vector sizes. + * gcc.dg/vect/bb-slp-26.c: New. + +=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 +@@ -49,6 +49,7 @@ + } + + /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ ++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ + /* { dg-final { cleanup-tree-dump "slp" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000 +@@ -0,0 +1,60 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++char src[N], dst[N]; ++ ++void foo (char * __restrict__ dst, char * __restrict__ src, int h, ++ int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] += A*src[0]; ++ dst[1] += A*src[1]; ++ dst[2] += A*src[2]; ++ dst[3] += A*src[3]; ++ dst[4] += A*src[4]; ++ dst[5] += A*src[5]; ++ dst[6] += A*src[6]; ++ dst[7] += A*src[7]; ++ dst += 8; ++ src += 8; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i/8; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * src[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 +@@ -3283,6 +3283,24 @@ + return $et_vect_multiple_sizes_saved + } + ++# Return 1 if the target supports vectors of 64 bits. ++ ++proc check_effective_target_vect64 { } { ++ global et_vect64 ++ ++ if [info exists et_vect64_saved] { ++ verbose "check_effective_target_vect64: using cached result" 2 ++ } else { ++ set et_vect64_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect64_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 ++ return $et_vect64_saved ++} ++ + # Return 1 if the target supports section-anchors + + proc check_effective_target_section_anchors { } { + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 +@@ -1664,42 +1664,18 @@ + + /* Check if the basic block can be vectorized. */ + +-bb_vec_info +-vect_slp_analyze_bb (basic_block bb) ++static bb_vec_info ++vect_slp_analyze_bb_1 (basic_block bb) + { + bb_vec_info bb_vinfo; + VEC (ddr_p, heap) *ddrs; + VEC (slp_instance, heap) *slp_instances; + slp_instance instance; +- int i, insns = 0; +- gimple_stmt_iterator gsi; ++ int i; + int min_vf = 2; + int max_vf = MAX_VECTORIZATION_FACTOR; + bool data_dependence_in_bb = false; + +- current_vector_size = 0; +- +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); +- +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- { +- gimple stmt = gsi_stmt (gsi); +- if (!is_gimple_debug (stmt) +- && !gimple_nop_p (stmt) +- && gimple_code (stmt) != GIMPLE_LABEL) +- insns++; +- } +- +- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) +- { +- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +- fprintf (vect_dump, "not vectorized: too many instructions in basic " +- "block.\n"); +- +- return NULL; +- } +- + bb_vinfo = new_bb_vec_info (bb); + if (!bb_vinfo) + return NULL; +@@ -1819,6 +1795,61 @@ + } + + ++bb_vec_info ++vect_slp_analyze_bb (basic_block bb) ++{ ++ bb_vec_info bb_vinfo; ++ int insns = 0; ++ gimple_stmt_iterator gsi; ++ unsigned int vector_sizes; ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); ++ ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple stmt = gsi_stmt (gsi); ++ if (!is_gimple_debug (stmt) ++ && !gimple_nop_p (stmt) ++ && gimple_code (stmt) != GIMPLE_LABEL) ++ insns++; ++ } ++ ++ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) ++ { ++ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) ++ fprintf (vect_dump, "not vectorized: too many instructions in basic " ++ "block.\n"); ++ ++ return NULL; ++ } ++ ++ /* Autodetect first vector size we try. */ ++ current_vector_size = 0; ++ vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); ++ ++ while (1) ++ { ++ bb_vinfo = vect_slp_analyze_bb_1 (bb); ++ if (bb_vinfo) ++ return bb_vinfo; ++ ++ destroy_bb_vec_info (bb_vinfo); ++ ++ vector_sizes &= ~current_vector_size; ++ if (vector_sizes == 0 ++ || current_vector_size == 0) ++ return NULL; ++ ++ /* Try the next biggest vector size. */ ++ current_vector_size = 1 << floor_log2 (vector_sizes); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "***** Re-trying analysis with " ++ "vector size %d\n", current_vector_size); ++ } ++} ++ ++ + /* SLP costs are calculated according to SLP instance unrolling factor (i.e., + the number of created vector stmts depends on the unrolling factor). + However, the actual number of vector stmts for every SLP node depends on + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch new file mode 100644 index 0000000000..13e6fd26e5 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch @@ -0,0 +1,124 @@ +2011-10-13 Andrew Stubbs <ams@codesourcery.com> + + Backport from mainline: + + 2011-10-07 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/predicates.md (shift_amount_operand): Remove constant + range check. + (shift_operator): Check range of constants for all shift operators. + + gcc/testsuite/ + * gcc.dg/pr50193-1.c: New file. + * gcc.target/arm/shiftable.c: New file. + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 +@@ -129,11 +129,12 @@ + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "memory_operand"))) + ++;; This doesn't have to do much because the constant is already checked ++;; in the shift_operator predicate. + (define_predicate "shift_amount_operand" + (ior (and (match_test "TARGET_ARM") + (match_operand 0 "s_register_operand")) +- (and (match_operand 0 "const_int_operand") +- (match_test "INTVAL (op) > 0")))) ++ (match_operand 0 "const_int_operand"))) + + (define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") +@@ -219,13 +220,20 @@ + (match_test "mode == GET_MODE (op)"))) + + ;; True for shift operators. ++;; Notes: ++;; * mult is only permitted with a constant shift amount ++;; * patterns that permit register shift amounts only in ARM mode use ++;; shift_amount_operand, patterns that always allow registers do not, ++;; so we don't have to worry about that sort of thing here. + (define_special_predicate "shift_operator" + (and (ior (ior (and (match_code "mult") + (match_test "power_of_two_operand (XEXP (op, 1), mode)")) + (and (match_code "rotate") + (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) +- (match_code "ashift,ashiftrt,lshiftrt,rotatert")) ++ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") ++ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT ++ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_test "mode == GET_MODE (op)"))) + + ;; True for MULT, to identify which variant of shift_operator is in use. + +=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c' +--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000 +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target arm32 } */ ++ ++/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some ++ of these as a left shift, others as a multiply. Check that we match the ++ right one. */ ++ ++int ++plus (int a, int b) ++{ ++ return (a * 64) + b; ++} ++ ++/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */ ++ ++int ++minus (int a, int b) ++{ ++ return a - (b * 64); ++} ++ ++/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */ ++ ++int ++ior (int a, int b) ++{ ++ return (a * 64) | b; ++} ++ ++/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */ ++ ++int ++xor (int a, int b) ++{ ++ return (a * 64) ^ b; ++} ++ ++/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */ ++ ++int ++and (int a, int b) ++{ ++ return (a * 64) & b; ++} ++ ++/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */ ++ ++int ++rsb (int a, int b) ++{ ++ return (a * 64) - b; ++} ++ ++/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */ ++ ++int ++mvn (int a, int b) ++{ ++ return ~(a * 64); ++} ++ ++/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */ + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch new file mode 100644 index 0000000000..6642f8a667 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch @@ -0,0 +1,362 @@ +2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-09-27 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block + vectorization. + (vectorizable_type_promotion): Likewise. + (vect_analyze_stmt): Call vectorizable_type_demotion and + vectorizable_type_promotion for basic blocks. + (supportable_widening_operation): Don't assume loop vectorization. + * tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for + basic blocks. Update vectorization factor for basic block + vectorization. + (vect_analyze_slp_instance): Allow multiple types for basic block + vectorization. Recheck unrolling factor after construction of SLP + instance. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit + vectors. + * gcc.dg/vect/bb-slp-27.c: New. + * gcc.dg/vect/bb-slp-28.c: New. + + + 2011-10-04 Ira Rosen <ira.rosen@linaro.org> + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): + Make et_vect_multiple_sizes_saved global. + (check_effective_target_vect64): Make et_vect64_saved global. + +=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000 +@@ -48,8 +48,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ +-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ + /* { dg-final { cleanup-tree-dump "slp" } } */ + + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000 +@@ -0,0 +1,49 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define N 16 ++ ++short src[N], dst[N]; ++ ++void foo (int a) ++{ ++ dst[0] += a*src[0]; ++ dst[1] += a*src[1]; ++ dst[2] += a*src[2]; ++ dst[3] += a*src[3]; ++ dst[4] += a*src[4]; ++ dst[5] += a*src[5]; ++ dst[6] += a*src[6]; ++ dst[7] += a*src[7]; ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (A); ++ ++ for (i = 0; i < 8; i++) ++ { ++ if (dst[i] != A * i) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000 +@@ -0,0 +1,71 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 300 ++#define N 16 ++ ++char src[N]; ++short dst[N]; ++short src1[N], dst1[N]; ++ ++void foo (int a) ++{ ++ dst[0] = (short) (a * (int) src[0]); ++ dst[1] = (short) (a * (int) src[1]); ++ dst[2] = (short) (a * (int) src[2]); ++ dst[3] = (short) (a * (int) src[3]); ++ dst[4] = (short) (a * (int) src[4]); ++ dst[5] = (short) (a * (int) src[5]); ++ dst[6] = (short) (a * (int) src[6]); ++ dst[7] = (short) (a * (int) src[7]); ++ dst[8] = (short) (a * (int) src[8]); ++ dst[9] = (short) (a * (int) src[9]); ++ dst[10] = (short) (a * (int) src[10]); ++ dst[11] = (short) (a * (int) src[11]); ++ dst[12] = (short) (a * (int) src[12]); ++ dst[13] = (short) (a * (int) src[13]); ++ dst[14] = (short) (a * (int) src[14]); ++ dst[15] = (short) (a * (int) src[15]); ++ ++ dst1[0] += src1[0]; ++ dst1[1] += src1[1]; ++ dst1[2] += src1[2]; ++ dst1[3] += src1[3]; ++ dst1[4] += src1[4]; ++ dst1[5] += src1[5]; ++ dst1[6] += src1[6]; ++ dst1[7] += src1[7]; ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 2; ++ dst1[i] = 0; ++ src[i] = i; ++ src1[i] = i+2; ++ } ++ ++ foo (A); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (dst[i] != A * i ++ || (i < N/2 && dst1[i] != i + 2)) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 +@@ -3268,7 +3268,7 @@ + # Return 1 if the target supports multiple vector sizes + + proc check_effective_target_vect_multiple_sizes { } { +- global et_vect_multiple_sizes ++ global et_vect_multiple_sizes_saved + + if [info exists et_vect_multiple_sizes_saved] { + verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 +@@ -3286,7 +3286,7 @@ + # Return 1 if the target supports vectors of 64 bits. + + proc check_effective_target_vect64 { } { +- global et_vect64 ++ global et_vect64_saved + + if [info exists et_vect64_saved] { + verbose "check_effective_target_vect64: using cached result" 2 + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 +@@ -386,20 +386,15 @@ + return false; + } + +- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); +- if (ncopies != 1) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "SLP with multiple types "); +- +- /* FORNOW: multiple types are unsupported in BB SLP. */ +- if (bb_vinfo) +- return false; +- } +- + /* In case of multiple types we need to detect the smallest type. */ + if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) +- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ { ++ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ if (bb_vinfo) ++ vectorization_factor = *max_nunits; ++ } ++ ++ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); + + if (is_gimple_call (stmt)) + rhs_code = CALL_EXPR; +@@ -1183,7 +1178,6 @@ + if (loop_vinfo) + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + else +- /* No multitypes in BB SLP. */ + vectorization_factor = nunits; + + /* Calculate the unrolling factor. */ +@@ -1246,16 +1240,23 @@ + &max_nunits, &load_permutation, &loads, + vectorization_factor)) + { ++ /* Calculate the unrolling factor based on the smallest type. */ ++ if (max_nunits > nunits) ++ unrolling_factor = least_common_multiple (max_nunits, group_size) ++ / group_size; ++ ++ if (unrolling_factor != 1 && !loop_vinfo) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: unrolling required in basic" ++ " block SLP"); ++ return false; ++ } ++ + /* Create a new SLP instance. */ + new_instance = XNEW (struct _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; +- /* Calculate the unrolling factor based on the smallest type in the +- loop. */ +- if (max_nunits > nunits) +- unrolling_factor = least_common_multiple (max_nunits, group_size) +- / group_size; +- + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; + SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; + SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 +@@ -3081,11 +3081,9 @@ + VEC (tree, heap) *vec_oprnds0 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; + tree last_oprnd, intermediate_type; +- +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -3113,7 +3111,7 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) + { + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -3360,11 +3358,9 @@ + int multi_step_cvt = 0; + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; +- +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -3393,7 +3389,7 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) + { + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -5153,7 +5149,9 @@ + else + { + if (bb_vinfo) +- ok = (vectorizable_shift (stmt, NULL, NULL, node) ++ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) ++ || vectorizable_type_demotion (stmt, NULL, NULL, node) ++ || vectorizable_shift (stmt, NULL, NULL, node) + || vectorizable_operation (stmt, NULL, NULL, node) + || vectorizable_assignment (stmt, NULL, NULL, node) + || vectorizable_load (stmt, NULL, NULL, node, NULL) +@@ -5780,7 +5778,7 @@ + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); +- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *vect_loop = NULL; + bool ordered_p; + enum machine_mode vec_mode; + enum insn_code icode1, icode2; +@@ -5789,6 +5787,9 @@ + tree wide_vectype = vectype_out; + enum tree_code c1, c2; + ++ if (loop_info) ++ vect_loop = LOOP_VINFO_LOOP (loop_info); ++ + /* The result of a vectorized widening operation usually requires two vectors + (because the widened results do not fit int one vector). The generated + vector results would normally be expected to be generated in the same +@@ -5809,7 +5810,8 @@ + iterations in parallel). We therefore don't allow to change the order + of the computation in the inner-loop during outer-loop vectorization. */ + +- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction ++ if (vect_loop ++ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction + && !nested_in_vect_loop_p (vect_loop, stmt)) + ordered_p = false; + else + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch new file mode 100644 index 0000000000..d49ebab600 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch @@ -0,0 +1,628 @@ +2011-10-17 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r178852: + + 2011-09-14 Julian Brown <julian@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_override_options): Add unaligned_access + support. + (arm_file_start): Emit attribute for unaligned access as appropriate. + * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD) + (UNSPEC_UNALIGNED_STORE): Add constants for unspecs. + (insv, extzv): Add unaligned-access support. + (extv): Change to expander. Likewise. + (extzv_t1, extv_regsi): Add helpers. + (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu) + (unaligned_storesi, unaligned_storehi): New. + (*extv_reg): New (previous extv implementation). + * config/arm/arm.opt (munaligned_access): Add option. + * config/arm/constraints.md (Uw): New constraint. + * expmed.c (store_bit_field_1): Adjust bitfield numbering according + to size of access, not size of unit, when BITS_BIG_ENDIAN != + BYTES_BIG_ENDIAN. Don't use bitfield accesses for + volatile accesses when -fstrict-volatile-bitfields is in effect. + (extract_bit_field_1): Likewise. + + Backport from mainline r172697: + + 2011-04-19 Wei Guozhi <carrot@google.com> + + PR target/47855 + gcc/ + * config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype. + * config/arm/arm.c (thumb1_legitimate_address_p): Remove the static + linkage. + * config/arm/constraints.md (Uu): New constraint. + * config/arm/arm.md (*arm_movqi_insn): Compute attr "length". + +=== modified file 'gcc/config/arm/arm-protos.h' +--- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000 ++++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000 +@@ -59,6 +59,7 @@ + int); + extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); ++extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); + extern int arm_const_double_rtx (rtx); + extern int neg_const_double_rtx_ok_for_fpa (rtx); + extern int vfp3_const_double_rtx (rtx); + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000 ++++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000 +@@ -2065,6 +2065,28 @@ + fix_cm3_ldrd = 0; + } + ++ /* Enable -munaligned-access by default for ++ - all ARMv6 architecture-based processors ++ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. ++ ++ Disable -munaligned-access by default for ++ - all pre-ARMv6 architecture-based processors ++ - ARMv6-M architecture-based processors. */ ++ ++ if (unaligned_access == 2) ++ { ++ if (arm_arch6 && (arm_arch_notm || arm_arch7)) ++ unaligned_access = 1; ++ else ++ unaligned_access = 0; ++ } ++ else if (unaligned_access == 1 ++ && !(arm_arch6 && (arm_arch_notm || arm_arch7))) ++ { ++ warning (0, "target CPU does not support unaligned accesses"); ++ unaligned_access = 0; ++ } ++ + if (TARGET_THUMB1 && flag_schedule_insns) + { + /* Don't warn since it's on by default in -O2. */ +@@ -6106,7 +6128,7 @@ + addresses based on the frame pointer or arg pointer until the + reload pass starts. This is so that eliminating such addresses + into stack based ones won't produce impossible code. */ +-static int ++int + thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) + { + /* ??? Not clear if this is right. Experiment. */ +@@ -22226,6 +22248,10 @@ + val = 6; + asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + ++ /* Tag_CPU_unaligned_access. */ ++ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n", ++ unaligned_access); ++ + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", + +=== modified file 'gcc/config/arm/arm.md' +--- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000 ++++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000 +@@ -113,6 +113,10 @@ + (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from + ; another symbolic address. + (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. ++ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access ++ ; unaligned locations, on architectures which support ++ ; that. ++ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh. + ] + ) + +@@ -2463,10 +2467,10 @@ + ;;; this insv pattern, so this pattern needs to be reevalutated. + + (define_expand "insv" +- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "") +- (match_operand:SI 1 "general_operand" "") +- (match_operand:SI 2 "general_operand" "")) +- (match_operand:SI 3 "reg_or_int_operand" ""))] ++ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") ++ (match_operand 1 "general_operand" "") ++ (match_operand 2 "general_operand" "")) ++ (match_operand 3 "reg_or_int_operand" ""))] + "TARGET_ARM || arm_arch_thumb2" + " + { +@@ -2477,35 +2481,70 @@ + + if (arm_arch_thumb2) + { +- bool use_bfi = TRUE; +- +- if (GET_CODE (operands[3]) == CONST_INT) +- { +- HOST_WIDE_INT val = INTVAL (operands[3]) & mask; +- +- if (val == 0) +- { +- emit_insn (gen_insv_zero (operands[0], operands[1], +- operands[2])); ++ if (unaligned_access && MEM_P (operands[0]) ++ && s_register_operand (operands[3], GET_MODE (operands[3])) ++ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0) ++ { ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width ++ - start_bit; ++ ++ if (width == 32) ++ { ++ base_addr = adjust_address (operands[0], SImode, ++ start_bit / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_storesi (base_addr, operands[3])); ++ } ++ else ++ { ++ rtx tmp = gen_reg_rtx (HImode); ++ ++ base_addr = adjust_address (operands[0], HImode, ++ start_bit / BITS_PER_UNIT); ++ emit_move_insn (tmp, gen_lowpart (HImode, operands[3])); ++ emit_insn (gen_unaligned_storehi (base_addr, tmp)); ++ } ++ DONE; ++ } ++ else if (s_register_operand (operands[0], GET_MODE (operands[0]))) ++ { ++ bool use_bfi = TRUE; ++ ++ if (GET_CODE (operands[3]) == CONST_INT) ++ { ++ HOST_WIDE_INT val = INTVAL (operands[3]) & mask; ++ ++ if (val == 0) ++ { ++ emit_insn (gen_insv_zero (operands[0], operands[1], ++ operands[2])); ++ DONE; ++ } ++ ++ /* See if the set can be done with a single orr instruction. */ ++ if (val == mask && const_ok_for_arm (val << start_bit)) ++ use_bfi = FALSE; ++ } ++ ++ if (use_bfi) ++ { ++ if (GET_CODE (operands[3]) != REG) ++ operands[3] = force_reg (SImode, operands[3]); ++ ++ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], ++ operands[3])); + DONE; + } +- +- /* See if the set can be done with a single orr instruction. */ +- if (val == mask && const_ok_for_arm (val << start_bit)) +- use_bfi = FALSE; +- } +- +- if (use_bfi) +- { +- if (GET_CODE (operands[3]) != REG) +- operands[3] = force_reg (SImode, operands[3]); +- +- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], +- operands[3])); +- DONE; +- } ++ } ++ else ++ FAIL; + } + ++ if (!s_register_operand (operands[0], GET_MODE (operands[0]))) ++ FAIL; ++ + target = copy_rtx (operands[0]); + /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical + subreg as the final target. */ +@@ -3697,12 +3736,10 @@ + ;; to reduce register pressure later on. + + (define_expand "extzv" +- [(set (match_dup 4) +- (ashift:SI (match_operand:SI 1 "register_operand" "") +- (match_operand:SI 2 "const_int_operand" ""))) +- (set (match_operand:SI 0 "register_operand" "") +- (lshiftrt:SI (match_dup 4) +- (match_operand:SI 3 "const_int_operand" "")))] ++ [(set (match_operand 0 "s_register_operand" "") ++ (zero_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] + "TARGET_THUMB1 || arm_arch_thumb2" + " + { +@@ -3711,10 +3748,57 @@ + + if (arm_arch_thumb2) + { +- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], +- operands[3])); +- DONE; ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ ++ if (unaligned_access && MEM_P (operands[1]) ++ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0) ++ { ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width ++ - bitpos; ++ ++ if (width == 32) ++ { ++ base_addr = adjust_address (operands[1], SImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); ++ } ++ else ++ { ++ rtx dest = operands[0]; ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ /* We may get a paradoxical subreg here. Strip it off. */ ++ if (GET_CODE (dest) == SUBREG ++ && GET_MODE (dest) == SImode ++ && GET_MODE (SUBREG_REG (dest)) == HImode) ++ dest = SUBREG_REG (dest); ++ ++ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) ++ FAIL; ++ ++ base_addr = adjust_address (operands[1], HImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadhiu (tmp, base_addr)); ++ emit_move_insn (gen_lowpart (SImode, dest), tmp); ++ } ++ DONE; ++ } ++ else if (s_register_operand (operands[1], GET_MODE (operands[1]))) ++ { ++ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++ } ++ else ++ FAIL; + } ++ ++ if (!s_register_operand (operands[1], GET_MODE (operands[1]))) ++ FAIL; + + operands[3] = GEN_INT (rshift); + +@@ -3724,12 +3808,154 @@ + DONE; + } + +- operands[2] = GEN_INT (lshift); +- operands[4] = gen_reg_rtx (SImode); ++ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift), ++ operands[3], gen_reg_rtx (SImode))); ++ DONE; + }" + ) + +-(define_insn "extv" ++;; Helper for extzv, for the Thumb-1 register-shifts case. ++ ++(define_expand "extzv_t1" ++ [(set (match_operand:SI 4 "s_register_operand" "") ++ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") ++ (match_operand:SI 2 "const_int_operand" ""))) ++ (set (match_operand:SI 0 "s_register_operand" "") ++ (lshiftrt:SI (match_dup 4) ++ (match_operand:SI 3 "const_int_operand" "")))] ++ "TARGET_THUMB1" ++ "") ++ ++(define_expand "extv" ++ [(set (match_operand 0 "s_register_operand" "") ++ (sign_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "arm_arch_thumb2" ++{ ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ ++ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32) ++ && (bitpos % BITS_PER_UNIT) == 0) ++ { ++ rtx base_addr; ++ ++ if (BYTES_BIG_ENDIAN) ++ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos; ++ ++ if (width == 32) ++ { ++ base_addr = adjust_address (operands[1], SImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); ++ } ++ else ++ { ++ rtx dest = operands[0]; ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ /* We may get a paradoxical subreg here. Strip it off. */ ++ if (GET_CODE (dest) == SUBREG ++ && GET_MODE (dest) == SImode ++ && GET_MODE (SUBREG_REG (dest)) == HImode) ++ dest = SUBREG_REG (dest); ++ ++ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) ++ FAIL; ++ ++ base_addr = adjust_address (operands[1], HImode, ++ bitpos / BITS_PER_UNIT); ++ emit_insn (gen_unaligned_loadhis (tmp, base_addr)); ++ emit_move_insn (gen_lowpart (SImode, dest), tmp); ++ } ++ ++ DONE; ++ } ++ else if (!s_register_operand (operands[1], GET_MODE (operands[1]))) ++ FAIL; ++ else if (GET_MODE (operands[0]) == SImode ++ && GET_MODE (operands[1]) == SImode) ++ { ++ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++ } ++ ++ FAIL; ++}) ++ ++; Helper to expand register forms of extv with the proper modes. ++ ++(define_expand "extv_regsi" ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "" ++{ ++}) ++ ++; ARMv6+ unaligned load/store instructions (used for packed structure accesses). ++ ++(define_insn "unaligned_loadsi" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%?\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load1")]) ++ ++(define_insn "unaligned_loadhis" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (sign_extend:SI ++ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD)))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%(sh%)\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load_byte")]) ++ ++(define_insn "unaligned_loadhiu" ++ [(set (match_operand:SI 0 "s_register_operand" "=l,r") ++ (zero_extend:SI ++ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] ++ UNSPEC_UNALIGNED_LOAD)))] ++ "unaligned_access && TARGET_32BIT" ++ "ldr%(h%)\t%0, %1\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "load_byte")]) ++ ++(define_insn "unaligned_storesi" ++ [(set (match_operand:SI 0 "memory_operand" "=Uw,m") ++ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")] ++ UNSPEC_UNALIGNED_STORE))] ++ "unaligned_access && TARGET_32BIT" ++ "str%?\t%1, %0\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "store1")]) ++ ++(define_insn "unaligned_storehi" ++ [(set (match_operand:HI 0 "memory_operand" "=Uw,m") ++ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")] ++ UNSPEC_UNALIGNED_STORE))] ++ "unaligned_access && TARGET_32BIT" ++ "str%(h%)\t%1, %0\t@ unaligned" ++ [(set_attr "arch" "t2,any") ++ (set_attr "length" "2,4") ++ (set_attr "predicable" "yes") ++ (set_attr "type" "store1")]) ++ ++(define_insn "*extv_reg" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") +@@ -6038,8 +6264,8 @@ + + + (define_insn "*arm_movqi_insn" +- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") +- (match_operand:QI 1 "general_operand" "rI,K,m,r"))] ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m") ++ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))] + "TARGET_32BIT + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" +@@ -6047,10 +6273,14 @@ + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + ldr%(b%)\\t%0, %1 ++ str%(b%)\\t%1, %0 ++ ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0" +- [(set_attr "type" "*,*,load1,store1") +- (set_attr "insn" "mov,mvn,*,*") +- (set_attr "predicable" "yes")] ++ [(set_attr "type" "*,*,load1,store1,load1,store1") ++ (set_attr "insn" "mov,mvn,*,*,*,*") ++ (set_attr "predicable" "yes") ++ (set_attr "arch" "any,any,t2,t2,any,any") ++ (set_attr "length" "4,4,2,2,4,4")] + ) + + (define_insn "*thumb1_movqi_insn" + +=== modified file 'gcc/config/arm/arm.opt' +--- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000 ++++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000 +@@ -173,3 +173,7 @@ + Target Report Var(fix_cm3_ldrd) Init(2) + Avoid overlapping destination and address registers on LDRD instructions + that may trigger Cortex-M3 errata. ++ ++munaligned-access ++Target Report Var(unaligned_access) Init(2) ++Enable unaligned word and halfword accesses to packed data. + +=== modified file 'gcc/config/arm/constraints.md' +--- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000 ++++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000 +@@ -36,6 +36,7 @@ + ;; The following memory constraints have been used: + ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us + ;; in ARM state: Uq ++;; in Thumb state: Uu, Uw + + + (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS" +@@ -344,6 +345,27 @@ + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0))"))) + ++(define_memory_constraint "Uu" ++ "@internal ++ In Thumb state an address that is valid in 16bit encoding." ++ (and (match_code "mem") ++ (match_test "TARGET_THUMB ++ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), ++ 0)"))) ++ ++; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p ++; are actually LDM/STM instructions, so cannot be used to access unaligned ++; data. ++(define_memory_constraint "Uw" ++ "@internal ++ In Thumb state an address that is valid in 16bit encoding, and that can be ++ used for unaligned accesses." ++ (and (match_code "mem") ++ (match_test "TARGET_THUMB ++ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), ++ 0) ++ && GET_CODE (XEXP (op, 0)) != POST_INC"))) ++ + ;; We used to have constraint letters for S and R in ARM state, but + ;; all uses of these now appear to have been removed. + + +=== modified file 'gcc/expmed.c' +--- old/gcc/expmed.c 2011-05-22 19:02:59 +0000 ++++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000 +@@ -657,6 +657,10 @@ + && GET_MODE (value) != BLKmode + && bitsize > 0 + && GET_MODE_BITSIZE (op_mode) >= bitsize ++ /* Do not use insv for volatile bitfields when ++ -fstrict-volatile-bitfields is in effect. */ ++ && !(MEM_P (op0) && MEM_VOLATILE_P (op0) ++ && flag_strict_volatile_bitfields > 0) + && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) + && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) + && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), +@@ -700,19 +704,21 @@ + copy_back = true; + } + +- /* On big-endian machines, we count bits from the most significant. +- If the bit field insn does not, we must invert. */ +- +- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) +- xbitpos = unit - bitsize - xbitpos; +- + /* We have been counting XBITPOS within UNIT. + Count instead within the size of the register. */ +- if (BITS_BIG_ENDIAN && !MEM_P (xop0)) ++ if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) + xbitpos += GET_MODE_BITSIZE (op_mode) - unit; + + unit = GET_MODE_BITSIZE (op_mode); + ++ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count ++ "backwards" from the size of the unit we are inserting into. ++ Otherwise, we count bits from the most significant on a ++ BYTES/BITS_BIG_ENDIAN machine. */ ++ ++ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) ++ xbitpos = unit - bitsize - xbitpos; ++ + /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ + value1 = value; + if (GET_MODE (value) != op_mode) +@@ -1528,6 +1534,10 @@ + if (ext_mode != MAX_MACHINE_MODE + && bitsize > 0 + && GET_MODE_BITSIZE (ext_mode) >= bitsize ++ /* Do not use extv/extzv for volatile bitfields when ++ -fstrict-volatile-bitfields is in effect. */ ++ && !(MEM_P (op0) && MEM_VOLATILE_P (op0) ++ && flag_strict_volatile_bitfields > 0) + /* If op0 is a register, we need it in EXT_MODE to make it + acceptable to the format of ext(z)v. */ + && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) +@@ -1552,17 +1562,20 @@ + /* Get ref to first byte containing part of the field. */ + xop0 = adjust_address (xop0, byte_mode, xoffset); + +- /* On big-endian machines, we count bits from the most significant. +- If the bit field insn does not, we must invert. */ +- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) +- xbitpos = unit - bitsize - xbitpos; +- + /* Now convert from counting within UNIT to counting in EXT_MODE. */ +- if (BITS_BIG_ENDIAN && !MEM_P (xop0)) ++ if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) + xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; + + unit = GET_MODE_BITSIZE (ext_mode); + ++ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count ++ "backwards" from the size of the unit we are extracting from. ++ Otherwise, we count bits from the most significant on a ++ BYTES/BITS_BIG_ENDIAN machine. */ ++ ++ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) ++ xbitpos = unit - bitsize - xbitpos; ++ + if (xtarget == 0) + xtarget = xspec_target = gen_reg_rtx (tmode); + + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch new file mode 100644 index 0000000000..3c0ff00856 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch @@ -0,0 +1,1951 @@ +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages. + (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete. + (node_sched_params): Remove first_reg_move and nreg_moves. + (ps_num_consecutive_stages, extend_node_sched_params): New functions. + (update_node_sched_params): Move up file. + (print_node_sched_params): Print the stage. Don't dump info related + to first_reg_move and nreg_moves. + (set_columns_for_row): New function. + (set_columns_for_ps): Move up file and use set_columns_for_row. + (schedule_reg_move): New function. + (schedule_reg_moves): Call extend_node_sched_params and + schedule_reg_move. Extend size of uses bitmap. Initialize + num_consecutive_stages. Return false if a move could not be + scheduled. + (apply_reg_moves): Don't emit moves here. + (permute_partial_schedule): Handle register moves. + (duplicate_insns_of_cycles): Remove for_prolog. Emit moves according + to the same stage-count test as ddg nodes. + (generate_prolog_epilog): Update calls accordingly. + (sms_schedule): Allow move-scheduling to add a new first stage. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_insn): Adjust comment. + (ps_reg_move_info): New structure. + (partial_schedule): Add reg_moves field. + (SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params. + (node_sched_params): Turn first_reg_move into an identifier. + (ps_reg_move): New function. + (ps_rtl_insn): Cope with register moves. + (ps_first_note): Adjust comment and assert that the instruction + isn't a register move. + (node_sched_params): Replace with... + (node_sched_param_vec): ...this vector. + (set_node_sched_params): Adjust accordingly. + (print_node_sched_params): Take a partial schedule instead of a ddg. + Use ps_rtl_insn and ps_reg_move. + (generate_reg_moves): Rename to... + (schedule_reg_moves): ...this. Remove rescan parameter. Record each + move in the partial schedule, but don't emit it here. Don't perform + register substitutions here either. + (apply_reg_moves): New function. + (duplicate_insns_of_cycles): Use register indices directly, + rather than finding instructions using PREV_INSN. Use ps_reg_move. + (sms_schedule): Call schedule_reg_moves before committing to + a partial schedule. Try the next ii if the schedule fails. + Use apply_reg_moves instead of generate_reg_moves. Adjust + call to print_node_sched_params. Free node_sched_param_vec + instead of node_sched_params. + (create_partial_schedule): Initialize reg_moves. + (free_partial_schedule): Free reg_moves. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (ps_insn): Replace node field with an identifier. + (SCHED_ASAP): Replace with.. + (NODE_ASAP): ...this macro. + (SCHED_PARAMS): New macro. + (SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW) + (SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS. + (node_sched_params): Remove asap. + (ps_rtl_insn, ps_first_note): New functions. + (set_node_sched_params): Use XCNEWVEC. Don't copy across the + asap values. + (print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP. + (generate_reg_moves): Pass ids to the SCHED_* macros. + (update_node_sched_params): Take a ps insn identifier rather than + a node as parameter. Use ps_rtl_insn. + (set_columns_for_ps): Update for above field and SCHED_* macro changes. + (permute_partial_schedule): Use ps_rtl_insn and ps_first_note. + (optimize_sc): Update for above field and SCHED_* macro changes. + Update calls to try_scheduling_node_in_cycle and + update_node_sched_params. + (duplicate_insns_of_cycles): Adjust for above field and SCHED_* + macro changes. Use ps_rtl_insn and ps_first_note. + (sms_schedule): Pass ids to the SCHED_* macros. + (get_sched_window): Adjust for above field and SCHED_* macro changes. + Use NODE_ASAP instead of SCHED_ASAP. + (try_scheduling_node_in_cycle): Remove node parameter. Update + call to ps_add_node_check_conflicts. Pass ids to the SCHED_* + macros. + (sms_schedule_by_order): Update call to try_scheduling_node_in_cycle. + (ps_insert_empty_row): Adjust for above field changes. + (compute_split_row): Use ids rather than nodes. + (verify_partial_schedule): Adjust for above field changes. + (print_partial_schedule): Use ps_rtl_insn. + (create_ps_insn): Take an id rather than a node. + (ps_insn_find_column): Adjust for above field changes. + Use ps_rtl_insn. + (ps_insn_advance_column): Adjust for above field changes. + (add_node_to_ps): Remove node parameter. Update call to + create_ps_insn. + (ps_has_conflicts): Use ps_rtl_insn. + (ps_add_node_check_conflicts): Replace node parameter than an id. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (undo_replace_buff_elem): Delete. + (generate_reg_moves): Don't build and return an undo list. + (free_undo_replace_buff): Delete. + (sms_schedule): Adjust call to generate_reg_moves. + Don't call free_undo_replace_buff. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (get_sched_window): Use a table for the debug output. + Print the current ii. + (sms_schedule_by_order): Reduce whitespace in dump line. + +2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> + + gcc/ + Backport from mainline: + + 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> + + * modulo-sched.c (get_sched_window): Use just one loop for predecessors + and one loop for successors. Fix upper bound of memory range. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 ++++ new/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 +@@ -124,8 +124,10 @@ + /* A single instruction in the partial schedule. */ + struct ps_insn + { +- /* The corresponding DDG_NODE. */ +- ddg_node_ptr node; ++ /* Identifies the instruction to be scheduled. Values smaller than ++ the ddg's num_nodes refer directly to ddg nodes. A value of ++ X - num_nodes refers to register move X. */ ++ int id; + + /* The (absolute) cycle in which the PS instruction is scheduled. + Same as SCHED_TIME (node). */ +@@ -137,6 +139,33 @@ + + }; + ++/* Information about a register move that has been added to a partial ++ schedule. */ ++struct ps_reg_move_info ++{ ++ /* The source of the move is defined by the ps_insn with id DEF. ++ The destination is used by the ps_insns with the ids in USES. */ ++ int def; ++ sbitmap uses; ++ ++ /* The original form of USES' instructions used OLD_REG, but they ++ should now use NEW_REG. */ ++ rtx old_reg; ++ rtx new_reg; ++ ++ /* The number of consecutive stages that the move occupies. */ ++ int num_consecutive_stages; ++ ++ /* An instruction that sets NEW_REG to the correct value. The first ++ move associated with DEF will have an rhs of OLD_REG; later moves ++ use the result of the previous move. */ ++ rtx insn; ++}; ++ ++typedef struct ps_reg_move_info ps_reg_move_info; ++DEF_VEC_O (ps_reg_move_info); ++DEF_VEC_ALLOC_O (ps_reg_move_info, heap); ++ + /* Holds the partial schedule as an array of II rows. Each entry of the + array points to a linked list of PS_INSNs, which represents the + instructions that are scheduled for that row. */ +@@ -148,6 +177,10 @@ + /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ + ps_insn_ptr *rows; + ++ /* All the moves added for this partial schedule. Index X has ++ a ps_insn id of X + g->num_nodes. */ ++ VEC (ps_reg_move_info, heap) *reg_moves; ++ + /* rows_length[i] holds the number of instructions in the row. + It is used only (as an optimization) to back off quickly from + trying to schedule a node in a full row; that is, to avoid running +@@ -165,17 +198,6 @@ + int stage_count; /* The stage count of the partial schedule. */ + }; + +-/* We use this to record all the register replacements we do in +- the kernel so we can undo SMS if it is not profitable. */ +-struct undo_replace_buff_elem +-{ +- rtx insn; +- rtx orig_reg; +- rtx new_reg; +- struct undo_replace_buff_elem *next; +-}; +- +- + + static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); + static void free_partial_schedule (partial_schedule_ptr); +@@ -183,9 +205,7 @@ + void print_partial_schedule (partial_schedule_ptr, FILE *); + static void verify_partial_schedule (partial_schedule_ptr, sbitmap); + static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, +- ddg_node_ptr node, int cycle, +- sbitmap must_precede, +- sbitmap must_follow); ++ int, int, sbitmap, sbitmap); + static void rotate_partial_schedule (partial_schedule_ptr, int); + void set_row_column_for_ps (partial_schedule_ptr); + static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); +@@ -201,43 +221,27 @@ + static void permute_partial_schedule (partial_schedule_ptr, rtx); + static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, + rtx, rtx); +-static void duplicate_insns_of_cycles (partial_schedule_ptr, +- int, int, int, rtx); + static int calculate_stage_count (partial_schedule_ptr, int); + static void calculate_must_precede_follow (ddg_node_ptr, int, int, + int, int, sbitmap, sbitmap, sbitmap); + static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, + sbitmap, int, int *, int *, int *); +-static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, +- int, int, sbitmap, int *, sbitmap, +- sbitmap); ++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, ++ sbitmap, int *, sbitmap, sbitmap); + static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); + +-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) +-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +-#define SCHED_FIRST_REG_MOVE(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) +-#define SCHED_NREG_MOVES(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) +-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) +-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) +-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) ++#define NODE_ASAP(node) ((node)->aux.count) ++ ++#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x) ++#define SCHED_TIME(x) (SCHED_PARAMS (x)->time) ++#define SCHED_ROW(x) (SCHED_PARAMS (x)->row) ++#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) ++#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) + + /* The scheduling parameters held for each node. */ + typedef struct node_sched_params + { +- int asap; /* A lower-bound on the absolute scheduling cycle. */ +- int time; /* The absolute scheduling cycle (time >= asap). */ +- +- /* The following field (first_reg_move) is a pointer to the first +- register-move instruction added to handle the modulo-variable-expansion +- of the register defined by this node. This register-move copies the +- original register defined by the node. */ +- rtx first_reg_move; +- +- /* The number of register-move instructions added, immediately preceding +- first_reg_move. */ +- int nreg_moves; ++ int time; /* The absolute scheduling cycle. */ + + int row; /* Holds time % ii. */ + int stage; /* Holds time / ii. */ +@@ -247,6 +251,9 @@ + int column; + } *node_sched_params_ptr; + ++typedef struct node_sched_params node_sched_params; ++DEF_VEC_O (node_sched_params); ++DEF_VEC_ALLOC_O (node_sched_params, heap); + + /* The following three functions are copied from the current scheduler + code in order to use sched_analyze() for computing the dependencies. +@@ -296,6 +303,49 @@ + 0 + }; + ++/* Partial schedule instruction ID in PS is a register move. Return ++ information about it. */ ++static struct ps_reg_move_info * ++ps_reg_move (partial_schedule_ptr ps, int id) ++{ ++ gcc_checking_assert (id >= ps->g->num_nodes); ++ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes); ++} ++ ++/* Return the rtl instruction that is being scheduled by partial schedule ++ instruction ID, which belongs to schedule PS. */ ++static rtx ++ps_rtl_insn (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return ps->g->nodes[id].insn; ++ else ++ return ps_reg_move (ps, id)->insn; ++} ++ ++/* Partial schedule instruction ID, which belongs to PS, occured in ++ the original (unscheduled) loop. Return the first instruction ++ in the loop that was associated with ps_rtl_insn (PS, ID). ++ If the instruction had some notes before it, this is the first ++ of those notes. */ ++static rtx ++ps_first_note (partial_schedule_ptr ps, int id) ++{ ++ gcc_assert (id < ps->g->num_nodes); ++ return ps->g->nodes[id].first_note; ++} ++ ++/* Return the number of consecutive stages that are occupied by ++ partial schedule instruction ID in PS. */ ++static int ++ps_num_consecutive_stages (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return 1; ++ else ++ return ps_reg_move (ps, id)->num_consecutive_stages; ++} ++ + /* Given HEAD and TAIL which are the first and last insns in a loop; + return the register which controls the loop. Return zero if it has + more than one occurrence in the loop besides the control part or the +@@ -396,35 +446,59 @@ + } + + +-/* Points to the array that contains the sched data for each node. */ +-static node_sched_params_ptr node_sched_params; ++/* A vector that contains the sched data for each ps_insn. */ ++static VEC (node_sched_params, heap) *node_sched_param_vec; + +-/* Allocate sched_params for each node and initialize it. Assumes that +- the aux field of each node contain the asap bound (computed earlier), +- and copies it into the sched_params field. */ ++/* Allocate sched_params for each node and initialize it. */ + static void + set_node_sched_params (ddg_ptr g) + { +- int i; +- +- /* Allocate for each node in the DDG a place to hold the "sched_data". */ +- /* Initialize ASAP/ALAP/HIGHT to zero. */ +- node_sched_params = (node_sched_params_ptr) +- xcalloc (g->num_nodes, +- sizeof (struct node_sched_params)); +- +- /* Set the pointer of the general data of the node to point to the +- appropriate sched_params structure. */ +- for (i = 0; i < g->num_nodes; i++) +- { +- /* Watch out for aliasing problems? */ +- node_sched_params[i].asap = g->nodes[i].aux.count; +- g->nodes[i].aux.info = &node_sched_params[i]; +- } +-} +- +-static void +-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) ++ VEC_truncate (node_sched_params, node_sched_param_vec, 0); ++ VEC_safe_grow_cleared (node_sched_params, heap, ++ node_sched_param_vec, g->num_nodes); ++} ++ ++/* Make sure that node_sched_param_vec has an entry for every move in PS. */ ++static void ++extend_node_sched_params (partial_schedule_ptr ps) ++{ ++ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec, ++ ps->g->num_nodes + VEC_length (ps_reg_move_info, ++ ps->reg_moves)); ++} ++ ++/* Update the sched_params (time, row and stage) for node U using the II, ++ the CYCLE of U and MIN_CYCLE. ++ We're not simply taking the following ++ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); ++ because the stages may not be aligned on cycle 0. */ ++static void ++update_node_sched_params (int u, int ii, int cycle, int min_cycle) ++{ ++ int sc_until_cycle_zero; ++ int stage; ++ ++ SCHED_TIME (u) = cycle; ++ SCHED_ROW (u) = SMODULO (cycle, ii); ++ ++ /* The calculation of stage count is done adding the number ++ of stages before cycle zero and after cycle zero. */ ++ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); ++ ++ if (SCHED_TIME (u) < 0) ++ { ++ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ } ++ else ++ { ++ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; ++ } ++} ++ ++static void ++print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) + { + int i; + +@@ -432,22 +506,170 @@ + return; + for (i = 0; i < num_nodes; i++) + { +- node_sched_params_ptr nsp = &node_sched_params[i]; +- rtx reg_move = nsp->first_reg_move; +- int j; ++ node_sched_params_ptr nsp = SCHED_PARAMS (i); + + fprintf (file, "Node = %d; INSN = %d\n", i, +- (INSN_UID (g->nodes[i].insn))); +- fprintf (file, " asap = %d:\n", nsp->asap); ++ INSN_UID (ps_rtl_insn (ps, i))); ++ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); + fprintf (file, " time = %d:\n", nsp->time); +- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); +- for (j = 0; j < nsp->nreg_moves; j++) ++ fprintf (file, " stage = %d:\n", nsp->stage); ++ } ++} ++ ++/* Set SCHED_COLUMN for each instruction in row ROW of PS. */ ++static void ++set_columns_for_row (partial_schedule_ptr ps, int row) ++{ ++ ps_insn_ptr cur_insn; ++ int column; ++ ++ column = 0; ++ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) ++ SCHED_COLUMN (cur_insn->id) = column++; ++} ++ ++/* Set SCHED_COLUMN for each instruction in PS. */ ++static void ++set_columns_for_ps (partial_schedule_ptr ps) ++{ ++ int row; ++ ++ for (row = 0; row < ps->ii; row++) ++ set_columns_for_row (ps, row); ++} ++ ++/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. ++ Its single predecessor has already been scheduled, as has its ++ ddg node successors. (The move may have also another move as its ++ successor, in which case that successor will be scheduled later.) ++ ++ The move is part of a chain that satisfies register dependencies ++ between a producing ddg node and various consuming ddg nodes. ++ If some of these dependencies have a distance of 1 (meaning that ++ the use is upward-exposoed) then DISTANCE1_USES is nonnull and ++ contains the set of uses with distance-1 dependencies. ++ DISTANCE1_USES is null otherwise. ++ ++ MUST_FOLLOW is a scratch bitmap that is big enough to hold ++ all current ps_insn ids. ++ ++ Return true on success. */ ++static bool ++schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, ++ sbitmap distance1_uses, sbitmap must_follow) ++{ ++ unsigned int u; ++ int this_time, this_distance, this_start, this_end, this_latency; ++ int start, end, c, ii; ++ sbitmap_iterator sbi; ++ ps_reg_move_info *move; ++ rtx this_insn; ++ ps_insn_ptr psi; ++ ++ move = ps_reg_move (ps, i_reg_move); ++ ii = ps->ii; ++ if (dump_file) ++ { ++ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" ++ ", min cycle = %d\n\n", INSN_UID (move->insn), ii, ++ PS_MIN_CYCLE (ps)); ++ print_rtl_single (dump_file, move->insn); ++ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =====\n"); ++ } ++ ++ start = INT_MIN; ++ end = INT_MAX; ++ ++ /* For dependencies of distance 1 between a producer ddg node A ++ and consumer ddg node B, we have a chain of dependencies: ++ ++ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B ++ ++ where Mi is the ith move. For dependencies of distance 0 between ++ a producer ddg node A and consumer ddg node C, we have a chain of ++ dependencies: ++ ++ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C ++ ++ where Mi' occupies the same position as Mi but occurs a stage later. ++ We can only schedule each move once, so if we have both types of ++ chain, we model the second as: ++ ++ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C ++ ++ First handle the dependencies between the previously-scheduled ++ predecessor and the move. */ ++ this_insn = ps_rtl_insn (ps, move->def); ++ this_latency = insn_latency (this_insn, move->insn); ++ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; ++ this_time = SCHED_TIME (move->def) - this_distance * ii; ++ this_start = this_time + this_latency; ++ this_end = this_time + ii; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (move->def), ++ INSN_UID (this_insn), this_latency, this_distance, ++ INSN_UID (move->insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ ++ /* Handle the dependencies between the move and previously-scheduled ++ successors. */ ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi) ++ { ++ this_insn = ps_rtl_insn (ps, u); ++ this_latency = insn_latency (move->insn, this_insn); ++ if (distance1_uses && !TEST_BIT (distance1_uses, u)) ++ this_distance = -1; ++ else ++ this_distance = 0; ++ this_time = SCHED_TIME (u) + this_distance * ii; ++ this_start = this_time - ii; ++ this_end = this_time - this_latency; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), ++ this_latency, this_distance, INSN_UID (this_insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "----------- ----------- -----\n"); ++ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); ++ } ++ ++ sbitmap_zero (must_follow); ++ SET_BIT (must_follow, move->def); ++ ++ start = MAX (start, end - (ii - 1)); ++ for (c = end; c >= start; c--) ++ { ++ psi = ps_add_node_check_conflicts (ps, i_reg_move, c, ++ move->uses, must_follow); ++ if (psi) + { +- fprintf (file, " reg_move = "); +- print_rtl_single (file, reg_move); +- reg_move = PREV_INSN (reg_move); ++ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); ++ if (dump_file) ++ fprintf (dump_file, "\nScheduled register move INSN %d at" ++ " time %d, row %d\n\n", INSN_UID (move->insn), c, ++ SCHED_ROW (i_reg_move)); ++ return true; + } + } ++ ++ if (dump_file) ++ fprintf (dump_file, "\nNo available slot\n\n"); ++ ++ return false; + } + + /* +@@ -461,22 +683,23 @@ + nreg_moves = ----------------------------------- + 1 - { dependence. + ii { 1 if not. + */ +-static struct undo_replace_buff_elem * +-generate_reg_moves (partial_schedule_ptr ps, bool rescan) ++static bool ++schedule_reg_moves (partial_schedule_ptr ps) + { + ddg_ptr g = ps->g; + int ii = ps->ii; + int i; +- struct undo_replace_buff_elem *reg_move_replaces = NULL; + + for (i = 0; i < g->num_nodes; i++) + { + ddg_node_ptr u = &g->nodes[i]; + ddg_edge_ptr e; + int nreg_moves = 0, i_reg_move; +- sbitmap *uses_of_defs; +- rtx last_reg_move; + rtx prev_reg, old_reg; ++ int first_move; ++ int distances[2]; ++ sbitmap must_follow; ++ sbitmap distance1_uses; + rtx set = single_set (u->insn); + + /* Skip instructions that do not set a register. */ +@@ -485,18 +708,21 @@ + + /* Compute the number of reg_moves needed for u, by looking at life + ranges started at u (excluding self-loops). */ ++ distances[0] = distances[1] = false; + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; + + if (e->distance == 1) +- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; + + /* If dest precedes src in the schedule of the kernel, then dest + will read before src writes and we can save one reg_copy. */ +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + nreg_moves4e--; + + if (nreg_moves4e >= 1) +@@ -513,125 +739,105 @@ + gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); + } + ++ if (nreg_moves4e) ++ { ++ gcc_assert (e->distance < 2); ++ distances[e->distance] = true; ++ } + nreg_moves = MAX (nreg_moves, nreg_moves4e); + } + + if (nreg_moves == 0) + continue; + ++ /* Create NREG_MOVES register moves. */ ++ first_move = VEC_length (ps_reg_move_info, ps->reg_moves); ++ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves, ++ first_move + nreg_moves); ++ extend_node_sched_params (ps); ++ ++ /* Record the moves associated with this node. */ ++ first_move += ps->g->num_nodes; ++ ++ /* Generate each move. */ ++ old_reg = prev_reg = SET_DEST (single_set (u->insn)); ++ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) ++ { ++ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); ++ ++ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; ++ move->uses = sbitmap_alloc (first_move + nreg_moves); ++ move->old_reg = old_reg; ++ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); ++ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; ++ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); ++ sbitmap_zero (move->uses); ++ ++ prev_reg = move->new_reg; ++ } ++ ++ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; ++ + /* Every use of the register defined by node may require a different + copy of this register, depending on the time the use is scheduled. +- Set a bitmap vector, telling which nodes use each copy of this +- register. */ +- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); +- sbitmap_vector_zero (uses_of_defs, nreg_moves); ++ Record which uses require which move results. */ + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; + + if (e->distance == 1) +- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; + +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + dest_copy--; + + if (dest_copy) +- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); ++ { ++ ps_reg_move_info *move; ++ ++ move = ps_reg_move (ps, first_move + dest_copy - 1); ++ SET_BIT (move->uses, e->dest->cuid); ++ if (e->distance == 1) ++ SET_BIT (distance1_uses, e->dest->cuid); ++ } + } + +- /* Now generate the reg_moves, attaching relevant uses to them. */ +- SCHED_NREG_MOVES (u) = nreg_moves; +- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); +- /* Insert the reg-moves right before the notes which precede +- the insn they relates to. */ +- last_reg_move = u->first_note; +- ++ must_follow = sbitmap_alloc (first_move + nreg_moves); + for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) ++ if (!schedule_reg_move (ps, first_move + i_reg_move, ++ distance1_uses, must_follow)) ++ break; ++ sbitmap_free (must_follow); ++ if (distance1_uses) ++ sbitmap_free (distance1_uses); ++ if (i_reg_move < nreg_moves) ++ return false; ++ } ++ return true; ++} ++ ++/* Emit the moves associatied with PS. Apply the substitutions ++ associated with them. */ ++static void ++apply_reg_moves (partial_schedule_ptr ps) ++{ ++ ps_reg_move_info *move; ++ int i; ++ ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) ++ { ++ unsigned int i_use; ++ sbitmap_iterator sbi; ++ ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi) + { +- unsigned int i_use = 0; +- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); +- rtx reg_move = gen_move_insn (new_reg, prev_reg); +- sbitmap_iterator sbi; +- +- add_insn_before (reg_move, last_reg_move, NULL); +- last_reg_move = reg_move; +- +- if (!SCHED_FIRST_REG_MOVE (u)) +- SCHED_FIRST_REG_MOVE (u) = reg_move; +- +- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) +- { +- struct undo_replace_buff_elem *rep; +- +- rep = (struct undo_replace_buff_elem *) +- xcalloc (1, sizeof (struct undo_replace_buff_elem)); +- rep->insn = g->nodes[i_use].insn; +- rep->orig_reg = old_reg; +- rep->new_reg = new_reg; +- +- if (! reg_move_replaces) +- reg_move_replaces = rep; +- else +- { +- rep->next = reg_move_replaces; +- reg_move_replaces = rep; +- } +- +- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); +- if (rescan) +- df_insn_rescan (g->nodes[i_use].insn); +- } +- +- prev_reg = new_reg; ++ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); ++ df_insn_rescan (ps->g->nodes[i_use].insn); + } +- sbitmap_vector_free (uses_of_defs); +- } +- return reg_move_replaces; +-} +- +-/* Free memory allocated for the undo buffer. */ +-static void +-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) +-{ +- +- while (reg_move_replaces) +- { +- struct undo_replace_buff_elem *rep = reg_move_replaces; +- +- reg_move_replaces = reg_move_replaces->next; +- free (rep); +- } +-} +- +-/* Update the sched_params (time, row and stage) for node U using the II, +- the CYCLE of U and MIN_CYCLE. +- We're not simply taking the following +- SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); +- because the stages may not be aligned on cycle 0. */ +-static void +-update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle) +-{ +- int sc_until_cycle_zero; +- int stage; +- +- SCHED_TIME (u) = cycle; +- SCHED_ROW (u) = SMODULO (cycle, ii); +- +- /* The calculation of stage count is done adding the number +- of stages before cycle zero and after cycle zero. */ +- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); +- +- if (SCHED_TIME (u) < 0) +- { +- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); +- SCHED_STAGE (u) = sc_until_cycle_zero - stage; +- } +- else +- { +- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); +- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; + } + } + +@@ -647,18 +853,19 @@ + for (row = 0; row < ii; row++) + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int normalized_time = SCHED_TIME (u) - amount; + int new_min_cycle = PS_MIN_CYCLE (ps) - amount; + + if (dump_file) + { + /* Print the scheduling times after the rotation. */ ++ rtx insn = ps_rtl_insn (ps, u); ++ + fprintf (dump_file, "crr_insn->node=%d (insn id %d), " +- "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, +- INSN_UID (crr_insn->node->insn), normalized_time, +- new_min_cycle); +- if (JUMP_P (crr_insn->node->insn)) ++ "crr_insn->cycle=%d, min_cycle=%d", u, ++ INSN_UID (insn), normalized_time, new_min_cycle); ++ if (JUMP_P (insn)) + fprintf (dump_file, " (branch)"); + fprintf (dump_file, "\n"); + } +@@ -671,22 +878,6 @@ + } + } + +-/* Set SCHED_COLUMN of each node according to its position in PS. */ +-static void +-set_columns_for_ps (partial_schedule_ptr ps) +-{ +- int row; +- +- for (row = 0; row < ps->ii; row++) +- { +- ps_insn_ptr cur_insn = ps->rows[row]; +- int column = 0; +- +- for (; cur_insn; cur_insn = cur_insn->next_in_row) +- SCHED_COLUMN (cur_insn->node) = column++; +- } +-} +- + /* Permute the insns according to their order in PS, from row 0 to + row ii-1, and position them right before LAST. This schedules + the insns of the loop kernel. */ +@@ -699,9 +890,18 @@ + + for (row = 0; row < ii ; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) +- if (PREV_INSN (last) != ps_ij->node->insn) +- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, +- PREV_INSN (last)); ++ { ++ rtx insn = ps_rtl_insn (ps, ps_ij->id); ++ ++ if (PREV_INSN (last) != insn) ++ { ++ if (ps_ij->id < ps->g->num_nodes) ++ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, ++ PREV_INSN (last)); ++ else ++ add_insn_before (insn, last, NULL); ++ } ++ } + } + + /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE +@@ -750,7 +950,7 @@ + to row ii-1. If they are equal just bail out. */ + stage_count = calculate_stage_count (ps, amount); + stage_count_curr = +- calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1)); ++ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); + + if (stage_count == stage_count_curr) + { +@@ -779,7 +979,7 @@ + print_partial_schedule (ps, dump_file); + } + +- if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1) ++ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) + { + ok = true; + goto clear; +@@ -794,7 +994,7 @@ + { + bool success; + ps_insn_ptr next_ps_i; +- int branch_cycle = SCHED_TIME (g->closing_branch); ++ int branch_cycle = SCHED_TIME (g->closing_branch->cuid); + int row = SMODULO (branch_cycle, ps->ii); + int num_splits = 0; + sbitmap must_precede, must_follow, tmp_precede, tmp_follow; +@@ -850,13 +1050,12 @@ + branch so we can remove it from it's current cycle. */ + for (next_ps_i = ps->rows[row]; + next_ps_i; next_ps_i = next_ps_i->next_in_row) +- if (next_ps_i->node->cuid == g->closing_branch->cuid) ++ if (next_ps_i->id == g->closing_branch->cuid) + break; + + remove_node_from_ps (ps, next_ps_i); + success = +- try_scheduling_node_in_cycle (ps, g->closing_branch, +- g->closing_branch->cuid, c, ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, + sched_nodes, &num_splits, + tmp_precede, tmp_follow); + gcc_assert (num_splits == 0); +@@ -874,8 +1073,7 @@ + must_precede, branch_cycle, start, end, + step); + success = +- try_scheduling_node_in_cycle (ps, g->closing_branch, +- g->closing_branch->cuid, ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, + branch_cycle, sched_nodes, + &num_splits, tmp_precede, + tmp_follow); +@@ -889,7 +1087,7 @@ + fprintf (dump_file, + "SMS success in moving branch to cycle %d\n", c); + +- update_node_sched_params (g->closing_branch, ii, c, ++ update_node_sched_params (g->closing_branch->cuid, ii, c, + PS_MIN_CYCLE (ps)); + ok = true; + } +@@ -905,7 +1103,7 @@ + + static void + duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, +- int to_stage, int for_prolog, rtx count_reg) ++ int to_stage, rtx count_reg) + { + int row; + ps_insn_ptr ps_ij; +@@ -913,9 +1111,9 @@ + for (row = 0; row < ps->ii; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) + { +- ddg_node_ptr u_node = ps_ij->node; +- int j, i_reg_moves; +- rtx reg_move = NULL_RTX; ++ int u = ps_ij->id; ++ int first_u, last_u; ++ rtx u_insn; + + /* Do not duplicate any insn which refers to count_reg as it + belongs to the control part. +@@ -923,52 +1121,20 @@ + be ignored. + TODO: This should be done by analyzing the control part of + the loop. */ +- if (reg_mentioned_p (count_reg, u_node->insn) +- || JUMP_P (ps_ij->node->insn)) ++ u_insn = ps_rtl_insn (ps, u); ++ if (reg_mentioned_p (count_reg, u_insn) ++ || JUMP_P (u_insn)) + continue; + +- if (for_prolog) +- { +- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing +- number of reg_moves starting with the second occurrence of +- u_node, which is generated if its SCHED_STAGE <= to_stage. */ +- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); +- +- /* The reg_moves start from the *first* reg_move backwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < i_reg_moves; j++) +- reg_move = PREV_INSN (reg_move); +- } +- } +- else /* It's for the epilog. */ +- { +- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, +- starting to decrease one stage after u_node no longer occurs; +- that is, generate all reg_moves until +- SCHED_STAGE (u_node) == from_stage - 1. */ +- i_reg_moves = SCHED_NREG_MOVES (u_node) +- - (from_stage - SCHED_STAGE (u_node) - 1); +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); +- +- /* The reg_moves start from the *last* reg_move forwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) +- reg_move = PREV_INSN (reg_move); +- } +- } +- +- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) +- emit_insn (copy_rtx (PATTERN (reg_move))); +- if (SCHED_STAGE (u_node) >= from_stage +- && SCHED_STAGE (u_node) <= to_stage) +- duplicate_insn_chain (u_node->first_note, u_node->insn); ++ first_u = SCHED_STAGE (u); ++ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; ++ if (from_stage <= last_u && to_stage >= first_u) ++ { ++ if (u < ps->g->num_nodes) ++ duplicate_insn_chain (ps_first_note (ps, u), u_insn); ++ else ++ emit_insn (copy_rtx (PATTERN (u_insn))); ++ } + } + } + +@@ -1002,7 +1168,7 @@ + } + + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); ++ duplicate_insns_of_cycles (ps, 0, i, count_reg); + + /* Put the prolog on the entry edge. */ + e = loop_preheader_edge (loop); +@@ -1014,7 +1180,7 @@ + start_sequence (); + + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); ++ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg); + + /* Put the epilogue on the exit edge. */ + gcc_assert (single_exit (loop)); +@@ -1350,10 +1516,9 @@ + { + rtx head, tail; + rtx count_reg, count_init; +- int mii, rec_mii; +- unsigned stage_count = 0; ++ int mii, rec_mii, stage_count, min_cycle; + HOST_WIDEST_INT loop_count = 0; +- bool opt_sc_p = false; ++ bool opt_sc_p; + + if (! (g = g_arr[loop->num])) + continue; +@@ -1430,62 +1595,63 @@ + fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", + rec_mii, mii, maxii); + +- /* After sms_order_nodes and before sms_schedule_by_order, to copy over +- ASAP. */ +- set_node_sched_params (g); +- +- ps = sms_schedule_by_order (g, mii, maxii, node_order); +- +- if (ps) ++ for (;;) + { +- /* Try to achieve optimized SC by normalizing the partial +- schedule (having the cycles start from cycle zero). +- The branch location must be placed in row ii-1 in the +- final scheduling. If failed, shift all instructions to +- position the branch in row ii-1. */ +- opt_sc_p = optimize_sc (ps, g); +- if (opt_sc_p) +- stage_count = calculate_stage_count (ps, 0); +- else ++ set_node_sched_params (g); ++ ++ stage_count = 0; ++ opt_sc_p = false; ++ ps = sms_schedule_by_order (g, mii, maxii, node_order); ++ ++ if (ps) + { +- /* Bring the branch to cycle ii-1. */ +- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ /* Try to achieve optimized SC by normalizing the partial ++ schedule (having the cycles start from cycle zero). ++ The branch location must be placed in row ii-1 in the ++ final scheduling. If failed, shift all instructions to ++ position the branch in row ii-1. */ ++ opt_sc_p = optimize_sc (ps, g); ++ if (opt_sc_p) ++ stage_count = calculate_stage_count (ps, 0); ++ else ++ { ++ /* Bring the branch to cycle ii-1. */ ++ int amount = (SCHED_TIME (g->closing_branch->cuid) ++ - (ps->ii - 1)); + ++ if (dump_file) ++ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); ++ ++ stage_count = calculate_stage_count (ps, amount); ++ } ++ ++ gcc_assert (stage_count >= 1); ++ } ++ ++ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of ++ 1 means that there is no interleaving between iterations thus ++ we let the scheduling passes do the job in this case. */ ++ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC) ++ || (count_init && (loop_count <= stage_count)) ++ || (flag_branch_probabilities && (trip_count <= stage_count))) ++ { + if (dump_file) +- fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); +- +- stage_count = calculate_stage_count (ps, amount); +- } +- +- gcc_assert (stage_count >= 1); +- PS_STAGE_COUNT (ps) = stage_count; +- } +- +- /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of +- 1 means that there is no interleaving between iterations thus +- we let the scheduling passes do the job in this case. */ +- if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC) +- || (count_init && (loop_count <= stage_count)) +- || (flag_branch_probabilities && (trip_count <= stage_count))) +- { +- if (dump_file) +- { +- fprintf (dump_file, "SMS failed... \n"); +- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); +- fprintf (dump_file, ", trip-count="); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); +- fprintf (dump_file, ")\n"); +- } +- } +- else +- { +- struct undo_replace_buff_elem *reg_move_replaces; ++ { ++ fprintf (dump_file, "SMS failed... \n"); ++ fprintf (dump_file, "SMS sched-failed (stage-count=%d," ++ " loop-count=", stage_count); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); ++ fprintf (dump_file, ", trip-count="); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); ++ fprintf (dump_file, ")\n"); ++ } ++ break; ++ } + + if (!opt_sc_p) + { + /* Rotate the partial schedule to have the branch in row ii-1. */ +- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); ++ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); + + reset_sched_times (ps, amount); + rotate_partial_schedule (ps, amount); +@@ -1493,6 +1659,29 @@ + + set_columns_for_ps (ps); + ++ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); ++ if (!schedule_reg_moves (ps)) ++ { ++ mii = ps->ii + 1; ++ free_partial_schedule (ps); ++ continue; ++ } ++ ++ /* Moves that handle incoming values might have been added ++ to a new first stage. Bump the stage count if so. ++ ++ ??? Perhaps we could consider rotating the schedule here ++ instead? */ ++ if (PS_MIN_CYCLE (ps) < min_cycle) ++ { ++ reset_sched_times (ps, 0); ++ stage_count++; ++ } ++ ++ /* The stage count should now be correct without rotation. */ ++ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); ++ PS_STAGE_COUNT (ps) = stage_count; ++ + canon_loop (loop); + + if (dump_file) +@@ -1531,17 +1720,16 @@ + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); + +- reg_move_replaces = generate_reg_moves (ps, true); ++ apply_reg_moves (ps); + if (dump_file) +- print_node_sched_params (dump_file, g->num_nodes, g); ++ print_node_sched_params (dump_file, g->num_nodes, ps); + /* Generate prolog and epilog. */ + generate_prolog_epilog (ps, loop, count_reg, count_init); +- +- free_undo_replace_buff (reg_move_replaces); ++ break; + } + + free_partial_schedule (ps); +- free (node_sched_params); ++ VEC_free (node_sched_params, heap, node_sched_param_vec); + free (node_order); + free_ddg (g); + } +@@ -1643,9 +1831,11 @@ + + static int + get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, +- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) ++ sbitmap sched_nodes, int ii, int *start_p, int *step_p, ++ int *end_p) + { + int start, step, end; ++ int early_start, late_start; + ddg_edge_ptr e; + sbitmap psp = sbitmap_alloc (ps->g->num_nodes); + sbitmap pss = sbitmap_alloc (ps->g->num_nodes); +@@ -1653,6 +1843,8 @@ + sbitmap u_node_succs = NODE_SUCCESSORS (u_node); + int psp_not_empty; + int pss_not_empty; ++ int count_preds; ++ int count_succs; + + /* 1. compute sched window for u (start, end, step). */ + sbitmap_zero (psp); +@@ -1660,214 +1852,119 @@ + psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); + pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); + +- if (psp_not_empty && !pss_not_empty) +- { +- int early_start = INT_MIN; +- +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge: "); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_not_empty," +- " checking p %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = +- MAX (early_start, p_st + e->latency - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency: %d", +- p_st, early_start, e->latency); +- +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- } +- start = early_start; +- end = MIN (end, early_start + ii); +- /* Schedule the node close to it's predecessors. */ +- step = 1; +- +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- } +- +- else if (!psp_not_empty && pss_not_empty) +- { +- int late_start = INT_MAX; +- +- end = INT_MIN; +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); +- +- late_start = MIN (late_start, +- s_st - e->latency + (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->data_type == MEM_DEP) +- end = MAX (end, SCHED_TIME (v_node) - ii + 1); +- if (dump_file) +- fprintf (dump_file, "end = %d\n", end); +- +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- start = late_start; +- end = MAX (end, late_start - ii); +- /* Schedule the node close to it's successors. */ ++ /* We first compute a forward range (start <= end), then decide whether ++ to reverse it. */ ++ early_start = INT_MIN; ++ late_start = INT_MAX; ++ start = INT_MIN; ++ end = INT_MAX; ++ step = 1; ++ ++ count_preds = 0; ++ count_succs = 0; ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" ++ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); ++ fprintf (dump_file, "%11s %11s %11s %11s %5s\n", ++ "start", "early start", "late start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =========== ===========" ++ " =====\n"); ++ } ++ /* Calculate early_start and limit end. Both bounds are inclusive. */ ++ if (psp_not_empty) ++ for (e = u_node->in; e != 0; e = e->next_in) ++ { ++ int v = e->src->cuid; ++ ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int p_st = SCHED_TIME (v); ++ int earliest = p_st + e->latency - (e->distance * ii); ++ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11s %11d %11s %11d %5d", ++ "", earliest, "", latest, p_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } ++ ++ early_start = MAX (early_start, earliest); ++ end = MIN (end, latest); ++ ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_preds++; ++ } ++ } ++ ++ /* Calculate late_start and limit start. Both bounds are inclusive. */ ++ if (pss_not_empty) ++ for (e = u_node->out; e != 0; e = e->next_out) ++ { ++ int v = e->dest->cuid; ++ ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int s_st = SCHED_TIME (v); ++ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); ++ int latest = s_st - e->latency + (e->distance * ii); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11d %11s %11d %11s %5d", ++ earliest, "", latest, "", s_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } ++ ++ start = MAX (start, earliest); ++ late_start = MIN (late_start, latest); ++ ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_succs++; ++ } ++ } ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "----------- ----------- ----------- -----------" ++ " -----\n"); ++ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", ++ start, early_start, late_start, end, "", ++ "(max, max, min, min)"); ++ } ++ ++ /* Get a target scheduling window no bigger than ii. */ ++ if (early_start == INT_MIN && late_start == INT_MAX) ++ early_start = NODE_ASAP (u_node); ++ else if (early_start == INT_MIN) ++ early_start = late_start - (ii - 1); ++ late_start = MIN (late_start, early_start + (ii - 1)); ++ ++ /* Apply memory dependence limits. */ ++ start = MAX (start, early_start); ++ end = MIN (end, late_start); ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", ++ "", start, end, "", ""); ++ ++ /* If there are at least as many successors as predecessors, schedule the ++ node close to its successors. */ ++ if (pss_not_empty && count_succs >= count_preds) ++ { ++ int tmp = end; ++ end = start; ++ start = tmp; + step = -1; +- +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- +- } +- +- else if (psp_not_empty && pss_not_empty) +- { +- int early_start = INT_MIN; +- int late_start = INT_MAX; +- int count_preds = 0; +- int count_succs = 0; +- +- start = INT_MIN; +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking p %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = MAX (early_start, +- p_st + e->latency +- - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency = %d", +- p_st, early_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_preds++; +- +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); +- +- late_start = MIN (late_start, +- s_st - e->latency +- + (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_succs++; +- +- if (e->data_type == MEM_DEP) +- start = MAX (start, SCHED_TIME (v_node) - ii + 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- start = MAX (start, early_start); +- end = MIN (end, MIN (early_start + ii, late_start + 1)); +- step = 1; +- /* If there are more successors than predecessors schedule the +- node close to it's successors. */ +- if (count_succs >= count_preds) +- { +- int old_start = start; +- +- start = end - 1; +- end = old_start - 1; +- step = -1; +- } +- } +- else /* psp is empty && pss is empty. */ +- { +- start = SCHED_ASAP (u_node); +- end = start + ii; +- step = 1; +- } ++ } ++ ++ /* Now that we've finalized the window, make END an exclusive rather ++ than an inclusive bound. */ ++ end += step; + + *start_p = start; + *step_p = step; +@@ -1880,10 +1977,10 @@ + if (dump_file) + fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", + start, end, step); +- return -1; ++ return -1; + } + +- return 0; ++ return 0; + } + + /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the +@@ -1939,7 +2036,7 @@ + SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ + for (e = u_node->in; e != 0; e = e->next_in) + if (TEST_BIT (sched_nodes, e->src->cuid) +- && ((SCHED_TIME (e->src) - (e->distance * ii)) == ++ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == + first_cycle_in_window)) + { + if (dump_file) +@@ -1964,7 +2061,7 @@ + SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ + for (e = u_node->out; e != 0; e = e->next_out) + if (TEST_BIT (sched_nodes, e->dest->cuid) +- && ((SCHED_TIME (e->dest) + (e->distance * ii)) == ++ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == + last_cycle_in_window)) + { + if (dump_file) +@@ -1988,7 +2085,7 @@ + last row of the scheduling window) */ + + static bool +-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, ++try_scheduling_node_in_cycle (partial_schedule_ptr ps, + int u, int cycle, sbitmap sched_nodes, + int *num_splits, sbitmap must_precede, + sbitmap must_follow) +@@ -1997,11 +2094,10 @@ + bool success = 0; + + verify_partial_schedule (ps, sched_nodes); +- psi = ps_add_node_check_conflicts (ps, u_node, cycle, +- must_precede, must_follow); ++ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); + if (psi) + { +- SCHED_TIME (u_node) = cycle; ++ SCHED_TIME (u) = cycle; + SET_BIT (sched_nodes, u); + success = 1; + *num_splits = 0; +@@ -2062,8 +2158,8 @@ + &step, &end) == 0) + { + if (dump_file) +- fprintf (dump_file, "\nTrying to schedule node %d \ +- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID ++ fprintf (dump_file, "\nTrying to schedule node %d " ++ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID + (g->nodes[u].insn)), start, end, step); + + gcc_assert ((step > 0 && start < end) +@@ -2081,7 +2177,7 @@ + &tmp_precede, must_precede, + c, start, end, step); + success = +- try_scheduling_node_in_cycle (ps, u_node, u, c, ++ try_scheduling_node_in_cycle (ps, u, c, + sched_nodes, + &num_splits, tmp_precede, + tmp_follow); +@@ -2181,7 +2277,7 @@ + for (crr_insn = rows_new[row]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); + + SCHED_TIME (u) = new_time; +@@ -2202,7 +2298,7 @@ + for (crr_insn = rows_new[row + 1]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; + + SCHED_TIME (u) = new_time; +@@ -2242,24 +2338,24 @@ + { + ddg_edge_ptr e; + int lower = INT_MIN, upper = INT_MAX; +- ddg_node_ptr crit_pred = NULL; +- ddg_node_ptr crit_succ = NULL; ++ int crit_pred = -1; ++ int crit_succ = -1; + int crit_cycle; + + for (e = u_node->in; e != 0; e = e->next_in) + { +- ddg_node_ptr v_node = e->src; ++ int v = e->src->cuid; + +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) +- if (SCHED_TIME (v_node) > lower) ++ if (TEST_BIT (sched_nodes, v) ++ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) ++ if (SCHED_TIME (v) > lower) + { +- crit_pred = v_node; +- lower = SCHED_TIME (v_node); ++ crit_pred = v; ++ lower = SCHED_TIME (v); + } + } + +- if (crit_pred != NULL) ++ if (crit_pred >= 0) + { + crit_cycle = SCHED_TIME (crit_pred) + 1; + return SMODULO (crit_cycle, ii); +@@ -2267,17 +2363,18 @@ + + for (e = u_node->out; e != 0; e = e->next_out) + { +- ddg_node_ptr v_node = e->dest; +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) +- if (SCHED_TIME (v_node) < upper) ++ int v = e->dest->cuid; ++ ++ if (TEST_BIT (sched_nodes, v) ++ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) ++ if (SCHED_TIME (v) < upper) + { +- crit_succ = v_node; +- upper = SCHED_TIME (v_node); ++ crit_succ = v; ++ upper = SCHED_TIME (v); + } + } + +- if (crit_succ != NULL) ++ if (crit_succ >= 0) + { + crit_cycle = SCHED_TIME (crit_succ); + return SMODULO (crit_cycle, ii); +@@ -2301,10 +2398,10 @@ + + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + + length++; +- gcc_assert (TEST_BIT (sched_nodes, u->cuid)); ++ gcc_assert (TEST_BIT (sched_nodes, u)); + /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by + popcount (sched_nodes) == number of insns in ps. */ + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); +@@ -2719,6 +2816,7 @@ + partial_schedule_ptr ps = XNEW (struct partial_schedule); + ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); + ps->rows_length = (int *) xcalloc (ii, sizeof (int)); ++ ps->reg_moves = NULL; + ps->ii = ii; + ps->history = history; + ps->min_cycle = INT_MAX; +@@ -2753,8 +2851,16 @@ + static void + free_partial_schedule (partial_schedule_ptr ps) + { ++ ps_reg_move_info *move; ++ unsigned int i; ++ + if (!ps) + return; ++ ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) ++ sbitmap_free (move->uses); ++ VEC_free (ps_reg_move_info, heap, ps->reg_moves); ++ + free_ps_insns (ps); + free (ps->rows); + free (ps->rows_length); +@@ -2796,12 +2902,12 @@ + fprintf (dump, "\n[ROW %d ]: ", i); + while (ps_i) + { +- if (JUMP_P (ps_i->node->insn)) +- fprintf (dump, "%d (branch), ", +- INSN_UID (ps_i->node->insn)); ++ rtx insn = ps_rtl_insn (ps, ps_i->id); ++ ++ if (JUMP_P (insn)) ++ fprintf (dump, "%d (branch), ", INSN_UID (insn)); + else +- fprintf (dump, "%d, ", +- INSN_UID (ps_i->node->insn)); ++ fprintf (dump, "%d, ", INSN_UID (insn)); + + ps_i = ps_i->next_in_row; + } +@@ -2810,11 +2916,11 @@ + + /* Creates an object of PS_INSN and initializes it to the given parameters. */ + static ps_insn_ptr +-create_ps_insn (ddg_node_ptr node, int cycle) ++create_ps_insn (int id, int cycle) + { + ps_insn_ptr ps_i = XNEW (struct ps_insn); + +- ps_i->node = node; ++ ps_i->id = id; + ps_i->next_in_row = NULL; + ps_i->prev_in_row = NULL; + ps_i->cycle = cycle; +@@ -2879,10 +2985,11 @@ + next_ps_i; + next_ps_i = next_ps_i->next_in_row) + { +- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) ++ if (must_follow ++ && TEST_BIT (must_follow, next_ps_i->id) + && ! first_must_follow) + first_must_follow = next_ps_i; +- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) ++ if (must_precede && TEST_BIT (must_precede, next_ps_i->id)) + { + /* If we have already met a node that must follow, then + there is no possible column. */ +@@ -2893,8 +3000,8 @@ + } + /* The closing branch must be the last in the row. */ + if (must_precede +- && TEST_BIT (must_precede, next_ps_i->node->cuid) +- && JUMP_P (next_ps_i->node->insn)) ++ && TEST_BIT (must_precede, next_ps_i->id) ++ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) + return false; + + last_in_row = next_ps_i; +@@ -2903,7 +3010,7 @@ + /* The closing branch is scheduled as well. Make sure there is no + dependent instruction after it as the branch should be the last + instruction in the row. */ +- if (JUMP_P (ps_i->node->insn)) ++ if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) + { + if (first_must_follow) + return false; +@@ -2954,7 +3061,6 @@ + { + ps_insn_ptr prev, next; + int row; +- ddg_node_ptr next_node; + + if (!ps || !ps_i) + return false; +@@ -2964,11 +3070,9 @@ + if (! ps_i->next_in_row) + return false; + +- next_node = ps_i->next_in_row->node; +- + /* Check if next_in_row is dependent on ps_i, both having same sched + times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ +- if (must_follow && TEST_BIT (must_follow, next_node->cuid)) ++ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id)) + return false; + + /* Advance PS_I over its next_in_row in the doubly linked list. */ +@@ -2999,7 +3103,7 @@ + before/after (respectively) the node pointed to by PS_I when scheduled + in the same cycle. */ + static ps_insn_ptr +-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, ++add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, + sbitmap must_precede, sbitmap must_follow) + { + ps_insn_ptr ps_i; +@@ -3008,7 +3112,7 @@ + if (ps->rows_length[row] >= issue_rate) + return NULL; + +- ps_i = create_ps_insn (node, cycle); ++ ps_i = create_ps_insn (id, cycle); + + /* Finds and inserts PS_I according to MUST_FOLLOW and + MUST_PRECEDE. */ +@@ -3060,7 +3164,7 @@ + crr_insn; + crr_insn = crr_insn->next_in_row) + { +- rtx insn = crr_insn->node->insn; ++ rtx insn = ps_rtl_insn (ps, crr_insn->id); + + if (!NONDEBUG_INSN_P (insn)) + continue; +@@ -3097,7 +3201,7 @@ + cuid N must be come before/after (respectively) the node pointed to by + PS_I when scheduled in the same cycle. */ + ps_insn_ptr +-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, ++ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, + int c, sbitmap must_precede, + sbitmap must_follow) + { + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch new file mode 100644 index 0000000000..02f8e51779 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch @@ -0,0 +1,147 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-09-09 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/arm-cores.def (generic-armv7-a): New architecture. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * config/arm/arm.c (arm_file_start): Output .arch directive when + user passes -mcpu=generic-*. + (arm_issue_rate): Add genericv7a support. + * config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec. + (ASM_CPU_SPEC): New define. + * config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec). + * config/arm/semi.h (ASM_SPEC): Likewise. + * doc/invoke.texi (ARM Options): Document -mcpu=generic-* + and -mtune=generic-*. + +=== modified file 'gcc/config/arm/arm-cores.def' +--- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000 ++++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000 +@@ -124,6 +124,7 @@ + ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) + ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) ++ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +@@ -135,3 +136,4 @@ + ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) + ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) + ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) ++ + +=== modified file 'gcc/config/arm/arm-tune.md' +--- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000 ++++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000 ++++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000 +@@ -22185,6 +22185,8 @@ + const char *fpu_name; + if (arm_selected_arch) + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); ++ else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0) ++ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8); + else + asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); + +@@ -23717,6 +23719,7 @@ + case cortexr4: + case cortexr4f: + case cortexr5: ++ case genericv7a: + case cortexa5: + case cortexa8: + case cortexa9: + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000 ++++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 +@@ -198,6 +198,7 @@ + Do not define this macro if it does not need to do anything. */ + #define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ ++ { "asm_cpu_spec", ASM_CPU_SPEC }, \ + SUBTARGET_EXTRA_SPECS + + #ifndef SUBTARGET_EXTRA_SPECS +@@ -2278,4 +2279,8 @@ + instruction. */ + #define MAX_LDM_STM_OPS 4 + ++#define ASM_CPU_SPEC \ ++ " %{mcpu=generic-*:-march=%*;" \ ++ " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" ++ + #endif /* ! GCC_ARM_H */ + +=== modified file 'gcc/config/arm/elf.h' +--- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000 ++++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000 +@@ -56,8 +56,7 @@ + #define ASM_SPEC "\ + %{mbig-endian:-EB} \ + %{mlittle-endian:-EL} \ +-%{mcpu=*:-mcpu=%*} \ +-%{march=*:-march=%*} \ ++%(asm_cpu_spec) \ + %{mapcs-*:-mapcs-%*} \ + %(subtarget_asm_float_spec) \ + %{mthumb-interwork:-mthumb-interwork} \ + +=== modified file 'gcc/config/arm/semi.h' +--- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000 ++++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000 +@@ -65,8 +65,7 @@ + #define ASM_SPEC "\ + %{fpic|fpie: -k} %{fPIC|fPIE: -k} \ + %{mbig-endian:-EB} \ +-%{mcpu=*:-mcpu=%*} \ +-%{march=*:-march=%*} \ ++%(arm_cpu_spec) \ + %{mapcs-float:-mfloat} \ + %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \ + %{mfloat-abi=*} %{mfpu=*} \ + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000 ++++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 +@@ -10215,6 +10215,10 @@ + @samp{cortex-m0}, + @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++@option{-mcpu=generic-@var{arch}} is also permissible, and is ++equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. ++See @option{-mtune} for more information. ++ + @item -mtune=@var{name} + @opindex mtune + This option is very similar to the @option{-mcpu=} option, except that +@@ -10226,6 +10230,13 @@ + For some ARM implementations better performance can be obtained by using + this option. + ++@option{-mtune=generic-@var{arch}} specifies that GCC should tune the ++performance for a blend of processors within architecture @var{arch}. ++The aim is to generate code that run well on the current most popular ++processors, balancing between optimizations that benefit some CPUs in the ++range, and avoiding performance pitfalls of other CPUs. The effects of ++this option may change in future GCC versions as CPU models come and go. ++ + @item -march=@var{name} + @opindex march + This specifies the name of the target ARM architecture. GCC uses this + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch new file mode 100644 index 0000000000..695aa85591 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch @@ -0,0 +1,304 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + * config/arm/driver-arm.c (host_detect_local_cpu): Close the file + before exiting. + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config.host (arm*-*-linux*): Add driver-arm.o and x-arm. + * config/arm/arm.opt: Add 'native' processor_type and + arm_arch enum values. + * config/arm/arm.h (host_detect_local_cpu): New prototype. + (EXTRA_SPEC_FUNCTIONS): New define. + (MCPU_MTUNE_NATIVE_SPECS): New define. + (DRIVER_SELF_SPECS): New define. + * config/arm/driver-arm.c: New file. + * config/arm/x-arm: New file. + * doc/invoke.texi (ARM Options): Document -mcpu=native, + -mtune=native and -march=native. + +=== modified file 'gcc/config.host' +--- old/gcc/config.host 2011-02-15 09:49:14 +0000 ++++ new/gcc/config.host 2011-10-19 17:01:50 +0000 +@@ -100,6 +100,14 @@ + esac + + case ${host} in ++ arm*-*-linux*) ++ case ${target} in ++ arm*-*-*) ++ host_extra_gcc_objs="driver-arm.o" ++ host_xmake_file="${host_xmake_file} arm/x-arm" ++ ;; ++ esac ++ ;; + alpha*-*-linux*) + case ${target} in + alpha*-*-linux*) + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 ++++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 +@@ -2283,4 +2283,21 @@ + " %{mcpu=generic-*:-march=%*;" \ + " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" + ++/* -mcpu=native handling only makes sense with compiler running on ++ an ARM chip. */ ++#if defined(__arm__) ++extern const char *host_detect_local_cpu (int argc, const char **argv); ++# define EXTRA_SPEC_FUNCTIONS \ ++ { "local_cpu_detect", host_detect_local_cpu }, ++ ++# define MCPU_MTUNE_NATIVE_SPECS \ ++ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \ ++ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \ ++ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" ++#else ++# define MCPU_MTUNE_NATIVE_SPECS "" ++#endif ++ ++#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS ++ + #endif /* ! GCC_ARM_H */ + +=== modified file 'gcc/config/arm/arm.opt' +--- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000 ++++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000 +@@ -48,6 +48,11 @@ + Target RejectNegative Joined + Specify the name of the target architecture + ++; Other arm_arch values are loaded from arm-tables.opt ++; but that is a generated file and this is an odd-one-out. ++EnumValue ++Enum(arm_arch) String(native) Value(-1) DriverOnly ++ + marm + Target RejectNegative InverseMask(THUMB) Undocumented + +@@ -153,6 +158,11 @@ + Target RejectNegative Joined + Tune code for the given processor + ++; Other processor_type values are loaded from arm-tables.opt ++; but that is a generated file and this is an odd-one-out. ++EnumValue ++Enum(processor_type) String(native) Value(-1) DriverOnly ++ + mwords-little-endian + Target Report RejectNegative Mask(LITTLE_WORDS) + Assume big endian bytes, little endian words + +=== added file 'gcc/config/arm/driver-arm.c' +--- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 +@@ -0,0 +1,149 @@ ++/* Subroutines for the gcc driver. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "configargs.h" ++ ++struct vendor_cpu { ++ const char *part_no; ++ const char *arch_name; ++ const char *cpu_name; ++}; ++ ++static struct vendor_cpu arm_cpu_table[] = { ++ {"0x926", "armv5te", "arm926ej-s"}, ++ {"0xa26", "armv5te", "arm1026ej-s"}, ++ {"0xb02", "armv6k", "mpcore"}, ++ {"0xb36", "armv6j", "arm1136j-s"}, ++ {"0xb56", "armv6t2", "arm1156t2-s"}, ++ {"0xb76", "armv6zk", "arm1176jz-s"}, ++ {"0xc05", "armv7-a", "cortex-a5"}, ++ {"0xc08", "armv7-a", "cortex-a8"}, ++ {"0xc09", "armv7-a", "cortex-a9"}, ++ {"0xc0f", "armv7-a", "cortex-a15"}, ++ {"0xc14", "armv7-r", "cortex-r4"}, ++ {"0xc15", "armv7-r", "cortex-r5"}, ++ {"0xc20", "armv6-m", "cortex-m0"}, ++ {"0xc21", "armv6-m", "cortex-m1"}, ++ {"0xc23", "armv7-m", "cortex-m3"}, ++ {"0xc24", "armv7e-m", "cortex-m4"}, ++ {NULL, NULL, NULL} ++}; ++ ++struct { ++ const char *vendor_no; ++ const struct vendor_cpu *vendor_parts; ++} vendors[] = { ++ {"0x41", arm_cpu_table}, ++ {NULL, NULL} ++}; ++ ++/* This will be called by the spec parser in gcc.c when it sees ++ a %:local_cpu_detect(args) construct. Currently it will be called ++ with either "arch", "cpu" or "tune" as argument depending on if ++ -march=native, -mcpu=native or -mtune=native is to be substituted. ++ ++ It returns a string containing new command line parameters to be ++ put at the place of the above two options, depending on what CPU ++ this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for ++ -march=native. If the routine can't detect a known processor, ++ the -march or -mtune option is discarded. ++ ++ ARGC and ARGV are set depending on the actual arguments given ++ in the spec. */ ++const char * ++host_detect_local_cpu (int argc, const char **argv) ++{ ++ const char *val = NULL; ++ char buf[128]; ++ FILE *f = NULL; ++ bool arch; ++ const struct vendor_cpu *cpu_table = NULL; ++ ++ if (argc < 1) ++ goto not_found; ++ ++ arch = strcmp (argv[0], "arch") == 0; ++ if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune")) ++ goto not_found; ++ ++ f = fopen ("/proc/cpuinfo", "r"); ++ if (f == NULL) ++ goto not_found; ++ ++ while (fgets (buf, sizeof (buf), f) != NULL) ++ { ++ /* Ensure that CPU implementer is ARM (0x41). */ ++ if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0) ++ { ++ int i; ++ for (i = 0; vendors[i].vendor_no != NULL; i++) ++ if (strstr (buf, vendors[i].vendor_no) != NULL) ++ { ++ cpu_table = vendors[i].vendor_parts; ++ break; ++ } ++ } ++ ++ /* Detect arch/cpu. */ ++ if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0) ++ { ++ int i; ++ ++ if (cpu_table == NULL) ++ goto not_found; ++ ++ for (i = 0; cpu_table[i].part_no != NULL; i++) ++ if (strstr (buf, cpu_table[i].part_no) != NULL) ++ { ++ val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name; ++ break; ++ } ++ break; ++ } ++ } ++ ++ fclose (f); ++ ++ if (val == NULL) ++ goto not_found; ++ ++ return concat ("-m", argv[0], "=", val, NULL); ++ ++not_found: ++ { ++ unsigned int i; ++ unsigned int opt; ++ const char *search[] = {NULL, "arch"}; ++ ++ if (f) ++ fclose (f); ++ ++ search[0] = argv[0]; ++ for (opt = 0; opt < ARRAY_SIZE (search); opt++) ++ for (i = 0; i < ARRAY_SIZE (configure_default_options); i++) ++ if (strcmp (configure_default_options[i].name, search[opt]) == 0) ++ return concat ("-m", search[opt], "=", ++ configure_default_options[i].value, NULL); ++ return NULL; ++ } ++} + +=== added file 'gcc/config/arm/x-arm' +--- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000 +@@ -0,0 +1,3 @@ ++driver-arm.o: $(srcdir)/config/arm/driver-arm.c \ ++ $(CONFIG_H) $(SYSTEM_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +=== modified file 'gcc/doc/invoke.texi' +--- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 ++++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000 +@@ -10215,10 +10215,16 @@ + @samp{cortex-m0}, + @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++ + @option{-mcpu=generic-@var{arch}} is also permissible, and is + equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. + See @option{-mtune} for more information. + ++@option{-mcpu=native} causes the compiler to auto-detect the CPU ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -mtune=@var{name} + @opindex mtune + This option is very similar to the @option{-mcpu=} option, except that +@@ -10237,6 +10243,11 @@ + range, and avoiding performance pitfalls of other CPUs. The effects of + this option may change in future GCC versions as CPU models come and go. + ++@option{-mtune=native} causes the compiler to auto-detect the CPU ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -march=@var{name} + @opindex march + This specifies the name of the target ARM architecture. GCC uses this +@@ -10250,6 +10261,11 @@ + @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, + @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. + ++@option{-march=native} causes the compiler to auto-detect the architecture ++of the build computer. At present, this feature is only supported on ++Linux, and not all architectures are recognised. If the auto-detect is ++unsuccessful the option has no effect. ++ + @item -mfpu=@var{name} + @itemx -mfpe=@var{number} + @itemx -mfp=@var{number} + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch new file mode 100644 index 0000000000..ad91d77366 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch @@ -0,0 +1,123 @@ +2011-10-19 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2011-10-18 Andrew Stubbs <ams@codesourcery.com> + + PR tree-optimization/50717 + + gcc/ + * tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type' + parameter. Calculate 'type' from stmt. + (convert_mult_to_widen): Update call the is_widening_mult_p. + (convert_plusminus_to_widen): Likewise. + + gcc/testsuite/ + * gcc.dg/pr50717-1.c: New file. + * gcc.target/arm/wmul-12.c: Correct types. + * gcc.target/arm/wmul-8.c: Correct types. + +=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c' +--- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000 +@@ -0,0 +1,26 @@ ++/* PR tree-optimization/50717 */ ++/* Ensure that widening multiply-and-accumulate is not used where integer ++ type promotion or users' casts should prevent it. */ ++ ++/* { dg-options "-O2 -fdump-tree-widening_mul" } */ ++ ++long long ++f (unsigned int a, char b, long long c) ++{ ++ return (a * b) + c; ++} ++ ++int ++g (short a, short b, int c) ++{ ++ return (short)(a * b) + c; ++} ++ ++int ++h (char a, char b, int c) ++{ ++ return (char)(a * b) + c; ++} ++ ++/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */ ++/* { dg-final { cleanup-tree-dump "widening_mul" } } */ + +=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000 +@@ -4,8 +4,8 @@ + long long + foo (int *b, int *c) + { +- int tmp = *b * *c; +- return 10 + (long long)tmp; ++ long long tmp = (long long)*b * *c; ++ return 10 + tmp; + } + + /* { dg-final { scan-assembler "smlal" } } */ + +=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c' +--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000 ++++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000 +@@ -4,7 +4,7 @@ + long long + foo (long long a, int *b, int *c) + { +- return a + *b * *c; ++ return a + (long long)*b * *c; + } + + /* { dg-final { scan-assembler "smlal" } } */ + +=== modified file 'gcc/tree-ssa-math-opts.c' +--- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000 ++++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000 +@@ -1351,10 +1351,12 @@ + and *TYPE2_OUT would give the operands of the multiplication. */ + + static bool +-is_widening_mult_p (tree type, gimple stmt, ++is_widening_mult_p (gimple stmt, + tree *type1_out, tree *rhs1_out, + tree *type2_out, tree *rhs2_out) + { ++ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); ++ + if (TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + return false; +@@ -1416,7 +1418,7 @@ + if (TREE_CODE (type) != INTEGER_TYPE) + return false; + +- if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2)) ++ if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) + return false; + + to_mode = TYPE_MODE (type); +@@ -1592,7 +1594,7 @@ + if (code == PLUS_EXPR + && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) + { +- if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1, ++ if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs2; +@@ -1600,7 +1602,7 @@ + } + else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) + { +- if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1, ++ if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs1; + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch new file mode 100644 index 0000000000..843f1cff25 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch @@ -0,0 +1,24 @@ +2011-10-21 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-10-21 Andrew Stubbs <ams@codesourcery.com> + + PR target/50809 + + gcc/ + * config/arm/driver-arm.c (vendors): Make static. + +=== modified file 'gcc/config/arm/driver-arm.c' +--- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 ++++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000 +@@ -49,7 +49,7 @@ + {NULL, NULL, NULL} + }; + +-struct { ++static struct { + const char *vendor_no; + const struct vendor_cpu *vendor_parts; + } vendors[] = { + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch new file mode 100644 index 0000000000..1ad48e512e --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch @@ -0,0 +1,453 @@ +2011-10-27 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vect-stmts.c (vectorizable_load): For SLP without permutation + treat the first load of the node as the first element in its + interleaving chain. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if + necessary and possible. + (vect_build_slp_tree): Add new argument. Allow load groups of any size + in basic blocks. Keep all the loads for further permutation check. + Use the new argument to determine if there is a permutation. Update + the recursive calls. + (vect_supported_load_permutation_p): Allow subchains of interleaving + chains in basic block vectorization. + (vect_analyze_slp_instance): Update the call to vect_build_slp_tree. + Check load permutation based on the new parameter. + (vect_schedule_slp_instance): Don't start from the first element in + interleaving chain unless the loads are permuted. + + gcc/testsuite/ + * gcc.dg/vect/bb-slp-29.c: New test. + +=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c' +--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000 +@@ -0,0 +1,59 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define A 3 ++#define B 4 ++#define N 256 ++ ++short src[N], dst[N]; ++ ++void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) ++{ ++ int i; ++ h /= 16; ++ for (i = 0; i < h; i++) ++ { ++ dst[0] = A*src[0] + B*src[1]; ++ dst[1] = A*src[1] + B*src[2]; ++ dst[2] = A*src[2] + B*src[3]; ++ dst[3] = A*src[3] + B*src[4]; ++ dst[4] = A*src[4] + B*src[5]; ++ dst[5] = A*src[5] + B*src[6]; ++ dst[6] = A*src[6] + B*src[7]; ++ dst[7] = A*src[7] + B*src[8]; ++ dst += stride; ++ src += stride; ++ if (dummy == 32) ++ abort (); ++ } ++} ++ ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ dst[i] = 0; ++ src[i] = i; ++ } ++ ++ foo (dst, src, N, 8, 0); ++ ++ for (i = 0; i < N/2; i++) ++ { ++ if (dst[i] != A * src[i] + B * src[i+1]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */ ++/* { dg-final { cleanup-tree-dump "slp" } } */ ++ + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 +@@ -115,13 +115,15 @@ + { + tree oprnd; + unsigned int i, number_of_oprnds; +- tree def; ++ tree def[2]; + gimple def_stmt; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + stmt_vec_info stmt_info = + vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); + enum gimple_rhs_class rhs_class; + struct loop *loop = NULL; ++ enum tree_code rhs_code; ++ bool different_types = false; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -133,7 +135,7 @@ + { + oprnd = gimple_op (stmt, i + 1); + +- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, ++ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], + &dt[i]) + || (!def_stmt && dt[i] != vect_constant_def)) + { +@@ -188,11 +190,11 @@ + switch (gimple_code (def_stmt)) + { + case GIMPLE_PHI: +- def = gimple_phi_result (def_stmt); ++ def[i] = gimple_phi_result (def_stmt); + break; + + case GIMPLE_ASSIGN: +- def = gimple_assign_lhs (def_stmt); ++ def[i] = gimple_assign_lhs (def_stmt); + break; + + default: +@@ -206,8 +208,8 @@ + { + /* op0 of the first stmt of the group - store its info. */ + *first_stmt_dt0 = dt[i]; +- if (def) +- *first_stmt_def0_type = TREE_TYPE (def); ++ if (def[i]) ++ *first_stmt_def0_type = TREE_TYPE (def[i]); + else + *first_stmt_const_oprnd = oprnd; + +@@ -227,8 +229,8 @@ + { + /* op1 of the first stmt of the group - store its info. */ + *first_stmt_dt1 = dt[i]; +- if (def) +- *first_stmt_def1_type = TREE_TYPE (def); ++ if (def[i]) ++ *first_stmt_def1_type = TREE_TYPE (def[i]); + else + { + /* We assume that the stmt contains only one constant +@@ -249,22 +251,53 @@ + the def-stmt/s of the first stmt. */ + if ((i == 0 + && (*first_stmt_dt0 != dt[i] +- || (*first_stmt_def0_type && def ++ || (*first_stmt_def0_type && def[0] + && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def))))) ++ TREE_TYPE (def[0]))))) + || (i == 1 + && (*first_stmt_dt1 != dt[i] +- || (*first_stmt_def1_type && def ++ || (*first_stmt_def1_type && def[1] + && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def))))) +- || (!def ++ TREE_TYPE (def[1]))))) ++ || (!def[i] + && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), +- TREE_TYPE (oprnd)))) ++ TREE_TYPE (oprnd))) ++ || different_types) + { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: different types "); ++ if (i != number_of_oprnds - 1) ++ different_types = true; ++ else ++ { ++ if (is_gimple_assign (stmt) ++ && (rhs_code = gimple_assign_rhs_code (stmt)) ++ && TREE_CODE_CLASS (rhs_code) == tcc_binary ++ && commutative_tree_code (rhs_code) ++ && *first_stmt_dt0 == dt[1] ++ && *first_stmt_dt1 == dt[0] ++ && def[0] && def[1] ++ && !(*first_stmt_def0_type ++ && !types_compatible_p (*first_stmt_def0_type, ++ TREE_TYPE (def[1]))) ++ && !(*first_stmt_def1_type ++ && !types_compatible_p (*first_stmt_def1_type, ++ TREE_TYPE (def[0])))) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Swapping operands of "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ++ gimple_assign_rhs2_ptr (stmt)); ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); + +- return false; ++ return false; ++ } ++ } + } + } + } +@@ -278,10 +311,10 @@ + + case vect_internal_def: + case vect_reduction_def: +- if (i == 0) ++ if ((i == 0 && !different_types) || (i == 1 && different_types)) + VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); + else +- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); ++ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); + break; + + default: +@@ -289,7 +322,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: illegal type of def "); +- print_generic_expr (vect_dump, def, TDF_SLIM); ++ print_generic_expr (vect_dump, def[i], TDF_SLIM); + } + + return false; +@@ -312,7 +345,7 @@ + int ncopies_for_cost, unsigned int *max_nunits, + VEC (int, heap) **load_permutation, + VEC (slp_tree, heap) **loads, +- unsigned int vectorization_factor) ++ unsigned int vectorization_factor, bool *loads_permuted) + { + VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); + VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); +@@ -523,7 +556,9 @@ + + /* Check that the size of interleaved loads group is not + greater than the SLP group size. */ +- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) ++ if (loop_vinfo ++ && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) ++ > ncopies * group_size) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -644,19 +679,22 @@ + /* Strided loads were reached - stop the recursion. */ + if (stop_recursion) + { ++ VEC_safe_push (slp_tree, heap, *loads, *node); + if (permutation) + { +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ ++ *loads_permuted = true; + *inside_cost + += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) + * group_size; + } + else +- { +- /* We don't check here complex numbers chains, so we keep them in +- LOADS for further check in vect_supported_load_permutation_p. */ ++ { ++ /* We don't check here complex numbers chains, so we set ++ LOADS_PERMUTED for further check in ++ vect_supported_load_permutation_p. */ + if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ *loads_permuted = true; + } + + return true; +@@ -675,7 +713,7 @@ + if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads, +- vectorization_factor)) ++ vectorization_factor, loads_permuted)) + return false; + + SLP_TREE_LEFT (*node) = left_node; +@@ -693,7 +731,7 @@ + if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads, +- vectorization_factor)) ++ vectorization_factor, loads_permuted)) + return false; + + SLP_TREE_RIGHT (*node) = right_node; +@@ -879,8 +917,10 @@ + bool supported, bad_permutation = false; + sbitmap load_index; + slp_tree node, other_complex_node; +- gimple stmt, first = NULL, other_node_first; ++ gimple stmt, first = NULL, other_node_first, load, next_load, first_load; + unsigned complex_numbers = 0; ++ struct data_reference *dr; ++ bb_vec_info bb_vinfo; + + /* FORNOW: permutations are only supported in SLP. */ + if (!slp_instn) +@@ -1040,6 +1080,76 @@ + } + } + ++ /* In basic block vectorization we allow any subchain of an interleaving ++ chain. ++ FORNOW: not supported in loop SLP because of realignment compications. */ ++ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); ++ bad_permutation = false; ++ /* Check that for every node in the instance teh loads form a subchain. */ ++ if (bb_vinfo) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ next_load = NULL; ++ first_load = NULL; ++ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) ++ { ++ if (!first_load) ++ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load)); ++ else if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load))) ++ { ++ bad_permutation = true; ++ break; ++ } ++ ++ if (j != 0 && next_load != load) ++ { ++ bad_permutation = true; ++ break; ++ } ++ ++ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load)); ++ } ++ ++ if (bad_permutation) ++ break; ++ } ++ ++ /* Check that the alignment of the first load in every subchain, i.e., ++ the first statement in every load node, is supported. */ ++ if (!bad_permutation) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); ++ if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load))) ++ { ++ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); ++ if (vect_supportable_dr_alignment (dr, false) ++ == dr_unaligned_unsupported) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "unsupported unaligned load "); ++ print_gimple_stmt (vect_dump, first_load, 0, ++ TDF_SLIM); ++ } ++ bad_permutation = true; ++ break; ++ } ++ } ++ } ++ ++ if (!bad_permutation) ++ { ++ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); ++ return true; ++ } ++ } ++ } ++ + /* FORNOW: the only supported permutation is 0..01..1.. of length equal to + GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as + well (unless it's reduction). */ +@@ -1149,6 +1259,7 @@ + VEC (int, heap) *load_permutation; + VEC (slp_tree, heap) *loads; + struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); ++ bool loads_permuted = false; + + if (dr) + { +@@ -1238,7 +1349,7 @@ + if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, + &inside_cost, &outside_cost, ncopies_for_cost, + &max_nunits, &load_permutation, &loads, +- vectorization_factor)) ++ vectorization_factor, &loads_permuted)) + { + /* Calculate the unrolling factor based on the smallest type. */ + if (max_nunits > nunits) +@@ -1263,7 +1374,8 @@ + SLP_INSTANCE_LOADS (new_instance) = loads; + SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; + SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; +- if (VEC_length (slp_tree, loads)) ++ ++ if (loads_permuted) + { + if (!vect_supported_load_permutation_p (new_instance, group_size, + load_permutation)) +@@ -2542,10 +2654,11 @@ + /* Loads should be inserted before the first load. */ + if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) + && STMT_VINFO_STRIDED_ACCESS (stmt_info) +- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) ++ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) ++ && SLP_INSTANCE_LOAD_PERMUTATION (instance)) + si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); + else if (is_pattern_stmt_p (stmt_info)) +- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); ++ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + else + si = gsi_for_stmt (stmt); + + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 +@@ -4285,6 +4285,11 @@ + if (strided_load) + { + first_stmt = DR_GROUP_FIRST_DR (stmt_info); ++ if (slp ++ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) ++ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) ++ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); ++ + /* Check if the chain of loads is already vectorized. */ + if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) + { + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch new file mode 100644 index 0000000000..421a8fe3a9 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch @@ -0,0 +1,1505 @@ + 2011-10-18 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo, + vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document. + * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR, + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + (op_code_prio): Likewise. + (op_symbol_code): Handle WIDEN_LSHIFT_EXPR. + * optabs.c (optab_for_tree_code): Handle + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo. + * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo. + * genopinit.c (optabs): Initialize the new optabs. + * expr.c (expand_expr_real_2): Handle + VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. + * gimple-pretty-print.c (dump_binary_rhs): Likewise. + * tree-vectorizer.h (NUM_PATTERNS): Increase to 8. + * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR, + VEC_WIDEN_LSHIFT_LO_EXPR): New. + * cfgexpand.c (expand_debug_expr): Handle new tree codes. + * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add + vect_recog_widen_shift_pattern. + (vect_handle_widen_mult_by_const): Rename... + (vect_handle_widen_op_by_const): ...to this. Handle shifts. + Add a new argument, update documentation. + (vect_recog_widen_mult_pattern): Assume that only second + operand can be constant. Update call to + vect_handle_widen_op_by_const. + (vect_recog_over_widening_pattern): Fix typo. + (vect_recog_widen_shift_pattern): New. + * tree-vect-stmts.c (vectorizable_type_promotion): Handle + widening shifts. + (supportable_widening_operation): Likewise. + * tree-inline.c (estimate_operator_cost): Handle new tree codes. + * tree-vect-generic.c (expand_vector_operations_1): Likewise. + * tree-cfg.c (verify_gimple_assign_binary): Likewise. + * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New. + (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>, + vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>): + Likewise. + * config/arm/predicates.md (const_neon_scalar_shift_amount_operand): + New. + * config/arm/iterators.md (V_innermode): New. + * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand + for widening shift. + + gcc/testsuite + * testsuite/lib/target-supports.exp + (check_effective_target_vect_widen_shift): New. + * gcc.dg/vect/vect-widen-shift-s16.c: New. + * gcc.dg/vect/vect-widen-shift-s8.c: New. + * gcc.dg/vect/vect-widen-shift-u16.c: New. + * gcc.dg/vect/vect-widen-shift-u8.c: New. + + 2011-10-06 Jakub Jelinek <jakub@redhat.com> + + gcc/ + * tree-vect-patterns.c (vect_pattern_recog_1): Use + vect_recog_func_ptr typedef for the first argument. + (vect_pattern_recog): Rename vect_recog_func_ptr variable + to vect_recog_func, use vect_recog_func_ptr typedef for it. + + 2011-10-16 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50727 + * tree-vect-patterns.c (vect_operation_fits_smaller_type): Add + DEF_STMT to the list of statements to be replaced by the + pattern statements. + + 2011-10-09 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + PR tree-optimization/50635 + * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add + DEF_STMT to the list of statements to be replaced by the + pattern statements. + (vect_handle_widen_mult_by_const): Don't check TYPE_OUT. + +=== modified file 'gcc/cfgexpand.c' +--- old/gcc/cfgexpand.c 2011-07-01 09:19:21 +0000 ++++ new/gcc/cfgexpand.c 2011-10-23 13:33:07 +0000 +@@ -3215,6 +3215,8 @@ + case VEC_UNPACK_LO_EXPR: + case VEC_WIDEN_MULT_HI_EXPR: + case VEC_WIDEN_MULT_LO_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + return NULL; + + /* Misc codes. */ + +=== modified file 'gcc/config/arm/iterators.md' +--- old/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000 ++++ new/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000 +@@ -388,6 +388,9 @@ + (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")]) + (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) + ++;; Mode attribute for vshll. ++(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) ++ + ;;---------------------------------------------------------------------------- + ;; Code attributes + ;;---------------------------------------------------------------------------- + +=== modified file 'gcc/config/arm/neon.md' +--- old/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 ++++ new/gcc/config/arm/neon.md 2011-10-23 13:33:07 +0000 +@@ -5316,6 +5316,44 @@ + } + ) + ++(define_insn "neon_vec_<US>shiftl_<mode>" ++ [(set (match_operand:<V_widen> 0 "register_operand" "=w") ++ (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") ++ (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] ++ "TARGET_NEON" ++{ ++ return "vshll.<US><V_sz_elem> %q0, %P1, %2"; ++} ++ [(set_attr "neon_type" "neon_shift_1")] ++) ++ ++(define_expand "vec_widen_<US>shiftl_lo_<mode>" ++ [(match_operand:<V_unpack> 0 "register_operand" "") ++ (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ { ++ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), ++ operands[2])); ++ DONE; ++ } ++) ++ ++(define_expand "vec_widen_<US>shiftl_hi_<mode>" ++ [(match_operand:<V_unpack> 0 "register_operand" "") ++ (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ { ++ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], ++ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, ++ GET_MODE_SIZE (<V_HALF>mode)), ++ operands[2])); ++ DONE; ++ } ++) ++ + ;; Vectorize for non-neon-quad case + (define_insn "neon_unpack<US>_<mode>" + [(set (match_operand:<V_widen> 0 "register_operand" "=w") +@@ -5392,6 +5430,34 @@ + } + ) + ++(define_expand "vec_widen_<US>shiftl_hi_<mode>" ++ [(match_operand:<V_double_width> 0 "register_operand" "") ++ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON" ++ { ++ rtx tmpreg = gen_reg_rtx (<V_widen>mode); ++ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); ++ emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); ++ ++ DONE; ++ } ++) ++ ++(define_expand "vec_widen_<US>shiftl_lo_<mode>" ++ [(match_operand:<V_double_width> 0 "register_operand" "") ++ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ "TARGET_NEON" ++ { ++ rtx tmpreg = gen_reg_rtx (<V_widen>mode); ++ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); ++ emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); ++ ++ DONE; ++ } ++) ++ + ;; The case when using all quad registers. + (define_insn "vec_pack_trunc_<mode>" + [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") + +=== modified file 'gcc/config/arm/predicates.md' +--- old/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 ++++ new/gcc/config/arm/predicates.md 2011-10-23 13:33:07 +0000 +@@ -136,6 +136,11 @@ + (match_operand 0 "s_register_operand")) + (match_operand 0 "const_int_operand"))) + ++(define_predicate "const_neon_scalar_shift_amount_operand" ++ (and (match_code "const_int") ++ (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode) ++ && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0"))) ++ + (define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +=== modified file 'gcc/doc/md.texi' +--- old/gcc/doc/md.texi 2011-08-13 08:32:32 +0000 ++++ new/gcc/doc/md.texi 2011-10-23 13:33:07 +0000 +@@ -4230,6 +4230,17 @@ + elements of the two vectors, and put the N/2 products of size 2*S in the + output vector (operand 0). + ++@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern ++@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern ++@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern ++@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern ++@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}} ++@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}} ++Signed/Unsigned widening shift left. The first input (operand 1) is a vector ++with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift ++the high/low elements of operand 1, and put the N/2 results of size 2*S in the ++output vector (operand 0). ++ + @cindex @code{mulhisi3} instruction pattern + @item @samp{mulhisi3} + Multiply operands 1 and 2, which have mode @code{HImode}, and store + +=== modified file 'gcc/expr.c' +--- old/gcc/expr.c 2011-08-25 11:42:09 +0000 ++++ new/gcc/expr.c 2011-10-23 13:33:07 +0000 +@@ -8290,6 +8290,19 @@ + return target; + } + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ { ++ tree oprnd0 = treeop0; ++ tree oprnd1 = treeop1; ++ ++ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); ++ target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX, ++ target, unsignedp); ++ gcc_assert (target); ++ return target; ++ } ++ + case VEC_PACK_TRUNC_EXPR: + case VEC_PACK_SAT_EXPR: + case VEC_PACK_FIX_TRUNC_EXPR: + +=== modified file 'gcc/genopinit.c' +--- old/gcc/genopinit.c 2011-07-15 13:06:31 +0000 ++++ new/gcc/genopinit.c 2011-10-23 13:33:07 +0000 +@@ -268,6 +268,10 @@ + "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))", + "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))", + "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))", ++ "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))", ++ "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))", ++ "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))", ++ "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))", + "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))", + "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))", + "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))", + +=== modified file 'gcc/gimple-pretty-print.c' +--- old/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000 ++++ new/gcc/gimple-pretty-print.c 2011-10-23 13:33:07 +0000 +@@ -343,6 +343,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + for (p = tree_code_name [(int) code]; *p; p++) + pp_character (buffer, TOUPPER (*p)); + pp_string (buffer, " <"); + +=== modified file 'gcc/optabs.c' +--- old/gcc/optabs.c 2011-08-11 15:46:01 +0000 ++++ new/gcc/optabs.c 2011-10-23 13:33:07 +0000 +@@ -454,6 +454,14 @@ + return TYPE_UNSIGNED (type) ? + vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; ++ ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; ++ + case VEC_UNPACK_HI_EXPR: + return TYPE_UNSIGNED (type) ? + vec_unpacku_hi_optab : vec_unpacks_hi_optab; +@@ -6351,6 +6359,10 @@ + init_optab (vec_widen_umult_lo_optab, UNKNOWN); + init_optab (vec_widen_smult_hi_optab, UNKNOWN); + init_optab (vec_widen_smult_lo_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN); + init_optab (vec_unpacks_hi_optab, UNKNOWN); + init_optab (vec_unpacks_lo_optab, UNKNOWN); + init_optab (vec_unpacku_hi_optab, UNKNOWN); + +=== modified file 'gcc/optabs.h' +--- old/gcc/optabs.h 2011-07-27 14:12:45 +0000 ++++ new/gcc/optabs.h 2011-10-23 13:33:07 +0000 +@@ -350,6 +350,12 @@ + OTI_vec_widen_umult_lo, + OTI_vec_widen_smult_hi, + OTI_vec_widen_smult_lo, ++ /* Widening shift left. ++ The high/low part of the resulting vector is returned. */ ++ OTI_vec_widen_ushiftl_hi, ++ OTI_vec_widen_ushiftl_lo, ++ OTI_vec_widen_sshiftl_hi, ++ OTI_vec_widen_sshiftl_lo, + /* Extract and widen the high/low part of a vector of signed or + floating point elements. */ + OTI_vec_unpacks_hi, +@@ -542,6 +548,10 @@ + #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo]) + #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi]) + #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo]) ++#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi]) ++#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo]) ++#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi]) ++#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo]) + #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi]) + #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo]) + #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi]) + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,107 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 16 ++ ++__attribute__ ((noinline)) void ++foo (short *src, int *dst) ++{ ++ int i; ++ short b, b0, b1, b2, b3, *s = src; ++ int *d = dst; ++ ++ for (i = 0; i < N/4; i++) ++ { ++ b0 = *s++; ++ b1 = *s++; ++ b2 = *s++; ++ b3 = *s++; ++ *d = b0 << C; ++ d++; ++ *d = b1 << C; ++ d++; ++ *d = b2 << C; ++ d++; ++ *d = b3 << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N/4; i++) ++ { ++ b0 = *s++; ++ b1 = *s++; ++ b2 = *s++; ++ b3 = *s++; ++ *d = b0 << C; ++ d++; ++ *d = b1 << C; ++ d++; ++ *d = b2 << C; ++ d++; ++ *d = b3 << 6; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N/4; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ b = *s++; ++ if (*d != b << 6) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ short in[N]; ++ int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 12 ++ ++__attribute__ ((noinline)) void ++foo (char *src, int *dst) ++{ ++ int i; ++ char b, *s = src; ++ int *d = dst; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d = b << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ char in[N]; ++ int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C 7 ++ ++__attribute__ ((noinline)) void ++foo (unsigned short *src, unsigned int *dst) ++{ ++ int i; ++ unsigned short b, *s = src; ++ unsigned int *d = dst; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d = b << C; ++ d++; ++ } ++ ++ s = src; ++ d = dst; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d != b << C) ++ abort (); ++ d++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned short in[N]; ++ unsigned int out[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c' +--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000 +@@ -0,0 +1,65 @@ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_shift } */ ++ ++#include <stdarg.h> ++#include "tree-vect.h" ++ ++#define N 64 ++#define C1 10 ++#define C2 5 ++ ++__attribute__ ((noinline)) void ++foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2) ++{ ++ int i; ++ unsigned char b, *s = src; ++ unsigned int *d1 = dst1, *d2 = dst2; ++ ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ *d1 = b << C1; ++ d1++; ++ *d2 = b << C2; ++ d2++; ++ } ++ ++ s = src; ++ d1 = dst1; ++ d2 = dst2; ++ for (i = 0; i < N; i++) ++ { ++ b = *s++; ++ if (*d1 != b << C1 || *d2 != b << C2) ++ abort (); ++ d1++; ++ d2++; ++ } ++} ++ ++int main (void) ++{ ++ int i; ++ unsigned char in[N]; ++ unsigned int out1[N]; ++ unsigned int out2[N]; ++ ++ check_vect (); ++ ++ for (i = 0; i < N; i++) ++ { ++ in[i] = i; ++ out1[i] = 255; ++ out2[i] = 255; ++ __asm__ volatile (""); ++ } ++ ++ foo (in, out1, out2); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ ++ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 +@@ -2783,6 +2783,26 @@ + } + + # Return 1 if the target plus current options supports a vector ++# widening shift, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. ++ ++proc check_effective_target_vect_widen_shift { } { ++ global et_vect_widen_shift_saved ++ ++ if [info exists et_vect_shift_saved] { ++ verbose "check_effective_target_vect_widen_shift: using cached result" 2 ++ } else { ++ set et_vect_widen_shift_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_widen_shift_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2 ++ return $et_vect_widen_shift_saved ++} ++ ++# Return 1 if the target plus current options supports a vector + # dot-product of signed chars, 0 otherwise. + # + # This won't change for different subtargets so cache the result. + +=== modified file 'gcc/tree-cfg.c' +--- old/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000 ++++ new/gcc/tree-cfg.c 2011-10-23 13:33:07 +0000 +@@ -3473,6 +3473,44 @@ + return false; + } + ++ case WIDEN_LSHIFT_EXPR: ++ { ++ if (!INTEGRAL_TYPE_P (lhs_type) ++ || !INTEGRAL_TYPE_P (rhs1_type) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))) ++ { ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } ++ ++ return false; ++ } ++ ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ { ++ if (TREE_CODE (rhs1_type) != VECTOR_TYPE ++ || TREE_CODE (lhs_type) != VECTOR_TYPE ++ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) ++ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type)) ++ > TYPE_PRECISION (TREE_TYPE (lhs_type)))) ++ { ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } ++ ++ return false; ++ } ++ + case PLUS_EXPR: + case MINUS_EXPR: + { + +=== modified file 'gcc/tree-inline.c' +--- old/gcc/tree-inline.c 2011-08-13 08:32:32 +0000 ++++ new/gcc/tree-inline.c 2011-10-23 13:33:07 +0000 +@@ -3343,6 +3343,7 @@ + case DOT_PROD_EXPR: + case WIDEN_MULT_PLUS_EXPR: + case WIDEN_MULT_MINUS_EXPR: ++ case WIDEN_LSHIFT_EXPR: + + case VEC_WIDEN_MULT_HI_EXPR: + case VEC_WIDEN_MULT_LO_EXPR: +@@ -3357,6 +3358,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + + return 1; + + +=== modified file 'gcc/tree-pretty-print.c' +--- old/gcc/tree-pretty-print.c 2010-11-05 09:00:50 +0000 ++++ new/gcc/tree-pretty-print.c 2011-10-23 13:33:07 +0000 +@@ -1539,6 +1539,7 @@ + case RROTATE_EXPR: + case VEC_LSHIFT_EXPR: + case VEC_RSHIFT_EXPR: ++ case WIDEN_LSHIFT_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_AND_EXPR: +@@ -2209,6 +2210,22 @@ + pp_string (buffer, " > "); + break; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; ++ ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; ++ + case VEC_UNPACK_HI_EXPR: + pp_string (buffer, " VEC_UNPACK_HI_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); +@@ -2531,6 +2548,9 @@ + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ case WIDEN_LSHIFT_EXPR: + return 11; + + case WIDEN_SUM_EXPR: +@@ -2706,6 +2726,9 @@ + case VEC_RSHIFT_EXPR: + return "v>>"; + ++ case WIDEN_LSHIFT_EXPR: ++ return "w<<"; ++ + case POINTER_PLUS_EXPR: + return "+"; + + +=== modified file 'gcc/tree-vect-generic.c' +--- old/gcc/tree-vect-generic.c 2011-02-08 14:16:50 +0000 ++++ new/gcc/tree-vect-generic.c 2011-10-23 13:33:07 +0000 +@@ -552,7 +552,9 @@ + || code == VEC_UNPACK_LO_EXPR + || code == VEC_PACK_TRUNC_EXPR + || code == VEC_PACK_SAT_EXPR +- || code == VEC_PACK_FIX_TRUNC_EXPR) ++ || code == VEC_PACK_FIX_TRUNC_EXPR ++ || code == VEC_WIDEN_LSHIFT_HI_EXPR ++ || code == VEC_WIDEN_LSHIFT_LO_EXPR) + type = TREE_TYPE (rhs1); + + /* Optabs will try converting a negation into a subtraction, so + +=== modified file 'gcc/tree-vect-patterns.c' +--- old/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000 +@@ -48,12 +48,15 @@ + static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); + static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, + tree *); ++static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, ++ tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, + vect_recog_dot_prod_pattern, + vect_recog_pow_pattern, +- vect_recog_over_widening_pattern}; ++ vect_recog_over_widening_pattern, ++ vect_recog_widen_shift_pattern}; + + + /* Function widened_name_p +@@ -331,27 +334,38 @@ + return pattern_stmt; + } + +-/* Handle two cases of multiplication by a constant. The first one is when +- the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second +- operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to +- TYPE. ++ ++/* Handle widening operation by a constant. At the moment we support MULT_EXPR ++ and LSHIFT_EXPR. ++ ++ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR ++ we check that CONST_OPRND is less or equal to the size of HALF_TYPE. + + Otherwise, if the type of the result (TYPE) is at least 4 times bigger than +- HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than +- TYPE), we can perform widen-mult from the intermediate type to TYPE and +- replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ ++ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) ++ that satisfies the above restrictions, we can perform a widening opeartion ++ from the intermediate type to TYPE and replace a_T = (TYPE) a_t; ++ with a_it = (interm_type) a_t; */ + + static bool +-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd, +- VEC (gimple, heap) **stmts, tree type, +- tree *half_type, gimple def_stmt) ++vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, ++ tree const_oprnd, tree *oprnd, ++ VEC (gimple, heap) **stmts, tree type, ++ tree *half_type, gimple def_stmt) + { + tree new_type, new_oprnd, tmp; + gimple new_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + +- if (int_fits_type_p (const_oprnd, *half_type)) ++ if (code != MULT_EXPR && code != LSHIFT_EXPR) ++ return false; ++ ++ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) ++ != 1)) ++ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) + { + /* CONST_OPRND is a constant of HALF_TYPE. */ + *oprnd = gimple_assign_rhs1 (def_stmt); +@@ -364,14 +378,16 @@ + || !vinfo_for_stmt (def_stmt)) + return false; + +- /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for ++ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for + a type 2 times bigger than HALF_TYPE. */ + new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, + TYPE_UNSIGNED (type)); +- if (!int_fits_type_p (const_oprnd, new_type)) ++ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) + return false; + +- /* Use NEW_TYPE for widen_mult. */ ++ /* Use NEW_TYPE for widening operation. */ + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) + { + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); +@@ -381,6 +397,7 @@ + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) + return false; + ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); + *oprnd = gimple_assign_lhs (new_stmt); + } + else +@@ -392,7 +409,6 @@ + new_oprnd = make_ssa_name (tmp, NULL); + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, + NULL_TREE); +- SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; + VEC_safe_push (gimple, heap, *stmts, def_stmt); + *oprnd = new_oprnd; +@@ -402,7 +418,6 @@ + return true; + } + +- + /* Function vect_recog_widen_mult_pattern + + Try to find the following pattern: +@@ -491,7 +506,7 @@ + enum tree_code dummy_code; + int dummy_int; + VEC (tree, heap) *dummy_vec; +- bool op0_ok, op1_ok; ++ bool op1_ok; + + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -511,38 +526,23 @@ + return NULL; + + /* Check argument 0. */ +- op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); ++ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) ++ return NULL; + /* Check argument 1. */ + op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); + +- /* In case of multiplication by a constant one of the operands may not match +- the pattern, but not both. */ +- if (!op0_ok && !op1_ok) +- return NULL; +- +- if (op0_ok && op1_ok) ++ if (op1_ok) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } +- else if (!op0_ok) +- { +- if (TREE_CODE (oprnd0) == INTEGER_CST +- && TREE_CODE (half_type1) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, +- stmts, type, +- &half_type1, def_stmt1)) +- half_type0 = half_type1; +- else +- return NULL; +- } +- else if (!op1_ok) ++ else + { + if (TREE_CODE (oprnd1) == INTEGER_CST + && TREE_CODE (half_type0) == INTEGER_TYPE +- && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0, +- stmts, type, +- &half_type0, def_stmt0)) ++ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, ++ &oprnd0, stmts, type, ++ &half_type0, def_stmt0)) + half_type1 = half_type0; + else + return NULL; +@@ -998,6 +998,7 @@ + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) + return false; + ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); + oprnd = gimple_assign_lhs (new_stmt); + } + else +@@ -1128,7 +1129,7 @@ + statetments, except for the case when the last statement in the + sequence doesn't have a corresponding pattern statement. In such + case we associate the last pattern statement with the last statement +- in the sequence. Therefore, we only add an original statetement to ++ in the sequence. Therefore, we only add the original statement to + the list if we know that it is not the last. */ + if (prev_stmt) + VEC_safe_push (gimple, heap, *stmts, prev_stmt); +@@ -1215,6 +1216,231 @@ + } + + ++/* Detect widening shift pattern: ++ ++ type a_t; ++ TYPE a_T, res_T; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ ++ where type 'TYPE' is at least double the size of type 'type'. ++ ++ Also detect unsigned cases: ++ ++ unsigned type a_t; ++ unsigned TYPE u_res_T; ++ TYPE a_T, res_T; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ S4 u_res_T = (unsigned TYPE) res_T; ++ ++ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we ++ create an additional pattern stmt for S2 to create a variable of an ++ intermediate type, and perform widen-shift on the intermediate type: ++ ++ type a_t; ++ interm_type a_it; ++ TYPE a_T, res_T, res_T'; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ '--> a_it = (interm_type) a_t; ++ S3 res_T = a_T << CONST; ++ '--> res_T' = a_it <<* CONST; ++ ++ Input/Output: ++ ++ * STMTS: Contains a stmt from which the pattern search begins. ++ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 ++ in STMTS. When an intermediate type is used and a pattern statement is ++ created for S2, we also put S2 here (before S3). ++ ++ Output: ++ ++ * TYPE_IN: The type of the input arguments to the pattern. ++ ++ * TYPE_OUT: The type of the output of this pattern. ++ ++ * Return value: A new stmt that will be used to replace the sequence of ++ stmts that constitute the pattern. In this case it will be: ++ WIDEN_LSHIFT_EXPR <a_t, CONST>. */ ++ ++static gimple ++vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) ++{ ++ gimple last_stmt = VEC_pop (gimple, *stmts); ++ gimple def_stmt0; ++ tree oprnd0, oprnd1; ++ tree type, half_type0; ++ gimple pattern_stmt, orig_stmt = NULL; ++ tree vectype, vectype_out = NULL_TREE; ++ tree dummy; ++ tree var; ++ enum tree_code dummy_code; ++ int dummy_int; ++ VEC (tree, heap) * dummy_vec; ++ gimple use_stmt = NULL; ++ bool over_widen = false; ++ ++ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) ++ return NULL; ++ ++ orig_stmt = last_stmt; ++ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) ++ { ++ /* This statement was also detected as over-widening operation (it can't ++ be any other pattern, because only over-widening detects shifts). ++ LAST_STMT is the final type demotion statement, but its related ++ statement is shift. We analyze the related statement to catch cases: ++ ++ orig code: ++ type a_t; ++ itype res; ++ TYPE a_T, res_T; ++ ++ S1 a_T = (TYPE) a_t; ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; ++ ++ (size of type * 2 <= size of itype ++ and size of itype * 2 <= size of TYPE) ++ ++ code after over-widening pattern detection: ++ ++ S1 a_T = (TYPE) a_t; ++ --> a_it = (itype) a_t; ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; <--- LAST_STMT ++ --> res = a_it << CONST; ++ ++ after widen_shift: ++ ++ S1 a_T = (TYPE) a_t; ++ --> a_it = (itype) a_t; - redundant ++ S2 res_T = a_T << CONST; ++ S3 res = (itype)res_T; ++ --> res = a_t w<< CONST; ++ ++ i.e., we replace the three statements with res = a_t w<< CONST. */ ++ last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt)); ++ over_widen = true; ++ } ++ ++ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); ++ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) ++ return NULL; ++ ++ /* Check operand 0: it has to be defined by a type promotion. */ ++ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) ++ return NULL; ++ ++ /* Check operand 1: has to be positive. We check that it fits the type ++ in vect_handle_widen_op_by_const (). */ ++ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ type = gimple_expr_type (last_stmt); ++ ++ /* Check if this a widening operation. */ ++ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, ++ &oprnd0, stmts, ++ type, &half_type0, def_stmt0)) ++ return NULL; ++ ++ /* Handle unsigned case. Look for ++ S4 u_res_T = (unsigned TYPE) res_T; ++ Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */ ++ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) ++ { ++ tree lhs = gimple_assign_lhs (last_stmt), use_lhs; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ int nuses = 0; ++ tree use_type; ++ ++ if (over_widen) ++ { ++ /* In case of over-widening pattern, S4 should be ORIG_STMT itself. ++ We check here that TYPE is the correct type for the operation, ++ i.e., it's the type of the original result. */ ++ tree orig_type = gimple_expr_type (orig_stmt); ++ if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type))) ++ return NULL; ++ } ++ else ++ { ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); ++ nuses++; ++ } ++ ++ if (nuses != 1 || !is_gimple_assign (use_stmt) ++ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) ++ return NULL; ++ ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ ++ if (!INTEGRAL_TYPE_P (use_type) ++ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) ++ return NULL; ++ ++ type = use_type; ++ } ++ } ++ ++ /* Pattern detected. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); ++ ++ /* Check target support. */ ++ vectype = get_vectype_for_scalar_type (half_type0); ++ vectype_out = get_vectype_for_scalar_type (type); ++ ++ if (!vectype ++ || !vectype_out ++ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, ++ vectype_out, vectype, ++ &dummy, &dummy, &dummy_code, ++ &dummy_code, &dummy_int, ++ &dummy_vec)) ++ return NULL; ++ ++ *type_in = vectype; ++ *type_out = vectype_out; ++ ++ /* Pattern supported. Create a stmt to be used to replace the pattern. */ ++ var = vect_recog_temp_ssa_var (type, NULL); ++ pattern_stmt = ++ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ ++ if (use_stmt) ++ last_stmt = use_stmt; ++ else ++ last_stmt = orig_stmt; ++ ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); ++ return pattern_stmt; ++} ++ + /* Mark statements that are involved in a pattern. */ + + static inline void +@@ -1278,7 +1504,8 @@ + static void + vect_pattern_recog_1 ( + gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), +- gimple_stmt_iterator si) ++ gimple_stmt_iterator si, ++ VEC (gimple, heap) **stmts_to_replace) + { + gimple stmt = gsi_stmt (si), pattern_stmt; + stmt_vec_info stmt_info; +@@ -1288,14 +1515,14 @@ + enum tree_code code; + int i; + gimple next; +- VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + +- VEC_quick_push (gimple, stmts_to_replace, stmt); +- pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); ++ VEC_truncate (gimple, *stmts_to_replace, 0); ++ VEC_quick_push (gimple, *stmts_to_replace, stmt); ++ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); + if (!pattern_stmt) + return; + +- stmt = VEC_last (gimple, stmts_to_replace); ++ stmt = VEC_last (gimple, *stmts_to_replace); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + +@@ -1303,8 +1530,6 @@ + { + /* No need to check target support (already checked by the pattern + recognition function). */ +- if (type_out) +- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); + pattern_vectype = type_out ? type_out : type_in; + } + else +@@ -1360,8 +1585,8 @@ + /* It is possible that additional pattern stmts are created and inserted in + STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the + relevant statements. */ +- for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) +- && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); ++ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) ++ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); + i++) + { + stmt_info = vinfo_for_stmt (stmt); +@@ -1374,8 +1599,6 @@ + + vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); + } +- +- VEC_free (gimple, heap, stmts_to_replace); + } + + +@@ -1465,6 +1688,7 @@ + gimple_stmt_iterator si; + unsigned int i, j; + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); ++ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); +@@ -1480,8 +1704,11 @@ + for (j = 0; j < NUM_PATTERNS; j++) + { + vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; +- vect_pattern_recog_1 (vect_recog_func_ptr, si); ++ vect_pattern_recog_1 (vect_recog_func_ptr, si, ++ &stmts_to_replace); + } + } + } ++ ++ VEC_free (gimple, heap, stmts_to_replace); + } + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 ++++ new/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 +@@ -480,6 +480,11 @@ + } + } + } ++ else if (rhs_code == WIDEN_LSHIFT_EXPR) ++ { ++ need_same_oprnds = true; ++ first_op1 = gimple_assign_rhs2 (stmt); ++ } + } + else + { + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 +@@ -3359,6 +3359,7 @@ + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ unsigned int k; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; +@@ -3375,7 +3376,8 @@ + + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) +- && code != WIDEN_MULT_EXPR) ++ && code != WIDEN_MULT_EXPR ++ && code != WIDEN_LSHIFT_EXPR) + return false; + + scalar_dest = gimple_assign_lhs (stmt); +@@ -3403,7 +3405,7 @@ + bool ok; + + op1 = gimple_assign_rhs2 (stmt); +- if (code == WIDEN_MULT_EXPR) ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) + { + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ +@@ -3480,7 +3482,7 @@ + fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", + ncopies); + +- if (code == WIDEN_MULT_EXPR) ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) + { + if (CONSTANT_CLASS_P (op0)) + op0 = fold_convert (TREE_TYPE (op1), op0); +@@ -3521,6 +3523,8 @@ + if (op_type == binary_op) + vec_oprnds1 = VEC_alloc (tree, heap, 1); + } ++ else if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -3534,15 +3538,33 @@ + if (j == 0) + { + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, +- &vec_oprnds1, -1); +- else ++ { ++ if (code == WIDEN_LSHIFT_EXPR) ++ { ++ vec_oprnd1 = op1; ++ /* Store vec_oprnd1 for every vector stmt to be created ++ for SLP_NODE. We check during the analysis that all ++ the shift arguments are the same. */ ++ for (k = 0; k < slp_node->vec_stmts_size - 1; k++) ++ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); ++ ++ vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, ++ -1); ++ } ++ else ++ vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, ++ &vec_oprnds1, -1); ++ } ++ else + { + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + } + } +@@ -3553,7 +3575,10 @@ + VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); + VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); + } + } +@@ -5853,6 +5878,19 @@ + } + break; + ++ case WIDEN_LSHIFT_EXPR: ++ if (BYTES_BIG_ENDIAN) ++ { ++ c1 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c2 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ else ++ { ++ c2 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c1 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ break; ++ + CASE_CONVERT: + if (BYTES_BIG_ENDIAN) + { + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 +@@ -896,7 +896,7 @@ + Additional pattern recognition functions can (and will) be added + in the future. */ + typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); +-#define NUM_PATTERNS 5 ++#define NUM_PATTERNS 6 + void vect_pattern_recog (loop_vec_info); + + /* In tree-vectorizer.c. */ + +=== modified file 'gcc/tree.def' +--- old/gcc/tree.def 2011-01-21 14:14:12 +0000 ++++ new/gcc/tree.def 2011-10-23 13:33:07 +0000 +@@ -1092,6 +1092,19 @@ + is subtracted from t3. */ + DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) + ++/* Widening shift left. ++ The first operand is of type t1. ++ The second operand is the number of bits to shift by; it need not be the ++ same type as the first operand and result. ++ Note that the result is undefined if the second operand is larger ++ than or equal to the first operand's type size. ++ The type of the entire expression is t2, such that t2 is at least twice ++ the size of t1. ++ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting) ++ the first argument from type t1 to type t2, and then shifting it ++ by the second argument. */ ++DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) ++ + /* Fused multiply-add. + All operands and the result are of the same type. No intermediate + rounding is performed after multiplying operand one with operand two +@@ -1147,6 +1160,16 @@ + DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) + DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) + ++/* Widening vector shift left in bits. ++ Operand 0 is a vector to be shifted with N elements of size S. ++ Operand 1 is an integer shift amount in bits. ++ The result of the operation is N elements of size 2*S. ++ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results. ++ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results. ++ */ ++DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) ++DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) ++ + /* PREDICT_EXPR. Specify hint for branch prediction. The + PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the + outcome (0 for not taken and 1 for taken). Once the profile is guessed + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch new file mode 100644 index 0000000000..95b9ea9b83 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch @@ -0,0 +1,61 @@ +2011-11-04 Revital Eres <revital.eres@linaro.org> + + Backport from mainline -r180673: + + gcc/ + * modulo-sched.c (generate_prolog_epilog): Mark prolog + and epilog as BB_DISABLE_SCHEDULE. + (mark_loop_unsched): New function. + (sms_schedule): Call it. + +=== modified file 'gcc/modulo-sched.c' +--- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 ++++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000 +@@ -1173,6 +1173,8 @@ + /* Put the prolog on the entry edge. */ + e = loop_preheader_edge (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; + + end_sequence (); + +@@ -1186,9 +1188,24 @@ + gcc_assert (single_exit (loop)); + e = single_exit (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; ++ + end_sequence (); + } + ++/* Mark LOOP as software pipelined so the later ++ scheduling passes don't touch it. */ ++static void ++mark_loop_unsched (struct loop *loop) ++{ ++ unsigned i; ++ basic_block *bbs = get_loop_body (loop); ++ ++ for (i = 0; i < loop->num_nodes; i++) ++ bbs[i]->flags |= BB_DISABLE_SCHEDULE; ++} ++ + /* Return true if all the BBs of the loop are empty except the + loop header. */ + static bool +@@ -1714,9 +1731,10 @@ + permute_partial_schedule (ps, g->closing_branch->first_note); + + /* Mark this loop as software pipelined so the later +- scheduling passes doesn't touch it. */ ++ scheduling passes don't touch it. */ + if (! flag_resched_modulo_sched) +- g->bb->flags |= BB_DISABLE_SCHEDULE; ++ mark_loop_unsched (loop); ++ + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); + + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch new file mode 100644 index 0000000000..dcffe1945c --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch @@ -0,0 +1,23 @@ +2011-11-02 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF mainline: + + 2011-11-01 Andrew Stubbs <ams@codesourcery.com> + + gcc/ + * config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning. + +=== modified file 'gcc/config/arm/bpabi.h' +--- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000 ++++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000 +@@ -56,7 +56,8 @@ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + + #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ +- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" ++ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ ++ ":%{!r:--be8}}}" + + /* Tell the assembler to build BPABI binaries. */ + #undef SUBTARGET_EXTRA_ASM_SPEC + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch new file mode 100644 index 0000000000..70a7bdfa2b --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch @@ -0,0 +1,1400 @@ +2011-11-17 Ira Rosen <ira.rosen@linaro.org> + + Backport from mainline: + + 2011-11-03 Ira Rosen <ira.rosen@linaro.org> + + gcc/ + * tree-vectorizer.h (slp_void_p): New. + (struct _slp_tree): Replace left and right with children. Update + documentation. + (struct _slp_oprnd_info): New. + (vect_get_vec_defs): Declare. + (vect_get_slp_defs): Update arguments. + * tree-vect-loop.c (vect_create_epilog_for_reduction): Call + vect_get_vec_defs instead of vect_get_slp_defs. + (vectorizable_reduction): Likewise. + * tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument. + Update call to vect_get_slp_defs. + (vectorizable_conversion): Update call to vect_get_vec_defs. + (vectorizable_assignment, vectorizable_shift, + vectorizable_operation): Likewise. + (vectorizable_type_demotion): Call vect_get_vec_defs instead of + vect_get_slp_defs. + (vectorizable_type_promotion, vectorizable_store): Likewise. + (vect_analyze_stmt): Fix typo. + * tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal. + (vect_print_slp_tree, vect_mark_slp_stmts, + vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts, + vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations, + vect_schedule_slp_instance): Likewise. + (vect_create_new_slp_node): New. + (vect_create_oprnd_info, vect_free_oprnd_info): Likewise. + (vect_get_and_check_slp_defs): Pass information about defs using + oprnds_info, allow any number of operands. + (vect_build_slp_tree): Likewise. Update calls to + vect_get_and_check_slp_defs. Fix comments. + (vect_analyze_slp_instance): Move node creation to + vect_create_new_slp_node. + (vect_get_slp_defs): Allow any number of operands. + + 2011-11-11 Jakub Jelinek <jakub@redhat.com> + + gcc/ + * tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN + vector. + (vect_create_new_slp_node): Don't allocate node before checking stmt + type. + (vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always + free def_stmts vectors and additionally free oprnd_info. + (vect_build_slp_tree): Adjust callers. Call it even if + stop_recursion. If vect_create_new_slp_node or + vect_build_slp_tree fails, properly handle freeing memory. + If it succeeded, clear def_stmts in oprnd_info. + +=== modified file 'gcc/tree-vect-loop.c' +--- old/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000 ++++ new/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000 +@@ -3282,8 +3282,8 @@ + + /* Get the loop-entry arguments. */ + if (slp_node) +- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs, +- NULL, reduc_index); ++ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, ++ NULL, slp_node, reduc_index); + else + { + vec_initial_defs = VEC_alloc (tree, heap, 1); +@@ -4451,8 +4451,8 @@ + } + + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1, +- -1); ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, ++ slp_node, -1); + else + { + loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], + +=== modified file 'gcc/tree-vect-slp.c' +--- old/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 ++++ new/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000 +@@ -67,15 +67,16 @@ + static void + vect_free_slp_tree (slp_tree node) + { ++ int i; ++ slp_void_p child; ++ + if (!node) + return; + +- if (SLP_TREE_LEFT (node)) +- vect_free_slp_tree (SLP_TREE_LEFT (node)); +- +- if (SLP_TREE_RIGHT (node)) +- vect_free_slp_tree (SLP_TREE_RIGHT (node)); +- ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_free_slp_tree ((slp_tree) child); ++ ++ VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node)); + VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); + + if (SLP_TREE_VEC_STMTS (node)) +@@ -96,48 +97,116 @@ + } + + +-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that +- they are of a legal type and that they match the defs of the first stmt of +- the SLP group (stored in FIRST_STMT_...). */ ++/* Create an SLP node for SCALAR_STMTS. */ ++ ++static slp_tree ++vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts) ++{ ++ slp_tree node; ++ gimple stmt = VEC_index (gimple, scalar_stmts, 0); ++ unsigned int nops; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ nops = gimple_num_ops (stmt) - 1; ++ else ++ return NULL; ++ ++ node = XNEW (struct _slp_tree); ++ SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; ++ SLP_TREE_VEC_STMTS (node) = NULL; ++ SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops); ++ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; ++ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ ++ return node; ++} ++ ++ ++/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each ++ operand. */ ++static VEC (slp_oprnd_info, heap) * ++vect_create_oprnd_info (int nops, int group_size) ++{ ++ int i; ++ slp_oprnd_info oprnd_info; ++ VEC (slp_oprnd_info, heap) *oprnds_info; ++ ++ oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops); ++ for (i = 0; i < nops; i++) ++ { ++ oprnd_info = XNEW (struct _slp_oprnd_info); ++ oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size); ++ oprnd_info->first_dt = vect_uninitialized_def; ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ oprnd_info->first_pattern = false; ++ VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info); ++ } ++ ++ return oprnds_info; ++} ++ ++ ++/* Free operands info. */ ++ ++static void ++vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info) ++{ ++ int i; ++ slp_oprnd_info oprnd_info; ++ ++ FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info) ++ { ++ VEC_free (gimple, heap, oprnd_info->def_stmts); ++ XDELETE (oprnd_info); ++ } ++ ++ VEC_free (slp_oprnd_info, heap, *oprnds_info); ++} ++ ++ ++/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that ++ they are of a valid type and that they match the defs of the first stmt of ++ the SLP group (stored in OPRNDS_INFO). */ + + static bool + vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, + slp_tree slp_node, gimple stmt, +- VEC (gimple, heap) **def_stmts0, +- VEC (gimple, heap) **def_stmts1, +- enum vect_def_type *first_stmt_dt0, +- enum vect_def_type *first_stmt_dt1, +- tree *first_stmt_def0_type, +- tree *first_stmt_def1_type, +- tree *first_stmt_const_oprnd, +- int ncopies_for_cost, +- bool *pattern0, bool *pattern1) ++ int ncopies_for_cost, bool first, ++ VEC (slp_oprnd_info, heap) **oprnds_info) + { + tree oprnd; + unsigned int i, number_of_oprnds; +- tree def[2]; ++ tree def, def_op0 = NULL_TREE; + gimple def_stmt; +- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; +- stmt_vec_info stmt_info = +- vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); +- enum gimple_rhs_class rhs_class; ++ enum vect_def_type dt = vect_uninitialized_def; ++ enum vect_def_type dt_op0 = vect_uninitialized_def; ++ stmt_vec_info stmt_info = vinfo_for_stmt (stmt); ++ tree lhs = gimple_get_lhs (stmt); + struct loop *loop = NULL; + enum tree_code rhs_code; + bool different_types = false; ++ bool pattern = false; ++ slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); + +- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); +- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ ++ if (is_gimple_call (stmt)) ++ number_of_oprnds = gimple_call_num_args (stmt); ++ else ++ number_of_oprnds = gimple_num_ops (stmt) - 1; + + for (i = 0; i < number_of_oprnds; i++) + { + oprnd = gimple_op (stmt, i + 1); ++ oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); + +- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], +- &dt[i]) +- || (!def_stmt && dt[i] != vect_constant_def)) ++ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, ++ &dt) ++ || (!def_stmt && dt != vect_constant_def)) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -158,29 +227,24 @@ + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + { +- if (!*first_stmt_dt0) +- *pattern0 = true; +- else +- { +- if (i == 1 && !*first_stmt_dt1) +- *pattern1 = true; +- else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "Build SLP failed: some of the stmts" +- " are in a pattern, and others are not "); +- print_generic_expr (vect_dump, oprnd, TDF_SLIM); +- } ++ pattern = true; ++ if (!first && !oprnd_info->first_pattern) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "Build SLP failed: some of the stmts" ++ " are in a pattern, and others are not "); ++ print_generic_expr (vect_dump, oprnd, TDF_SLIM); ++ } + +- return false; +- } ++ return false; + } + + def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); +- dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); ++ dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); + +- if (*dt == vect_unknown_def_type) ++ if (dt == vect_unknown_def_type ++ || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unsupported pattern."); +@@ -190,11 +254,11 @@ + switch (gimple_code (def_stmt)) + { + case GIMPLE_PHI: +- def[i] = gimple_phi_result (def_stmt); ++ def = gimple_phi_result (def_stmt); + break; + + case GIMPLE_ASSIGN: +- def[i] = gimple_assign_lhs (def_stmt); ++ def = gimple_assign_lhs (def_stmt); + break; + + default: +@@ -204,117 +268,125 @@ + } + } + +- if (!*first_stmt_dt0) ++ if (first) + { +- /* op0 of the first stmt of the group - store its info. */ +- *first_stmt_dt0 = dt[i]; +- if (def[i]) +- *first_stmt_def0_type = TREE_TYPE (def[i]); +- else +- *first_stmt_const_oprnd = oprnd; ++ oprnd_info->first_dt = dt; ++ oprnd_info->first_pattern = pattern; ++ if (def) ++ { ++ oprnd_info->first_def_type = TREE_TYPE (def); ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ } ++ else ++ { ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = oprnd; ++ } + +- /* Analyze costs (for the first stmt of the group only). */ +- if (rhs_class != GIMPLE_SINGLE_RHS) +- /* Not memory operation (we don't call this functions for loads). */ +- vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); +- else +- /* Store. */ +- vect_model_store_cost (stmt_info, ncopies_for_cost, false, +- dt[0], slp_node); ++ if (i == 0) ++ { ++ def_op0 = def; ++ dt_op0 = dt; ++ /* Analyze costs (for the first stmt of the group only). */ ++ if (REFERENCE_CLASS_P (lhs)) ++ /* Store. */ ++ vect_model_store_cost (stmt_info, ncopies_for_cost, false, ++ dt, slp_node); ++ else ++ /* Not memory operation (we don't call this function for ++ loads). */ ++ vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt, ++ slp_node); ++ } + } + + else + { +- if (!*first_stmt_dt1 && i == 1) +- { +- /* op1 of the first stmt of the group - store its info. */ +- *first_stmt_dt1 = dt[i]; +- if (def[i]) +- *first_stmt_def1_type = TREE_TYPE (def[i]); +- else +- { +- /* We assume that the stmt contains only one constant +- operand. We fail otherwise, to be on the safe side. */ +- if (*first_stmt_const_oprnd) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: two constant " +- "oprnds in stmt"); +- return false; +- } +- *first_stmt_const_oprnd = oprnd; +- } +- } +- else +- { +- /* Not first stmt of the group, check that the def-stmt/s match +- the def-stmt/s of the first stmt. */ +- if ((i == 0 +- && (*first_stmt_dt0 != dt[i] +- || (*first_stmt_def0_type && def[0] +- && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def[0]))))) +- || (i == 1 +- && (*first_stmt_dt1 != dt[i] +- || (*first_stmt_def1_type && def[1] +- && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def[1]))))) +- || (!def[i] +- && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), +- TREE_TYPE (oprnd))) +- || different_types) +- { +- if (i != number_of_oprnds - 1) +- different_types = true; ++ /* Not first stmt of the group, check that the def-stmt/s match ++ the def-stmt/s of the first stmt. Allow different definition ++ types for reduction chains: the first stmt must be a ++ vect_reduction_def (a phi node), and the rest ++ vect_internal_def. */ ++ if (((oprnd_info->first_dt != dt ++ && !(oprnd_info->first_dt == vect_reduction_def ++ && dt == vect_internal_def)) ++ || (oprnd_info->first_def_type != NULL_TREE ++ && def ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def)))) ++ || (!def ++ && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), ++ TREE_TYPE (oprnd))) ++ || different_types) ++ { ++ if (number_of_oprnds != 2) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } ++ ++ /* Try to swap operands in case of binary operation. */ ++ if (i == 0) ++ different_types = true; ++ else ++ { ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (is_gimple_assign (stmt) ++ && (rhs_code = gimple_assign_rhs_code (stmt)) ++ && TREE_CODE_CLASS (rhs_code) == tcc_binary ++ && commutative_tree_code (rhs_code) ++ && oprnd0_info->first_dt == dt ++ && oprnd_info->first_dt == dt_op0 ++ && def_op0 && def ++ && !(oprnd0_info->first_def_type ++ && !types_compatible_p (oprnd0_info->first_def_type, ++ TREE_TYPE (def))) ++ && !(oprnd_info->first_def_type ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def_op0)))) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Swapping operands of "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ ++ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ++ gimple_assign_rhs2_ptr (stmt)); ++ } + else +- { +- if (is_gimple_assign (stmt) +- && (rhs_code = gimple_assign_rhs_code (stmt)) +- && TREE_CODE_CLASS (rhs_code) == tcc_binary +- && commutative_tree_code (rhs_code) +- && *first_stmt_dt0 == dt[1] +- && *first_stmt_dt1 == dt[0] +- && def[0] && def[1] +- && !(*first_stmt_def0_type +- && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def[1]))) +- && !(*first_stmt_def1_type +- && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def[0])))) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- { +- fprintf (vect_dump, "Swapping operands of "); +- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); +- } +- swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), +- gimple_assign_rhs2_ptr (stmt)); +- } +- else +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: different types "); +- +- return false; +- } +- } ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } + } + } + } + + /* Check the types of the definitions. */ +- switch (dt[i]) ++ switch (dt) + { + case vect_constant_def: + case vect_external_def: ++ case vect_reduction_def: + break; + + case vect_internal_def: +- case vect_reduction_def: +- if ((i == 0 && !different_types) || (i == 1 && different_types)) +- VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); ++ if (different_types) ++ { ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (i == 0) ++ VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt); ++ else ++ VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt); ++ } + else +- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); ++ VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt); + break; + + default: +@@ -322,7 +394,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: illegal type of def "); +- print_generic_expr (vect_dump, def[i], TDF_SLIM); ++ print_generic_expr (vect_dump, def, TDF_SLIM); + } + + return false; +@@ -347,15 +419,10 @@ + VEC (slp_tree, heap) **loads, + unsigned int vectorization_factor, bool *loads_permuted) + { +- VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); +- VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); + unsigned int i; + VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); + gimple stmt = VEC_index (gimple, stmts, 0); +- enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; +- enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; + enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; +- tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; + tree lhs; + bool stop_recursion = false, need_same_oprnds = false; + tree vectype, scalar_type, first_op1 = NULL_TREE; +@@ -364,13 +431,21 @@ + int icode; + enum machine_mode optab_op2_mode; + enum machine_mode vec_mode; +- tree first_stmt_const_oprnd = NULL_TREE; + struct data_reference *first_dr; +- bool pattern0 = false, pattern1 = false; + HOST_WIDE_INT dummy; + bool permutation = false; + unsigned int load_place; + gimple first_load, prev_first_load = NULL; ++ VEC (slp_oprnd_info, heap) *oprnds_info; ++ unsigned int nops; ++ slp_oprnd_info oprnd_info; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else ++ nops = gimple_num_ops (stmt) - 1; ++ ++ oprnds_info = vect_create_oprnd_info (nops, group_size); + + /* For every stmt in NODE find its def stmt/s. */ + FOR_EACH_VEC_ELT (gimple, stmts, i, stmt) +@@ -391,6 +466,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -400,10 +476,11 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, +- "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); ++ "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -416,6 +493,8 @@ + fprintf (vect_dump, "Build SLP failed: unsupported data-type "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } ++ ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -462,6 +541,7 @@ + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: no optab."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + icode = (int) optab_handler (optab, vec_mode); +@@ -470,6 +550,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: " + "op not supported by target."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + optab_op2_mode = insn_data[icode].operand[2].mode; +@@ -506,6 +587,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -519,6 +601,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + } +@@ -530,15 +613,12 @@ + { + /* Store. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, +- stmt, &def_stmts0, &def_stmts1, +- &first_stmt_dt0, +- &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ stmt, ncopies_for_cost, ++ (i == 0), &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + else + { +@@ -556,6 +636,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -573,6 +654,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -593,6 +675,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + } +@@ -612,6 +695,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -639,7 +723,7 @@ + { + if (TREE_CODE_CLASS (rhs_code) == tcc_reference) + { +- /* Not strided load. */ ++ /* Not strided load. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: not strided load "); +@@ -647,6 +731,7 @@ + } + + /* FORNOW: Not strided loads are not supported. */ ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -661,19 +746,18 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + + /* Find the def-stmts. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, +- &def_stmts0, &def_stmts1, +- &first_stmt_dt0, &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ ncopies_for_cost, (i == 0), ++ &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + } + +@@ -702,46 +786,37 @@ + *loads_permuted = true; + } + ++ vect_free_oprnd_info (&oprnds_info); + return true; + } + + /* Create SLP_TREE nodes for the definition node/s. */ +- if (first_stmt_dt0 == vect_internal_def) +- { +- slp_tree left_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; +- SLP_TREE_VEC_STMTS (left_node) = NULL; +- SLP_TREE_LEFT (left_node) = NULL; +- SLP_TREE_RIGHT (left_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, +- inside_cost, outside_cost, ncopies_for_cost, +- max_nunits, load_permutation, loads, +- vectorization_factor, loads_permuted)) +- return false; +- +- SLP_TREE_LEFT (*node) = left_node; +- } +- +- if (first_stmt_dt1 == vect_internal_def) +- { +- slp_tree right_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; +- SLP_TREE_VEC_STMTS (right_node) = NULL; +- SLP_TREE_LEFT (right_node) = NULL; +- SLP_TREE_RIGHT (right_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, +- inside_cost, outside_cost, ncopies_for_cost, +- max_nunits, load_permutation, loads, +- vectorization_factor, loads_permuted)) +- return false; +- +- SLP_TREE_RIGHT (*node) = right_node; +- } +- ++ FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info) ++ { ++ slp_tree child; ++ ++ if (oprnd_info->first_dt != vect_internal_def) ++ continue; ++ ++ child = vect_create_new_slp_node (oprnd_info->def_stmts); ++ if (!child ++ || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, ++ inside_cost, outside_cost, ncopies_for_cost, ++ max_nunits, load_permutation, loads, ++ vectorization_factor, loads_permuted)) ++ { ++ if (child) ++ oprnd_info->def_stmts = NULL; ++ vect_free_slp_tree (child); ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } ++ ++ oprnd_info->def_stmts = NULL; ++ VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child); ++ } ++ ++ vect_free_oprnd_info (&oprnds_info); + return true; + } + +@@ -751,6 +826,7 @@ + { + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return; +@@ -763,8 +839,8 @@ + } + fprintf (vect_dump, "\n"); + +- vect_print_slp_tree (SLP_TREE_LEFT (node)); +- vect_print_slp_tree (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_print_slp_tree ((slp_tree) child); + } + + +@@ -778,6 +854,7 @@ + { + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return; +@@ -786,8 +863,8 @@ + if (j < 0 || i == j) + STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; + +- vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); +- vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts ((slp_tree) child, mark, j); + } + + +@@ -799,6 +876,7 @@ + int i; + gimple stmt; + stmt_vec_info stmt_info; ++ slp_void_p child; + + if (!node) + return; +@@ -811,8 +889,8 @@ + STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; + } + +- vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); +- vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts_relevant ((slp_tree) child); + } + + +@@ -885,12 +963,13 @@ + gimple stmt; + VEC (gimple, heap) *tmp_stmts; + unsigned int index, i; ++ slp_void_p child; + + if (!node) + return; + +- vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); +- vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation); + + gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); + tmp_stmts = VEC_alloc (gimple, heap, group_size); +@@ -1253,7 +1332,7 @@ + gimple stmt) + { + slp_instance new_instance; +- slp_tree node = XNEW (struct _slp_tree); ++ slp_tree node; + unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); + unsigned int unrolling_factor = 1, nunits; + tree vectype, scalar_type = NULL_TREE; +@@ -1265,6 +1344,7 @@ + VEC (slp_tree, heap) *loads; + struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + bool loads_permuted = false; ++ VEC (gimple, heap) *scalar_stmts; + + if (dr) + { +@@ -1308,39 +1388,26 @@ + } + + /* Create a node (a root of the SLP tree) for the packed strided stores. */ +- SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); ++ scalar_stmts = VEC_alloc (gimple, heap, group_size); + next = stmt; + if (dr) + { + /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ + while (next) + { +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + } + } + else + { + /* Collect reduction statements. */ +- for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, +- next); +- i++) +- { +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "pushing reduction into node: "); +- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); +- } +- } ++ VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); ++ for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + } + +- SLP_TREE_VEC_STMTS (node) = NULL; +- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; +- SLP_TREE_LEFT (node) = NULL; +- SLP_TREE_RIGHT (node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ node = vect_create_new_slp_node (scalar_stmts); + + /* Calculate the number of vector stmts to create based on the unrolling + factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is +@@ -1517,6 +1584,7 @@ + imm_use_iterator imm_iter; + gimple use_stmt; + stmt_vec_info stmt_vinfo; ++ slp_void_p child; + + if (!node) + return; +@@ -1534,8 +1602,8 @@ + == vect_reduction_def)) + vect_mark_slp_stmts (node, hybrid, i); + +- vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); +- vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_detect_hybrid_slp_stmts ((slp_tree) child); + } + + +@@ -1625,13 +1693,14 @@ + bool dummy; + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return true; + +- if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) +- || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) +- return false; ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child)) ++ return false; + + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) + { +@@ -2207,88 +2276,102 @@ + If the scalar definitions are loop invariants or constants, collect them and + call vect_get_constant_vectors() to create vector stmts. + Otherwise, the def-stmts must be already vectorized and the vectorized stmts +- must be stored in the LEFT/RIGHT node of SLP_NODE, and we call +- vect_get_slp_vect_defs() to retrieve them. +- If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from +- the right node. This is used when the second operand must remain scalar. */ ++ must be stored in the corresponding child of SLP_NODE, and we call ++ vect_get_slp_vect_defs () to retrieve them. */ + + void +-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, +- VEC (tree,heap) **vec_oprnds0, +- VEC (tree,heap) **vec_oprnds1, int reduc_index) ++vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node, ++ VEC (slp_void_p, heap) **vec_oprnds, int reduc_index) + { +- gimple first_stmt; +- enum tree_code code; +- int number_of_vects; ++ gimple first_stmt, first_def; ++ int number_of_vects = 0, i; ++ unsigned int child_index = 0; + HOST_WIDE_INT lhs_size_unit, rhs_size_unit; ++ slp_tree child = NULL; ++ VEC (tree, heap) *vec_defs; ++ tree oprnd, def_lhs; ++ bool vectorized_defs; + + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_LEFT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); +- else +- { +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); +- /* Number of vector stmts was calculated according to LHS in +- vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if +- necessary. See vect_get_smallest_scalar_type () for details. */ +- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, +- &rhs_size_unit); +- if (rhs_size_unit != lhs_size_unit) +- { +- number_of_vects *= rhs_size_unit; +- number_of_vects /= lhs_size_unit; +- } ++ FOR_EACH_VEC_ELT (tree, ops, i, oprnd) ++ { ++ /* For each operand we check if it has vectorized definitions in a child ++ node or we need to create them (for invariants and constants). We ++ check if the LHS of the first stmt of the next child matches OPRND. ++ If it does, we found the correct child. Otherwise, we call ++ vect_get_constant_vectors (), and not advance CHILD_INDEX in order ++ to check this child node for the next operand. */ ++ vectorized_defs = false; ++ if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index) ++ { ++ child = (slp_tree) VEC_index (slp_void_p, ++ SLP_TREE_CHILDREN (slp_node), ++ child_index); ++ first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0); ++ ++ /* In the end of a pattern sequence we have a use of the original stmt, ++ so we need to compare OPRND with the original def. */ ++ if (is_pattern_stmt_p (vinfo_for_stmt (first_def)) ++ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt)) ++ && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt))) ++ first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); ++ ++ if (is_gimple_call (first_def)) ++ def_lhs = gimple_call_lhs (first_def); ++ else ++ def_lhs = gimple_assign_lhs (first_def); ++ ++ if (operand_equal_p (oprnd, def_lhs, 0)) ++ { ++ /* The number of vector defs is determined by the number of ++ vector statements in the node from which we get those ++ statements. */ ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); ++ vectorized_defs = true; ++ child_index++; ++ } ++ } ++ ++ if (!vectorized_defs) ++ { ++ if (i == 0) ++ { ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++ /* Number of vector stmts was calculated according to LHS in ++ vect_schedule_slp_instance (), fix it by replacing LHS with ++ RHS, if necessary. See vect_get_smallest_scalar_type () for ++ details. */ ++ vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, ++ &rhs_size_unit); ++ if (rhs_size_unit != lhs_size_unit) ++ { ++ number_of_vects *= rhs_size_unit; ++ number_of_vects /= lhs_size_unit; ++ } ++ } ++ } ++ ++ /* Allocate memory for vectorized defs. */ ++ vec_defs = VEC_alloc (tree, heap, number_of_vects); ++ ++ /* For reduction defs we call vect_get_constant_vectors (), since we are ++ looking for initial loop invariant values. */ ++ if (vectorized_defs && reduc_index == -1) ++ /* The defs are already vectorized. */ ++ vect_get_slp_vect_defs (child, &vec_defs); ++ else ++ /* Build vectors from scalar defs. */ ++ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, ++ number_of_vects, reduc_index); ++ ++ VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs); ++ ++ /* For reductions, we only need initial values. */ ++ if (reduc_index != -1) ++ return; + } +- +- /* Allocate memory for vectorized defs. */ +- *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); +- +- /* SLP_NODE corresponds either to a group of stores or to a group of +- unary/binary operations. We don't call this function for loads. +- For reduction defs we call vect_get_constant_vectors(), since we are +- looking for initial loop invariant values. */ +- if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects, +- reduc_index); +- +- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) +- /* Since we don't call this function with loads, this is a group of +- stores. */ +- return; +- +- /* For reductions, we only need initial values. */ +- if (reduc_index != -1) +- return; +- +- code = gimple_assign_rhs_code (first_stmt); +- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) +- return; +- +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_RIGHT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); +- else +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); +- +- *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); +- +- if (SLP_TREE_RIGHT (slp_node)) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects, +- -1); + } + +- + /* Create NCOPIES permutation statements using the mask MASK_BYTES (by + building a vector of type MASK_TYPE from it) and two input vectors placed in + DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and +@@ -2605,14 +2688,14 @@ + tree vectype; + int i; + slp_tree loads_node; ++ slp_void_p child; + + if (!node) + return false; + +- vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, +- vectorization_factor); +- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, +- vectorization_factor); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_schedule_slp_instance ((slp_tree) child, instance, ++ vectorization_factor); + + stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); + stmt_info = vinfo_for_stmt (stmt); + +=== modified file 'gcc/tree-vect-stmts.c' +--- old/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 ++++ new/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000 +@@ -1419,16 +1419,35 @@ + } + + +-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not +- NULL. */ ++/* Get vectorized definitions for OP0 and OP1. ++ REDUC_INDEX is the index of reduction operand in case of reduction, ++ and -1 otherwise. */ + +-static void ++void + vect_get_vec_defs (tree op0, tree op1, gimple stmt, +- VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, +- slp_tree slp_node) ++ VEC (tree, heap) **vec_oprnds0, ++ VEC (tree, heap) **vec_oprnds1, ++ slp_tree slp_node, int reduc_index) + { + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1); ++ { ++ int nops = (op1 == NULL_TREE) ? 1 : 2; ++ VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); ++ VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); ++ ++ VEC_quick_push (tree, ops, op0); ++ if (op1) ++ VEC_quick_push (tree, ops, op1); ++ ++ vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); ++ ++ *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); ++ if (op1) ++ *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); ++ ++ VEC_free (tree, heap, ops); ++ VEC_free (slp_void_p, heap, vec_defs); ++ } + else + { + tree vec_oprnd; +@@ -2016,7 +2035,8 @@ + for (j = 0; j < ncopies; j++) + { + if (j == 0) +- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); ++ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, ++ -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); + +@@ -2221,7 +2241,7 @@ + { + /* Handle uses. */ + if (j == 0) +- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); ++ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); + +@@ -2576,10 +2596,10 @@ + operand 1 should be of a vector type (the usual case). */ + if (vec_oprnd1) + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + } + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); +@@ -2887,10 +2907,10 @@ + { + if (op_type == binary_op || op_type == ternary_op) + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + if (op_type == ternary_op) + { + vec_oprnds2 = VEC_alloc (tree, heap, 1); +@@ -3202,7 +3222,8 @@ + { + /* Handle uses. */ + if (slp_node) +- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1); ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); + else + { + VEC_free (tree, heap, vec_oprnds0); +@@ -3548,12 +3569,12 @@ + for (k = 0; k < slp_node->vec_stmts_size - 1; k++) + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + +- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, +- -1); ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); + } + else +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, +- &vec_oprnds1, -1); ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, ++ &vec_oprnds1, slp_node, -1); + } + else + { +@@ -3796,6 +3817,7 @@ + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); + first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); ++ op = gimple_assign_rhs1 (first_stmt); + } + else + /* VEC_NUM is the number of vect stmts to be created for this +@@ -3878,8 +3900,8 @@ + if (slp) + { + /* Get vectorized arguments for SLP_NODE. */ +- vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, +- NULL, -1); ++ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, ++ NULL, slp_node, -1); + + vec_oprnd = VEC_index (tree, vec_oprnds, 0); + } +@@ -5040,7 +5062,7 @@ + In basic blocks we only analyze statements that are a part of some SLP + instance, therefore, all the statements are relevant. + +- Pattern statement need to be analyzed instead of the original statement ++ Pattern statement needs to be analyzed instead of the original statement + if the original statement is not relevant. Otherwise, we analyze both + statements. */ + + +=== modified file 'gcc/tree-vectorizer.h' +--- old/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 ++++ new/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000 +@@ -73,15 +73,15 @@ + /************************************************************************ + SLP + ************************************************************************/ ++typedef void *slp_void_p; ++DEF_VEC_P (slp_void_p); ++DEF_VEC_ALLOC_P (slp_void_p, heap); + +-/* A computation tree of an SLP instance. Each node corresponds to a group of ++/* A computation tree of an SLP instance. Each node corresponds to a group of + stmts to be packed in a SIMD stmt. */ + typedef struct _slp_tree { +- /* Only binary and unary operations are supported. LEFT child corresponds to +- the first operand and RIGHT child to the second if the operation is +- binary. */ +- struct _slp_tree *left; +- struct _slp_tree *right; ++ /* Nodes that contain def-stmts of this node statements operands. */ ++ VEC (slp_void_p, heap) *children; + /* A group of scalar stmts to be vectorized together. */ + VEC (gimple, heap) *stmts; + /* Vectorized stmt/s. */ +@@ -146,14 +146,32 @@ + #define SLP_INSTANCE_LOADS(S) (S)->loads + #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load + +-#define SLP_TREE_LEFT(S) (S)->left +-#define SLP_TREE_RIGHT(S) (S)->right ++#define SLP_TREE_CHILDREN(S) (S)->children + #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts + #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts + #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size + #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop + #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop + ++/* This structure is used in creation of an SLP tree. Each instance ++ corresponds to the same operand in a group of scalar stmts in an SLP ++ node. */ ++typedef struct _slp_oprnd_info ++{ ++ /* Def-stmts for the operands. */ ++ VEC (gimple, heap) *def_stmts; ++ /* Information about the first statement, its vector def-type, type, the ++ operand itself in case it's constant, and an indication if it's a pattern ++ stmt. */ ++ enum vect_def_type first_dt; ++ tree first_def_type; ++ tree first_const_oprnd; ++ bool first_pattern; ++} *slp_oprnd_info; ++ ++DEF_VEC_P(slp_oprnd_info); ++DEF_VEC_ALLOC_P(slp_oprnd_info, heap); ++ + + typedef struct _vect_peel_info + { +@@ -819,6 +837,8 @@ + unsigned int *, unsigned int *); + extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); + extern bool vect_supportable_shift (enum tree_code, tree); ++extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, ++ VEC (tree, heap) **, slp_tree, int); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); +@@ -885,8 +905,9 @@ + extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); + extern void vect_make_slp_decision (loop_vec_info); + extern void vect_detect_hybrid_slp (loop_vec_info); +-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, +- VEC (tree,heap) **, int); ++extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree, ++ VEC (slp_void_p, heap) **, int); ++ + extern LOC find_bb_location (basic_block); + extern bb_vec_info vect_slp_analyze_bb (basic_block); + extern void vect_slp_transform_bb (basic_block); + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch new file mode 100644 index 0000000000..d72446919f --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch @@ -0,0 +1,515 @@ +2011-11-21 Michael Hope <michael.hope@linaro.org> + + Backport from mainline r180131: + + 2011-10-18 Julian Brown <julian@codesourcery.com> + + gcc/ + * config/arm/arm.c (arm_block_move_unaligned_straight) + (arm_adjust_block_mem, arm_block_move_unaligned_loop) + (arm_movmemqi_unaligned): New. + (arm_gen_movmemqi): Support unaligned block copies. + + gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_arm_unaligned): New. + * gcc.target/arm/unaligned-memcpy-1.c: New. + * gcc.target/arm/unaligned-memcpy-2.c: New. + * gcc.target/arm/unaligned-memcpy-3.c: New. + * gcc.target/arm/unaligned-memcpy-4.c: New. + + 2011-09-15 James Greenhalgh <james.greenhalgh@arm.com> + + gcc/ + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro. + +=== modified file 'gcc/config/arm/arm.c' +--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000 ++++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000 +@@ -10803,6 +10803,335 @@ + return true; + } + ++/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit ++ unaligned copies on processors which support unaligned semantics for those ++ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency ++ (using more registers) by doing e.g. load/load/store/store for a factor of 2. ++ An interleave factor of 1 (the minimum) will perform no interleaving. ++ Load/store multiple are used for aligned addresses where possible. */ ++ ++static void ++arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, ++ HOST_WIDE_INT length, ++ unsigned int interleave_factor) ++{ ++ rtx *regs = XALLOCAVEC (rtx, interleave_factor); ++ int *regnos = XALLOCAVEC (int, interleave_factor); ++ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; ++ HOST_WIDE_INT i, j; ++ HOST_WIDE_INT remaining = length, words; ++ rtx halfword_tmp = NULL, byte_tmp = NULL; ++ rtx dst, src; ++ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD; ++ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD; ++ HOST_WIDE_INT srcoffset, dstoffset; ++ HOST_WIDE_INT src_autoinc, dst_autoinc; ++ rtx mem, addr; ++ ++ gcc_assert (1 <= interleave_factor && interleave_factor <= 4); ++ ++ /* Use hard registers if we have aligned source or destination so we can use ++ load/store multiple with contiguous registers. */ ++ if (dst_aligned || src_aligned) ++ for (i = 0; i < interleave_factor; i++) ++ regs[i] = gen_rtx_REG (SImode, i); ++ else ++ for (i = 0; i < interleave_factor; i++) ++ regs[i] = gen_reg_rtx (SImode); ++ ++ dst = copy_addr_to_reg (XEXP (dstbase, 0)); ++ src = copy_addr_to_reg (XEXP (srcbase, 0)); ++ ++ srcoffset = dstoffset = 0; ++ ++ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. ++ For copying the last bytes we want to subtract this offset again. */ ++ src_autoinc = dst_autoinc = 0; ++ ++ for (i = 0; i < interleave_factor; i++) ++ regnos[i] = i; ++ ++ /* Copy BLOCK_SIZE_BYTES chunks. */ ++ ++ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes) ++ { ++ /* Load words. */ ++ if (src_aligned && interleave_factor > 1) ++ { ++ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src, ++ TRUE, srcbase, &srcoffset)); ++ src_autoinc += UNITS_PER_WORD * interleave_factor; ++ } ++ else ++ { ++ for (j = 0; j < interleave_factor; j++) ++ { ++ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD ++ - src_autoinc); ++ mem = adjust_automodify_address (srcbase, SImode, addr, ++ srcoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_loadsi (regs[j], mem)); ++ } ++ srcoffset += block_size_bytes; ++ } ++ ++ /* Store words. */ ++ if (dst_aligned && interleave_factor > 1) ++ { ++ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst, ++ TRUE, dstbase, &dstoffset)); ++ dst_autoinc += UNITS_PER_WORD * interleave_factor; ++ } ++ else ++ { ++ for (j = 0; j < interleave_factor; j++) ++ { ++ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD ++ - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, SImode, addr, ++ dstoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_storesi (mem, regs[j])); ++ } ++ dstoffset += block_size_bytes; ++ } ++ ++ remaining -= block_size_bytes; ++ } ++ ++ /* Copy any whole words left (note these aren't interleaved with any ++ subsequent halfword/byte load/stores in the interests of simplicity). */ ++ ++ words = remaining / UNITS_PER_WORD; ++ ++ gcc_assert (words < interleave_factor); ++ ++ if (src_aligned && words > 1) ++ { ++ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, ++ &srcoffset)); ++ src_autoinc += UNITS_PER_WORD * words; ++ } ++ else ++ { ++ for (j = 0; j < words; j++) ++ { ++ addr = plus_constant (src, ++ srcoffset + j * UNITS_PER_WORD - src_autoinc); ++ mem = adjust_automodify_address (srcbase, SImode, addr, ++ srcoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_loadsi (regs[j], mem)); ++ } ++ srcoffset += words * UNITS_PER_WORD; ++ } ++ ++ if (dst_aligned && words > 1) ++ { ++ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase, ++ &dstoffset)); ++ dst_autoinc += words * UNITS_PER_WORD; ++ } ++ else ++ { ++ for (j = 0; j < words; j++) ++ { ++ addr = plus_constant (dst, ++ dstoffset + j * UNITS_PER_WORD - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, SImode, addr, ++ dstoffset + j * UNITS_PER_WORD); ++ emit_insn (gen_unaligned_storesi (mem, regs[j])); ++ } ++ dstoffset += words * UNITS_PER_WORD; ++ } ++ ++ remaining -= words * UNITS_PER_WORD; ++ ++ gcc_assert (remaining < 4); ++ ++ /* Copy a halfword if necessary. */ ++ ++ if (remaining >= 2) ++ { ++ halfword_tmp = gen_reg_rtx (SImode); ++ ++ addr = plus_constant (src, srcoffset - src_autoinc); ++ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset); ++ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem)); ++ ++ /* Either write out immediately, or delay until we've loaded the last ++ byte, depending on interleave factor. */ ++ if (interleave_factor == 1) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); ++ emit_insn (gen_unaligned_storehi (mem, ++ gen_lowpart (HImode, halfword_tmp))); ++ halfword_tmp = NULL; ++ dstoffset += 2; ++ } ++ ++ remaining -= 2; ++ srcoffset += 2; ++ } ++ ++ gcc_assert (remaining < 2); ++ ++ /* Copy last byte. */ ++ ++ if ((remaining & 1) != 0) ++ { ++ byte_tmp = gen_reg_rtx (SImode); ++ ++ addr = plus_constant (src, srcoffset - src_autoinc); ++ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset); ++ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem); ++ ++ if (interleave_factor == 1) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); ++ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); ++ byte_tmp = NULL; ++ dstoffset++; ++ } ++ ++ remaining--; ++ srcoffset++; ++ } ++ ++ /* Store last halfword if we haven't done so already. */ ++ ++ if (halfword_tmp) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); ++ emit_insn (gen_unaligned_storehi (mem, ++ gen_lowpart (HImode, halfword_tmp))); ++ dstoffset += 2; ++ } ++ ++ /* Likewise for last byte. */ ++ ++ if (byte_tmp) ++ { ++ addr = plus_constant (dst, dstoffset - dst_autoinc); ++ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); ++ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); ++ dstoffset++; ++ } ++ ++ gcc_assert (remaining == 0 && srcoffset == dstoffset); ++} ++ ++/* From mips_adjust_block_mem: ++ ++ Helper function for doing a loop-based block operation on memory ++ reference MEM. Each iteration of the loop will operate on LENGTH ++ bytes of MEM. ++ ++ Create a new base register for use within the loop and point it to ++ the start of MEM. Create a new memory reference that uses this ++ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ ++ ++static void ++arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, ++ rtx *loop_mem) ++{ ++ *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); ++ ++ /* Although the new mem does not refer to a known location, ++ it does keep up to LENGTH bytes of alignment. */ ++ *loop_mem = change_address (mem, BLKmode, *loop_reg); ++ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); ++} ++ ++/* From mips_block_move_loop: ++ ++ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER ++ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that ++ the memory regions do not overlap. */ ++ ++static void ++arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, ++ unsigned int interleave_factor, ++ HOST_WIDE_INT bytes_per_iter) ++{ ++ rtx label, src_reg, dest_reg, final_src, test; ++ HOST_WIDE_INT leftover; ++ ++ leftover = length % bytes_per_iter; ++ length -= leftover; ++ ++ /* Create registers and memory references for use within the loop. */ ++ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); ++ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); ++ ++ /* Calculate the value that SRC_REG should have after the last iteration of ++ the loop. */ ++ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), ++ 0, 0, OPTAB_WIDEN); ++ ++ /* Emit the start of the loop. */ ++ label = gen_label_rtx (); ++ emit_label (label); ++ ++ /* Emit the loop body. */ ++ arm_block_move_unaligned_straight (dest, src, bytes_per_iter, ++ interleave_factor); ++ ++ /* Move on to the next block. */ ++ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter)); ++ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter)); ++ ++ /* Emit the loop condition. */ ++ test = gen_rtx_NE (VOIDmode, src_reg, final_src); ++ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); ++ ++ /* Mop up any left-over bytes. */ ++ if (leftover) ++ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); ++} ++ ++/* Emit a block move when either the source or destination is unaligned (not ++ aligned to a four-byte boundary). This may need further tuning depending on ++ core type, optimize_size setting, etc. */ ++ ++static int ++arm_movmemqi_unaligned (rtx *operands) ++{ ++ HOST_WIDE_INT length = INTVAL (operands[2]); ++ ++ if (optimize_size) ++ { ++ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; ++ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD; ++ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit ++ size of code if optimizing for size. We'll use ldm/stm if src_aligned ++ or dst_aligned though: allow more interleaving in those cases since the ++ resulting code can be smaller. */ ++ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; ++ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; ++ ++ if (length > 12) ++ arm_block_move_unaligned_loop (operands[0], operands[1], length, ++ interleave_factor, bytes_per_iter); ++ else ++ arm_block_move_unaligned_straight (operands[0], operands[1], length, ++ interleave_factor); ++ } ++ else ++ { ++ /* Note that the loop created by arm_block_move_unaligned_loop may be ++ subject to loop unrolling, which makes tuning this condition a little ++ redundant. */ ++ if (length > 32) ++ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16); ++ else ++ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); ++ } ++ ++ return 1; ++} ++ + int + arm_gen_movmemqi (rtx *operands) + { +@@ -10815,8 +11144,13 @@ + + if (GET_CODE (operands[2]) != CONST_INT + || GET_CODE (operands[3]) != CONST_INT +- || INTVAL (operands[2]) > 64 +- || INTVAL (operands[3]) & 3) ++ || INTVAL (operands[2]) > 64) ++ return 0; ++ ++ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) ++ return arm_movmemqi_unaligned (operands); ++ ++ if (INTVAL (operands[3]) & 3) + return 0; + + dstbase = operands[0]; + +=== modified file 'gcc/config/arm/arm.h' +--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 ++++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000 +@@ -47,6 +47,8 @@ + { \ + if (TARGET_DSP_MULTIPLY) \ + builtin_define ("__ARM_FEATURE_DSP"); \ ++ if (unaligned_access) \ ++ builtin_define ("__ARM_FEATURE_UNALIGNED"); \ + /* Define __arm__ even when in thumb mode, for \ + consistency with armcc. */ \ + builtin_define ("__arm__"); \ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++void unknown_alignment (char *dest, char *src) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* We should see three unaligned word loads and store pairs, one unaligned ++ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */ ++ ++/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char dest[16]; ++ ++void aligned_dest (char *src) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word store for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char src[16]; ++ ++void aligned_src (char *dest) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word load for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ + +=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c' +--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000 ++++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000 +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include <string.h> ++ ++char src[16]; ++char dest[16]; ++ ++void aligned_both (void) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* We know both src and dest to be aligned: expect multiword loads/stores. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ + +=== modified file 'gcc/testsuite/lib/target-supports.exp' +--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 ++++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000 +@@ -1894,6 +1894,18 @@ + }] + } + ++# Return 1 if this is an ARM target that supports unaligned word/halfword ++# load/store instructions. ++ ++proc check_effective_target_arm_unaligned { } { ++ return [check_no_compiler_messages arm_unaligned assembly { ++ #ifndef __ARM_FEATURE_UNALIGNED ++ #error no unaligned support ++ #endif ++ int i; ++ }] ++} ++ + # Add the options needed for NEON. We need either -mfloat-abi=softfp + # or -mfloat-abi=hard, but if one is already specified by the + # multilib, use it. Similarly, if a -mfpu option already enables + diff --git a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc index 73fe5c8b2d..fcdccf5d2d 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc @@ -53,4 +53,26 @@ file://linaro/gcc-4.6-linaro-r106805.patch \ file://linaro/gcc-4.6-linaro-r106806.patch \ file://linaro/gcc-4.6-linaro-r106807.patch \ file://linaro/gcc-4.6-linaro-r106811.patch \ +file://linaro/gcc-4.6-linaro-r106814.patch \ +file://linaro/gcc-4.6-linaro-r106815.patch \ +file://linaro/gcc-4.6-linaro-r106816.patch \ +file://linaro/gcc-4.6-linaro-r106817.patch \ +file://linaro/gcc-4.6-linaro-r106818.patch \ +file://linaro/gcc-4.6-linaro-r106819.patch \ +file://linaro/gcc-4.6-linaro-r106820.patch \ +file://linaro/gcc-4.6-linaro-r106821.patch \ +file://linaro/gcc-4.6-linaro-r106825.patch \ +file://linaro/gcc-4.6-linaro-r106826.patch \ +file://linaro/gcc-4.6-linaro-r106827.patch \ +file://linaro/gcc-4.6-linaro-r106828.patch \ +file://linaro/gcc-4.6-linaro-r106829.patch \ +file://linaro/gcc-4.6-linaro-r106830.patch \ +file://linaro/gcc-4.6-linaro-r106831.patch \ +file://linaro/gcc-4.6-linaro-r106832.patch \ +file://linaro/gcc-4.6-linaro-r106833.patch \ +file://linaro/gcc-4.6-linaro-r106834.patch \ +file://linaro/gcc-4.6-linaro-r106836.patch \ +file://linaro/gcc-4.6-linaro-r106839.patch \ +file://linaro/gcc-4.6-linaro-r106840.patch \ +file://linaro/gcc-4.6-linaro-r106841.patch \ " diff --git a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc index 0faf45e937..c12913d927 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc @@ -1,4 +1,4 @@ # this will prepend this layer to FILESPATH FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" -PRINC = "2" +PRINC = "3" ARM_INSTRUCTION_SET = "arm" |