aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch
blob: 9f0c98e9c99e72fae33d2ded5cea500050a2593a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
2010-07-15  Sandra Loosemore  <sandra@codesourcery.com>

	Backport from mainline:

	2010-06-09  Sandra Loosemore  <sandra@codesourcery.com>

	gcc/	
	* tree-ssa-loop-ivopts.c (adjust_setup_cost): New function.
	(get_computation_cost_at): Use it.
	(determine_use_iv_cost_condition): Likewise.
	(determine_iv_cost): Likewise.

	2010-07-05  Sandra Loosemore  <sandra@codesourcery.com>

	PR middle-end/42505

	gcc/
	* tree-ssa-loop-ivopts.c (determine_set_costs): Delete obsolete
	comments about cost model.
	(try_add_cand_for):  Add second strategy for choosing initial set
	based on original IVs, controlled by ORIGINALP argument.
	(get_initial_solution): Add ORIGINALP argument.
	(find_optimal_iv_set_1): New function, split from find_optimal_iv_set.
	(find_optimal_iv_set): Try two different strategies for choosing
	the IV set, and return the one with lower cost.

	gcc/testsuite/
	* gcc.target/arm/pr42505.c: New test case.

	2010-07-10  Sandra Loosemore  <sandra@codesourcery.com>

	PR middle-end/42505

	gcc/
	* tree-inline.c (estimate_num_insns): Refactor builtin complexity
	lookup code into....
	* builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
	new functions.
	* tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
	* cfgloopanal.c (target_clobbered_regs): Define.
	(init_set_costs): Initialize target_clobbered_regs.
	(estimate_reg_pressure_cost): Add call_p argument.  When true,
	adjust the number of available registers to exclude the
	call-clobbered registers.
	* cfgloop.h (target_clobbered_regs): Declare.
	(estimate_reg_pressure_cost): Adjust declaration.
	* tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
	(ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
	(determine_set_costs): Dump target_clobbered_regs.
	(loop_body_includes_call): New function.
	(tree_ssa_iv_optimize_loop): Use it to initialize new field.
	* loop-invariant.c (gain_for_invariant): Adjust arguments to pass
	call_p flag through.
	(best_gain_for_invariant): Likewise.
	(find_invariants_to_move): Likewise.
	(move_single_loop_invariants): Likewise, using already-computed
	has_call field.

 2010-07-15  Jie Zhang  <jie@codesourcery.com>
 
 	Issue #8497, #8893

=== modified file 'gcc/builtins.c'
--- old/gcc/builtins.c	2010-04-13 12:47:11 +0000
+++ new/gcc/builtins.c	2010-08-02 13:51:23 +0000
@@ -13624,3 +13624,123 @@
       break;
     }
 }
+
+/* Return true if DECL is a builtin that expands to a constant or similarly
+   simple code.  */
+bool
+is_simple_builtin (tree decl)
+{
+  if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
+    switch (DECL_FUNCTION_CODE (decl))
+      {
+	/* Builtins that expand to constants.  */
+      case BUILT_IN_CONSTANT_P:
+      case BUILT_IN_EXPECT:
+      case BUILT_IN_OBJECT_SIZE:
+      case BUILT_IN_UNREACHABLE:
+	/* Simple register moves or loads from stack.  */
+      case BUILT_IN_RETURN_ADDRESS:
+      case BUILT_IN_EXTRACT_RETURN_ADDR:
+      case BUILT_IN_FROB_RETURN_ADDR:
+      case BUILT_IN_RETURN:
+      case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
+      case BUILT_IN_FRAME_ADDRESS:
+      case BUILT_IN_VA_END:
+      case BUILT_IN_STACK_SAVE:
+      case BUILT_IN_STACK_RESTORE:
+	/* Exception state returns or moves registers around.  */
+      case BUILT_IN_EH_FILTER:
+      case BUILT_IN_EH_POINTER:
+      case BUILT_IN_EH_COPY_VALUES:
+	return true;
+
+      default:
+	return false;
+      }
+
+  return false;
+}
+
+/* Return true if DECL is a builtin that is not expensive, i.e., they are
+   most probably expanded inline into reasonably simple code.  This is a
+   superset of is_simple_builtin.  */
+bool
+is_inexpensive_builtin (tree decl)
+{
+  if (!decl)
+    return false;
+  else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
+    return true;
+  else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
+    switch (DECL_FUNCTION_CODE (decl))
+      {
+      case BUILT_IN_ABS:
+      case BUILT_IN_ALLOCA:
+      case BUILT_IN_BSWAP32:
+      case BUILT_IN_BSWAP64:
+      case BUILT_IN_CLZ:
+      case BUILT_IN_CLZIMAX:
+      case BUILT_IN_CLZL:
+      case BUILT_IN_CLZLL:
+      case BUILT_IN_CTZ:
+      case BUILT_IN_CTZIMAX:
+      case BUILT_IN_CTZL:
+      case BUILT_IN_CTZLL:
+      case BUILT_IN_FFS:
+      case BUILT_IN_FFSIMAX:
+      case BUILT_IN_FFSL:
+      case BUILT_IN_FFSLL:
+      case BUILT_IN_IMAXABS:
+      case BUILT_IN_FINITE:
+      case BUILT_IN_FINITEF:
+      case BUILT_IN_FINITEL:
+      case BUILT_IN_FINITED32:
+      case BUILT_IN_FINITED64:
+      case BUILT_IN_FINITED128:
+      case BUILT_IN_FPCLASSIFY:
+      case BUILT_IN_ISFINITE:
+      case BUILT_IN_ISINF_SIGN:
+      case BUILT_IN_ISINF:
+      case BUILT_IN_ISINFF:
+      case BUILT_IN_ISINFL:
+      case BUILT_IN_ISINFD32:
+      case BUILT_IN_ISINFD64:
+      case BUILT_IN_ISINFD128:
+      case BUILT_IN_ISNAN:
+      case BUILT_IN_ISNANF:
+      case BUILT_IN_ISNANL:
+      case BUILT_IN_ISNAND32:
+      case BUILT_IN_ISNAND64:
+      case BUILT_IN_ISNAND128:
+      case BUILT_IN_ISNORMAL:
+      case BUILT_IN_ISGREATER:
+      case BUILT_IN_ISGREATEREQUAL:
+      case BUILT_IN_ISLESS:
+      case BUILT_IN_ISLESSEQUAL:
+      case BUILT_IN_ISLESSGREATER:
+      case BUILT_IN_ISUNORDERED:
+      case BUILT_IN_VA_ARG_PACK:
+      case BUILT_IN_VA_ARG_PACK_LEN:
+      case BUILT_IN_VA_COPY:
+      case BUILT_IN_TRAP:
+      case BUILT_IN_SAVEREGS:
+      case BUILT_IN_POPCOUNTL:
+      case BUILT_IN_POPCOUNTLL:
+      case BUILT_IN_POPCOUNTIMAX:
+      case BUILT_IN_POPCOUNT:
+      case BUILT_IN_PARITYL:
+      case BUILT_IN_PARITYLL:
+      case BUILT_IN_PARITYIMAX:
+      case BUILT_IN_PARITY:
+      case BUILT_IN_LABS:
+      case BUILT_IN_LLABS:
+      case BUILT_IN_PREFETCH:
+	return true;
+
+      default:
+	return is_simple_builtin (decl);
+      }
+
+  return false;
+}
+

=== modified file 'gcc/cfgloop.h'
--- old/gcc/cfgloop.h	2009-11-25 10:55:54 +0000
+++ new/gcc/cfgloop.h	2010-08-02 13:51:23 +0000
@@ -622,13 +622,14 @@
 /* The properties of the target.  */
 
 extern unsigned target_avail_regs;
+extern unsigned target_clobbered_regs;
 extern unsigned target_res_regs;
 extern unsigned target_reg_cost [2];
 extern unsigned target_spill_cost [2];
 
 /* Register pressure estimation for induction variable optimizations & loop
    invariant motion.  */
-extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
+extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool);
 extern void init_set_costs (void);
 
 /* Loop optimizer initialization.  */

=== modified file 'gcc/cfgloopanal.c'
--- old/gcc/cfgloopanal.c	2009-09-30 08:57:56 +0000
+++ new/gcc/cfgloopanal.c	2010-08-02 13:51:23 +0000
@@ -320,6 +320,8 @@
 /* The properties of the target.  */
 
 unsigned target_avail_regs;	/* Number of available registers.  */
+unsigned target_clobbered_regs; /* Number of available registers that are
+				   call-clobbered.  */
 unsigned target_res_regs;	/* Number of registers reserved for temporary
 				   expressions.  */
 unsigned target_reg_cost[2];	/* The cost for register when there still
@@ -342,10 +344,15 @@
   unsigned i;
 
   target_avail_regs = 0;
+  target_clobbered_regs = 0;
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 	&& !fixed_regs[i])
-      target_avail_regs++;
+      {
+	target_avail_regs++;
+	if (call_used_regs[i])
+	  target_clobbered_regs++;
+      }
 
   target_res_regs = 3;
 
@@ -379,20 +386,29 @@
 
 /* Estimates cost of increased register pressure caused by making N_NEW new
    registers live around the loop.  N_OLD is the number of registers live
-   around the loop.  */
+   around the loop.  If CALL_P is true, also take into account that
+   call-used registers may be clobbered in the loop body, reducing the
+   number of available registers before we spill.  */
 
 unsigned
-estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
+estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
+			    bool call_p)
 {
   unsigned cost;
   unsigned regs_needed = n_new + n_old;
+  unsigned available_regs = target_avail_regs;
+
+  /* If there is a call in the loop body, the call-clobbered registers
+     are not available for loop invariants.  */
+  if (call_p)
+    available_regs = available_regs - target_clobbered_regs;
 
   /* If we have enough registers, we should use them and not restrict
      the transformations unnecessarily.  */
-  if (regs_needed + target_res_regs <= target_avail_regs)
+  if (regs_needed + target_res_regs <= available_regs)
     return 0;
 
-  if (regs_needed <= target_avail_regs)
+  if (regs_needed <= available_regs)
     /* If we are close to running out of registers, try to preserve
        them.  */
     cost = target_reg_cost [speed] * n_new;

=== modified file 'gcc/loop-invariant.c'
--- old/gcc/loop-invariant.c	2010-04-02 18:54:46 +0000
+++ new/gcc/loop-invariant.c	2010-08-02 13:51:23 +0000
@@ -1173,11 +1173,13 @@
 /* Calculates gain for eliminating invariant INV.  REGS_USED is the number
    of registers used in the loop, NEW_REGS is the number of new variables
    already added due to the invariant motion.  The number of registers needed
-   for it is stored in *REGS_NEEDED.  */
+   for it is stored in *REGS_NEEDED.  SPEED and CALL_P are flags passed
+   through to estimate_reg_pressure_cost. */
 
 static int
 gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
-		    unsigned *new_regs, unsigned regs_used, bool speed)
+		    unsigned *new_regs, unsigned regs_used,
+		    bool speed, bool call_p)
 {
   int comp_cost, size_cost;
 
@@ -1188,9 +1190,9 @@
   if (! flag_ira_loop_pressure)
     {
       size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
-					       regs_used, speed)
+					       regs_used, speed, call_p)
 		   - estimate_reg_pressure_cost (new_regs[0],
-						 regs_used, speed));
+						 regs_used, speed, call_p));
     }
   else
     {
@@ -1245,7 +1247,8 @@
 
 static int
 best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
-			 unsigned *new_regs, unsigned regs_used, bool speed)
+			 unsigned *new_regs, unsigned regs_used,
+			 bool speed, bool call_p)
 {
   struct invariant *inv;
   int i, gain = 0, again;
@@ -1261,7 +1264,7 @@
 	continue;
 
       again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
-      				  speed);
+      				  speed, call_p);
       if (again > gain)
 	{
 	  gain = again;
@@ -1314,7 +1317,7 @@
 /* Determines which invariants to move.  */
 
 static void
-find_invariants_to_move (bool speed)
+find_invariants_to_move (bool speed, bool call_p)
 {
   int gain;
   unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
@@ -1353,7 +1356,8 @@
 	new_regs[ira_reg_class_cover[i]] = 0;
     }
   while ((gain = best_gain_for_invariant (&inv, regs_needed,
-					  new_regs, regs_used, speed)) > 0)
+					  new_regs, regs_used,
+					  speed, call_p)) > 0)
     {
       set_move_mark (inv->invno, gain);
       if (! flag_ira_loop_pressure)
@@ -1554,7 +1558,8 @@
   init_inv_motion_data ();
 
   find_invariants (loop);
-  find_invariants_to_move (optimize_loop_for_speed_p (loop));
+  find_invariants_to_move (optimize_loop_for_speed_p (loop),
+			   LOOP_DATA (loop)->has_call);
   move_invariants (loop);
 
   free_inv_motion_data ();

=== added file 'gcc/testsuite/gcc.target/arm/pr42505.c'
--- old/gcc/testsuite/gcc.target/arm/pr42505.c	1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr42505.c	2010-08-02 13:51:23 +0000
@@ -0,0 +1,23 @@
+/* { dg-options "-mthumb -Os -march=armv5te" }  */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-final { scan-assembler-not "str\[\\t \]*r.,\[\\t \]*.sp," } } */
+
+struct A {
+ int f1;
+ int f2;
+};
+
+int func(int c);
+
+/* This function should not need to spill anything to the stack.  */
+int test(struct A* src, struct A* dst, int count)
+{
+  while (count--) {
+    if (!func(src->f2)) {
+        return 0;
+      }
+      *dst++ = *src++;
+  }
+
+  return 1;
+}

=== modified file 'gcc/tree-inline.c'
--- old/gcc/tree-inline.c	2010-03-18 20:07:13 +0000
+++ new/gcc/tree-inline.c	2010-08-02 13:51:23 +0000
@@ -3246,34 +3246,13 @@
 	if (POINTER_TYPE_P (funtype))
 	  funtype = TREE_TYPE (funtype);
 
-	if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
+	if (is_simple_builtin (decl))
+	  return 0;
+	else if (is_inexpensive_builtin (decl))
 	  cost = weights->target_builtin_call_cost;
 	else
 	  cost = weights->call_cost;
 
-	if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
-	  switch (DECL_FUNCTION_CODE (decl))
-	    {
-	    case BUILT_IN_CONSTANT_P:
-	      return 0;
-	    case BUILT_IN_EXPECT:
-	      return 0;
-
-	    /* Prefetch instruction is not expensive.  */
-	    case BUILT_IN_PREFETCH:
-	      cost = weights->target_builtin_call_cost;
-	      break;
-
-	    /* Exception state returns or moves registers around.  */
-	    case BUILT_IN_EH_FILTER:
-	    case BUILT_IN_EH_POINTER:
-	    case BUILT_IN_EH_COPY_VALUES:
-	      return 0;
-
-	    default:
-	      break;
-	    }
-
 	if (decl)
 	  funtype = TREE_TYPE (decl);
 

=== modified file 'gcc/tree-ssa-loop-ivopts.c'
--- old/gcc/tree-ssa-loop-ivopts.c	2010-04-01 15:18:07 +0000
+++ new/gcc/tree-ssa-loop-ivopts.c	2010-08-02 13:51:23 +0000
@@ -257,6 +257,9 @@
 
   /* Are we optimizing for speed?  */
   bool speed;
+
+  /* Whether the loop body includes any function calls.  */
+  bool body_includes_call;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -2926,6 +2929,20 @@
   return get_computation_at (loop, use, cand, use->stmt);
 }
 
+/* Adjust the cost COST for being in loop setup rather than loop body.
+   If we're optimizing for space, the loop setup overhead is constant;
+   if we're optimizing for speed, amortize it over the per-iteration cost.  */
+static unsigned
+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
+{
+  if (cost == INFTY)
+    return cost;
+  else if (optimize_loop_for_speed_p (data->current_loop))
+    return cost / AVG_LOOP_NITER (data->current_loop);
+  else
+    return cost;
+}
+
 /* Returns cost of addition in MODE.  */
 
 static unsigned
@@ -3838,8 +3855,8 @@
   /* Symbol + offset should be compile-time computable so consider that they
       are added once to the variable, if present.  */
   if (var_present && (symbol_present || offset))
-    cost.cost += add_cost (TYPE_MODE (ctype), speed)
-		 / AVG_LOOP_NITER (data->current_loop);
+    cost.cost += adjust_setup_cost (data,
+				    add_cost (TYPE_MODE (ctype), speed));
 
   /* Having offset does not affect runtime cost in case it is added to
      symbol, but it increases complexity.  */
@@ -4104,7 +4121,7 @@
       elim_cost = force_var_cost (data, bound, &depends_on_elim);
       /* The bound is a loop invariant, so it will be only computed
 	 once.  */
-      elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
+      elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
     }
   else
     elim_cost = infinite_cost;
@@ -4351,7 +4368,7 @@
   cost_base = force_var_cost (data, base, NULL);
   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
 
-  cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
+  cost = cost_step + adjust_setup_cost (data, cost_base.cost);
 
   /* Prefer the original ivs unless we may gain something by replacing it.
      The reason is to make debugging simpler; so this is not relevant for
@@ -4404,7 +4421,8 @@
 {
   /* We add size to the cost, so that we prefer eliminating ivs
      if possible.  */
-  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
+  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
+					    data->body_includes_call);
 }
 
 /* For each size of the induction variable set determine the penalty.  */
@@ -4419,30 +4437,11 @@
   struct loop *loop = data->current_loop;
   bitmap_iterator bi;
 
-  /* We use the following model (definitely improvable, especially the
-     cost function -- TODO):
-
-     We estimate the number of registers available (using MD data), name it A.
-
-     We estimate the number of registers used by the loop, name it U.  This
-     number is obtained as the number of loop phi nodes (not counting virtual
-     registers and bivs) + the number of variables from outside of the loop.
-
-     We set a reserve R (free regs that are used for temporary computations,
-     etc.).  For now the reserve is a constant 3.
-
-     Let I be the number of induction variables.
-
-     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
-	make a lot of ivs without a reason).
-     -- if A - R < U + I <= A, the cost is I * PRES_COST
-     -- if U + I > A, the cost is I * PRES_COST and
-        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
-
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "Global costs:\n");
       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
+      fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
     }
@@ -5062,11 +5061,13 @@
 }
 
 /* Tries to extend the sets IVS in the best possible way in order
-   to express the USE.  */
+   to express the USE.  If ORIGINALP is true, prefer candidates from
+   the original set of IVs, otherwise favor important candidates not
+   based on any memory object.  */
 
 static bool
 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
-		  struct iv_use *use)
+		  struct iv_use *use, bool originalp)
 {
   comp_cost best_cost, act_cost;
   unsigned i;
@@ -5085,7 +5086,8 @@
       iv_ca_set_no_cp (data, ivs, use);
     }
 
-  /* First try important candidates not based on any memory object.  Only if
+  /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
+     first try important candidates not based on any memory object.  Only if
      this fails, try the specific ones.  Rationale -- in loops with many
      variables the best choice often is to use just one generic biv.  If we
      added here many ivs specific to the uses, the optimization algorithm later
@@ -5097,7 +5099,10 @@
     {
       cand = iv_cand (data, i);
 
-      if (cand->iv->base_object != NULL_TREE)
+      if (originalp && cand->pos !=IP_ORIGINAL)
+	continue;
+
+      if (!originalp && cand->iv->base_object != NULL_TREE)
 	continue;
 
       if (iv_ca_cand_used_p (ivs, cand))
@@ -5133,8 +5138,13 @@
 	    continue;
 
 	  /* Already tried this.  */
-	  if (cand->important && cand->iv->base_object == NULL_TREE)
-	    continue;
+	  if (cand->important)
+	    {
+	      if (originalp && cand->pos == IP_ORIGINAL)
+		continue;
+	      if (!originalp && cand->iv->base_object == NULL_TREE)
+		continue;
+	    }
 
 	  if (iv_ca_cand_used_p (ivs, cand))
 	    continue;
@@ -5168,13 +5178,13 @@
 /* Finds an initial assignment of candidates to uses.  */
 
 static struct iv_ca *
-get_initial_solution (struct ivopts_data *data)
+get_initial_solution (struct ivopts_data *data, bool originalp)
 {
   struct iv_ca *ivs = iv_ca_new (data);
   unsigned i;
 
   for (i = 0; i < n_iv_uses (data); i++)
-    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
+    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
       {
 	iv_ca_free (&ivs);
 	return NULL;
@@ -5246,14 +5256,12 @@
    solution and remove the unused ivs while this improves the cost.  */
 
 static struct iv_ca *
-find_optimal_iv_set (struct ivopts_data *data)
+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
 {
-  unsigned i;
   struct iv_ca *set;
-  struct iv_use *use;
 
   /* Get the initial solution.  */
-  set = get_initial_solution (data);
+  set = get_initial_solution (data, originalp);
   if (!set)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5276,11 +5284,46 @@
 	}
     }
 
+  return set;
+}
+
+static struct iv_ca *
+find_optimal_iv_set (struct ivopts_data *data)
+{
+  unsigned i;
+  struct iv_ca *set, *origset;
+  struct iv_use *use;
+  comp_cost cost, origcost;
+
+  /* Determine the cost based on a strategy that starts with original IVs,
+     and try again using a strategy that prefers candidates not based
+     on any IVs.  */
+  origset = find_optimal_iv_set_1 (data, true);
+  set = find_optimal_iv_set_1 (data, false);
+
+  if (!origset && !set)
+    return NULL;
+
+  origcost = origset ? iv_ca_cost (origset) : infinite_cost;
+  cost = set ? iv_ca_cost (set) : infinite_cost;
+
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
-      comp_cost cost = iv_ca_cost (set);
-      fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
-    }
+      fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+	       origcost.cost, origcost.complexity);
+      fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+	       cost.cost, cost.complexity);
+    }
+
+  /* Choose the one with the best cost.  */
+  if (compare_costs (origcost, cost) <= 0)
+    {
+      if (set)
+	iv_ca_free (&set);
+      set = origset;
+    }
+  else if (origset)
+    iv_ca_free (&origset);
 
   for (i = 0; i < n_iv_uses (data); i++)
     {
@@ -5768,6 +5811,25 @@
   VEC_free (iv_cand_p, heap, data->iv_candidates);
 }
 
+/* Returns true if the loop body BODY includes any function calls.  */
+
+static bool
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
+{
+  gimple_stmt_iterator gsi;
+  unsigned i;
+
+  for (i = 0; i < num_nodes; i++)
+    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	gimple stmt = gsi_stmt (gsi);
+	if (is_gimple_call (stmt)
+	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
+	  return true;
+      }
+  return false;
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
@@ -5799,6 +5861,7 @@
     }
 
   body = get_loop_body (loop);
+  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
   free (body);
 

=== modified file 'gcc/tree.h'
--- old/gcc/tree.h	2010-04-02 18:54:46 +0000
+++ new/gcc/tree.h	2010-08-02 13:51:23 +0000
@@ -4962,6 +4962,8 @@
 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
 			  tree, tree);
 extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
+extern bool is_simple_builtin (tree);
+extern bool is_inexpensive_builtin (tree);
 
 /* In convert.c */
 extern tree strip_float_extensions (tree);