summaryrefslogtreecommitdiffstats
path: root/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch')
-rw-r--r--recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch2997
1 files changed, 2997 insertions, 0 deletions
diff --git a/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch
new file mode 100644
index 0000000000..c66c11f82c
--- /dev/null
+++ b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99380.patch
@@ -0,0 +1,2997 @@
12010-08-31 Chung-Lin Tang <cltang@codesourcery.com>
2
3 Backport from mainline:
4
5 2010-04-14 Bernd Schmidt <bernds@codesourcery.com>
6
7 PR target/21803
8 gcc/
9 * ifcvt.c (cond_exec_process_if_block): Look for identical sequences
10 at the start and end of the then/else blocks, and omit them from the
11 conversion.
12 * cfgcleanup.c (flow_find_cross_jump): No longer static. Remove MODE
13 argument; all callers changed. Pass zero to old_insns_match_p instead.
14 (flow_find_head_matching_sequence): New function.
15 (old_insns_match_p): Check REG_EH_REGION notes for calls.
16 * basic-block.h (flow_find_cross_jump,
17 flow_find_head_matching_sequence): Declare functions.
18
19 gcc/testsuite/
20 * gcc.target/arm/pr42496.c: New test.
21
22 2010-04-22 Bernd Schmidt <bernds@codesourcery.com>
23
24 PR middle-end/29274
25 gcc/
26 * tree-pass.h (pass_optimize_widening_mul): Declare.
27 * tree-ssa-math-opts.c (execute_optimize_widening_mul,
28 gate_optimize_widening_mul): New static functions.
29 (pass_optimize_widening_mul): New.
30 * expr.c (expand_expr_real_2) <case WIDEN_MULT_EXPR>: New case.
31 <case MULT_EXPR>: Remove support for widening multiplies.
32 * tree.def (WIDEN_MULT_EXPR): Tweak comment.
33 * cfgexpand.c (expand_debug_expr) <case WIDEN_MULT_EXPR>: Use
34 simplify_gen_unary rather than directly building extensions.
35 * tree-cfg.c (verify_gimple_assign_binary): Add tests for
36 WIDEN_MULT_EXPR.
37 * expmed.c (expand_widening_mult): New function.
38 * passes.c (init_optimization_passes): Add pass_optimize_widening_mul.
39 * optabs.h (expand_widening_mult): Declare.
40
41 gcc/testsuite/
42 * gcc.target/i386/wmul-1.c: New test.
43 * gcc.target/i386/wmul-2.c: New test.
44 * gcc.target/bfin/wmul-1.c: New test.
45 * gcc.target/bfin/wmul-2.c: New test.
46 * gcc.target/arm/wmul-1.c: New test.
47 * gcc.target/arm/wmul-2.c: New test.
48
49 2010-04-24 Bernd Schmidt <bernds@codesourcery.com>
50
51 PR tree-optimization/41442
52 gcc/
53 * fold-const.c (merge_truthop_with_opposite_arm): New function.
54 (fold_binary_loc): Call it.
55
56 gcc/testsuite/
57 * gcc.target/i386/pr41442.c: New test.
58
59 2010-04-29 Bernd Schmidt <bernds@codesourcery.com>
60
61 PR target/42895
62 gcc/
63 * doc/tm.texi (ADJUST_REG_ALLOC_ORDER): Renamed from
64 ORDER_REGS_FOR_LOCAL_ALLOC. All instances of this macro changed.
65 (HONOR_REG_ALLOC_ORDER): Describe new macro.
66 * ira.c (setup_alloc_regs): Use ADJUST_REG_ALLOC_ORDER if defined.
67 * ira-color.c (assign_hard_reg): Take prologue/epilogue costs into
68 account only if HONOR_REG_ALLOC_ORDER is not defined.
69 * config/arm/arm.h (HONOR_REG_ALLOC_ORDER): Define.
70 * system.h (ORDER_REGS_FOR_LOCAL_ALLOC): Poison.
71
72 2010-05-04 Mikael Pettersson <mikpe@it.uu.se>
73
74 PR bootstrap/43964
75 gcc/
76 * ira-color.c (assign_hard_reg): Declare rclass and add_cost
77 only if HONOR_REG_ALLOC_ORDER is not defined.
78
79 2010-06-04 Bernd Schmidt <bernds@codesourcery.com>
80
81 PR rtl-optimization/39871
82 PR rtl-optimization/40615
83 PR rtl-optimization/42500
84 PR rtl-optimization/42502
85 gcc/
86 * ira.c (init_reg_equiv_memory_loc: New function.
87 (ira): Call it twice.
88 * reload.h (calculate_elim_costs_all_insns): Declare.
89 * ira-costs.c: Include "reload.h".
90 (regno_equiv_gains): New static variable.
91 (init_costs): Allocate it.
92 (finish_costs): Free it.
93 (ira_costs): Call calculate_elim_costs_all_insns.
94 (find_costs_and_classes): Take estimated elimination costs
95 into account.
96 (ira_adjust_equiv_reg_cost): New function.
97 * ira.h (ira_adjust_equiv_reg_cost): Declare it.
98 * reload1.c (init_eliminable_invariants, free_reg_equiv,
99 elimination_costs_in_insn, note_reg_elim_costly): New static functions.
100 (elim_bb): New static variable.
101 (reload): Move code out of here into init_eliminable_invariants and
102 free_reg_equiv. Call them.
103 (calculate_elim_costs_all_insns): New function.
104 (eliminate_regs_1): Declare. Add extra arg FOR_COSTS;
105 all callers changed. If FOR_COSTS is true, don't call alter_reg,
106 but call note_reg_elim_costly if we turned a valid memory address
107 into an invalid one.
108 * Makefile.in (ira-costs.o): Depend on reload.h.
109
110 gcc/testsuite/
111 * gcc.target/arm/eliminate.c: New test.
112
113 2010-06-09 Bernd Schmidt <bernds@codesourcery.com>
114
115 gcc/
116 * config/arm/arm.c (thumb2_reorg): New function.
117 (arm_reorg): Call it.
118 * config/arm/thumb2.md (define_peephole2 for flag clobbering
119 arithmetic operations): Delete.
120
121 2010-06-12 Bernd Schmidt <bernds@codesourcery.com>
122
123 gcc/
124 * config/arm/arm.c (thumb2_reorg): Fix errors in previous change.
125
126 2010-06-17 Bernd Schmidt <bernds@codesourcery.com>
127
128 PR rtl-optimization/39871
129 gcc/
130 * reload1.c (init_eliminable_invariants): For flag_pic, disable
131 equivalences only for constants that aren't LEGITIMATE_PIC_OPERAND_P.
132 (function_invariant_p): Rule out a plus of frame or arg pointer with
133 a SYMBOL_REF.
134 * ira.c (find_reg_equiv_invariant_const): Likewise.
135
136 2010-06-18 Eric Botcazou <ebotcazou@adacore.com>
137
138 PR rtl-optimization/40900
139 gcc/
140 * expr.c (expand_expr_real_1) <SSA_NAME>: Fix long line. Save the
141 original expression for later reuse.
142 <expand_decl_rtl>: Use promote_function_mode to compute the signedness
143 of the promoted RTL for a SSA_NAME on the LHS of a call statement.
144
145 2010-06-18 Bernd Schmidt <bernds@codesourcery.com>
146 gcc/testsuite/
147 * gcc.target/arm/pr40900.c: New test.
148
149 2010-06-30 Bernd Schmidt <bernds@codesourcery.com>
150
151 PR tree-optimization/39799
152 gcc/
153 * tree-inline.c (remap_ssa_name): Initialize variable only if
154 SSA_NAME_OCCURS_IN_ABNORMAL_PHI.
155 * tree-ssa.c (warn_uninit): Avoid emitting an unnecessary message.
156
157 gcc/testsuite/
158 * c-c++-common/uninit-17.c: New test.
159
160 2010-07-25 Eric Botcazou <ebotcazou@adacore.com>
161
162 PR target/44484
163 gcc/
164 * config/sparc/predicates.md (memory_reg_operand): Delete.
165 * config/sparc/sync.md (sync_compare_and_swap): Minor tweaks.
166 (*sync_compare_and_swap): Encode the address form in the pattern.
167 (*sync_compare_and_swapdi_v8plus): Likewise.
168
169 2010-08-29 Chung-Lin Tang <cltang@codesourcery.com>
170
171 Backport from mainline:
172
173=== modified file 'gcc/Makefile.in'
174--- old/gcc/Makefile.in 2010-08-10 13:31:21 +0000
175+++ new/gcc/Makefile.in 2010-09-01 13:29:58 +0000
176@@ -3193,7 +3193,7 @@
177 ira-costs.o: ira-costs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
178 hard-reg-set.h $(RTL_H) $(EXPR_H) $(TM_P_H) $(FLAGS_H) $(BASIC_BLOCK_H) \
179 $(REGS_H) addresses.h insn-config.h $(RECOG_H) $(TOPLEV_H) $(TARGET_H) \
180- $(PARAMS_H) $(IRA_INT_H)
181+ $(PARAMS_H) $(IRA_INT_H) reload.h
182 ira-conflicts.o: ira-conflicts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
183 $(TARGET_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) \
184 insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) $(TOPLEV_H) $(TM_P_H) $(PARAMS_H) \
185
186=== modified file 'gcc/basic-block.h'
187--- old/gcc/basic-block.h 2010-08-16 09:41:58 +0000
188+++ new/gcc/basic-block.h 2010-09-01 13:29:58 +0000
189@@ -894,6 +894,10 @@
190
191 /* In cfgcleanup.c. */
192 extern bool cleanup_cfg (int);
193+extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *);
194+extern int flow_find_head_matching_sequence (basic_block, basic_block,
195+ rtx *, rtx *, int);
196+
197 extern bool delete_unreachable_blocks (void);
198
199 extern bool mark_dfs_back_edges (void);
200
201=== modified file 'gcc/cfgcleanup.c'
202--- old/gcc/cfgcleanup.c 2010-05-17 16:26:22 +0000
203+++ new/gcc/cfgcleanup.c 2010-09-01 13:29:58 +0000
204@@ -68,7 +68,6 @@
205 static bool try_crossjump_to_edge (int, edge, edge);
206 static bool try_crossjump_bb (int, basic_block);
207 static bool outgoing_edges_match (int, basic_block, basic_block);
208-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
209 static bool old_insns_match_p (int, rtx, rtx);
210
211 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
212@@ -972,13 +971,27 @@
213 be filled that clobbers a parameter expected by the subroutine.
214
215 ??? We take the simple route for now and assume that if they're
216- equal, they were constructed identically. */
217-
218- if (CALL_P (i1)
219- && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
220+ equal, they were constructed identically.
221+
222+ Also check for identical exception regions. */
223+
224+ if (CALL_P (i1))
225+ {
226+ /* Ensure the same EH region. */
227+ rtx n1 = find_reg_note (i1, REG_EH_REGION, 0);
228+ rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
229+
230+ if (!n1 && n2)
231+ return false;
232+
233+ if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
234+ return false;
235+
236+ if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
237 CALL_INSN_FUNCTION_USAGE (i2))
238- || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
239- return false;
240+ || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
241+ return false;
242+ }
243
244 #ifdef STACK_REGS
245 /* If cross_jump_death_matters is not 0, the insn's mode
246@@ -1017,6 +1030,29 @@
247 return false;
248 }
249
250+/* When comparing insns I1 and I2 in flow_find_cross_jump or
251+ flow_find_head_matching_sequence, ensure the notes match. */
252+
253+static void
254+merge_notes (rtx i1, rtx i2)
255+{
256+ /* If the merged insns have different REG_EQUAL notes, then
257+ remove them. */
258+ rtx equiv1 = find_reg_equal_equiv_note (i1);
259+ rtx equiv2 = find_reg_equal_equiv_note (i2);
260+
261+ if (equiv1 && !equiv2)
262+ remove_note (i1, equiv1);
263+ else if (!equiv1 && equiv2)
264+ remove_note (i2, equiv2);
265+ else if (equiv1 && equiv2
266+ && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
267+ {
268+ remove_note (i1, equiv1);
269+ remove_note (i2, equiv2);
270+ }
271+}
272+
273 /* Look through the insns at the end of BB1 and BB2 and find the longest
274 sequence that are equivalent. Store the first insns for that sequence
275 in *F1 and *F2 and return the sequence length.
276@@ -1024,9 +1060,8 @@
277 To simplify callers of this function, if the blocks match exactly,
278 store the head of the blocks in *F1 and *F2. */
279
280-static int
281-flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
282- basic_block bb2, rtx *f1, rtx *f2)
283+int
284+flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2)
285 {
286 rtx i1, i2, last1, last2, afterlast1, afterlast2;
287 int ninsns = 0;
288@@ -1066,7 +1101,7 @@
289 if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
290 break;
291
292- if (!old_insns_match_p (mode, i1, i2))
293+ if (!old_insns_match_p (0, i1, i2))
294 break;
295
296 merge_memattrs (i1, i2);
297@@ -1074,21 +1109,7 @@
298 /* Don't begin a cross-jump with a NOTE insn. */
299 if (INSN_P (i1))
300 {
301- /* If the merged insns have different REG_EQUAL notes, then
302- remove them. */
303- rtx equiv1 = find_reg_equal_equiv_note (i1);
304- rtx equiv2 = find_reg_equal_equiv_note (i2);
305-
306- if (equiv1 && !equiv2)
307- remove_note (i1, equiv1);
308- else if (!equiv1 && equiv2)
309- remove_note (i2, equiv2);
310- else if (equiv1 && equiv2
311- && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
312- {
313- remove_note (i1, equiv1);
314- remove_note (i2, equiv2);
315- }
316+ merge_notes (i1, i2);
317
318 afterlast1 = last1, afterlast2 = last2;
319 last1 = i1, last2 = i2;
320@@ -1130,6 +1151,97 @@
321 return ninsns;
322 }
323
324+/* Like flow_find_cross_jump, except start looking for a matching sequence from
325+ the head of the two blocks. Do not include jumps at the end.
326+ If STOP_AFTER is nonzero, stop after finding that many matching
327+ instructions. */
328+
329+int
330+flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1,
331+ rtx *f2, int stop_after)
332+{
333+ rtx i1, i2, last1, last2, beforelast1, beforelast2;
334+ int ninsns = 0;
335+ edge e;
336+ edge_iterator ei;
337+ int nehedges1 = 0, nehedges2 = 0;
338+
339+ FOR_EACH_EDGE (e, ei, bb1->succs)
340+ if (e->flags & EDGE_EH)
341+ nehedges1++;
342+ FOR_EACH_EDGE (e, ei, bb2->succs)
343+ if (e->flags & EDGE_EH)
344+ nehedges2++;
345+
346+ i1 = BB_HEAD (bb1);
347+ i2 = BB_HEAD (bb2);
348+ last1 = beforelast1 = last2 = beforelast2 = NULL_RTX;
349+
350+ while (true)
351+ {
352+
353+ /* Ignore notes. */
354+ while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
355+ i1 = NEXT_INSN (i1);
356+
357+ while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
358+ i2 = NEXT_INSN (i2);
359+
360+ if (NOTE_P (i1) || NOTE_P (i2)
361+ || JUMP_P (i1) || JUMP_P (i2))
362+ break;
363+
364+ /* A sanity check to make sure we're not merging insns with different
365+ effects on EH. If only one of them ends a basic block, it shouldn't
366+ have an EH edge; if both end a basic block, there should be the same
367+ number of EH edges. */
368+ if ((i1 == BB_END (bb1) && i2 != BB_END (bb2)
369+ && nehedges1 > 0)
370+ || (i2 == BB_END (bb2) && i1 != BB_END (bb1)
371+ && nehedges2 > 0)
372+ || (i1 == BB_END (bb1) && i2 == BB_END (bb2)
373+ && nehedges1 != nehedges2))
374+ break;
375+
376+ if (!old_insns_match_p (0, i1, i2))
377+ break;
378+
379+ merge_memattrs (i1, i2);
380+
381+ /* Don't begin a cross-jump with a NOTE insn. */
382+ if (INSN_P (i1))
383+ {
384+ merge_notes (i1, i2);
385+
386+ beforelast1 = last1, beforelast2 = last2;
387+ last1 = i1, last2 = i2;
388+ ninsns++;
389+ }
390+
391+ if (i1 == BB_END (bb1) || i2 == BB_END (bb2)
392+ || (stop_after > 0 && ninsns == stop_after))
393+ break;
394+
395+ i1 = NEXT_INSN (i1);
396+ i2 = NEXT_INSN (i2);
397+ }
398+
399+#ifdef HAVE_cc0
400+ /* Don't allow a compare to be shared by cross-jumping unless the insn
401+ after the compare is also shared. */
402+ if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1))
403+ last1 = beforelast1, last2 = beforelast2, ninsns--;
404+#endif
405+
406+ if (ninsns)
407+ {
408+ *f1 = last1;
409+ *f2 = last2;
410+ }
411+
412+ return ninsns;
413+}
414+
415 /* Return true iff outgoing edges of BB1 and BB2 match, together with
416 the branch instruction. This means that if we commonize the control
417 flow before end of the basic block, the semantic remains unchanged.
418@@ -1498,7 +1610,7 @@
419 return false;
420
421 /* ... and part the second. */
422- nmatch = flow_find_cross_jump (mode, src1, src2, &newpos1, &newpos2);
423+ nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2);
424
425 /* Don't proceed with the crossjump unless we found a sufficient number
426 of matching instructions or the 'from' block was totally matched
427
428=== modified file 'gcc/cfgexpand.c'
429--- old/gcc/cfgexpand.c 2010-05-14 17:11:03 +0000
430+++ new/gcc/cfgexpand.c 2010-09-01 13:29:58 +0000
431@@ -3026,14 +3026,15 @@
432 if (SCALAR_INT_MODE_P (GET_MODE (op0))
433 && SCALAR_INT_MODE_P (mode))
434 {
435+ enum machine_mode inner_mode = GET_MODE (op0);
436 if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 0))))
437- op0 = gen_rtx_ZERO_EXTEND (mode, op0);
438+ op0 = simplify_gen_unary (ZERO_EXTEND, mode, op0, inner_mode);
439 else
440- op0 = gen_rtx_SIGN_EXTEND (mode, op0);
441+ op0 = simplify_gen_unary (SIGN_EXTEND, mode, op0, inner_mode);
442 if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 1))))
443- op1 = gen_rtx_ZERO_EXTEND (mode, op1);
444+ op1 = simplify_gen_unary (ZERO_EXTEND, mode, op1, inner_mode);
445 else
446- op1 = gen_rtx_SIGN_EXTEND (mode, op1);
447+ op1 = simplify_gen_unary (SIGN_EXTEND, mode, op1, inner_mode);
448 return gen_rtx_MULT (mode, op0, op1);
449 }
450 return NULL;
451
452=== modified file 'gcc/config/arm/arm.c'
453--- old/gcc/config/arm/arm.c 2010-08-31 10:00:27 +0000
454+++ new/gcc/config/arm/arm.c 2010-09-01 13:29:58 +0000
455@@ -8116,8 +8116,6 @@
456 static bool
457 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
458 {
459- rtx i_pat, d_pat;
460-
461 /* Some true dependencies can have a higher cost depending
462 on precisely how certain input operands are used. */
463 if (REG_NOTE_KIND (link) == 0
464@@ -12166,6 +12164,60 @@
465 return result;
466 }
467
468+/* Convert instructions to their cc-clobbering variant if possible, since
469+ that allows us to use smaller encodings. */
470+
471+static void
472+thumb2_reorg (void)
473+{
474+ basic_block bb;
475+ regset_head live;
476+
477+ INIT_REG_SET (&live);
478+
479+ /* We are freeing block_for_insn in the toplev to keep compatibility
480+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
481+ compute_bb_for_insn ();
482+ df_analyze ();
483+
484+ FOR_EACH_BB (bb)
485+ {
486+ rtx insn;
487+ COPY_REG_SET (&live, DF_LR_OUT (bb));
488+ df_simulate_initialize_backwards (bb, &live);
489+ FOR_BB_INSNS_REVERSE (bb, insn)
490+ {
491+ if (NONJUMP_INSN_P (insn)
492+ && !REGNO_REG_SET_P (&live, CC_REGNUM))
493+ {
494+ rtx pat = PATTERN (insn);
495+ if (GET_CODE (pat) == SET
496+ && low_register_operand (XEXP (pat, 0), SImode)
497+ && thumb_16bit_operator (XEXP (pat, 1), SImode)
498+ && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
499+ && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
500+ {
501+ rtx dst = XEXP (pat, 0);
502+ rtx src = XEXP (pat, 1);
503+ rtx op0 = XEXP (src, 0);
504+ if (rtx_equal_p (dst, op0)
505+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
506+ {
507+ rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
508+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
509+ rtvec vec = gen_rtvec (2, pat, clobber);
510+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
511+ INSN_CODE (insn) = -1;
512+ }
513+ }
514+ }
515+ if (NONDEBUG_INSN_P (insn))
516+ df_simulate_one_insn_backwards (bb, insn, &live);
517+ }
518+ }
519+ CLEAR_REG_SET (&live);
520+}
521+
522 /* Gcc puts the pool in the wrong place for ARM, since we can only
523 load addresses a limited distance around the pc. We do some
524 special munging to move the constant pool values to the correct
525@@ -12177,6 +12229,9 @@
526 HOST_WIDE_INT address = 0;
527 Mfix * fix;
528
529+ if (TARGET_THUMB2)
530+ thumb2_reorg ();
531+
532 minipool_fix_head = minipool_fix_tail = NULL;
533
534 /* The first insn must always be a note, or the code below won't
535
536=== modified file 'gcc/config/arm/arm.h'
537--- old/gcc/config/arm/arm.h 2010-08-13 11:11:15 +0000
538+++ new/gcc/config/arm/arm.h 2010-09-01 13:29:58 +0000
539@@ -1133,7 +1133,11 @@
540 }
541
542 /* Use different register alloc ordering for Thumb. */
543-#define ORDER_REGS_FOR_LOCAL_ALLOC arm_order_regs_for_local_alloc ()
544+#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc ()
545+
546+/* Tell IRA to use the order we define rather than messing it up with its
547+ own cost calculations. */
548+#define HONOR_REG_ALLOC_ORDER
549
550 /* Interrupt functions can only use registers that have already been
551 saved by the prologue, even if they would normally be
552
553=== modified file 'gcc/config/arm/arm.md'
554--- old/gcc/config/arm/arm.md 2010-08-31 10:00:27 +0000
555+++ new/gcc/config/arm/arm.md 2010-09-01 13:29:58 +0000
556@@ -4074,7 +4074,7 @@
557
558 (define_split
559 [(set (match_operand:SI 0 "register_operand" "")
560- (zero_extend:SI (match_operand:HI 1 "register_operand" "l,m")))]
561+ (zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
562 "!TARGET_THUMB2 && !arm_arch6"
563 [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
564 (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
565
566=== modified file 'gcc/config/arm/thumb2.md'
567--- old/gcc/config/arm/thumb2.md 2010-08-31 10:00:27 +0000
568+++ new/gcc/config/arm/thumb2.md 2010-09-01 13:29:58 +0000
569@@ -1046,29 +1046,6 @@
570 }"
571 )
572
573-;; Peepholes and insns for 16-bit flag clobbering instructions.
574-;; The conditional forms of these instructions do not clobber CC.
575-;; However by the time peepholes are run it is probably too late to do
576-;; anything useful with this information.
577-(define_peephole2
578- [(set (match_operand:SI 0 "low_register_operand" "")
579- (match_operator:SI 3 "thumb_16bit_operator"
580- [(match_operand:SI 1 "low_register_operand" "")
581- (match_operand:SI 2 "low_register_operand" "")]))]
582- "TARGET_THUMB2
583- && (rtx_equal_p(operands[0], operands[1])
584- || GET_CODE(operands[3]) == PLUS
585- || GET_CODE(operands[3]) == MINUS)
586- && peep2_regno_dead_p(0, CC_REGNUM)"
587- [(parallel
588- [(set (match_dup 0)
589- (match_op_dup 3
590- [(match_dup 1)
591- (match_dup 2)]))
592- (clobber (reg:CC CC_REGNUM))])]
593- ""
594-)
595-
596 (define_insn "*thumb2_alusi3_short"
597 [(set (match_operand:SI 0 "s_register_operand" "=l")
598 (match_operator:SI 3 "thumb_16bit_operator"
599
600=== modified file 'gcc/config/avr/avr.h'
601--- old/gcc/config/avr/avr.h 2010-01-11 23:12:14 +0000
602+++ new/gcc/config/avr/avr.h 2010-09-01 13:29:58 +0000
603@@ -232,7 +232,7 @@
604 32,33,34,35 \
605 }
606
607-#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc ()
608+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
609
610
611 #define HARD_REGNO_NREGS(REGNO, MODE) ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
612
613=== modified file 'gcc/config/i386/i386.h'
614--- old/gcc/config/i386/i386.h 2010-04-27 19:14:19 +0000
615+++ new/gcc/config/i386/i386.h 2010-09-01 13:29:58 +0000
616@@ -955,7 +955,7 @@
617 registers listed in CALL_USED_REGISTERS, keeping the others
618 available for storage of persistent values.
619
620- The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order,
621+ The ADJUST_REG_ALLOC_ORDER actually overwrite the order,
622 so this is just empty initializer for array. */
623
624 #define REG_ALLOC_ORDER \
625@@ -964,11 +964,11 @@
626 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
627 48, 49, 50, 51, 52 }
628
629-/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
630+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
631 to be rearranged based on a particular function. When using sse math,
632 we want to allocate SSE before x87 registers and vice versa. */
633
634-#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
635+#define ADJUST_REG_ALLOC_ORDER x86_order_regs_for_local_alloc ()
636
637
638 #define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL)
639
640=== modified file 'gcc/config/mips/mips.h'
641--- old/gcc/config/mips/mips.h 2009-10-29 17:39:52 +0000
642+++ new/gcc/config/mips/mips.h 2010-09-01 13:29:58 +0000
643@@ -2059,12 +2059,12 @@
644 182,183,184,185,186,187 \
645 }
646
647-/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
648+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
649 to be rearranged based on a particular function. On the mips16, we
650 want to allocate $24 (T_REG) before other registers for
651 instructions for which it is possible. */
652
653-#define ORDER_REGS_FOR_LOCAL_ALLOC mips_order_regs_for_local_alloc ()
654+#define ADJUST_REG_ALLOC_ORDER mips_order_regs_for_local_alloc ()
655
656 /* True if VALUE is an unsigned 6-bit number. */
657
658
659=== modified file 'gcc/config/picochip/picochip.h'
660--- old/gcc/config/picochip/picochip.h 2009-11-04 11:06:36 +0000
661+++ new/gcc/config/picochip/picochip.h 2010-09-01 13:29:58 +0000
662@@ -261,7 +261,7 @@
663 /* We can dynamically change the REG_ALLOC_ORDER using the following hook.
664 It would be desirable to change it for leaf functions so we can put
665 r12 at the end of this list.*/
666-#define ORDER_REGS_FOR_LOCAL_ALLOC picochip_order_regs_for_local_alloc ()
667+#define ADJUST_REG_ALLOC_ORDER picochip_order_regs_for_local_alloc ()
668
669 /* How Values Fit in Registers */
670
671
672=== modified file 'gcc/config/sparc/predicates.md'
673--- old/gcc/config/sparc/predicates.md 2009-02-20 15:20:38 +0000
674+++ new/gcc/config/sparc/predicates.md 2010-09-01 13:29:58 +0000
675@@ -1,5 +1,5 @@
676 ;; Predicate definitions for SPARC.
677-;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
678+;; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
679 ;;
680 ;; This file is part of GCC.
681 ;;
682@@ -473,9 +473,3 @@
683 ;; and (xor ... (not ...)) to (not (xor ...)). */
684 (define_predicate "cc_arith_not_operator"
685 (match_code "and,ior"))
686-
687-;; Return true if OP is memory operand with just [%reg] addressing mode.
688-(define_predicate "memory_reg_operand"
689- (and (match_code "mem")
690- (and (match_operand 0 "memory_operand")
691- (match_test "REG_P (XEXP (op, 0))"))))
692
693=== modified file 'gcc/config/sparc/sparc.h'
694--- old/gcc/config/sparc/sparc.h 2010-04-02 18:54:46 +0000
695+++ new/gcc/config/sparc/sparc.h 2010-09-01 13:29:58 +0000
696@@ -1181,7 +1181,7 @@
697 96, 97, 98, 99, /* %fcc0-3 */ \
698 100, 0, 14, 30, 31, 101} /* %icc, %g0, %o6, %i6, %i7, %sfp */
699
700-#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc ()
701+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
702
703 extern char sparc_leaf_regs[];
704 #define LEAF_REGISTERS sparc_leaf_regs
705
706=== modified file 'gcc/config/sparc/sync.md'
707--- old/gcc/config/sparc/sync.md 2009-02-20 15:20:38 +0000
708+++ new/gcc/config/sparc/sync.md 2010-09-01 13:29:58 +0000
709@@ -1,5 +1,5 @@
710 ;; GCC machine description for SPARC synchronization instructions.
711-;; Copyright (C) 2005, 2007, 2009
712+;; Copyright (C) 2005, 2007, 2009, 2010
713 ;; Free Software Foundation, Inc.
714 ;;
715 ;; This file is part of GCC.
716@@ -62,7 +62,7 @@
717
718 (define_expand "sync_compare_and_swap<mode>"
719 [(parallel
720- [(set (match_operand:I48MODE 0 "register_operand" "=r")
721+ [(set (match_operand:I48MODE 0 "register_operand" "")
722 (match_operand:I48MODE 1 "memory_operand" ""))
723 (set (match_dup 1)
724 (unspec_volatile:I48MODE
725@@ -71,7 +71,7 @@
726 UNSPECV_CAS))])]
727 "TARGET_V9"
728 {
729- if (! REG_P (XEXP (operands[1], 0)))
730+ if (!REG_P (XEXP (operands[1], 0)))
731 {
732 rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
733 operands[1] = replace_equiv_address (operands[1], addr);
734@@ -81,20 +81,20 @@
735
736 (define_insn "*sync_compare_and_swap<mode>"
737 [(set (match_operand:I48MODE 0 "register_operand" "=r")
738- (match_operand:I48MODE 1 "memory_reg_operand" "+m"))
739- (set (match_dup 1)
740+ (mem:I48MODE (match_operand 1 "register_operand" "r")))
741+ (set (mem:I48MODE (match_dup 1))
742 (unspec_volatile:I48MODE
743 [(match_operand:I48MODE 2 "register_operand" "r")
744 (match_operand:I48MODE 3 "register_operand" "0")]
745 UNSPECV_CAS))]
746 "TARGET_V9 && (<MODE>mode == SImode || TARGET_ARCH64)"
747- "cas<modesuffix>\t%1, %2, %0"
748+ "cas<modesuffix>\t[%1], %2, %0"
749 [(set_attr "type" "multi")])
750
751 (define_insn "*sync_compare_and_swapdi_v8plus"
752 [(set (match_operand:DI 0 "register_operand" "=h")
753- (match_operand:DI 1 "memory_reg_operand" "+m"))
754- (set (match_dup 1)
755+ (mem:DI (match_operand 1 "register_operand" "r")))
756+ (set (mem:DI (match_dup 1))
757 (unspec_volatile:DI
758 [(match_operand:DI 2 "register_operand" "h")
759 (match_operand:DI 3 "register_operand" "0")]
760@@ -109,7 +109,7 @@
761 output_asm_insn ("srl\t%L2, 0, %L2", operands);
762 output_asm_insn ("sllx\t%H2, 32, %H3", operands);
763 output_asm_insn ("or\t%L2, %H3, %H3", operands);
764- output_asm_insn ("casx\t%1, %H3, %L3", operands);
765+ output_asm_insn ("casx\t[%1], %H3, %L3", operands);
766 return "srlx\t%L3, 32, %H3";
767 }
768 [(set_attr "type" "multi")
769
770=== modified file 'gcc/config/xtensa/xtensa.h'
771--- old/gcc/config/xtensa/xtensa.h 2009-09-23 21:24:42 +0000
772+++ new/gcc/config/xtensa/xtensa.h 2010-09-01 13:29:58 +0000
773@@ -286,7 +286,7 @@
774 incoming argument in a2 is live throughout the function and
775 local-alloc decides to use a2, then the incoming argument must
776 either be spilled or copied to another register. To get around
777- this, we define ORDER_REGS_FOR_LOCAL_ALLOC to redefine
778+ this, we define ADJUST_REG_ALLOC_ORDER to redefine
779 reg_alloc_order for leaf functions such that lowest numbered
780 registers are used first with the exception that the incoming
781 argument registers are not used until after other register choices
782@@ -300,7 +300,7 @@
783 35, \
784 }
785
786-#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc ()
787+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
788
789 /* For Xtensa, the only point of this is to prevent GCC from otherwise
790 giving preference to call-used registers. To minimize window
791
792=== modified file 'gcc/doc/tm.texi'
793--- old/gcc/doc/tm.texi 2010-08-13 11:53:46 +0000
794+++ new/gcc/doc/tm.texi 2010-09-01 13:29:58 +0000
795@@ -2093,7 +2093,7 @@
796 the highest numbered allocable register first.
797 @end defmac
798
799-@defmac ORDER_REGS_FOR_LOCAL_ALLOC
800+@defmac ADJUST_REG_ALLOC_ORDER
801 A C statement (sans semicolon) to choose the order in which to allocate
802 hard registers for pseudo-registers local to a basic block.
803
804@@ -2107,6 +2107,15 @@
805 On most machines, it is not necessary to define this macro.
806 @end defmac
807
808+@defmac HONOR_REG_ALLOC_ORDER
809+Normally, IRA tries to estimate the costs for saving a register in the
810+prologue and restoring it in the epilogue. This discourages it from
811+using call-saved registers. If a machine wants to ensure that IRA
812+allocates registers in the order given by REG_ALLOC_ORDER even if some
813+call-saved registers appear earlier than call-used ones, this macro
814+should be defined.
815+@end defmac
816+
817 @defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno})
818 In some case register allocation order is not enough for the
819 Integrated Register Allocator (@acronym{IRA}) to generate a good code.
820
821=== modified file 'gcc/expmed.c'
822--- old/gcc/expmed.c 2010-03-03 22:10:17 +0000
823+++ new/gcc/expmed.c 2010-09-01 13:29:58 +0000
824@@ -3253,6 +3253,55 @@
825 gcc_assert (op0);
826 return op0;
827 }
828+
829+/* Perform a widening multiplication and return an rtx for the result.
830+ MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
831+ TARGET is a suggestion for where to store the result (an rtx).
832+ THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
833+ or smul_widen_optab.
834+
835+ We check specially for a constant integer as OP1, comparing the
836+ cost of a widening multiply against the cost of a sequence of shifts
837+ and adds. */
838+
839+rtx
840+expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
841+ int unsignedp, optab this_optab)
842+{
843+ bool speed = optimize_insn_for_speed_p ();
844+
845+ if (CONST_INT_P (op1)
846+ && (INTVAL (op1) >= 0
847+ || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
848+ {
849+ HOST_WIDE_INT coeff = INTVAL (op1);
850+ int max_cost;
851+ enum mult_variant variant;
852+ struct algorithm algorithm;
853+
854+ /* Special case powers of two. */
855+ if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
856+ {
857+ op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
858+ return expand_shift (LSHIFT_EXPR, mode, op0,
859+ build_int_cst (NULL_TREE, floor_log2 (coeff)),
860+ target, unsignedp);
861+ }
862+
863+ /* Exclude cost of op0 from max_cost to match the cost
864+ calculation of the synth_mult. */
865+ max_cost = mul_widen_cost[speed][mode];
866+ if (choose_mult_variant (mode, coeff, &algorithm, &variant,
867+ max_cost))
868+ {
869+ op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
870+ return expand_mult_const (mode, op0, coeff, target,
871+ &algorithm, variant);
872+ }
873+ }
874+ return expand_binop (mode, this_optab, op0, op1, target,
875+ unsignedp, OPTAB_LIB_WIDEN);
876+}
877
878 /* Return the smallest n such that 2**n >= X. */
879
880
881=== modified file 'gcc/expr.c'
882--- old/gcc/expr.c 2010-08-20 16:21:01 +0000
883+++ new/gcc/expr.c 2010-09-01 13:29:58 +0000
884@@ -7224,7 +7224,6 @@
885 optab this_optab;
886 rtx subtarget, original_target;
887 int ignore;
888- tree subexp0, subexp1;
889 bool reduce_bit_field;
890 gimple subexp0_def, subexp1_def;
891 tree top0, top1;
892@@ -7679,13 +7678,7 @@
893
894 goto binop2;
895
896- case MULT_EXPR:
897- /* If this is a fixed-point operation, then we cannot use the code
898- below because "expand_mult" doesn't support sat/no-sat fixed-point
899- multiplications. */
900- if (ALL_FIXED_POINT_MODE_P (mode))
901- goto binop;
902-
903+ case WIDEN_MULT_EXPR:
904 /* If first operand is constant, swap them.
905 Thus the following special case checks need only
906 check the second operand. */
907@@ -7696,96 +7689,35 @@
908 treeop1 = t1;
909 }
910
911- /* Attempt to return something suitable for generating an
912- indexed address, for machines that support that. */
913-
914- if (modifier == EXPAND_SUM && mode == ptr_mode
915- && host_integerp (treeop1, 0))
916- {
917- tree exp1 = treeop1;
918-
919- op0 = expand_expr (treeop0, subtarget, VOIDmode,
920- EXPAND_SUM);
921-
922- if (!REG_P (op0))
923- op0 = force_operand (op0, NULL_RTX);
924- if (!REG_P (op0))
925- op0 = copy_to_mode_reg (mode, op0);
926-
927- return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
928- gen_int_mode (tree_low_cst (exp1, 0),
929- TYPE_MODE (TREE_TYPE (exp1)))));
930- }
931-
932- if (modifier == EXPAND_STACK_PARM)
933- target = 0;
934-
935- /* Check for multiplying things that have been extended
936- from a narrower type. If this machine supports multiplying
937- in that narrower type with a result in the desired type,
938- do it that way, and avoid the explicit type-conversion. */
939-
940- subexp0 = treeop0;
941- subexp1 = treeop1;
942- subexp0_def = get_def_for_expr (subexp0, NOP_EXPR);
943- subexp1_def = get_def_for_expr (subexp1, NOP_EXPR);
944- top0 = top1 = NULL_TREE;
945-
946 /* First, check if we have a multiplication of one signed and one
947 unsigned operand. */
948- if (subexp0_def
949- && (top0 = gimple_assign_rhs1 (subexp0_def))
950- && subexp1_def
951- && (top1 = gimple_assign_rhs1 (subexp1_def))
952- && TREE_CODE (type) == INTEGER_TYPE
953- && (TYPE_PRECISION (TREE_TYPE (top0))
954- < TYPE_PRECISION (TREE_TYPE (subexp0)))
955- && (TYPE_PRECISION (TREE_TYPE (top0))
956- == TYPE_PRECISION (TREE_TYPE (top1)))
957- && (TYPE_UNSIGNED (TREE_TYPE (top0))
958- != TYPE_UNSIGNED (TREE_TYPE (top1))))
959+ if (TREE_CODE (treeop1) != INTEGER_CST
960+ && (TYPE_UNSIGNED (TREE_TYPE (treeop0))
961+ != TYPE_UNSIGNED (TREE_TYPE (treeop1))))
962 {
963- enum machine_mode innermode
964- = TYPE_MODE (TREE_TYPE (top0));
965+ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
966 this_optab = usmul_widen_optab;
967- if (mode == GET_MODE_WIDER_MODE (innermode))
968+ if (mode == GET_MODE_2XWIDER_MODE (innermode))
969 {
970 if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
971 {
972- if (TYPE_UNSIGNED (TREE_TYPE (top0)))
973- expand_operands (top0, top1, NULL_RTX, &op0, &op1,
974+ if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
975+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
976 EXPAND_NORMAL);
977 else
978- expand_operands (top0, top1, NULL_RTX, &op1, &op0,
979+ expand_operands (treeop0, treeop1, subtarget, &op1, &op0,
980 EXPAND_NORMAL);
981-
982 goto binop3;
983 }
984 }
985 }
986- /* Check for a multiplication with matching signedness. If
987- valid, TOP0 and TOP1 were set in the previous if
988- condition. */
989- else if (top0
990- && TREE_CODE (type) == INTEGER_TYPE
991- && (TYPE_PRECISION (TREE_TYPE (top0))
992- < TYPE_PRECISION (TREE_TYPE (subexp0)))
993- && ((TREE_CODE (subexp1) == INTEGER_CST
994- && int_fits_type_p (subexp1, TREE_TYPE (top0))
995- /* Don't use a widening multiply if a shift will do. */
996- && ((GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (subexp1)))
997- > HOST_BITS_PER_WIDE_INT)
998- || exact_log2 (TREE_INT_CST_LOW (subexp1)) < 0))
999- ||
1000- (top1
1001- && (TYPE_PRECISION (TREE_TYPE (top1))
1002- == TYPE_PRECISION (TREE_TYPE (top0))
1003- /* If both operands are extended, they must either both
1004- be zero-extended or both be sign-extended. */
1005- && (TYPE_UNSIGNED (TREE_TYPE (top1))
1006- == TYPE_UNSIGNED (TREE_TYPE (top0)))))))
1007+ /* Check for a multiplication with matching signedness. */
1008+ else if ((TREE_CODE (treeop1) == INTEGER_CST
1009+ && int_fits_type_p (treeop1, TREE_TYPE (treeop0)))
1010+ || (TYPE_UNSIGNED (TREE_TYPE (treeop1))
1011+ == TYPE_UNSIGNED (TREE_TYPE (treeop0))))
1012 {
1013- tree op0type = TREE_TYPE (top0);
1014+ tree op0type = TREE_TYPE (treeop0);
1015 enum machine_mode innermode = TYPE_MODE (op0type);
1016 bool zextend_p = TYPE_UNSIGNED (op0type);
1017 optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
1018@@ -7795,24 +7727,22 @@
1019 {
1020 if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
1021 {
1022- if (TREE_CODE (subexp1) == INTEGER_CST)
1023- expand_operands (top0, subexp1, NULL_RTX, &op0, &op1,
1024- EXPAND_NORMAL);
1025- else
1026- expand_operands (top0, top1, NULL_RTX, &op0, &op1,
1027- EXPAND_NORMAL);
1028- goto binop3;
1029+ expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
1030+ EXPAND_NORMAL);
1031+ temp = expand_widening_mult (mode, op0, op1, target,
1032+ unsignedp, this_optab);
1033+ return REDUCE_BIT_FIELD (temp);
1034 }
1035- else if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
1036- && innermode == word_mode)
1037+ if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
1038+ && innermode == word_mode)
1039 {
1040 rtx htem, hipart;
1041- op0 = expand_normal (top0);
1042- if (TREE_CODE (subexp1) == INTEGER_CST)
1043+ op0 = expand_normal (treeop0);
1044+ if (TREE_CODE (treeop1) == INTEGER_CST)
1045 op1 = convert_modes (innermode, mode,
1046- expand_normal (subexp1), unsignedp);
1047+ expand_normal (treeop1), unsignedp);
1048 else
1049- op1 = expand_normal (top1);
1050+ op1 = expand_normal (treeop1);
1051 temp = expand_binop (mode, other_optab, op0, op1, target,
1052 unsignedp, OPTAB_LIB_WIDEN);
1053 hipart = gen_highpart (innermode, temp);
1054@@ -7825,7 +7755,53 @@
1055 }
1056 }
1057 }
1058- expand_operands (subexp0, subexp1, subtarget, &op0, &op1, EXPAND_NORMAL);
1059+ treeop0 = fold_build1 (CONVERT_EXPR, type, treeop0);
1060+ treeop1 = fold_build1 (CONVERT_EXPR, type, treeop1);
1061+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
1062+ return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
1063+
1064+ case MULT_EXPR:
1065+ /* If this is a fixed-point operation, then we cannot use the code
1066+ below because "expand_mult" doesn't support sat/no-sat fixed-point
1067+ multiplications. */
1068+ if (ALL_FIXED_POINT_MODE_P (mode))
1069+ goto binop;
1070+
1071+ /* If first operand is constant, swap them.
1072+ Thus the following special case checks need only
1073+ check the second operand. */
1074+ if (TREE_CODE (treeop0) == INTEGER_CST)
1075+ {
1076+ tree t1 = treeop0;
1077+ treeop0 = treeop1;
1078+ treeop1 = t1;
1079+ }
1080+
1081+ /* Attempt to return something suitable for generating an
1082+ indexed address, for machines that support that. */
1083+
1084+ if (modifier == EXPAND_SUM && mode == ptr_mode
1085+ && host_integerp (treeop1, 0))
1086+ {
1087+ tree exp1 = treeop1;
1088+
1089+ op0 = expand_expr (treeop0, subtarget, VOIDmode,
1090+ EXPAND_SUM);
1091+
1092+ if (!REG_P (op0))
1093+ op0 = force_operand (op0, NULL_RTX);
1094+ if (!REG_P (op0))
1095+ op0 = copy_to_mode_reg (mode, op0);
1096+
1097+ return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
1098+ gen_int_mode (tree_low_cst (exp1, 0),
1099+ TYPE_MODE (TREE_TYPE (exp1)))));
1100+ }
1101+
1102+ if (modifier == EXPAND_STACK_PARM)
1103+ target = 0;
1104+
1105+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
1106 return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
1107
1108 case TRUNC_DIV_EXPR:
1109@@ -8311,6 +8287,8 @@
1110 location_t loc = EXPR_LOCATION (exp);
1111 struct separate_ops ops;
1112 tree treeop0, treeop1, treeop2;
1113+ tree ssa_name = NULL_TREE;
1114+ gimple g;
1115
1116 type = TREE_TYPE (exp);
1117 mode = TYPE_MODE (type);
1118@@ -8423,15 +8401,17 @@
1119 base variable. This unnecessarily allocates a pseudo, see how we can
1120 reuse it, if partition base vars have it set already. */
1121 if (!currently_expanding_to_rtl)
1122- return expand_expr_real_1 (SSA_NAME_VAR (exp), target, tmode, modifier, NULL);
1123- {
1124- gimple g = get_gimple_for_ssa_name (exp);
1125- if (g)
1126- return expand_expr_real (gimple_assign_rhs_to_tree (g), target,
1127- tmode, modifier, NULL);
1128- }
1129- decl_rtl = get_rtx_for_ssa_name (exp);
1130- exp = SSA_NAME_VAR (exp);
1131+ return expand_expr_real_1 (SSA_NAME_VAR (exp), target, tmode, modifier,
1132+ NULL);
1133+
1134+ g = get_gimple_for_ssa_name (exp);
1135+ if (g)
1136+ return expand_expr_real (gimple_assign_rhs_to_tree (g), target, tmode,
1137+ modifier, NULL);
1138+
1139+ ssa_name = exp;
1140+ decl_rtl = get_rtx_for_ssa_name (ssa_name);
1141+ exp = SSA_NAME_VAR (ssa_name);
1142 goto expand_decl_rtl;
1143
1144 case PARM_DECL:
1145@@ -8533,15 +8513,21 @@
1146 /* If the mode of DECL_RTL does not match that of the decl, it
1147 must be a promoted value. We return a SUBREG of the wanted mode,
1148 but mark it so that we know that it was already extended. */
1149-
1150- if (REG_P (decl_rtl)
1151- && GET_MODE (decl_rtl) != DECL_MODE (exp))
1152+ if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp))
1153 {
1154 enum machine_mode pmode;
1155
1156- /* Get the signedness used for this variable. Ensure we get the
1157- same mode we got when the variable was declared. */
1158- pmode = promote_decl_mode (exp, &unsignedp);
1159+ /* Get the signedness to be used for this variable. Ensure we get
1160+ the same mode we got when the variable was declared. */
1161+ if (code == SSA_NAME
1162+ && (g = SSA_NAME_DEF_STMT (ssa_name))
1163+ && gimple_code (g) == GIMPLE_CALL)
1164+ pmode = promote_function_mode (type, mode, &unsignedp,
1165+ TREE_TYPE
1166+ (TREE_TYPE (gimple_call_fn (g))),
1167+ 2);
1168+ else
1169+ pmode = promote_decl_mode (exp, &unsignedp);
1170 gcc_assert (GET_MODE (decl_rtl) == pmode);
1171
1172 temp = gen_lowpart_SUBREG (mode, decl_rtl);
1173
1174=== modified file 'gcc/fold-const.c'
1175--- old/gcc/fold-const.c 2010-04-06 09:36:57 +0000
1176+++ new/gcc/fold-const.c 2010-09-01 13:29:58 +0000
1177@@ -5741,6 +5741,76 @@
1178 const_binop (BIT_XOR_EXPR, c, temp, 0));
1179 }
1180
1181+/* For an expression that has the form
1182+ (A && B) || ~B
1183+ or
1184+ (A || B) && ~B,
1185+ we can drop one of the inner expressions and simplify to
1186+ A || ~B
1187+ or
1188+ A && ~B
1189+ LOC is the location of the resulting expression. OP is the inner
1190+ logical operation; the left-hand side in the examples above, while CMPOP
1191+ is the right-hand side. RHS_ONLY is used to prevent us from accidentally
1192+ removing a condition that guards another, as in
1193+ (A != NULL && A->...) || A == NULL
1194+ which we must not transform. If RHS_ONLY is true, only eliminate the
1195+ right-most operand of the inner logical operation. */
1196+
1197+static tree
1198+merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
1199+ bool rhs_only)
1200+{
1201+ tree type = TREE_TYPE (cmpop);
1202+ enum tree_code code = TREE_CODE (cmpop);
1203+ enum tree_code truthop_code = TREE_CODE (op);
1204+ tree lhs = TREE_OPERAND (op, 0);
1205+ tree rhs = TREE_OPERAND (op, 1);
1206+ tree orig_lhs = lhs, orig_rhs = rhs;
1207+ enum tree_code rhs_code = TREE_CODE (rhs);
1208+ enum tree_code lhs_code = TREE_CODE (lhs);
1209+ enum tree_code inv_code;
1210+
1211+ if (TREE_SIDE_EFFECTS (op) || TREE_SIDE_EFFECTS (cmpop))
1212+ return NULL_TREE;
1213+
1214+ if (TREE_CODE_CLASS (code) != tcc_comparison)
1215+ return NULL_TREE;
1216+
1217+ if (rhs_code == truthop_code)
1218+ {
1219+ tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, rhs_only);
1220+ if (newrhs != NULL_TREE)
1221+ {
1222+ rhs = newrhs;
1223+ rhs_code = TREE_CODE (rhs);
1224+ }
1225+ }
1226+ if (lhs_code == truthop_code && !rhs_only)
1227+ {
1228+ tree newlhs = merge_truthop_with_opposite_arm (loc, lhs, cmpop, false);
1229+ if (newlhs != NULL_TREE)
1230+ {
1231+ lhs = newlhs;
1232+ lhs_code = TREE_CODE (lhs);
1233+ }
1234+ }
1235+
1236+ inv_code = invert_tree_comparison (code, HONOR_NANS (TYPE_MODE (type)));
1237+ if (inv_code == rhs_code
1238+ && operand_equal_p (TREE_OPERAND (rhs, 0), TREE_OPERAND (cmpop, 0), 0)
1239+ && operand_equal_p (TREE_OPERAND (rhs, 1), TREE_OPERAND (cmpop, 1), 0))
1240+ return lhs;
1241+ if (!rhs_only && inv_code == lhs_code
1242+ && operand_equal_p (TREE_OPERAND (lhs, 0), TREE_OPERAND (cmpop, 0), 0)
1243+ && operand_equal_p (TREE_OPERAND (lhs, 1), TREE_OPERAND (cmpop, 1), 0))
1244+ return rhs;
1245+ if (rhs != orig_rhs || lhs != orig_lhs)
1246+ return fold_build2_loc (loc, truthop_code, TREE_TYPE (cmpop),
1247+ lhs, rhs);
1248+ return NULL_TREE;
1249+}
1250+
1251 /* Find ways of folding logical expressions of LHS and RHS:
1252 Try to merge two comparisons to the same innermost item.
1253 Look for range tests like "ch >= '0' && ch <= '9'".
1254@@ -12539,6 +12609,22 @@
1255 if (0 != (tem = fold_range_test (loc, code, type, op0, op1)))
1256 return tem;
1257
1258+ if ((code == TRUTH_ANDIF_EXPR && TREE_CODE (arg0) == TRUTH_ORIF_EXPR)
1259+ || (code == TRUTH_ORIF_EXPR && TREE_CODE (arg0) == TRUTH_ANDIF_EXPR))
1260+ {
1261+ tem = merge_truthop_with_opposite_arm (loc, arg0, arg1, true);
1262+ if (tem)
1263+ return fold_build2_loc (loc, code, type, tem, arg1);
1264+ }
1265+
1266+ if ((code == TRUTH_ANDIF_EXPR && TREE_CODE (arg1) == TRUTH_ORIF_EXPR)
1267+ || (code == TRUTH_ORIF_EXPR && TREE_CODE (arg1) == TRUTH_ANDIF_EXPR))
1268+ {
1269+ tem = merge_truthop_with_opposite_arm (loc, arg1, arg0, false);
1270+ if (tem)
1271+ return fold_build2_loc (loc, code, type, arg0, tem);
1272+ }
1273+
1274 /* Check for the possibility of merging component references. If our
1275 lhs is another similar operation, try to merge its rhs with our
1276 rhs. Then try to merge our lhs and rhs. */
1277
1278=== modified file 'gcc/ifcvt.c'
1279--- old/gcc/ifcvt.c 2010-04-02 18:54:46 +0000
1280+++ new/gcc/ifcvt.c 2010-09-01 13:29:58 +0000
1281@@ -385,7 +385,11 @@
1282 rtx false_expr; /* test for then block insns */
1283 rtx true_prob_val; /* probability of else block */
1284 rtx false_prob_val; /* probability of then block */
1285- int n_insns;
1286+ rtx then_last_head = NULL_RTX; /* Last match at the head of THEN */
1287+ rtx else_last_head = NULL_RTX; /* Last match at the head of ELSE */
1288+ rtx then_first_tail = NULL_RTX; /* First match at the tail of THEN */
1289+ rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */
1290+ int then_n_insns, else_n_insns, n_insns;
1291 enum rtx_code false_code;
1292
1293 /* If test is comprised of && or || elements, and we've failed at handling
1294@@ -418,15 +422,78 @@
1295 number of insns and see if it is small enough to convert. */
1296 then_start = first_active_insn (then_bb);
1297 then_end = last_active_insn (then_bb, TRUE);
1298- n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
1299+ then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
1300+ n_insns = then_n_insns;
1301 max = MAX_CONDITIONAL_EXECUTE;
1302
1303 if (else_bb)
1304 {
1305+ int n_matching;
1306+
1307 max *= 2;
1308 else_start = first_active_insn (else_bb);
1309 else_end = last_active_insn (else_bb, TRUE);
1310- n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
1311+ else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb);
1312+ n_insns += else_n_insns;
1313+
1314+ /* Look for matching sequences at the head and tail of the two blocks,
1315+ and limit the range of insns to be converted if possible. */
1316+ n_matching = flow_find_cross_jump (then_bb, else_bb,
1317+ &then_first_tail, &else_first_tail);
1318+ if (then_first_tail == BB_HEAD (then_bb))
1319+ then_start = then_end = NULL_RTX;
1320+ if (else_first_tail == BB_HEAD (else_bb))
1321+ else_start = else_end = NULL_RTX;
1322+
1323+ if (n_matching > 0)
1324+ {
1325+ if (then_end)
1326+ then_end = prev_active_insn (then_first_tail);
1327+ if (else_end)
1328+ else_end = prev_active_insn (else_first_tail);
1329+ n_insns -= 2 * n_matching;
1330+ }
1331+
1332+ if (then_start && else_start)
1333+ {
1334+ int longest_match = MIN (then_n_insns - n_matching,
1335+ else_n_insns - n_matching);
1336+ n_matching
1337+ = flow_find_head_matching_sequence (then_bb, else_bb,
1338+ &then_last_head,
1339+ &else_last_head,
1340+ longest_match);
1341+
1342+ if (n_matching > 0)
1343+ {
1344+ rtx insn;
1345+
1346+ /* We won't pass the insns in the head sequence to
1347+ cond_exec_process_insns, so we need to test them here
1348+ to make sure that they don't clobber the condition. */
1349+ for (insn = BB_HEAD (then_bb);
1350+ insn != NEXT_INSN (then_last_head);
1351+ insn = NEXT_INSN (insn))
1352+ if (!LABEL_P (insn) && !NOTE_P (insn)
1353+ && !DEBUG_INSN_P (insn)
1354+ && modified_in_p (test_expr, insn))
1355+ return FALSE;
1356+ }
1357+
1358+ if (then_last_head == then_end)
1359+ then_start = then_end = NULL_RTX;
1360+ if (else_last_head == else_end)
1361+ else_start = else_end = NULL_RTX;
1362+
1363+ if (n_matching > 0)
1364+ {
1365+ if (then_start)
1366+ then_start = next_active_insn (then_last_head);
1367+ if (else_start)
1368+ else_start = next_active_insn (else_last_head);
1369+ n_insns -= 2 * n_matching;
1370+ }
1371+ }
1372 }
1373
1374 if (n_insns > max)
1375@@ -570,7 +637,21 @@
1376 fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
1377 n_insns, (n_insns == 1) ? " was" : "s were");
1378
1379- /* Merge the blocks! */
1380+ /* Merge the blocks! If we had matching sequences, make sure to delete one
1381+ copy at the appropriate location first: delete the copy in the THEN branch
1382+ for a tail sequence so that the remaining one is executed last for both
1383+ branches, and delete the copy in the ELSE branch for a head sequence so
1384+ that the remaining one is executed first for both branches. */
1385+ if (then_first_tail)
1386+ {
1387+ rtx from = then_first_tail;
1388+ if (!INSN_P (from))
1389+ from = next_active_insn (from);
1390+ delete_insn_chain (from, BB_END (then_bb), false);
1391+ }
1392+ if (else_last_head)
1393+ delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
1394+
1395 merge_if_block (ce_info);
1396 cond_exec_changed_p = TRUE;
1397 return TRUE;
1398
1399=== modified file 'gcc/ira-color.c'
1400--- old/gcc/ira-color.c 2010-04-02 18:54:46 +0000
1401+++ new/gcc/ira-color.c 2010-09-01 13:29:58 +0000
1402@@ -441,14 +441,18 @@
1403 {
1404 HARD_REG_SET conflicting_regs;
1405 int i, j, k, hard_regno, best_hard_regno, class_size;
1406- int cost, mem_cost, min_cost, full_cost, min_full_cost, add_cost;
1407+ int cost, mem_cost, min_cost, full_cost, min_full_cost;
1408 int *a_costs;
1409 int *conflict_costs;
1410- enum reg_class cover_class, rclass, conflict_cover_class;
1411+ enum reg_class cover_class, conflict_cover_class;
1412 enum machine_mode mode;
1413 ira_allocno_t a, conflict_allocno;
1414 ira_allocno_conflict_iterator aci;
1415 static int costs[FIRST_PSEUDO_REGISTER], full_costs[FIRST_PSEUDO_REGISTER];
1416+#ifndef HONOR_REG_ALLOC_ORDER
1417+ enum reg_class rclass;
1418+ int add_cost;
1419+#endif
1420 #ifdef STACK_REGS
1421 bool no_stack_reg_p;
1422 #endif
1423@@ -586,6 +590,7 @@
1424 continue;
1425 cost = costs[i];
1426 full_cost = full_costs[i];
1427+#ifndef HONOR_REG_ALLOC_ORDER
1428 if (! allocated_hardreg_p[hard_regno]
1429 && ira_hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set))
1430 /* We need to save/restore the hard register in
1431@@ -598,6 +603,7 @@
1432 cost += add_cost;
1433 full_cost += add_cost;
1434 }
1435+#endif
1436 if (min_cost > cost)
1437 min_cost = cost;
1438 if (min_full_cost > full_cost)
1439
1440=== modified file 'gcc/ira-costs.c'
1441--- old/gcc/ira-costs.c 2010-08-13 11:40:17 +0000
1442+++ new/gcc/ira-costs.c 2010-09-01 13:29:58 +0000
1443@@ -33,6 +33,7 @@
1444 #include "addresses.h"
1445 #include "insn-config.h"
1446 #include "recog.h"
1447+#include "reload.h"
1448 #include "toplev.h"
1449 #include "target.h"
1450 #include "params.h"
1451@@ -123,6 +124,10 @@
1452 /* Record cover register class of each allocno with the same regno. */
1453 static enum reg_class *regno_cover_class;
1454
1455+/* Record cost gains for not allocating a register with an invariant
1456+ equivalence. */
1457+static int *regno_equiv_gains;
1458+
1459 /* Execution frequency of the current insn. */
1460 static int frequency;
1461
1462@@ -1263,6 +1268,7 @@
1463 #ifdef FORBIDDEN_INC_DEC_CLASSES
1464 int inc_dec_p = false;
1465 #endif
1466+ int equiv_savings = regno_equiv_gains[i];
1467
1468 if (! allocno_p)
1469 {
1470@@ -1311,6 +1317,15 @@
1471 #endif
1472 }
1473 }
1474+ if (equiv_savings < 0)
1475+ temp_costs->mem_cost = -equiv_savings;
1476+ else if (equiv_savings > 0)
1477+ {
1478+ temp_costs->mem_cost = 0;
1479+ for (k = 0; k < cost_classes_num; k++)
1480+ temp_costs->cost[k] += equiv_savings;
1481+ }
1482+
1483 best_cost = (1 << (HOST_BITS_PER_INT - 2)) - 1;
1484 best = ALL_REGS;
1485 alt_class = NO_REGS;
1486@@ -1680,6 +1695,8 @@
1487 regno_cover_class
1488 = (enum reg_class *) ira_allocate (sizeof (enum reg_class)
1489 * max_reg_num ());
1490+ regno_equiv_gains = (int *) ira_allocate (sizeof (int) * max_reg_num ());
1491+ memset (regno_equiv_gains, 0, sizeof (int) * max_reg_num ());
1492 }
1493
1494 /* Common finalization function for ira_costs and
1495@@ -1687,6 +1704,7 @@
1496 static void
1497 finish_costs (void)
1498 {
1499+ ira_free (regno_equiv_gains);
1500 ira_free (regno_cover_class);
1501 ira_free (pref_buffer);
1502 ira_free (costs);
1503@@ -1702,6 +1720,7 @@
1504 init_costs ();
1505 total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
1506 * ira_allocnos_num);
1507+ calculate_elim_costs_all_insns ();
1508 find_costs_and_classes (ira_dump_file);
1509 setup_allocno_cover_class_and_costs ();
1510 finish_costs ();
1511@@ -1775,3 +1794,16 @@
1512 ALLOCNO_COVER_CLASS_COST (a) = min_cost;
1513 }
1514 }
1515+
1516+/* Add COST to the estimated gain for eliminating REGNO with its
1517+ equivalence. If COST is zero, record that no such elimination is
1518+ possible. */
1519+
1520+void
1521+ira_adjust_equiv_reg_cost (unsigned regno, int cost)
1522+{
1523+ if (cost == 0)
1524+ regno_equiv_gains[regno] = 0;
1525+ else
1526+ regno_equiv_gains[regno] += cost;
1527+}
1528
1529=== modified file 'gcc/ira.c'
1530--- old/gcc/ira.c 2010-08-12 13:51:16 +0000
1531+++ new/gcc/ira.c 2010-09-01 13:29:58 +0000
1532@@ -431,9 +431,6 @@
1533 HARD_REG_SET processed_hard_reg_set;
1534
1535 ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER);
1536- /* We could call ORDER_REGS_FOR_LOCAL_ALLOC here (it is usually
1537- putting hard callee-used hard registers first). But our
1538- heuristics work better. */
1539 for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
1540 {
1541 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
1542@@ -490,6 +487,9 @@
1543 static void
1544 setup_alloc_regs (bool use_hard_frame_p)
1545 {
1546+#ifdef ADJUST_REG_ALLOC_ORDER
1547+ ADJUST_REG_ALLOC_ORDER;
1548+#endif
1549 COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_reg_set);
1550 if (! use_hard_frame_p)
1551 SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
1552@@ -1533,12 +1533,8 @@
1553
1554 x = XEXP (note, 0);
1555
1556- if (! function_invariant_p (x)
1557- || ! flag_pic
1558- /* A function invariant is often CONSTANT_P but may
1559- include a register. We promise to only pass CONSTANT_P
1560- objects to LEGITIMATE_PIC_OPERAND_P. */
1561- || (CONSTANT_P (x) && LEGITIMATE_PIC_OPERAND_P (x)))
1562+ if (! CONSTANT_P (x)
1563+ || ! flag_pic || LEGITIMATE_PIC_OPERAND_P (x))
1564 {
1565 /* It can happen that a REG_EQUIV note contains a MEM
1566 that is not a legitimate memory operand. As later
1567@@ -3097,8 +3093,19 @@
1568 if (dump_file)
1569 print_insn_chains (dump_file);
1570 }
1571-
1572
1573+/* Allocate memory for reg_equiv_memory_loc. */
1574+static void
1575+init_reg_equiv_memory_loc (void)
1576+{
1577+ max_regno = max_reg_num ();
1578+
1579+ /* And the reg_equiv_memory_loc array. */
1580+ VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno);
1581+ memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0,
1582+ sizeof (rtx) * max_regno);
1583+ reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec);
1584+}
1585
1586 /* All natural loops. */
1587 struct loops ira_loops;
1588@@ -3203,6 +3210,8 @@
1589 record_loop_exits ();
1590 current_loops = &ira_loops;
1591
1592+ init_reg_equiv_memory_loc ();
1593+
1594 if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
1595 fprintf (ira_dump_file, "Building IRA IR\n");
1596 loops_p = ira_build (optimize
1597@@ -3263,13 +3272,8 @@
1598 #endif
1599
1600 delete_trivially_dead_insns (get_insns (), max_reg_num ());
1601- max_regno = max_reg_num ();
1602
1603- /* And the reg_equiv_memory_loc array. */
1604- VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno);
1605- memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0,
1606- sizeof (rtx) * max_regno);
1607- reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec);
1608+ init_reg_equiv_memory_loc ();
1609
1610 if (max_regno != max_regno_before_ira)
1611 {
1612
1613=== modified file 'gcc/ira.h'
1614--- old/gcc/ira.h 2009-09-02 17:54:25 +0000
1615+++ new/gcc/ira.h 2010-09-01 13:29:58 +0000
1616@@ -87,3 +87,4 @@
1617 extern void ira_mark_new_stack_slot (rtx, int, unsigned int);
1618 extern bool ira_better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx);
1619
1620+extern void ira_adjust_equiv_reg_cost (unsigned, int);
1621
1622=== modified file 'gcc/optabs.h'
1623--- old/gcc/optabs.h 2009-11-25 10:55:54 +0000
1624+++ new/gcc/optabs.h 2010-09-01 13:29:58 +0000
1625@@ -771,6 +771,9 @@
1626 /* Generate code for float to integral conversion. */
1627 extern bool expand_sfix_optab (rtx, rtx, convert_optab);
1628
1629+/* Generate code for a widening multiply. */
1630+extern rtx expand_widening_mult (enum machine_mode, rtx, rtx, rtx, int, optab);
1631+
1632 /* Return tree if target supports vector operations for COND_EXPR. */
1633 bool expand_vec_cond_expr_p (tree, enum machine_mode);
1634
1635
1636=== modified file 'gcc/passes.c'
1637--- old/gcc/passes.c 2010-05-19 12:14:37 +0000
1638+++ new/gcc/passes.c 2010-09-01 13:29:58 +0000
1639@@ -944,6 +944,7 @@
1640 NEXT_PASS (pass_forwprop);
1641 NEXT_PASS (pass_phiopt);
1642 NEXT_PASS (pass_fold_builtins);
1643+ NEXT_PASS (pass_optimize_widening_mul);
1644 NEXT_PASS (pass_tail_calls);
1645 NEXT_PASS (pass_rename_ssa_copies);
1646 NEXT_PASS (pass_uncprop);
1647
1648=== modified file 'gcc/reload.h'
1649--- old/gcc/reload.h 2010-04-02 18:54:46 +0000
1650+++ new/gcc/reload.h 2010-09-01 13:29:58 +0000
1651@@ -347,6 +347,10 @@
1652 extern rtx eliminate_regs (rtx, enum machine_mode, rtx);
1653 extern bool elimination_target_reg_p (rtx);
1654
1655+/* Called from the register allocator to estimate costs of eliminating
1656+ invariant registers. */
1657+extern void calculate_elim_costs_all_insns (void);
1658+
1659 /* Deallocate the reload register used by reload number R. */
1660 extern void deallocate_reload_reg (int r);
1661
1662
1663=== modified file 'gcc/reload1.c'
1664--- old/gcc/reload1.c 2010-03-02 18:56:50 +0000
1665+++ new/gcc/reload1.c 2010-09-01 13:29:58 +0000
1666@@ -413,6 +413,7 @@
1667 static void set_label_offsets (rtx, rtx, int);
1668 static void check_eliminable_occurrences (rtx);
1669 static void elimination_effects (rtx, enum machine_mode);
1670+static rtx eliminate_regs_1 (rtx, enum machine_mode, rtx, bool, bool);
1671 static int eliminate_regs_in_insn (rtx, int);
1672 static void update_eliminable_offsets (void);
1673 static void mark_not_eliminable (rtx, const_rtx, void *);
1674@@ -420,8 +421,11 @@
1675 static bool verify_initial_elim_offsets (void);
1676 static void set_initial_label_offsets (void);
1677 static void set_offsets_for_label (rtx);
1678+static void init_eliminable_invariants (rtx, bool);
1679 static void init_elim_table (void);
1680+static void free_reg_equiv (void);
1681 static void update_eliminables (HARD_REG_SET *);
1682+static void elimination_costs_in_insn (rtx);
1683 static void spill_hard_reg (unsigned int, int);
1684 static int finish_spills (int);
1685 static void scan_paradoxical_subregs (rtx);
1686@@ -698,6 +702,9 @@
1687
1688 /* Global variables used by reload and its subroutines. */
1689
1690+/* The current basic block while in calculate_elim_costs_all_insns. */
1691+static basic_block elim_bb;
1692+
1693 /* Set during calculate_needs if an insn needs register elimination. */
1694 static int something_needs_elimination;
1695 /* Set during calculate_needs if an insn needs an operand changed. */
1696@@ -776,22 +783,6 @@
1697 if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i))
1698 df_set_regs_ever_live (i, true);
1699
1700- /* Find all the pseudo registers that didn't get hard regs
1701- but do have known equivalent constants or memory slots.
1702- These include parameters (known equivalent to parameter slots)
1703- and cse'd or loop-moved constant memory addresses.
1704-
1705- Record constant equivalents in reg_equiv_constant
1706- so they will be substituted by find_reloads.
1707- Record memory equivalents in reg_mem_equiv so they can
1708- be substituted eventually by altering the REG-rtx's. */
1709-
1710- reg_equiv_constant = XCNEWVEC (rtx, max_regno);
1711- reg_equiv_invariant = XCNEWVEC (rtx, max_regno);
1712- reg_equiv_mem = XCNEWVEC (rtx, max_regno);
1713- reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno);
1714- reg_equiv_address = XCNEWVEC (rtx, max_regno);
1715- reg_max_ref_width = XCNEWVEC (unsigned int, max_regno);
1716 reg_old_renumber = XCNEWVEC (short, max_regno);
1717 memcpy (reg_old_renumber, reg_renumber, max_regno * sizeof (short));
1718 pseudo_forbidden_regs = XNEWVEC (HARD_REG_SET, max_regno);
1719@@ -799,115 +790,9 @@
1720
1721 CLEAR_HARD_REG_SET (bad_spill_regs_global);
1722
1723- /* Look for REG_EQUIV notes; record what each pseudo is equivalent
1724- to. Also find all paradoxical subregs and find largest such for
1725- each pseudo. */
1726-
1727- num_eliminable_invariants = 0;
1728- for (insn = first; insn; insn = NEXT_INSN (insn))
1729- {
1730- rtx set = single_set (insn);
1731-
1732- /* We may introduce USEs that we want to remove at the end, so
1733- we'll mark them with QImode. Make sure there are no
1734- previously-marked insns left by say regmove. */
1735- if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
1736- && GET_MODE (insn) != VOIDmode)
1737- PUT_MODE (insn, VOIDmode);
1738-
1739- if (NONDEBUG_INSN_P (insn))
1740- scan_paradoxical_subregs (PATTERN (insn));
1741-
1742- if (set != 0 && REG_P (SET_DEST (set)))
1743- {
1744- rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
1745- rtx x;
1746-
1747- if (! note)
1748- continue;
1749-
1750- i = REGNO (SET_DEST (set));
1751- x = XEXP (note, 0);
1752-
1753- if (i <= LAST_VIRTUAL_REGISTER)
1754- continue;
1755-
1756- if (! function_invariant_p (x)
1757- || ! flag_pic
1758- /* A function invariant is often CONSTANT_P but may
1759- include a register. We promise to only pass
1760- CONSTANT_P objects to LEGITIMATE_PIC_OPERAND_P. */
1761- || (CONSTANT_P (x)
1762- && LEGITIMATE_PIC_OPERAND_P (x)))
1763- {
1764- /* It can happen that a REG_EQUIV note contains a MEM
1765- that is not a legitimate memory operand. As later
1766- stages of reload assume that all addresses found
1767- in the reg_equiv_* arrays were originally legitimate,
1768- we ignore such REG_EQUIV notes. */
1769- if (memory_operand (x, VOIDmode))
1770- {
1771- /* Always unshare the equivalence, so we can
1772- substitute into this insn without touching the
1773- equivalence. */
1774- reg_equiv_memory_loc[i] = copy_rtx (x);
1775- }
1776- else if (function_invariant_p (x))
1777- {
1778- if (GET_CODE (x) == PLUS)
1779- {
1780- /* This is PLUS of frame pointer and a constant,
1781- and might be shared. Unshare it. */
1782- reg_equiv_invariant[i] = copy_rtx (x);
1783- num_eliminable_invariants++;
1784- }
1785- else if (x == frame_pointer_rtx || x == arg_pointer_rtx)
1786- {
1787- reg_equiv_invariant[i] = x;
1788- num_eliminable_invariants++;
1789- }
1790- else if (LEGITIMATE_CONSTANT_P (x))
1791- reg_equiv_constant[i] = x;
1792- else
1793- {
1794- reg_equiv_memory_loc[i]
1795- = force_const_mem (GET_MODE (SET_DEST (set)), x);
1796- if (! reg_equiv_memory_loc[i])
1797- reg_equiv_init[i] = NULL_RTX;
1798- }
1799- }
1800- else
1801- {
1802- reg_equiv_init[i] = NULL_RTX;
1803- continue;
1804- }
1805- }
1806- else
1807- reg_equiv_init[i] = NULL_RTX;
1808- }
1809- }
1810-
1811- if (dump_file)
1812- for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
1813- if (reg_equiv_init[i])
1814- {
1815- fprintf (dump_file, "init_insns for %u: ", i);
1816- print_inline_rtx (dump_file, reg_equiv_init[i], 20);
1817- fprintf (dump_file, "\n");
1818- }
1819-
1820+ init_eliminable_invariants (first, true);
1821 init_elim_table ();
1822
1823- first_label_num = get_first_label_num ();
1824- num_labels = max_label_num () - first_label_num;
1825-
1826- /* Allocate the tables used to store offset information at labels. */
1827- /* We used to use alloca here, but the size of what it would try to
1828- allocate would occasionally cause it to exceed the stack limit and
1829- cause a core dump. */
1830- offsets_known_at = XNEWVEC (char, num_labels);
1831- offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT));
1832-
1833 /* Alter each pseudo-reg rtx to contain its hard reg number. Assign
1834 stack slots to the pseudos that lack hard regs or equivalents.
1835 Do not touch virtual registers. */
1836@@ -1411,31 +1296,11 @@
1837 }
1838 }
1839
1840+ free (temp_pseudo_reg_arr);
1841+
1842 /* Indicate that we no longer have known memory locations or constants. */
1843- if (reg_equiv_constant)
1844- free (reg_equiv_constant);
1845- if (reg_equiv_invariant)
1846- free (reg_equiv_invariant);
1847- reg_equiv_constant = 0;
1848- reg_equiv_invariant = 0;
1849- VEC_free (rtx, gc, reg_equiv_memory_loc_vec);
1850- reg_equiv_memory_loc = 0;
1851-
1852- free (temp_pseudo_reg_arr);
1853-
1854- if (offsets_known_at)
1855- free (offsets_known_at);
1856- if (offsets_at)
1857- free (offsets_at);
1858-
1859- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1860- if (reg_equiv_alt_mem_list[i])
1861- free_EXPR_LIST_list (&reg_equiv_alt_mem_list[i]);
1862- free (reg_equiv_alt_mem_list);
1863-
1864- free (reg_equiv_mem);
1865+ free_reg_equiv ();
1866 reg_equiv_init = 0;
1867- free (reg_equiv_address);
1868 free (reg_max_ref_width);
1869 free (reg_old_renumber);
1870 free (pseudo_previous_regs);
1871@@ -1728,6 +1593,100 @@
1872 *pprev_reload = 0;
1873 }
1874
1875+/* This function is called from the register allocator to set up estimates
1876+ for the cost of eliminating pseudos which have REG_EQUIV equivalences to
1877+ an invariant. The structure is similar to calculate_needs_all_insns. */
1878+
1879+void
1880+calculate_elim_costs_all_insns (void)
1881+{
1882+ int *reg_equiv_init_cost;
1883+ basic_block bb;
1884+ int i;
1885+
1886+ reg_equiv_init_cost = XCNEWVEC (int, max_regno);
1887+ init_elim_table ();
1888+ init_eliminable_invariants (get_insns (), false);
1889+
1890+ set_initial_elim_offsets ();
1891+ set_initial_label_offsets ();
1892+
1893+ FOR_EACH_BB (bb)
1894+ {
1895+ rtx insn;
1896+ elim_bb = bb;
1897+
1898+ FOR_BB_INSNS (bb, insn)
1899+ {
1900+ /* If this is a label, a JUMP_INSN, or has REG_NOTES (which might
1901+ include REG_LABEL_OPERAND and REG_LABEL_TARGET), we need to see
1902+ what effects this has on the known offsets at labels. */
1903+
1904+ if (LABEL_P (insn) || JUMP_P (insn)
1905+ || (INSN_P (insn) && REG_NOTES (insn) != 0))
1906+ set_label_offsets (insn, insn, 0);
1907+
1908+ if (INSN_P (insn))
1909+ {
1910+ rtx set = single_set (insn);
1911+
1912+ /* Skip insns that only set an equivalence. */
1913+ if (set && REG_P (SET_DEST (set))
1914+ && reg_renumber[REGNO (SET_DEST (set))] < 0
1915+ && (reg_equiv_constant[REGNO (SET_DEST (set))]
1916+ || (reg_equiv_invariant[REGNO (SET_DEST (set))])))
1917+ {
1918+ unsigned regno = REGNO (SET_DEST (set));
1919+ rtx init = reg_equiv_init[regno];
1920+ if (init)
1921+ {
1922+ rtx t = eliminate_regs_1 (SET_SRC (set), VOIDmode, insn,
1923+ false, true);
1924+ int cost = rtx_cost (t, SET,
1925+ optimize_bb_for_speed_p (bb));
1926+ int freq = REG_FREQ_FROM_BB (bb);
1927+
1928+ reg_equiv_init_cost[regno] = cost * freq;
1929+ continue;
1930+ }
1931+ }
1932+ /* If needed, eliminate any eliminable registers. */
1933+ if (num_eliminable || num_eliminable_invariants)
1934+ elimination_costs_in_insn (insn);
1935+
1936+ if (num_eliminable)
1937+ update_eliminable_offsets ();
1938+ }
1939+ }
1940+ }
1941+ for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
1942+ {
1943+ if (reg_equiv_invariant[i])
1944+ {
1945+ if (reg_equiv_init[i])
1946+ {
1947+ int cost = reg_equiv_init_cost[i];
1948+ if (dump_file)
1949+ fprintf (dump_file,
1950+ "Reg %d has equivalence, initial gains %d\n", i, cost);
1951+ if (cost != 0)
1952+ ira_adjust_equiv_reg_cost (i, cost);
1953+ }
1954+ else
1955+ {
1956+ if (dump_file)
1957+ fprintf (dump_file,
1958+ "Reg %d had equivalence, but can't be eliminated\n",
1959+ i);
1960+ ira_adjust_equiv_reg_cost (i, 0);
1961+ }
1962+ }
1963+ }
1964+
1965+ free_reg_equiv ();
1966+ free (reg_equiv_init_cost);
1967+}
1968+
1969 /* Comparison function for qsort to decide which of two reloads
1970 should be handled first. *P1 and *P2 are the reload numbers. */
1971
1972@@ -2514,6 +2473,36 @@
1973 }
1974 }
1975
1976+/* Called through for_each_rtx, this function examines every reg that occurs
1977+ in PX and adjusts the costs for its elimination which are gathered by IRA.
1978+ DATA is the insn in which PX occurs. We do not recurse into MEM
1979+ expressions. */
1980+
1981+static int
1982+note_reg_elim_costly (rtx *px, void *data)
1983+{
1984+ rtx insn = (rtx)data;
1985+ rtx x = *px;
1986+
1987+ if (MEM_P (x))
1988+ return -1;
1989+
1990+ if (REG_P (x)
1991+ && REGNO (x) >= FIRST_PSEUDO_REGISTER
1992+ && reg_equiv_init[REGNO (x)]
1993+ && reg_equiv_invariant[REGNO (x)])
1994+ {
1995+ rtx t = reg_equiv_invariant[REGNO (x)];
1996+ rtx new_rtx = eliminate_regs_1 (t, Pmode, insn, true, true);
1997+ int cost = rtx_cost (new_rtx, SET, optimize_bb_for_speed_p (elim_bb));
1998+ int freq = REG_FREQ_FROM_BB (elim_bb);
1999+
2000+ if (cost != 0)
2001+ ira_adjust_equiv_reg_cost (REGNO (x), -cost * freq);
2002+ }
2003+ return 0;
2004+}
2005+
2006 /* Scan X and replace any eliminable registers (such as fp) with a
2007 replacement (such as sp), plus an offset.
2008
2009@@ -2533,6 +2522,9 @@
2010 This means, do not set ref_outside_mem even if the reference
2011 is outside of MEMs.
2012
2013+ If FOR_COSTS is true, we are being called before reload in order to
2014+ estimate the costs of keeping registers with an equivalence unallocated.
2015+
2016 REG_EQUIV_MEM and REG_EQUIV_ADDRESS contain address that have had
2017 replacements done assuming all offsets are at their initial values. If
2018 they are not, or if REG_EQUIV_ADDRESS is nonzero for a pseudo we
2019@@ -2541,7 +2533,7 @@
2020
2021 static rtx
2022 eliminate_regs_1 (rtx x, enum machine_mode mem_mode, rtx insn,
2023- bool may_use_invariant)
2024+ bool may_use_invariant, bool for_costs)
2025 {
2026 enum rtx_code code = GET_CODE (x);
2027 struct elim_table *ep;
2028@@ -2589,11 +2581,12 @@
2029 {
2030 if (may_use_invariant || (insn && DEBUG_INSN_P (insn)))
2031 return eliminate_regs_1 (copy_rtx (reg_equiv_invariant[regno]),
2032- mem_mode, insn, true);
2033+ mem_mode, insn, true, for_costs);
2034 /* There exists at least one use of REGNO that cannot be
2035 eliminated. Prevent the defining insn from being deleted. */
2036 reg_equiv_init[regno] = NULL_RTX;
2037- alter_reg (regno, -1, true);
2038+ if (!for_costs)
2039+ alter_reg (regno, -1, true);
2040 }
2041 return x;
2042
2043@@ -2654,8 +2647,10 @@
2044 operand of a load-address insn. */
2045
2046 {
2047- rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true);
2048- rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true);
2049+ rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true,
2050+ for_costs);
2051+ rtx new1 = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true,
2052+ for_costs);
2053
2054 if (reg_renumber && (new0 != XEXP (x, 0) || new1 != XEXP (x, 1)))
2055 {
2056@@ -2729,9 +2724,11 @@
2057 case GE: case GT: case GEU: case GTU:
2058 case LE: case LT: case LEU: case LTU:
2059 {
2060- rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false);
2061+ rtx new0 = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false,
2062+ for_costs);
2063 rtx new1 = XEXP (x, 1)
2064- ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false) : 0;
2065+ ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, false,
2066+ for_costs) : 0;
2067
2068 if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))
2069 return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1);
2070@@ -2742,7 +2739,8 @@
2071 /* If we have something in XEXP (x, 0), the usual case, eliminate it. */
2072 if (XEXP (x, 0))
2073 {
2074- new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true);
2075+ new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, true,
2076+ for_costs);
2077 if (new_rtx != XEXP (x, 0))
2078 {
2079 /* If this is a REG_DEAD note, it is not valid anymore.
2080@@ -2750,7 +2748,8 @@
2081 REG_DEAD note for the stack or frame pointer. */
2082 if (REG_NOTE_KIND (x) == REG_DEAD)
2083 return (XEXP (x, 1)
2084- ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true)
2085+ ? eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true,
2086+ for_costs)
2087 : NULL_RTX);
2088
2089 x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1));
2090@@ -2765,7 +2764,8 @@
2091 strictly needed, but it simplifies the code. */
2092 if (XEXP (x, 1))
2093 {
2094- new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true);
2095+ new_rtx = eliminate_regs_1 (XEXP (x, 1), mem_mode, insn, true,
2096+ for_costs);
2097 if (new_rtx != XEXP (x, 1))
2098 return
2099 gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x), XEXP (x, 0), new_rtx);
2100@@ -2791,7 +2791,7 @@
2101 && XEXP (XEXP (x, 1), 0) == XEXP (x, 0))
2102 {
2103 rtx new_rtx = eliminate_regs_1 (XEXP (XEXP (x, 1), 1), mem_mode,
2104- insn, true);
2105+ insn, true, for_costs);
2106
2107 if (new_rtx != XEXP (XEXP (x, 1), 1))
2108 return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0),
2109@@ -2814,7 +2814,8 @@
2110 case POPCOUNT:
2111 case PARITY:
2112 case BSWAP:
2113- new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false);
2114+ new_rtx = eliminate_regs_1 (XEXP (x, 0), mem_mode, insn, false,
2115+ for_costs);
2116 if (new_rtx != XEXP (x, 0))
2117 return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx);
2118 return x;
2119@@ -2835,7 +2836,8 @@
2120 new_rtx = SUBREG_REG (x);
2121 }
2122 else
2123- new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false);
2124+ new_rtx = eliminate_regs_1 (SUBREG_REG (x), mem_mode, insn, false,
2125+ for_costs);
2126
2127 if (new_rtx != SUBREG_REG (x))
2128 {
2129@@ -2869,14 +2871,20 @@
2130 /* Our only special processing is to pass the mode of the MEM to our
2131 recursive call and copy the flags. While we are here, handle this
2132 case more efficiently. */
2133- return
2134- replace_equiv_address_nv (x,
2135- eliminate_regs_1 (XEXP (x, 0), GET_MODE (x),
2136- insn, true));
2137+
2138+ new_rtx = eliminate_regs_1 (XEXP (x, 0), GET_MODE (x), insn, true,
2139+ for_costs);
2140+ if (for_costs
2141+ && memory_address_p (GET_MODE (x), XEXP (x, 0))
2142+ && !memory_address_p (GET_MODE (x), new_rtx))
2143+ for_each_rtx (&XEXP (x, 0), note_reg_elim_costly, insn);
2144+
2145+ return replace_equiv_address_nv (x, new_rtx);
2146
2147 case USE:
2148 /* Handle insn_list USE that a call to a pure function may generate. */
2149- new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false);
2150+ new_rtx = eliminate_regs_1 (XEXP (x, 0), VOIDmode, insn, false,
2151+ for_costs);
2152 if (new_rtx != XEXP (x, 0))
2153 return gen_rtx_USE (GET_MODE (x), new_rtx);
2154 return x;
2155@@ -2900,7 +2908,8 @@
2156 {
2157 if (*fmt == 'e')
2158 {
2159- new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false);
2160+ new_rtx = eliminate_regs_1 (XEXP (x, i), mem_mode, insn, false,
2161+ for_costs);
2162 if (new_rtx != XEXP (x, i) && ! copied)
2163 {
2164 x = shallow_copy_rtx (x);
2165@@ -2913,7 +2922,8 @@
2166 int copied_vec = 0;
2167 for (j = 0; j < XVECLEN (x, i); j++)
2168 {
2169- new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false);
2170+ new_rtx = eliminate_regs_1 (XVECEXP (x, i, j), mem_mode, insn, false,
2171+ for_costs);
2172 if (new_rtx != XVECEXP (x, i, j) && ! copied_vec)
2173 {
2174 rtvec new_v = gen_rtvec_v (XVECLEN (x, i),
2175@@ -2937,7 +2947,7 @@
2176 rtx
2177 eliminate_regs (rtx x, enum machine_mode mem_mode, rtx insn)
2178 {
2179- return eliminate_regs_1 (x, mem_mode, insn, false);
2180+ return eliminate_regs_1 (x, mem_mode, insn, false, false);
2181 }
2182
2183 /* Scan rtx X for modifications of elimination target registers. Update
2184@@ -3455,7 +3465,8 @@
2185 /* Companion to the above plus substitution, we can allow
2186 invariants as the source of a plain move. */
2187 is_set_src = false;
2188- if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set))
2189+ if (old_set
2190+ && recog_data.operand_loc[i] == &SET_SRC (old_set))
2191 is_set_src = true;
2192 in_plus = false;
2193 if (plus_src
2194@@ -3466,7 +3477,7 @@
2195 substed_operand[i]
2196 = eliminate_regs_1 (recog_data.operand[i], VOIDmode,
2197 replace ? insn : NULL_RTX,
2198- is_set_src || in_plus);
2199+ is_set_src || in_plus, false);
2200 if (substed_operand[i] != orig_operand[i])
2201 val = 1;
2202 /* Terminate the search in check_eliminable_occurrences at
2203@@ -3594,11 +3605,167 @@
2204 the pre-passes. */
2205 if (val && REG_NOTES (insn) != 0)
2206 REG_NOTES (insn)
2207- = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true);
2208+ = eliminate_regs_1 (REG_NOTES (insn), VOIDmode, REG_NOTES (insn), true,
2209+ false);
2210
2211 return val;
2212 }
2213
2214+/* Like eliminate_regs_in_insn, but only estimate costs for the use of the
2215+ register allocator. INSN is the instruction we need to examine, we perform
2216+ eliminations in its operands and record cases where eliminating a reg with
2217+ an invariant equivalence would add extra cost. */
2218+
2219+static void
2220+elimination_costs_in_insn (rtx insn)
2221+{
2222+ int icode = recog_memoized (insn);
2223+ rtx old_body = PATTERN (insn);
2224+ int insn_is_asm = asm_noperands (old_body) >= 0;
2225+ rtx old_set = single_set (insn);
2226+ int i;
2227+ rtx orig_operand[MAX_RECOG_OPERANDS];
2228+ rtx orig_dup[MAX_RECOG_OPERANDS];
2229+ struct elim_table *ep;
2230+ rtx plus_src, plus_cst_src;
2231+ bool sets_reg_p;
2232+
2233+ if (! insn_is_asm && icode < 0)
2234+ {
2235+ gcc_assert (GET_CODE (PATTERN (insn)) == USE
2236+ || GET_CODE (PATTERN (insn)) == CLOBBER
2237+ || GET_CODE (PATTERN (insn)) == ADDR_VEC
2238+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2239+ || GET_CODE (PATTERN (insn)) == ASM_INPUT
2240+ || DEBUG_INSN_P (insn));
2241+ return;
2242+ }
2243+
2244+ if (old_set != 0 && REG_P (SET_DEST (old_set))
2245+ && REGNO (SET_DEST (old_set)) < FIRST_PSEUDO_REGISTER)
2246+ {
2247+ /* Check for setting an eliminable register. */
2248+ for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
2249+ if (ep->from_rtx == SET_DEST (old_set) && ep->can_eliminate)
2250+ return;
2251+ }
2252+
2253+ /* We allow one special case which happens to work on all machines we
2254+ currently support: a single set with the source or a REG_EQUAL
2255+ note being a PLUS of an eliminable register and a constant. */
2256+ plus_src = plus_cst_src = 0;
2257+ sets_reg_p = false;
2258+ if (old_set && REG_P (SET_DEST (old_set)))
2259+ {
2260+ sets_reg_p = true;
2261+ if (GET_CODE (SET_SRC (old_set)) == PLUS)
2262+ plus_src = SET_SRC (old_set);
2263+ /* First see if the source is of the form (plus (...) CST). */
2264+ if (plus_src
2265+ && CONST_INT_P (XEXP (plus_src, 1)))
2266+ plus_cst_src = plus_src;
2267+ else if (REG_P (SET_SRC (old_set))
2268+ || plus_src)
2269+ {
2270+ /* Otherwise, see if we have a REG_EQUAL note of the form
2271+ (plus (...) CST). */
2272+ rtx links;
2273+ for (links = REG_NOTES (insn); links; links = XEXP (links, 1))
2274+ {
2275+ if ((REG_NOTE_KIND (links) == REG_EQUAL
2276+ || REG_NOTE_KIND (links) == REG_EQUIV)
2277+ && GET_CODE (XEXP (links, 0)) == PLUS
2278+ && CONST_INT_P (XEXP (XEXP (links, 0), 1)))
2279+ {
2280+ plus_cst_src = XEXP (links, 0);
2281+ break;
2282+ }
2283+ }
2284+ }
2285+ }
2286+
2287+ /* Determine the effects of this insn on elimination offsets. */
2288+ elimination_effects (old_body, VOIDmode);
2289+
2290+ /* Eliminate all eliminable registers occurring in operands that
2291+ can be handled by reload. */
2292+ extract_insn (insn);
2293+ for (i = 0; i < recog_data.n_dups; i++)
2294+ orig_dup[i] = *recog_data.dup_loc[i];
2295+
2296+ for (i = 0; i < recog_data.n_operands; i++)
2297+ {
2298+ orig_operand[i] = recog_data.operand[i];
2299+
2300+ /* For an asm statement, every operand is eliminable. */
2301+ if (insn_is_asm || insn_data[icode].operand[i].eliminable)
2302+ {
2303+ bool is_set_src, in_plus;
2304+
2305+ /* Check for setting a register that we know about. */
2306+ if (recog_data.operand_type[i] != OP_IN
2307+ && REG_P (orig_operand[i]))
2308+ {
2309+ /* If we are assigning to a register that can be eliminated, it
2310+ must be as part of a PARALLEL, since the code above handles
2311+ single SETs. We must indicate that we can no longer
2312+ eliminate this reg. */
2313+ for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS];
2314+ ep++)
2315+ if (ep->from_rtx == orig_operand[i])
2316+ ep->can_eliminate = 0;
2317+ }
2318+
2319+ /* Companion to the above plus substitution, we can allow
2320+ invariants as the source of a plain move. */
2321+ is_set_src = false;
2322+ if (old_set && recog_data.operand_loc[i] == &SET_SRC (old_set))
2323+ is_set_src = true;
2324+ if (is_set_src && !sets_reg_p)
2325+ note_reg_elim_costly (&SET_SRC (old_set), insn);
2326+ in_plus = false;
2327+ if (plus_src && sets_reg_p
2328+ && (recog_data.operand_loc[i] == &XEXP (plus_src, 0)
2329+ || recog_data.operand_loc[i] == &XEXP (plus_src, 1)))
2330+ in_plus = true;
2331+
2332+ eliminate_regs_1 (recog_data.operand[i], VOIDmode,
2333+ NULL_RTX,
2334+ is_set_src || in_plus, true);
2335+ /* Terminate the search in check_eliminable_occurrences at
2336+ this point. */
2337+ *recog_data.operand_loc[i] = 0;
2338+ }
2339+ }
2340+
2341+ for (i = 0; i < recog_data.n_dups; i++)
2342+ *recog_data.dup_loc[i]
2343+ = *recog_data.operand_loc[(int) recog_data.dup_num[i]];
2344+
2345+ /* If any eliminable remain, they aren't eliminable anymore. */
2346+ check_eliminable_occurrences (old_body);
2347+
2348+ /* Restore the old body. */
2349+ for (i = 0; i < recog_data.n_operands; i++)
2350+ *recog_data.operand_loc[i] = orig_operand[i];
2351+ for (i = 0; i < recog_data.n_dups; i++)
2352+ *recog_data.dup_loc[i] = orig_dup[i];
2353+
2354+ /* Update all elimination pairs to reflect the status after the current
2355+ insn. The changes we make were determined by the earlier call to
2356+ elimination_effects. */
2357+
2358+ for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
2359+ {
2360+ if (ep->previous_offset != ep->offset && ep->ref_outside_mem)
2361+ ep->can_eliminate = 0;
2362+
2363+ ep->ref_outside_mem = 0;
2364+ }
2365+
2366+ return;
2367+}
2368+
2369 /* Loop through all elimination pairs.
2370 Recalculate the number not at initial offset.
2371
2372@@ -3908,6 +4075,168 @@
2373 ep->to_rtx = gen_rtx_REG (Pmode, ep->to);
2374 }
2375 }
2376+
2377+/* Find all the pseudo registers that didn't get hard regs
2378+ but do have known equivalent constants or memory slots.
2379+ These include parameters (known equivalent to parameter slots)
2380+ and cse'd or loop-moved constant memory addresses.
2381+
2382+ Record constant equivalents in reg_equiv_constant
2383+ so they will be substituted by find_reloads.
2384+ Record memory equivalents in reg_mem_equiv so they can
2385+ be substituted eventually by altering the REG-rtx's. */
2386+
2387+static void
2388+init_eliminable_invariants (rtx first, bool do_subregs)
2389+{
2390+ int i;
2391+ rtx insn;
2392+
2393+ reg_equiv_constant = XCNEWVEC (rtx, max_regno);
2394+ reg_equiv_invariant = XCNEWVEC (rtx, max_regno);
2395+ reg_equiv_mem = XCNEWVEC (rtx, max_regno);
2396+ reg_equiv_alt_mem_list = XCNEWVEC (rtx, max_regno);
2397+ reg_equiv_address = XCNEWVEC (rtx, max_regno);
2398+ if (do_subregs)
2399+ reg_max_ref_width = XCNEWVEC (unsigned int, max_regno);
2400+ else
2401+ reg_max_ref_width = NULL;
2402+
2403+ num_eliminable_invariants = 0;
2404+
2405+ first_label_num = get_first_label_num ();
2406+ num_labels = max_label_num () - first_label_num;
2407+
2408+ /* Allocate the tables used to store offset information at labels. */
2409+ offsets_known_at = XNEWVEC (char, num_labels);
2410+ offsets_at = (HOST_WIDE_INT (*)[NUM_ELIMINABLE_REGS]) xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (HOST_WIDE_INT));
2411+
2412+/* Look for REG_EQUIV notes; record what each pseudo is equivalent
2413+ to. If DO_SUBREGS is true, also find all paradoxical subregs and
2414+ find largest such for each pseudo. FIRST is the head of the insn
2415+ list. */
2416+
2417+ for (insn = first; insn; insn = NEXT_INSN (insn))
2418+ {
2419+ rtx set = single_set (insn);
2420+
2421+ /* We may introduce USEs that we want to remove at the end, so
2422+ we'll mark them with QImode. Make sure there are no
2423+ previously-marked insns left by say regmove. */
2424+ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
2425+ && GET_MODE (insn) != VOIDmode)
2426+ PUT_MODE (insn, VOIDmode);
2427+
2428+ if (do_subregs && NONDEBUG_INSN_P (insn))
2429+ scan_paradoxical_subregs (PATTERN (insn));
2430+
2431+ if (set != 0 && REG_P (SET_DEST (set)))
2432+ {
2433+ rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
2434+ rtx x;
2435+
2436+ if (! note)
2437+ continue;
2438+
2439+ i = REGNO (SET_DEST (set));
2440+ x = XEXP (note, 0);
2441+
2442+ if (i <= LAST_VIRTUAL_REGISTER)
2443+ continue;
2444+
2445+ /* If flag_pic and we have constant, verify it's legitimate. */
2446+ if (!CONSTANT_P (x)
2447+ || !flag_pic || LEGITIMATE_PIC_OPERAND_P (x))
2448+ {
2449+ /* It can happen that a REG_EQUIV note contains a MEM
2450+ that is not a legitimate memory operand. As later
2451+ stages of reload assume that all addresses found
2452+ in the reg_equiv_* arrays were originally legitimate,
2453+ we ignore such REG_EQUIV notes. */
2454+ if (memory_operand (x, VOIDmode))
2455+ {
2456+ /* Always unshare the equivalence, so we can
2457+ substitute into this insn without touching the
2458+ equivalence. */
2459+ reg_equiv_memory_loc[i] = copy_rtx (x);
2460+ }
2461+ else if (function_invariant_p (x))
2462+ {
2463+ if (GET_CODE (x) == PLUS)
2464+ {
2465+ /* This is PLUS of frame pointer and a constant,
2466+ and might be shared. Unshare it. */
2467+ reg_equiv_invariant[i] = copy_rtx (x);
2468+ num_eliminable_invariants++;
2469+ }
2470+ else if (x == frame_pointer_rtx || x == arg_pointer_rtx)
2471+ {
2472+ reg_equiv_invariant[i] = x;
2473+ num_eliminable_invariants++;
2474+ }
2475+ else if (LEGITIMATE_CONSTANT_P (x))
2476+ reg_equiv_constant[i] = x;
2477+ else
2478+ {
2479+ reg_equiv_memory_loc[i]
2480+ = force_const_mem (GET_MODE (SET_DEST (set)), x);
2481+ if (! reg_equiv_memory_loc[i])
2482+ reg_equiv_init[i] = NULL_RTX;
2483+ }
2484+ }
2485+ else
2486+ {
2487+ reg_equiv_init[i] = NULL_RTX;
2488+ continue;
2489+ }
2490+ }
2491+ else
2492+ reg_equiv_init[i] = NULL_RTX;
2493+ }
2494+ }
2495+
2496+ if (dump_file)
2497+ for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
2498+ if (reg_equiv_init[i])
2499+ {
2500+ fprintf (dump_file, "init_insns for %u: ", i);
2501+ print_inline_rtx (dump_file, reg_equiv_init[i], 20);
2502+ fprintf (dump_file, "\n");
2503+ }
2504+}
2505+
2506+/* Indicate that we no longer have known memory locations or constants.
2507+ Free all data involved in tracking these. */
2508+
2509+static void
2510+free_reg_equiv (void)
2511+{
2512+ int i;
2513+
2514+ if (reg_equiv_constant)
2515+ free (reg_equiv_constant);
2516+ if (reg_equiv_invariant)
2517+ free (reg_equiv_invariant);
2518+ reg_equiv_constant = 0;
2519+ reg_equiv_invariant = 0;
2520+ VEC_free (rtx, gc, reg_equiv_memory_loc_vec);
2521+ reg_equiv_memory_loc = 0;
2522+
2523+ if (offsets_known_at)
2524+ free (offsets_known_at);
2525+ if (offsets_at)
2526+ free (offsets_at);
2527+ offsets_at = 0;
2528+ offsets_known_at = 0;
2529+
2530+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2531+ if (reg_equiv_alt_mem_list[i])
2532+ free_EXPR_LIST_list (&reg_equiv_alt_mem_list[i]);
2533+ free (reg_equiv_alt_mem_list);
2534+
2535+ free (reg_equiv_mem);
2536+ free (reg_equiv_address);
2537+}
2538
2539 /* Kick all pseudos out of hard register REGNO.
2540
2541@@ -5664,7 +5993,7 @@
2542 return 1;
2543 if (GET_CODE (x) == PLUS
2544 && (XEXP (x, 0) == frame_pointer_rtx || XEXP (x, 0) == arg_pointer_rtx)
2545- && CONSTANT_P (XEXP (x, 1)))
2546+ && GET_CODE (XEXP (x, 1)) == CONST_INT)
2547 return 1;
2548 return 0;
2549 }
2550
2551=== modified file 'gcc/system.h'
2552--- old/gcc/system.h 2009-12-13 23:00:53 +0000
2553+++ new/gcc/system.h 2010-09-01 13:29:58 +0000
2554@@ -761,7 +761,8 @@
2555 TARGET_ASM_EXCEPTION_SECTION TARGET_ASM_EH_FRAME_SECTION \
2556 SMALL_ARG_MAX ASM_OUTPUT_SHARED_BSS ASM_OUTPUT_SHARED_COMMON \
2557 ASM_OUTPUT_SHARED_LOCAL ASM_MAKE_LABEL_LINKONCE \
2558- STACK_CHECK_PROBE_INTERVAL STACK_CHECK_PROBE_LOAD
2559+ STACK_CHECK_PROBE_INTERVAL STACK_CHECK_PROBE_LOAD \
2560+ ORDER_REGS_FOR_LOCAL_ALLOC
2561
2562 /* Hooks that are no longer used. */
2563 #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \
2564
2565=== added file 'gcc/testsuite/c-c++-common/uninit-17.c'
2566--- old/gcc/testsuite/c-c++-common/uninit-17.c 1970-01-01 00:00:00 +0000
2567+++ new/gcc/testsuite/c-c++-common/uninit-17.c 2010-09-01 13:29:58 +0000
2568@@ -0,0 +1,25 @@
2569+/* { dg-do compile } */
2570+/* { dg-options "-O2 -Wuninitialized" } */
2571+
2572+inline int foo(int x)
2573+{
2574+ return x;
2575+}
2576+static void bar(int a, int *ptr)
2577+{
2578+ do
2579+ {
2580+ int b; /* { dg-warning "is used uninitialized" } */
2581+ if (b < 40) {
2582+ ptr[0] = b;
2583+ }
2584+ b += 1;
2585+ ptr++;
2586+ }
2587+ while (--a != 0);
2588+}
2589+void foobar(int a, int *ptr)
2590+{
2591+ bar(foo(a), ptr);
2592+}
2593+
2594
2595=== added file 'gcc/testsuite/gcc.target/arm/eliminate.c'
2596--- old/gcc/testsuite/gcc.target/arm/eliminate.c 1970-01-01 00:00:00 +0000
2597+++ new/gcc/testsuite/gcc.target/arm/eliminate.c 2010-09-01 13:29:58 +0000
2598@@ -0,0 +1,19 @@
2599+/* { dg-do compile } */
2600+/* { dg-options "-O2" } */
2601+
2602+struct X
2603+{
2604+ int c;
2605+};
2606+
2607+extern void bar(struct X *);
2608+
2609+void foo ()
2610+{
2611+ struct X x;
2612+ bar (&x);
2613+ bar (&x);
2614+ bar (&x);
2615+}
2616+
2617+/* { dg-final { scan-assembler-times "r0,\[\\t \]*sp" 3 } } */
2618
2619=== added file 'gcc/testsuite/gcc.target/arm/pr40900.c'
2620--- old/gcc/testsuite/gcc.target/arm/pr40900.c 1970-01-01 00:00:00 +0000
2621+++ new/gcc/testsuite/gcc.target/arm/pr40900.c 2010-09-01 13:29:58 +0000
2622@@ -0,0 +1,12 @@
2623+/* { dg-do compile } */
2624+/* { dg-options "-O2 -fno-optimize-sibling-calls" } */
2625+
2626+extern short shortv2();
2627+short shortv1()
2628+{
2629+ return shortv2();
2630+}
2631+
2632+/* { dg-final { scan-assembler-not "lsl" } } */
2633+/* { dg-final { scan-assembler-not "asr" } } */
2634+/* { dg-final { scan-assembler-not "sxth" } } */
2635
2636=== added file 'gcc/testsuite/gcc.target/arm/pr42496.c'
2637--- old/gcc/testsuite/gcc.target/arm/pr42496.c 1970-01-01 00:00:00 +0000
2638+++ new/gcc/testsuite/gcc.target/arm/pr42496.c 2010-09-01 13:29:58 +0000
2639@@ -0,0 +1,16 @@
2640+/* { dg-options "-O2" } */
2641+
2642+void foo(int i)
2643+{
2644+ extern int j;
2645+
2646+ if (i) {
2647+ j = 10;
2648+ }
2649+ else {
2650+ j = 20;
2651+ }
2652+}
2653+
2654+/* { dg-final { scan-assembler-not "strne" } } */
2655+/* { dg-final { scan-assembler-not "streq" } } */
2656
2657=== added file 'gcc/testsuite/gcc.target/arm/wmul-1.c'
2658--- old/gcc/testsuite/gcc.target/arm/wmul-1.c 1970-01-01 00:00:00 +0000
2659+++ new/gcc/testsuite/gcc.target/arm/wmul-1.c 2010-09-01 13:29:58 +0000
2660@@ -0,0 +1,18 @@
2661+/* { dg-do compile } */
2662+/* { dg-options "-O2 -march=armv6t2 -fno-unroll-loops" } */
2663+
2664+int mac(const short *a, const short *b, int sqr, int *sum)
2665+{
2666+ int i;
2667+ int dotp = *sum;
2668+
2669+ for (i = 0; i < 150; i++) {
2670+ dotp += b[i] * a[i];
2671+ sqr += b[i] * b[i];
2672+ }
2673+
2674+ *sum = dotp;
2675+ return sqr;
2676+}
2677+
2678+/* { dg-final { scan-assembler-times "smulbb" 2 } } */
2679
2680=== added file 'gcc/testsuite/gcc.target/arm/wmul-2.c'
2681--- old/gcc/testsuite/gcc.target/arm/wmul-2.c 1970-01-01 00:00:00 +0000
2682+++ new/gcc/testsuite/gcc.target/arm/wmul-2.c 2010-09-01 13:29:58 +0000
2683@@ -0,0 +1,12 @@
2684+/* { dg-do compile } */
2685+/* { dg-options "-O2 -march=armv6t2 -fno-unroll-loops" } */
2686+
2687+void vec_mpy(int y[], const short x[], short scaler)
2688+{
2689+ int i;
2690+
2691+ for (i = 0; i < 150; i++)
2692+ y[i] += ((scaler * x[i]) >> 31);
2693+}
2694+
2695+/* { dg-final { scan-assembler-times "smulbb" 1 } } */
2696
2697=== added file 'gcc/testsuite/gcc.target/bfin/wmul-1.c'
2698--- old/gcc/testsuite/gcc.target/bfin/wmul-1.c 1970-01-01 00:00:00 +0000
2699+++ new/gcc/testsuite/gcc.target/bfin/wmul-1.c 2010-09-01 13:29:58 +0000
2700@@ -0,0 +1,18 @@
2701+/* { dg-do compile } */
2702+/* { dg-options "-O2" } */
2703+
2704+int mac(const short *a, const short *b, int sqr, int *sum)
2705+{
2706+ int i;
2707+ int dotp = *sum;
2708+
2709+ for (i = 0; i < 150; i++) {
2710+ dotp += b[i] * a[i];
2711+ sqr += b[i] * b[i];
2712+ }
2713+
2714+ *sum = dotp;
2715+ return sqr;
2716+}
2717+
2718+/* { dg-final { scan-assembler-times "\\(IS\\)" 2 } } */
2719
2720=== added file 'gcc/testsuite/gcc.target/bfin/wmul-2.c'
2721--- old/gcc/testsuite/gcc.target/bfin/wmul-2.c 1970-01-01 00:00:00 +0000
2722+++ new/gcc/testsuite/gcc.target/bfin/wmul-2.c 2010-09-01 13:29:58 +0000
2723@@ -0,0 +1,12 @@
2724+/* { dg-do compile } */
2725+/* { dg-options "-O2" } */
2726+
2727+void vec_mpy(int y[], const short x[], short scaler)
2728+{
2729+ int i;
2730+
2731+ for (i = 0; i < 150; i++)
2732+ y[i] += ((scaler * x[i]) >> 31);
2733+}
2734+
2735+/* { dg-final { scan-assembler-times "\\(IS\\)" 1 } } */
2736
2737=== added file 'gcc/testsuite/gcc.target/i386/pr41442.c'
2738--- old/gcc/testsuite/gcc.target/i386/pr41442.c 1970-01-01 00:00:00 +0000
2739+++ new/gcc/testsuite/gcc.target/i386/pr41442.c 2010-09-01 13:29:58 +0000
2740@@ -0,0 +1,18 @@
2741+/* { dg-do compile } */
2742+/* { dg-options "-O2" } */
2743+
2744+typedef struct LINK link;
2745+struct LINK
2746+{
2747+ link* next;
2748+};
2749+
2750+int haha(link* p1, link* p2)
2751+{
2752+ if ((p1->next && !p2->next) || p2->next)
2753+ return 0;
2754+
2755+ return 1;
2756+}
2757+
2758+/* { dg-final { scan-assembler-times "test|cmp" 2 } } */
2759
2760=== added file 'gcc/testsuite/gcc.target/i386/wmul-1.c'
2761--- old/gcc/testsuite/gcc.target/i386/wmul-1.c 1970-01-01 00:00:00 +0000
2762+++ new/gcc/testsuite/gcc.target/i386/wmul-1.c 2010-09-01 13:29:58 +0000
2763@@ -0,0 +1,18 @@
2764+/* { dg-do compile } */
2765+/* { dg-options "-O2" } */
2766+
2767+long long mac(const int *a, const int *b, long long sqr, long long *sum)
2768+{
2769+ int i;
2770+ long long dotp = *sum;
2771+
2772+ for (i = 0; i < 150; i++) {
2773+ dotp += (long long)b[i] * a[i];
2774+ sqr += (long long)b[i] * b[i];
2775+ }
2776+
2777+ *sum = dotp;
2778+ return sqr;
2779+}
2780+
2781+/* { dg-final { scan-assembler-times "imull" 2 } } */
2782
2783=== added file 'gcc/testsuite/gcc.target/i386/wmul-2.c'
2784--- old/gcc/testsuite/gcc.target/i386/wmul-2.c 1970-01-01 00:00:00 +0000
2785+++ new/gcc/testsuite/gcc.target/i386/wmul-2.c 2010-09-01 13:29:58 +0000
2786@@ -0,0 +1,12 @@
2787+/* { dg-do compile } */
2788+/* { dg-options "-O2" } */
2789+
2790+void vec_mpy(int y[], const int x[], int scaler)
2791+{
2792+ int i;
2793+
2794+ for (i = 0; i < 150; i++)
2795+ y[i] += (((long long)scaler * x[i]) >> 31);
2796+}
2797+
2798+/* { dg-final { scan-assembler-times "imull" 1 } } */
2799
2800=== modified file 'gcc/tree-cfg.c'
2801--- old/gcc/tree-cfg.c 2010-08-10 13:31:21 +0000
2802+++ new/gcc/tree-cfg.c 2010-09-01 13:29:58 +0000
2803@@ -3428,8 +3428,13 @@
2804 connected to the operand types. */
2805 return verify_gimple_comparison (lhs_type, rhs1, rhs2);
2806
2807+ case WIDEN_MULT_EXPR:
2808+ if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2809+ return true;
2810+ return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
2811+ || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
2812+
2813 case WIDEN_SUM_EXPR:
2814- case WIDEN_MULT_EXPR:
2815 case VEC_WIDEN_MULT_HI_EXPR:
2816 case VEC_WIDEN_MULT_LO_EXPR:
2817 case VEC_PACK_TRUNC_EXPR:
2818
2819=== modified file 'gcc/tree-inline.c'
2820--- old/gcc/tree-inline.c 2010-08-10 13:31:21 +0000
2821+++ new/gcc/tree-inline.c 2010-09-01 13:29:58 +0000
2822@@ -229,6 +229,7 @@
2823 regions of the CFG, but this is expensive to test. */
2824 if (id->entry_bb
2825 && is_gimple_reg (SSA_NAME_VAR (name))
2826+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)
2827 && TREE_CODE (SSA_NAME_VAR (name)) != PARM_DECL
2828 && (id->entry_bb != EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2829 || EDGE_COUNT (id->entry_bb->preds) != 1))
2830
2831=== modified file 'gcc/tree-pass.h'
2832--- old/gcc/tree-pass.h 2010-04-02 18:54:46 +0000
2833+++ new/gcc/tree-pass.h 2010-09-01 13:29:58 +0000
2834@@ -407,6 +407,7 @@
2835 extern struct gimple_opt_pass pass_cse_reciprocals;
2836 extern struct gimple_opt_pass pass_cse_sincos;
2837 extern struct gimple_opt_pass pass_optimize_bswap;
2838+extern struct gimple_opt_pass pass_optimize_widening_mul;
2839 extern struct gimple_opt_pass pass_warn_function_return;
2840 extern struct gimple_opt_pass pass_warn_function_noreturn;
2841 extern struct gimple_opt_pass pass_cselim;
2842
2843=== modified file 'gcc/tree-ssa-math-opts.c'
2844--- old/gcc/tree-ssa-math-opts.c 2010-04-02 18:54:46 +0000
2845+++ new/gcc/tree-ssa-math-opts.c 2010-09-01 13:29:58 +0000
2846@@ -1260,3 +1260,137 @@
2847 0 /* todo_flags_finish */
2848 }
2849 };
2850+
2851+/* Find integer multiplications where the operands are extended from
2852+ smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
2853+ where appropriate. */
2854+
2855+static unsigned int
2856+execute_optimize_widening_mul (void)
2857+{
2858+ bool changed = false;
2859+ basic_block bb;
2860+
2861+ FOR_EACH_BB (bb)
2862+ {
2863+ gimple_stmt_iterator gsi;
2864+
2865+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2866+ {
2867+ gimple stmt = gsi_stmt (gsi);
2868+ gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2869+ tree type, type1 = NULL, type2 = NULL;
2870+ tree rhs1, rhs2, rhs1_convop = NULL, rhs2_convop = NULL;
2871+ enum tree_code rhs1_code, rhs2_code;
2872+
2873+ if (!is_gimple_assign (stmt)
2874+ || gimple_assign_rhs_code (stmt) != MULT_EXPR)
2875+ continue;
2876+
2877+ type = TREE_TYPE (gimple_assign_lhs (stmt));
2878+
2879+ if (TREE_CODE (type) != INTEGER_TYPE)
2880+ continue;
2881+
2882+ rhs1 = gimple_assign_rhs1 (stmt);
2883+ rhs2 = gimple_assign_rhs2 (stmt);
2884+
2885+ if (TREE_CODE (rhs1) == SSA_NAME)
2886+ {
2887+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2888+ if (!is_gimple_assign (rhs1_stmt))
2889+ continue;
2890+ rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2891+ if (!CONVERT_EXPR_CODE_P (rhs1_code))
2892+ continue;
2893+ rhs1_convop = gimple_assign_rhs1 (rhs1_stmt);
2894+ type1 = TREE_TYPE (rhs1_convop);
2895+ if (TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
2896+ continue;
2897+ }
2898+ else if (TREE_CODE (rhs1) != INTEGER_CST)
2899+ continue;
2900+
2901+ if (TREE_CODE (rhs2) == SSA_NAME)
2902+ {
2903+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2904+ if (!is_gimple_assign (rhs2_stmt))
2905+ continue;
2906+ rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2907+ if (!CONVERT_EXPR_CODE_P (rhs2_code))
2908+ continue;
2909+ rhs2_convop = gimple_assign_rhs1 (rhs2_stmt);
2910+ type2 = TREE_TYPE (rhs2_convop);
2911+ if (TYPE_PRECISION (type2) * 2 != TYPE_PRECISION (type))
2912+ continue;
2913+ }
2914+ else if (TREE_CODE (rhs2) != INTEGER_CST)
2915+ continue;
2916+
2917+ if (rhs1_stmt == NULL && rhs2_stmt == NULL)
2918+ continue;
2919+
2920+ /* Verify that the machine can perform a widening multiply in this
2921+ mode/signedness combination, otherwise this transformation is
2922+ likely to pessimize code. */
2923+ if ((rhs1_stmt == NULL || TYPE_UNSIGNED (type1))
2924+ && (rhs2_stmt == NULL || TYPE_UNSIGNED (type2))
2925+ && (optab_handler (umul_widen_optab, TYPE_MODE (type))
2926+ ->insn_code == CODE_FOR_nothing))
2927+ continue;
2928+ else if ((rhs1_stmt == NULL || !TYPE_UNSIGNED (type1))
2929+ && (rhs2_stmt == NULL || !TYPE_UNSIGNED (type2))
2930+ && (optab_handler (smul_widen_optab, TYPE_MODE (type))
2931+ ->insn_code == CODE_FOR_nothing))
2932+ continue;
2933+ else if (rhs1_stmt != NULL && rhs2_stmt != 0
2934+ && (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
2935+ && (optab_handler (usmul_widen_optab, TYPE_MODE (type))
2936+ ->insn_code == CODE_FOR_nothing))
2937+ continue;
2938+
2939+ if ((rhs1_stmt == NULL && !int_fits_type_p (rhs1, type2))
2940+ || (rhs2_stmt == NULL && !int_fits_type_p (rhs2, type1)))
2941+ continue;
2942+
2943+ if (rhs1_stmt == NULL)
2944+ gimple_assign_set_rhs1 (stmt, fold_convert (type2, rhs1));
2945+ else
2946+ gimple_assign_set_rhs1 (stmt, rhs1_convop);
2947+ if (rhs2_stmt == NULL)
2948+ gimple_assign_set_rhs2 (stmt, fold_convert (type1, rhs2));
2949+ else
2950+ gimple_assign_set_rhs2 (stmt, rhs2_convop);
2951+ gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2952+ update_stmt (stmt);
2953+ changed = true;
2954+ }
2955+ }
2956+ return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
2957+ | TODO_verify_stmts : 0);
2958+}
2959+
2960+static bool
2961+gate_optimize_widening_mul (void)
2962+{
2963+ return flag_expensive_optimizations && optimize;
2964+}
2965+
2966+struct gimple_opt_pass pass_optimize_widening_mul =
2967+{
2968+ {
2969+ GIMPLE_PASS,
2970+ "widening_mul", /* name */
2971+ gate_optimize_widening_mul, /* gate */
2972+ execute_optimize_widening_mul, /* execute */
2973+ NULL, /* sub */
2974+ NULL, /* next */
2975+ 0, /* static_pass_number */
2976+ TV_NONE, /* tv_id */
2977+ PROP_ssa, /* properties_required */
2978+ 0, /* properties_provided */
2979+ 0, /* properties_destroyed */
2980+ 0, /* todo_flags_start */
2981+ 0 /* todo_flags_finish */
2982+ }
2983+};
2984
2985=== modified file 'gcc/tree-ssa.c'
2986--- old/gcc/tree-ssa.c 2009-12-07 22:42:10 +0000
2987+++ new/gcc/tree-ssa.c 2010-09-01 13:29:58 +0000
2988@@ -1671,6 +1671,8 @@
2989 {
2990 TREE_NO_WARNING (var) = 1;
2991
2992+ if (location == DECL_SOURCE_LOCATION (var))
2993+ return;
2994 if (xloc.file != floc.file
2995 || xloc.line < floc.line
2996 || xloc.line > LOCATION_LINE (cfun->function_end_locus))
2997