summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
diff options
context:
space:
mode:
authorKhem Raj <raj.khem@gmail.com>2012-06-28 12:19:53 -0700
committerKoen Kooi <koen@dominion.thruhere.net>2012-07-09 18:40:21 +0200
commit6b278fbb02d818b54b5a9fa2716fc49e896b72a8 (patch)
tree833783fb738ff7abf3d0e3029c9a468e73b06e28 /toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
parent680af24d1ff95533db610176e6b01fcc9dcf6699 (diff)
downloadmeta-openembedded-6b278fbb02d818b54b5a9fa2716fc49e896b72a8.tar.gz
gcc-4.6: Migrate recipes from OE-Core
Remove linaro patches. If one needs to use linaro modified gcc they should use meta-linaro Signed-off-by: Khem Raj <raj.khem@gmail.com>
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch1951
1 files changed, 0 insertions, 1951 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
deleted file mode 100644
index 3c0ff00856..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
+++ /dev/null
@@ -1,1951 +0,0 @@
12011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
2
3 gcc/
4 Backport from mainline:
5
6 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
7
8 * modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages.
9 (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete.
10 (node_sched_params): Remove first_reg_move and nreg_moves.
11 (ps_num_consecutive_stages, extend_node_sched_params): New functions.
12 (update_node_sched_params): Move up file.
13 (print_node_sched_params): Print the stage. Don't dump info related
14 to first_reg_move and nreg_moves.
15 (set_columns_for_row): New function.
16 (set_columns_for_ps): Move up file and use set_columns_for_row.
17 (schedule_reg_move): New function.
18 (schedule_reg_moves): Call extend_node_sched_params and
19 schedule_reg_move. Extend size of uses bitmap. Initialize
20 num_consecutive_stages. Return false if a move could not be
21 scheduled.
22 (apply_reg_moves): Don't emit moves here.
23 (permute_partial_schedule): Handle register moves.
24 (duplicate_insns_of_cycles): Remove for_prolog. Emit moves according
25 to the same stage-count test as ddg nodes.
26 (generate_prolog_epilog): Update calls accordingly.
27 (sms_schedule): Allow move-scheduling to add a new first stage.
28
292011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
30
31 gcc/
32 Backport from mainline:
33
34 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
35
36 * modulo-sched.c (ps_insn): Adjust comment.
37 (ps_reg_move_info): New structure.
38 (partial_schedule): Add reg_moves field.
39 (SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params.
40 (node_sched_params): Turn first_reg_move into an identifier.
41 (ps_reg_move): New function.
42 (ps_rtl_insn): Cope with register moves.
43 (ps_first_note): Adjust comment and assert that the instruction
44 isn't a register move.
45 (node_sched_params): Replace with...
46 (node_sched_param_vec): ...this vector.
47 (set_node_sched_params): Adjust accordingly.
48 (print_node_sched_params): Take a partial schedule instead of a ddg.
49 Use ps_rtl_insn and ps_reg_move.
50 (generate_reg_moves): Rename to...
51 (schedule_reg_moves): ...this. Remove rescan parameter. Record each
52 move in the partial schedule, but don't emit it here. Don't perform
53 register substitutions here either.
54 (apply_reg_moves): New function.
55 (duplicate_insns_of_cycles): Use register indices directly,
56 rather than finding instructions using PREV_INSN. Use ps_reg_move.
57 (sms_schedule): Call schedule_reg_moves before committing to
58 a partial schedule. Try the next ii if the schedule fails.
59 Use apply_reg_moves instead of generate_reg_moves. Adjust
60 call to print_node_sched_params. Free node_sched_param_vec
61 instead of node_sched_params.
62 (create_partial_schedule): Initialize reg_moves.
63 (free_partial_schedule): Free reg_moves.
64
652011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
66
67 gcc/
68 Backport from mainline:
69
70 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
71
72 * modulo-sched.c (ps_insn): Replace node field with an identifier.
73 (SCHED_ASAP): Replace with..
74 (NODE_ASAP): ...this macro.
75 (SCHED_PARAMS): New macro.
76 (SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW)
77 (SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS.
78 (node_sched_params): Remove asap.
79 (ps_rtl_insn, ps_first_note): New functions.
80 (set_node_sched_params): Use XCNEWVEC. Don't copy across the
81 asap values.
82 (print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP.
83 (generate_reg_moves): Pass ids to the SCHED_* macros.
84 (update_node_sched_params): Take a ps insn identifier rather than
85 a node as parameter. Use ps_rtl_insn.
86 (set_columns_for_ps): Update for above field and SCHED_* macro changes.
87 (permute_partial_schedule): Use ps_rtl_insn and ps_first_note.
88 (optimize_sc): Update for above field and SCHED_* macro changes.
89 Update calls to try_scheduling_node_in_cycle and
90 update_node_sched_params.
91 (duplicate_insns_of_cycles): Adjust for above field and SCHED_*
92 macro changes. Use ps_rtl_insn and ps_first_note.
93 (sms_schedule): Pass ids to the SCHED_* macros.
94 (get_sched_window): Adjust for above field and SCHED_* macro changes.
95 Use NODE_ASAP instead of SCHED_ASAP.
96 (try_scheduling_node_in_cycle): Remove node parameter. Update
97 call to ps_add_node_check_conflicts. Pass ids to the SCHED_*
98 macros.
99 (sms_schedule_by_order): Update call to try_scheduling_node_in_cycle.
100 (ps_insert_empty_row): Adjust for above field changes.
101 (compute_split_row): Use ids rather than nodes.
102 (verify_partial_schedule): Adjust for above field changes.
103 (print_partial_schedule): Use ps_rtl_insn.
104 (create_ps_insn): Take an id rather than a node.
105 (ps_insn_find_column): Adjust for above field changes.
106 Use ps_rtl_insn.
107 (ps_insn_advance_column): Adjust for above field changes.
108 (add_node_to_ps): Remove node parameter. Update call to
109 create_ps_insn.
110 (ps_has_conflicts): Use ps_rtl_insn.
111 (ps_add_node_check_conflicts): Replace node parameter than an id.
112
1132011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
114
115 gcc/
116 Backport from mainline:
117
118 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
119
120 * modulo-sched.c (undo_replace_buff_elem): Delete.
121 (generate_reg_moves): Don't build and return an undo list.
122 (free_undo_replace_buff): Delete.
123 (sms_schedule): Adjust call to generate_reg_moves.
124 Don't call free_undo_replace_buff.
125
1262011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
127
128 gcc/
129 Backport from mainline:
130
131 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org>
132
133 * modulo-sched.c (get_sched_window): Use a table for the debug output.
134 Print the current ii.
135 (sms_schedule_by_order): Reduce whitespace in dump line.
136
1372011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
138
139 gcc/
140 Backport from mainline:
141
142 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org>
143
144 * modulo-sched.c (get_sched_window): Use just one loop for predecessors
145 and one loop for successors. Fix upper bound of memory range.
146
147=== modified file 'gcc/modulo-sched.c'
148--- old/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
149+++ new/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000
150@@ -124,8 +124,10 @@
151 /* A single instruction in the partial schedule. */
152 struct ps_insn
153 {
154- /* The corresponding DDG_NODE. */
155- ddg_node_ptr node;
156+ /* Identifies the instruction to be scheduled. Values smaller than
157+ the ddg's num_nodes refer directly to ddg nodes. A value of
158+ X - num_nodes refers to register move X. */
159+ int id;
160
161 /* The (absolute) cycle in which the PS instruction is scheduled.
162 Same as SCHED_TIME (node). */
163@@ -137,6 +139,33 @@
164
165 };
166
167+/* Information about a register move that has been added to a partial
168+ schedule. */
169+struct ps_reg_move_info
170+{
171+ /* The source of the move is defined by the ps_insn with id DEF.
172+ The destination is used by the ps_insns with the ids in USES. */
173+ int def;
174+ sbitmap uses;
175+
176+ /* The original form of USES' instructions used OLD_REG, but they
177+ should now use NEW_REG. */
178+ rtx old_reg;
179+ rtx new_reg;
180+
181+ /* The number of consecutive stages that the move occupies. */
182+ int num_consecutive_stages;
183+
184+ /* An instruction that sets NEW_REG to the correct value. The first
185+ move associated with DEF will have an rhs of OLD_REG; later moves
186+ use the result of the previous move. */
187+ rtx insn;
188+};
189+
190+typedef struct ps_reg_move_info ps_reg_move_info;
191+DEF_VEC_O (ps_reg_move_info);
192+DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
193+
194 /* Holds the partial schedule as an array of II rows. Each entry of the
195 array points to a linked list of PS_INSNs, which represents the
196 instructions that are scheduled for that row. */
197@@ -148,6 +177,10 @@
198 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
199 ps_insn_ptr *rows;
200
201+ /* All the moves added for this partial schedule. Index X has
202+ a ps_insn id of X + g->num_nodes. */
203+ VEC (ps_reg_move_info, heap) *reg_moves;
204+
205 /* rows_length[i] holds the number of instructions in the row.
206 It is used only (as an optimization) to back off quickly from
207 trying to schedule a node in a full row; that is, to avoid running
208@@ -165,17 +198,6 @@
209 int stage_count; /* The stage count of the partial schedule. */
210 };
211
212-/* We use this to record all the register replacements we do in
213- the kernel so we can undo SMS if it is not profitable. */
214-struct undo_replace_buff_elem
215-{
216- rtx insn;
217- rtx orig_reg;
218- rtx new_reg;
219- struct undo_replace_buff_elem *next;
220-};
221-
222-
223
224 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
225 static void free_partial_schedule (partial_schedule_ptr);
226@@ -183,9 +205,7 @@
227 void print_partial_schedule (partial_schedule_ptr, FILE *);
228 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
229 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
230- ddg_node_ptr node, int cycle,
231- sbitmap must_precede,
232- sbitmap must_follow);
233+ int, int, sbitmap, sbitmap);
234 static void rotate_partial_schedule (partial_schedule_ptr, int);
235 void set_row_column_for_ps (partial_schedule_ptr);
236 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
237@@ -201,43 +221,27 @@
238 static void permute_partial_schedule (partial_schedule_ptr, rtx);
239 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
240 rtx, rtx);
241-static void duplicate_insns_of_cycles (partial_schedule_ptr,
242- int, int, int, rtx);
243 static int calculate_stage_count (partial_schedule_ptr, int);
244 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
245 int, int, sbitmap, sbitmap, sbitmap);
246 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
247 sbitmap, int, int *, int *, int *);
248-static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
249- int, int, sbitmap, int *, sbitmap,
250- sbitmap);
251+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
252+ sbitmap, int *, sbitmap, sbitmap);
253 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
254
255-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
256-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
257-#define SCHED_FIRST_REG_MOVE(x) \
258- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
259-#define SCHED_NREG_MOVES(x) \
260- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
261-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
262-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
263-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
264+#define NODE_ASAP(node) ((node)->aux.count)
265+
266+#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
267+#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
268+#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
269+#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
270+#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
271
272 /* The scheduling parameters held for each node. */
273 typedef struct node_sched_params
274 {
275- int asap; /* A lower-bound on the absolute scheduling cycle. */
276- int time; /* The absolute scheduling cycle (time >= asap). */
277-
278- /* The following field (first_reg_move) is a pointer to the first
279- register-move instruction added to handle the modulo-variable-expansion
280- of the register defined by this node. This register-move copies the
281- original register defined by the node. */
282- rtx first_reg_move;
283-
284- /* The number of register-move instructions added, immediately preceding
285- first_reg_move. */
286- int nreg_moves;
287+ int time; /* The absolute scheduling cycle. */
288
289 int row; /* Holds time % ii. */
290 int stage; /* Holds time / ii. */
291@@ -247,6 +251,9 @@
292 int column;
293 } *node_sched_params_ptr;
294
295+typedef struct node_sched_params node_sched_params;
296+DEF_VEC_O (node_sched_params);
297+DEF_VEC_ALLOC_O (node_sched_params, heap);
298
299 /* The following three functions are copied from the current scheduler
300 code in order to use sched_analyze() for computing the dependencies.
301@@ -296,6 +303,49 @@
302 0
303 };
304
305+/* Partial schedule instruction ID in PS is a register move. Return
306+ information about it. */
307+static struct ps_reg_move_info *
308+ps_reg_move (partial_schedule_ptr ps, int id)
309+{
310+ gcc_checking_assert (id >= ps->g->num_nodes);
311+ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
312+}
313+
314+/* Return the rtl instruction that is being scheduled by partial schedule
315+ instruction ID, which belongs to schedule PS. */
316+static rtx
317+ps_rtl_insn (partial_schedule_ptr ps, int id)
318+{
319+ if (id < ps->g->num_nodes)
320+ return ps->g->nodes[id].insn;
321+ else
322+ return ps_reg_move (ps, id)->insn;
323+}
324+
325+/* Partial schedule instruction ID, which belongs to PS, occured in
326+ the original (unscheduled) loop. Return the first instruction
327+ in the loop that was associated with ps_rtl_insn (PS, ID).
328+ If the instruction had some notes before it, this is the first
329+ of those notes. */
330+static rtx
331+ps_first_note (partial_schedule_ptr ps, int id)
332+{
333+ gcc_assert (id < ps->g->num_nodes);
334+ return ps->g->nodes[id].first_note;
335+}
336+
337+/* Return the number of consecutive stages that are occupied by
338+ partial schedule instruction ID in PS. */
339+static int
340+ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
341+{
342+ if (id < ps->g->num_nodes)
343+ return 1;
344+ else
345+ return ps_reg_move (ps, id)->num_consecutive_stages;
346+}
347+
348 /* Given HEAD and TAIL which are the first and last insns in a loop;
349 return the register which controls the loop. Return zero if it has
350 more than one occurrence in the loop besides the control part or the
351@@ -396,35 +446,59 @@
352 }
353
354
355-/* Points to the array that contains the sched data for each node. */
356-static node_sched_params_ptr node_sched_params;
357+/* A vector that contains the sched data for each ps_insn. */
358+static VEC (node_sched_params, heap) *node_sched_param_vec;
359
360-/* Allocate sched_params for each node and initialize it. Assumes that
361- the aux field of each node contain the asap bound (computed earlier),
362- and copies it into the sched_params field. */
363+/* Allocate sched_params for each node and initialize it. */
364 static void
365 set_node_sched_params (ddg_ptr g)
366 {
367- int i;
368-
369- /* Allocate for each node in the DDG a place to hold the "sched_data". */
370- /* Initialize ASAP/ALAP/HIGHT to zero. */
371- node_sched_params = (node_sched_params_ptr)
372- xcalloc (g->num_nodes,
373- sizeof (struct node_sched_params));
374-
375- /* Set the pointer of the general data of the node to point to the
376- appropriate sched_params structure. */
377- for (i = 0; i < g->num_nodes; i++)
378- {
379- /* Watch out for aliasing problems? */
380- node_sched_params[i].asap = g->nodes[i].aux.count;
381- g->nodes[i].aux.info = &node_sched_params[i];
382- }
383-}
384-
385-static void
386-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
387+ VEC_truncate (node_sched_params, node_sched_param_vec, 0);
388+ VEC_safe_grow_cleared (node_sched_params, heap,
389+ node_sched_param_vec, g->num_nodes);
390+}
391+
392+/* Make sure that node_sched_param_vec has an entry for every move in PS. */
393+static void
394+extend_node_sched_params (partial_schedule_ptr ps)
395+{
396+ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
397+ ps->g->num_nodes + VEC_length (ps_reg_move_info,
398+ ps->reg_moves));
399+}
400+
401+/* Update the sched_params (time, row and stage) for node U using the II,
402+ the CYCLE of U and MIN_CYCLE.
403+ We're not simply taking the following
404+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
405+ because the stages may not be aligned on cycle 0. */
406+static void
407+update_node_sched_params (int u, int ii, int cycle, int min_cycle)
408+{
409+ int sc_until_cycle_zero;
410+ int stage;
411+
412+ SCHED_TIME (u) = cycle;
413+ SCHED_ROW (u) = SMODULO (cycle, ii);
414+
415+ /* The calculation of stage count is done adding the number
416+ of stages before cycle zero and after cycle zero. */
417+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
418+
419+ if (SCHED_TIME (u) < 0)
420+ {
421+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
422+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
423+ }
424+ else
425+ {
426+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
427+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
428+ }
429+}
430+
431+static void
432+print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
433 {
434 int i;
435
436@@ -432,22 +506,170 @@
437 return;
438 for (i = 0; i < num_nodes; i++)
439 {
440- node_sched_params_ptr nsp = &node_sched_params[i];
441- rtx reg_move = nsp->first_reg_move;
442- int j;
443+ node_sched_params_ptr nsp = SCHED_PARAMS (i);
444
445 fprintf (file, "Node = %d; INSN = %d\n", i,
446- (INSN_UID (g->nodes[i].insn)));
447- fprintf (file, " asap = %d:\n", nsp->asap);
448+ INSN_UID (ps_rtl_insn (ps, i)));
449+ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
450 fprintf (file, " time = %d:\n", nsp->time);
451- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
452- for (j = 0; j < nsp->nreg_moves; j++)
453+ fprintf (file, " stage = %d:\n", nsp->stage);
454+ }
455+}
456+
457+/* Set SCHED_COLUMN for each instruction in row ROW of PS. */
458+static void
459+set_columns_for_row (partial_schedule_ptr ps, int row)
460+{
461+ ps_insn_ptr cur_insn;
462+ int column;
463+
464+ column = 0;
465+ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
466+ SCHED_COLUMN (cur_insn->id) = column++;
467+}
468+
469+/* Set SCHED_COLUMN for each instruction in PS. */
470+static void
471+set_columns_for_ps (partial_schedule_ptr ps)
472+{
473+ int row;
474+
475+ for (row = 0; row < ps->ii; row++)
476+ set_columns_for_row (ps, row);
477+}
478+
479+/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
480+ Its single predecessor has already been scheduled, as has its
481+ ddg node successors. (The move may have also another move as its
482+ successor, in which case that successor will be scheduled later.)
483+
484+ The move is part of a chain that satisfies register dependencies
485+ between a producing ddg node and various consuming ddg nodes.
486+ If some of these dependencies have a distance of 1 (meaning that
487+ the use is upward-exposoed) then DISTANCE1_USES is nonnull and
488+ contains the set of uses with distance-1 dependencies.
489+ DISTANCE1_USES is null otherwise.
490+
491+ MUST_FOLLOW is a scratch bitmap that is big enough to hold
492+ all current ps_insn ids.
493+
494+ Return true on success. */
495+static bool
496+schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
497+ sbitmap distance1_uses, sbitmap must_follow)
498+{
499+ unsigned int u;
500+ int this_time, this_distance, this_start, this_end, this_latency;
501+ int start, end, c, ii;
502+ sbitmap_iterator sbi;
503+ ps_reg_move_info *move;
504+ rtx this_insn;
505+ ps_insn_ptr psi;
506+
507+ move = ps_reg_move (ps, i_reg_move);
508+ ii = ps->ii;
509+ if (dump_file)
510+ {
511+ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
512+ ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
513+ PS_MIN_CYCLE (ps));
514+ print_rtl_single (dump_file, move->insn);
515+ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
516+ fprintf (dump_file, "=========== =========== =====\n");
517+ }
518+
519+ start = INT_MIN;
520+ end = INT_MAX;
521+
522+ /* For dependencies of distance 1 between a producer ddg node A
523+ and consumer ddg node B, we have a chain of dependencies:
524+
525+ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
526+
527+ where Mi is the ith move. For dependencies of distance 0 between
528+ a producer ddg node A and consumer ddg node C, we have a chain of
529+ dependencies:
530+
531+ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
532+
533+ where Mi' occupies the same position as Mi but occurs a stage later.
534+ We can only schedule each move once, so if we have both types of
535+ chain, we model the second as:
536+
537+ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
538+
539+ First handle the dependencies between the previously-scheduled
540+ predecessor and the move. */
541+ this_insn = ps_rtl_insn (ps, move->def);
542+ this_latency = insn_latency (this_insn, move->insn);
543+ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
544+ this_time = SCHED_TIME (move->def) - this_distance * ii;
545+ this_start = this_time + this_latency;
546+ this_end = this_time + ii;
547+ if (dump_file)
548+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
549+ this_start, this_end, SCHED_TIME (move->def),
550+ INSN_UID (this_insn), this_latency, this_distance,
551+ INSN_UID (move->insn));
552+
553+ if (start < this_start)
554+ start = this_start;
555+ if (end > this_end)
556+ end = this_end;
557+
558+ /* Handle the dependencies between the move and previously-scheduled
559+ successors. */
560+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
561+ {
562+ this_insn = ps_rtl_insn (ps, u);
563+ this_latency = insn_latency (move->insn, this_insn);
564+ if (distance1_uses && !TEST_BIT (distance1_uses, u))
565+ this_distance = -1;
566+ else
567+ this_distance = 0;
568+ this_time = SCHED_TIME (u) + this_distance * ii;
569+ this_start = this_time - ii;
570+ this_end = this_time - this_latency;
571+ if (dump_file)
572+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
573+ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
574+ this_latency, this_distance, INSN_UID (this_insn));
575+
576+ if (start < this_start)
577+ start = this_start;
578+ if (end > this_end)
579+ end = this_end;
580+ }
581+
582+ if (dump_file)
583+ {
584+ fprintf (dump_file, "----------- ----------- -----\n");
585+ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
586+ }
587+
588+ sbitmap_zero (must_follow);
589+ SET_BIT (must_follow, move->def);
590+
591+ start = MAX (start, end - (ii - 1));
592+ for (c = end; c >= start; c--)
593+ {
594+ psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
595+ move->uses, must_follow);
596+ if (psi)
597 {
598- fprintf (file, " reg_move = ");
599- print_rtl_single (file, reg_move);
600- reg_move = PREV_INSN (reg_move);
601+ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
602+ if (dump_file)
603+ fprintf (dump_file, "\nScheduled register move INSN %d at"
604+ " time %d, row %d\n\n", INSN_UID (move->insn), c,
605+ SCHED_ROW (i_reg_move));
606+ return true;
607 }
608 }
609+
610+ if (dump_file)
611+ fprintf (dump_file, "\nNo available slot\n\n");
612+
613+ return false;
614 }
615
616 /*
617@@ -461,22 +683,23 @@
618 nreg_moves = ----------------------------------- + 1 - { dependence.
619 ii { 1 if not.
620 */
621-static struct undo_replace_buff_elem *
622-generate_reg_moves (partial_schedule_ptr ps, bool rescan)
623+static bool
624+schedule_reg_moves (partial_schedule_ptr ps)
625 {
626 ddg_ptr g = ps->g;
627 int ii = ps->ii;
628 int i;
629- struct undo_replace_buff_elem *reg_move_replaces = NULL;
630
631 for (i = 0; i < g->num_nodes; i++)
632 {
633 ddg_node_ptr u = &g->nodes[i];
634 ddg_edge_ptr e;
635 int nreg_moves = 0, i_reg_move;
636- sbitmap *uses_of_defs;
637- rtx last_reg_move;
638 rtx prev_reg, old_reg;
639+ int first_move;
640+ int distances[2];
641+ sbitmap must_follow;
642+ sbitmap distance1_uses;
643 rtx set = single_set (u->insn);
644
645 /* Skip instructions that do not set a register. */
646@@ -485,18 +708,21 @@
647
648 /* Compute the number of reg_moves needed for u, by looking at life
649 ranges started at u (excluding self-loops). */
650+ distances[0] = distances[1] = false;
651 for (e = u->out; e; e = e->next_out)
652 if (e->type == TRUE_DEP && e->dest != e->src)
653 {
654- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
655+ int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
656+ - SCHED_TIME (e->src->cuid)) / ii;
657
658 if (e->distance == 1)
659- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
660+ nreg_moves4e = (SCHED_TIME (e->dest->cuid)
661+ - SCHED_TIME (e->src->cuid) + ii) / ii;
662
663 /* If dest precedes src in the schedule of the kernel, then dest
664 will read before src writes and we can save one reg_copy. */
665- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
666- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
667+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
668+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
669 nreg_moves4e--;
670
671 if (nreg_moves4e >= 1)
672@@ -513,125 +739,105 @@
673 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
674 }
675
676+ if (nreg_moves4e)
677+ {
678+ gcc_assert (e->distance < 2);
679+ distances[e->distance] = true;
680+ }
681 nreg_moves = MAX (nreg_moves, nreg_moves4e);
682 }
683
684 if (nreg_moves == 0)
685 continue;
686
687+ /* Create NREG_MOVES register moves. */
688+ first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
689+ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
690+ first_move + nreg_moves);
691+ extend_node_sched_params (ps);
692+
693+ /* Record the moves associated with this node. */
694+ first_move += ps->g->num_nodes;
695+
696+ /* Generate each move. */
697+ old_reg = prev_reg = SET_DEST (single_set (u->insn));
698+ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
699+ {
700+ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
701+
702+ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
703+ move->uses = sbitmap_alloc (first_move + nreg_moves);
704+ move->old_reg = old_reg;
705+ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
706+ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
707+ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
708+ sbitmap_zero (move->uses);
709+
710+ prev_reg = move->new_reg;
711+ }
712+
713+ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
714+
715 /* Every use of the register defined by node may require a different
716 copy of this register, depending on the time the use is scheduled.
717- Set a bitmap vector, telling which nodes use each copy of this
718- register. */
719- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
720- sbitmap_vector_zero (uses_of_defs, nreg_moves);
721+ Record which uses require which move results. */
722 for (e = u->out; e; e = e->next_out)
723 if (e->type == TRUE_DEP && e->dest != e->src)
724 {
725- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
726+ int dest_copy = (SCHED_TIME (e->dest->cuid)
727+ - SCHED_TIME (e->src->cuid)) / ii;
728
729 if (e->distance == 1)
730- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
731+ dest_copy = (SCHED_TIME (e->dest->cuid)
732+ - SCHED_TIME (e->src->cuid) + ii) / ii;
733
734- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
735- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
736+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
737+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
738 dest_copy--;
739
740 if (dest_copy)
741- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
742+ {
743+ ps_reg_move_info *move;
744+
745+ move = ps_reg_move (ps, first_move + dest_copy - 1);
746+ SET_BIT (move->uses, e->dest->cuid);
747+ if (e->distance == 1)
748+ SET_BIT (distance1_uses, e->dest->cuid);
749+ }
750 }
751
752- /* Now generate the reg_moves, attaching relevant uses to them. */
753- SCHED_NREG_MOVES (u) = nreg_moves;
754- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
755- /* Insert the reg-moves right before the notes which precede
756- the insn they relates to. */
757- last_reg_move = u->first_note;
758-
759+ must_follow = sbitmap_alloc (first_move + nreg_moves);
760 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
761+ if (!schedule_reg_move (ps, first_move + i_reg_move,
762+ distance1_uses, must_follow))
763+ break;
764+ sbitmap_free (must_follow);
765+ if (distance1_uses)
766+ sbitmap_free (distance1_uses);
767+ if (i_reg_move < nreg_moves)
768+ return false;
769+ }
770+ return true;
771+}
772+
773+/* Emit the moves associatied with PS. Apply the substitutions
774+ associated with them. */
775+static void
776+apply_reg_moves (partial_schedule_ptr ps)
777+{
778+ ps_reg_move_info *move;
779+ int i;
780+
781+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
782+ {
783+ unsigned int i_use;
784+ sbitmap_iterator sbi;
785+
786+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
787 {
788- unsigned int i_use = 0;
789- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
790- rtx reg_move = gen_move_insn (new_reg, prev_reg);
791- sbitmap_iterator sbi;
792-
793- add_insn_before (reg_move, last_reg_move, NULL);
794- last_reg_move = reg_move;
795-
796- if (!SCHED_FIRST_REG_MOVE (u))
797- SCHED_FIRST_REG_MOVE (u) = reg_move;
798-
799- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
800- {
801- struct undo_replace_buff_elem *rep;
802-
803- rep = (struct undo_replace_buff_elem *)
804- xcalloc (1, sizeof (struct undo_replace_buff_elem));
805- rep->insn = g->nodes[i_use].insn;
806- rep->orig_reg = old_reg;
807- rep->new_reg = new_reg;
808-
809- if (! reg_move_replaces)
810- reg_move_replaces = rep;
811- else
812- {
813- rep->next = reg_move_replaces;
814- reg_move_replaces = rep;
815- }
816-
817- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
818- if (rescan)
819- df_insn_rescan (g->nodes[i_use].insn);
820- }
821-
822- prev_reg = new_reg;
823+ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
824+ df_insn_rescan (ps->g->nodes[i_use].insn);
825 }
826- sbitmap_vector_free (uses_of_defs);
827- }
828- return reg_move_replaces;
829-}
830-
831-/* Free memory allocated for the undo buffer. */
832-static void
833-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
834-{
835-
836- while (reg_move_replaces)
837- {
838- struct undo_replace_buff_elem *rep = reg_move_replaces;
839-
840- reg_move_replaces = reg_move_replaces->next;
841- free (rep);
842- }
843-}
844-
845-/* Update the sched_params (time, row and stage) for node U using the II,
846- the CYCLE of U and MIN_CYCLE.
847- We're not simply taking the following
848- SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
849- because the stages may not be aligned on cycle 0. */
850-static void
851-update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
852-{
853- int sc_until_cycle_zero;
854- int stage;
855-
856- SCHED_TIME (u) = cycle;
857- SCHED_ROW (u) = SMODULO (cycle, ii);
858-
859- /* The calculation of stage count is done adding the number
860- of stages before cycle zero and after cycle zero. */
861- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
862-
863- if (SCHED_TIME (u) < 0)
864- {
865- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
866- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
867- }
868- else
869- {
870- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
871- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
872 }
873 }
874
875@@ -647,18 +853,19 @@
876 for (row = 0; row < ii; row++)
877 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
878 {
879- ddg_node_ptr u = crr_insn->node;
880+ int u = crr_insn->id;
881 int normalized_time = SCHED_TIME (u) - amount;
882 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
883
884 if (dump_file)
885 {
886 /* Print the scheduling times after the rotation. */
887+ rtx insn = ps_rtl_insn (ps, u);
888+
889 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
890- "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
891- INSN_UID (crr_insn->node->insn), normalized_time,
892- new_min_cycle);
893- if (JUMP_P (crr_insn->node->insn))
894+ "crr_insn->cycle=%d, min_cycle=%d", u,
895+ INSN_UID (insn), normalized_time, new_min_cycle);
896+ if (JUMP_P (insn))
897 fprintf (dump_file, " (branch)");
898 fprintf (dump_file, "\n");
899 }
900@@ -671,22 +878,6 @@
901 }
902 }
903
904-/* Set SCHED_COLUMN of each node according to its position in PS. */
905-static void
906-set_columns_for_ps (partial_schedule_ptr ps)
907-{
908- int row;
909-
910- for (row = 0; row < ps->ii; row++)
911- {
912- ps_insn_ptr cur_insn = ps->rows[row];
913- int column = 0;
914-
915- for (; cur_insn; cur_insn = cur_insn->next_in_row)
916- SCHED_COLUMN (cur_insn->node) = column++;
917- }
918-}
919-
920 /* Permute the insns according to their order in PS, from row 0 to
921 row ii-1, and position them right before LAST. This schedules
922 the insns of the loop kernel. */
923@@ -699,9 +890,18 @@
924
925 for (row = 0; row < ii ; row++)
926 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
927- if (PREV_INSN (last) != ps_ij->node->insn)
928- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
929- PREV_INSN (last));
930+ {
931+ rtx insn = ps_rtl_insn (ps, ps_ij->id);
932+
933+ if (PREV_INSN (last) != insn)
934+ {
935+ if (ps_ij->id < ps->g->num_nodes)
936+ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
937+ PREV_INSN (last));
938+ else
939+ add_insn_before (insn, last, NULL);
940+ }
941+ }
942 }
943
944 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
945@@ -750,7 +950,7 @@
946 to row ii-1. If they are equal just bail out. */
947 stage_count = calculate_stage_count (ps, amount);
948 stage_count_curr =
949- calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
950+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
951
952 if (stage_count == stage_count_curr)
953 {
954@@ -779,7 +979,7 @@
955 print_partial_schedule (ps, dump_file);
956 }
957
958- if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
959+ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
960 {
961 ok = true;
962 goto clear;
963@@ -794,7 +994,7 @@
964 {
965 bool success;
966 ps_insn_ptr next_ps_i;
967- int branch_cycle = SCHED_TIME (g->closing_branch);
968+ int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
969 int row = SMODULO (branch_cycle, ps->ii);
970 int num_splits = 0;
971 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
972@@ -850,13 +1050,12 @@
973 branch so we can remove it from it's current cycle. */
974 for (next_ps_i = ps->rows[row];
975 next_ps_i; next_ps_i = next_ps_i->next_in_row)
976- if (next_ps_i->node->cuid == g->closing_branch->cuid)
977+ if (next_ps_i->id == g->closing_branch->cuid)
978 break;
979
980 remove_node_from_ps (ps, next_ps_i);
981 success =
982- try_scheduling_node_in_cycle (ps, g->closing_branch,
983- g->closing_branch->cuid, c,
984+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
985 sched_nodes, &num_splits,
986 tmp_precede, tmp_follow);
987 gcc_assert (num_splits == 0);
988@@ -874,8 +1073,7 @@
989 must_precede, branch_cycle, start, end,
990 step);
991 success =
992- try_scheduling_node_in_cycle (ps, g->closing_branch,
993- g->closing_branch->cuid,
994+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
995 branch_cycle, sched_nodes,
996 &num_splits, tmp_precede,
997 tmp_follow);
998@@ -889,7 +1087,7 @@
999 fprintf (dump_file,
1000 "SMS success in moving branch to cycle %d\n", c);
1001
1002- update_node_sched_params (g->closing_branch, ii, c,
1003+ update_node_sched_params (g->closing_branch->cuid, ii, c,
1004 PS_MIN_CYCLE (ps));
1005 ok = true;
1006 }
1007@@ -905,7 +1103,7 @@
1008
1009 static void
1010 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1011- int to_stage, int for_prolog, rtx count_reg)
1012+ int to_stage, rtx count_reg)
1013 {
1014 int row;
1015 ps_insn_ptr ps_ij;
1016@@ -913,9 +1111,9 @@
1017 for (row = 0; row < ps->ii; row++)
1018 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1019 {
1020- ddg_node_ptr u_node = ps_ij->node;
1021- int j, i_reg_moves;
1022- rtx reg_move = NULL_RTX;
1023+ int u = ps_ij->id;
1024+ int first_u, last_u;
1025+ rtx u_insn;
1026
1027 /* Do not duplicate any insn which refers to count_reg as it
1028 belongs to the control part.
1029@@ -923,52 +1121,20 @@
1030 be ignored.
1031 TODO: This should be done by analyzing the control part of
1032 the loop. */
1033- if (reg_mentioned_p (count_reg, u_node->insn)
1034- || JUMP_P (ps_ij->node->insn))
1035+ u_insn = ps_rtl_insn (ps, u);
1036+ if (reg_mentioned_p (count_reg, u_insn)
1037+ || JUMP_P (u_insn))
1038 continue;
1039
1040- if (for_prolog)
1041- {
1042- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
1043- number of reg_moves starting with the second occurrence of
1044- u_node, which is generated if its SCHED_STAGE <= to_stage. */
1045- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
1046- i_reg_moves = MAX (i_reg_moves, 0);
1047- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
1048-
1049- /* The reg_moves start from the *first* reg_move backwards. */
1050- if (i_reg_moves)
1051- {
1052- reg_move = SCHED_FIRST_REG_MOVE (u_node);
1053- for (j = 1; j < i_reg_moves; j++)
1054- reg_move = PREV_INSN (reg_move);
1055- }
1056- }
1057- else /* It's for the epilog. */
1058- {
1059- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
1060- starting to decrease one stage after u_node no longer occurs;
1061- that is, generate all reg_moves until
1062- SCHED_STAGE (u_node) == from_stage - 1. */
1063- i_reg_moves = SCHED_NREG_MOVES (u_node)
1064- - (from_stage - SCHED_STAGE (u_node) - 1);
1065- i_reg_moves = MAX (i_reg_moves, 0);
1066- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
1067-
1068- /* The reg_moves start from the *last* reg_move forwards. */
1069- if (i_reg_moves)
1070- {
1071- reg_move = SCHED_FIRST_REG_MOVE (u_node);
1072- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
1073- reg_move = PREV_INSN (reg_move);
1074- }
1075- }
1076-
1077- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
1078- emit_insn (copy_rtx (PATTERN (reg_move)));
1079- if (SCHED_STAGE (u_node) >= from_stage
1080- && SCHED_STAGE (u_node) <= to_stage)
1081- duplicate_insn_chain (u_node->first_note, u_node->insn);
1082+ first_u = SCHED_STAGE (u);
1083+ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1084+ if (from_stage <= last_u && to_stage >= first_u)
1085+ {
1086+ if (u < ps->g->num_nodes)
1087+ duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1088+ else
1089+ emit_insn (copy_rtx (PATTERN (u_insn)));
1090+ }
1091 }
1092 }
1093
1094@@ -1002,7 +1168,7 @@
1095 }
1096
1097 for (i = 0; i < last_stage; i++)
1098- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
1099+ duplicate_insns_of_cycles (ps, 0, i, count_reg);
1100
1101 /* Put the prolog on the entry edge. */
1102 e = loop_preheader_edge (loop);
1103@@ -1014,7 +1180,7 @@
1104 start_sequence ();
1105
1106 for (i = 0; i < last_stage; i++)
1107- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
1108+ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1109
1110 /* Put the epilogue on the exit edge. */
1111 gcc_assert (single_exit (loop));
1112@@ -1350,10 +1516,9 @@
1113 {
1114 rtx head, tail;
1115 rtx count_reg, count_init;
1116- int mii, rec_mii;
1117- unsigned stage_count = 0;
1118+ int mii, rec_mii, stage_count, min_cycle;
1119 HOST_WIDEST_INT loop_count = 0;
1120- bool opt_sc_p = false;
1121+ bool opt_sc_p;
1122
1123 if (! (g = g_arr[loop->num]))
1124 continue;
1125@@ -1430,62 +1595,63 @@
1126 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1127 rec_mii, mii, maxii);
1128
1129- /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1130- ASAP. */
1131- set_node_sched_params (g);
1132-
1133- ps = sms_schedule_by_order (g, mii, maxii, node_order);
1134-
1135- if (ps)
1136+ for (;;)
1137 {
1138- /* Try to achieve optimized SC by normalizing the partial
1139- schedule (having the cycles start from cycle zero).
1140- The branch location must be placed in row ii-1 in the
1141- final scheduling. If failed, shift all instructions to
1142- position the branch in row ii-1. */
1143- opt_sc_p = optimize_sc (ps, g);
1144- if (opt_sc_p)
1145- stage_count = calculate_stage_count (ps, 0);
1146- else
1147+ set_node_sched_params (g);
1148+
1149+ stage_count = 0;
1150+ opt_sc_p = false;
1151+ ps = sms_schedule_by_order (g, mii, maxii, node_order);
1152+
1153+ if (ps)
1154 {
1155- /* Bring the branch to cycle ii-1. */
1156- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
1157+ /* Try to achieve optimized SC by normalizing the partial
1158+ schedule (having the cycles start from cycle zero).
1159+ The branch location must be placed in row ii-1 in the
1160+ final scheduling. If failed, shift all instructions to
1161+ position the branch in row ii-1. */
1162+ opt_sc_p = optimize_sc (ps, g);
1163+ if (opt_sc_p)
1164+ stage_count = calculate_stage_count (ps, 0);
1165+ else
1166+ {
1167+ /* Bring the branch to cycle ii-1. */
1168+ int amount = (SCHED_TIME (g->closing_branch->cuid)
1169+ - (ps->ii - 1));
1170
1171+ if (dump_file)
1172+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1173+
1174+ stage_count = calculate_stage_count (ps, amount);
1175+ }
1176+
1177+ gcc_assert (stage_count >= 1);
1178+ }
1179+
1180+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1181+ 1 means that there is no interleaving between iterations thus
1182+ we let the scheduling passes do the job in this case. */
1183+ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1184+ || (count_init && (loop_count <= stage_count))
1185+ || (flag_branch_probabilities && (trip_count <= stage_count)))
1186+ {
1187 if (dump_file)
1188- fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1189-
1190- stage_count = calculate_stage_count (ps, amount);
1191- }
1192-
1193- gcc_assert (stage_count >= 1);
1194- PS_STAGE_COUNT (ps) = stage_count;
1195- }
1196-
1197- /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1198- 1 means that there is no interleaving between iterations thus
1199- we let the scheduling passes do the job in this case. */
1200- if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1201- || (count_init && (loop_count <= stage_count))
1202- || (flag_branch_probabilities && (trip_count <= stage_count)))
1203- {
1204- if (dump_file)
1205- {
1206- fprintf (dump_file, "SMS failed... \n");
1207- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1208- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1209- fprintf (dump_file, ", trip-count=");
1210- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1211- fprintf (dump_file, ")\n");
1212- }
1213- }
1214- else
1215- {
1216- struct undo_replace_buff_elem *reg_move_replaces;
1217+ {
1218+ fprintf (dump_file, "SMS failed... \n");
1219+ fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1220+ " loop-count=", stage_count);
1221+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1222+ fprintf (dump_file, ", trip-count=");
1223+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1224+ fprintf (dump_file, ")\n");
1225+ }
1226+ break;
1227+ }
1228
1229 if (!opt_sc_p)
1230 {
1231 /* Rotate the partial schedule to have the branch in row ii-1. */
1232- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
1233+ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1234
1235 reset_sched_times (ps, amount);
1236 rotate_partial_schedule (ps, amount);
1237@@ -1493,6 +1659,29 @@
1238
1239 set_columns_for_ps (ps);
1240
1241+ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1242+ if (!schedule_reg_moves (ps))
1243+ {
1244+ mii = ps->ii + 1;
1245+ free_partial_schedule (ps);
1246+ continue;
1247+ }
1248+
1249+ /* Moves that handle incoming values might have been added
1250+ to a new first stage. Bump the stage count if so.
1251+
1252+ ??? Perhaps we could consider rotating the schedule here
1253+ instead? */
1254+ if (PS_MIN_CYCLE (ps) < min_cycle)
1255+ {
1256+ reset_sched_times (ps, 0);
1257+ stage_count++;
1258+ }
1259+
1260+ /* The stage count should now be correct without rotation. */
1261+ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1262+ PS_STAGE_COUNT (ps) = stage_count;
1263+
1264 canon_loop (loop);
1265
1266 if (dump_file)
1267@@ -1531,17 +1720,16 @@
1268 /* The life-info is not valid any more. */
1269 df_set_bb_dirty (g->bb);
1270
1271- reg_move_replaces = generate_reg_moves (ps, true);
1272+ apply_reg_moves (ps);
1273 if (dump_file)
1274- print_node_sched_params (dump_file, g->num_nodes, g);
1275+ print_node_sched_params (dump_file, g->num_nodes, ps);
1276 /* Generate prolog and epilog. */
1277 generate_prolog_epilog (ps, loop, count_reg, count_init);
1278-
1279- free_undo_replace_buff (reg_move_replaces);
1280+ break;
1281 }
1282
1283 free_partial_schedule (ps);
1284- free (node_sched_params);
1285+ VEC_free (node_sched_params, heap, node_sched_param_vec);
1286 free (node_order);
1287 free_ddg (g);
1288 }
1289@@ -1643,9 +1831,11 @@
1290
1291 static int
1292 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1293- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1294+ sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1295+ int *end_p)
1296 {
1297 int start, step, end;
1298+ int early_start, late_start;
1299 ddg_edge_ptr e;
1300 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1301 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1302@@ -1653,6 +1843,8 @@
1303 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1304 int psp_not_empty;
1305 int pss_not_empty;
1306+ int count_preds;
1307+ int count_succs;
1308
1309 /* 1. compute sched window for u (start, end, step). */
1310 sbitmap_zero (psp);
1311@@ -1660,214 +1852,119 @@
1312 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1313 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1314
1315- if (psp_not_empty && !pss_not_empty)
1316- {
1317- int early_start = INT_MIN;
1318-
1319- end = INT_MAX;
1320- for (e = u_node->in; e != 0; e = e->next_in)
1321- {
1322- ddg_node_ptr v_node = e->src;
1323-
1324- if (dump_file)
1325- {
1326- fprintf (dump_file, "\nProcessing edge: ");
1327- print_ddg_edge (dump_file, e);
1328- fprintf (dump_file,
1329- "\nScheduling %d (%d) in psp_not_empty,"
1330- " checking p %d (%d): ", u_node->cuid,
1331- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1332- (v_node->insn));
1333- }
1334-
1335- if (TEST_BIT (sched_nodes, v_node->cuid))
1336- {
1337- int p_st = SCHED_TIME (v_node);
1338-
1339- early_start =
1340- MAX (early_start, p_st + e->latency - (e->distance * ii));
1341-
1342- if (dump_file)
1343- fprintf (dump_file,
1344- "pred st = %d; early_start = %d; latency: %d",
1345- p_st, early_start, e->latency);
1346-
1347- if (e->data_type == MEM_DEP)
1348- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1349- }
1350- else if (dump_file)
1351- fprintf (dump_file, "the node is not scheduled\n");
1352- }
1353- start = early_start;
1354- end = MIN (end, early_start + ii);
1355- /* Schedule the node close to it's predecessors. */
1356- step = 1;
1357-
1358- if (dump_file)
1359- fprintf (dump_file,
1360- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1361- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1362- }
1363-
1364- else if (!psp_not_empty && pss_not_empty)
1365- {
1366- int late_start = INT_MAX;
1367-
1368- end = INT_MIN;
1369- for (e = u_node->out; e != 0; e = e->next_out)
1370- {
1371- ddg_node_ptr v_node = e->dest;
1372-
1373- if (dump_file)
1374- {
1375- fprintf (dump_file, "\nProcessing edge:");
1376- print_ddg_edge (dump_file, e);
1377- fprintf (dump_file,
1378- "\nScheduling %d (%d) in pss_not_empty,"
1379- " checking s %d (%d): ", u_node->cuid,
1380- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1381- (v_node->insn));
1382- }
1383-
1384- if (TEST_BIT (sched_nodes, v_node->cuid))
1385- {
1386- int s_st = SCHED_TIME (v_node);
1387-
1388- late_start = MIN (late_start,
1389- s_st - e->latency + (e->distance * ii));
1390-
1391- if (dump_file)
1392- fprintf (dump_file,
1393- "succ st = %d; late_start = %d; latency = %d",
1394- s_st, late_start, e->latency);
1395-
1396- if (e->data_type == MEM_DEP)
1397- end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1398- if (dump_file)
1399- fprintf (dump_file, "end = %d\n", end);
1400-
1401- }
1402- else if (dump_file)
1403- fprintf (dump_file, "the node is not scheduled\n");
1404-
1405- }
1406- start = late_start;
1407- end = MAX (end, late_start - ii);
1408- /* Schedule the node close to it's successors. */
1409+ /* We first compute a forward range (start <= end), then decide whether
1410+ to reverse it. */
1411+ early_start = INT_MIN;
1412+ late_start = INT_MAX;
1413+ start = INT_MIN;
1414+ end = INT_MAX;
1415+ step = 1;
1416+
1417+ count_preds = 0;
1418+ count_succs = 0;
1419+
1420+ if (dump_file && (psp_not_empty || pss_not_empty))
1421+ {
1422+ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1423+ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1424+ fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1425+ "start", "early start", "late start", "end", "time");
1426+ fprintf (dump_file, "=========== =========== =========== ==========="
1427+ " =====\n");
1428+ }
1429+ /* Calculate early_start and limit end. Both bounds are inclusive. */
1430+ if (psp_not_empty)
1431+ for (e = u_node->in; e != 0; e = e->next_in)
1432+ {
1433+ int v = e->src->cuid;
1434+
1435+ if (TEST_BIT (sched_nodes, v))
1436+ {
1437+ int p_st = SCHED_TIME (v);
1438+ int earliest = p_st + e->latency - (e->distance * ii);
1439+ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1440+
1441+ if (dump_file)
1442+ {
1443+ fprintf (dump_file, "%11s %11d %11s %11d %5d",
1444+ "", earliest, "", latest, p_st);
1445+ print_ddg_edge (dump_file, e);
1446+ fprintf (dump_file, "\n");
1447+ }
1448+
1449+ early_start = MAX (early_start, earliest);
1450+ end = MIN (end, latest);
1451+
1452+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1453+ count_preds++;
1454+ }
1455+ }
1456+
1457+ /* Calculate late_start and limit start. Both bounds are inclusive. */
1458+ if (pss_not_empty)
1459+ for (e = u_node->out; e != 0; e = e->next_out)
1460+ {
1461+ int v = e->dest->cuid;
1462+
1463+ if (TEST_BIT (sched_nodes, v))
1464+ {
1465+ int s_st = SCHED_TIME (v);
1466+ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1467+ int latest = s_st - e->latency + (e->distance * ii);
1468+
1469+ if (dump_file)
1470+ {
1471+ fprintf (dump_file, "%11d %11s %11d %11s %5d",
1472+ earliest, "", latest, "", s_st);
1473+ print_ddg_edge (dump_file, e);
1474+ fprintf (dump_file, "\n");
1475+ }
1476+
1477+ start = MAX (start, earliest);
1478+ late_start = MIN (late_start, latest);
1479+
1480+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1481+ count_succs++;
1482+ }
1483+ }
1484+
1485+ if (dump_file && (psp_not_empty || pss_not_empty))
1486+ {
1487+ fprintf (dump_file, "----------- ----------- ----------- -----------"
1488+ " -----\n");
1489+ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1490+ start, early_start, late_start, end, "",
1491+ "(max, max, min, min)");
1492+ }
1493+
1494+ /* Get a target scheduling window no bigger than ii. */
1495+ if (early_start == INT_MIN && late_start == INT_MAX)
1496+ early_start = NODE_ASAP (u_node);
1497+ else if (early_start == INT_MIN)
1498+ early_start = late_start - (ii - 1);
1499+ late_start = MIN (late_start, early_start + (ii - 1));
1500+
1501+ /* Apply memory dependence limits. */
1502+ start = MAX (start, early_start);
1503+ end = MIN (end, late_start);
1504+
1505+ if (dump_file && (psp_not_empty || pss_not_empty))
1506+ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1507+ "", start, end, "", "");
1508+
1509+ /* If there are at least as many successors as predecessors, schedule the
1510+ node close to its successors. */
1511+ if (pss_not_empty && count_succs >= count_preds)
1512+ {
1513+ int tmp = end;
1514+ end = start;
1515+ start = tmp;
1516 step = -1;
1517-
1518- if (dump_file)
1519- fprintf (dump_file,
1520- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1521- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1522-
1523- }
1524-
1525- else if (psp_not_empty && pss_not_empty)
1526- {
1527- int early_start = INT_MIN;
1528- int late_start = INT_MAX;
1529- int count_preds = 0;
1530- int count_succs = 0;
1531-
1532- start = INT_MIN;
1533- end = INT_MAX;
1534- for (e = u_node->in; e != 0; e = e->next_in)
1535- {
1536- ddg_node_ptr v_node = e->src;
1537-
1538- if (dump_file)
1539- {
1540- fprintf (dump_file, "\nProcessing edge:");
1541- print_ddg_edge (dump_file, e);
1542- fprintf (dump_file,
1543- "\nScheduling %d (%d) in psp_pss_not_empty,"
1544- " checking p %d (%d): ", u_node->cuid, INSN_UID
1545- (u_node->insn), v_node->cuid, INSN_UID
1546- (v_node->insn));
1547- }
1548-
1549- if (TEST_BIT (sched_nodes, v_node->cuid))
1550- {
1551- int p_st = SCHED_TIME (v_node);
1552-
1553- early_start = MAX (early_start,
1554- p_st + e->latency
1555- - (e->distance * ii));
1556-
1557- if (dump_file)
1558- fprintf (dump_file,
1559- "pred st = %d; early_start = %d; latency = %d",
1560- p_st, early_start, e->latency);
1561-
1562- if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1563- count_preds++;
1564-
1565- if (e->data_type == MEM_DEP)
1566- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1567- }
1568- else if (dump_file)
1569- fprintf (dump_file, "the node is not scheduled\n");
1570-
1571- }
1572- for (e = u_node->out; e != 0; e = e->next_out)
1573- {
1574- ddg_node_ptr v_node = e->dest;
1575-
1576- if (dump_file)
1577- {
1578- fprintf (dump_file, "\nProcessing edge:");
1579- print_ddg_edge (dump_file, e);
1580- fprintf (dump_file,
1581- "\nScheduling %d (%d) in psp_pss_not_empty,"
1582- " checking s %d (%d): ", u_node->cuid, INSN_UID
1583- (u_node->insn), v_node->cuid, INSN_UID
1584- (v_node->insn));
1585- }
1586-
1587- if (TEST_BIT (sched_nodes, v_node->cuid))
1588- {
1589- int s_st = SCHED_TIME (v_node);
1590-
1591- late_start = MIN (late_start,
1592- s_st - e->latency
1593- + (e->distance * ii));
1594-
1595- if (dump_file)
1596- fprintf (dump_file,
1597- "succ st = %d; late_start = %d; latency = %d",
1598- s_st, late_start, e->latency);
1599-
1600- if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1601- count_succs++;
1602-
1603- if (e->data_type == MEM_DEP)
1604- start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1605- }
1606- else if (dump_file)
1607- fprintf (dump_file, "the node is not scheduled\n");
1608-
1609- }
1610- start = MAX (start, early_start);
1611- end = MIN (end, MIN (early_start + ii, late_start + 1));
1612- step = 1;
1613- /* If there are more successors than predecessors schedule the
1614- node close to it's successors. */
1615- if (count_succs >= count_preds)
1616- {
1617- int old_start = start;
1618-
1619- start = end - 1;
1620- end = old_start - 1;
1621- step = -1;
1622- }
1623- }
1624- else /* psp is empty && pss is empty. */
1625- {
1626- start = SCHED_ASAP (u_node);
1627- end = start + ii;
1628- step = 1;
1629- }
1630+ }
1631+
1632+ /* Now that we've finalized the window, make END an exclusive rather
1633+ than an inclusive bound. */
1634+ end += step;
1635
1636 *start_p = start;
1637 *step_p = step;
1638@@ -1880,10 +1977,10 @@
1639 if (dump_file)
1640 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1641 start, end, step);
1642- return -1;
1643+ return -1;
1644 }
1645
1646- return 0;
1647+ return 0;
1648 }
1649
1650 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1651@@ -1939,7 +2036,7 @@
1652 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1653 for (e = u_node->in; e != 0; e = e->next_in)
1654 if (TEST_BIT (sched_nodes, e->src->cuid)
1655- && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1656+ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
1657 first_cycle_in_window))
1658 {
1659 if (dump_file)
1660@@ -1964,7 +2061,7 @@
1661 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1662 for (e = u_node->out; e != 0; e = e->next_out)
1663 if (TEST_BIT (sched_nodes, e->dest->cuid)
1664- && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1665+ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
1666 last_cycle_in_window))
1667 {
1668 if (dump_file)
1669@@ -1988,7 +2085,7 @@
1670 last row of the scheduling window) */
1671
1672 static bool
1673-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1674+try_scheduling_node_in_cycle (partial_schedule_ptr ps,
1675 int u, int cycle, sbitmap sched_nodes,
1676 int *num_splits, sbitmap must_precede,
1677 sbitmap must_follow)
1678@@ -1997,11 +2094,10 @@
1679 bool success = 0;
1680
1681 verify_partial_schedule (ps, sched_nodes);
1682- psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1683- must_precede, must_follow);
1684+ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
1685 if (psi)
1686 {
1687- SCHED_TIME (u_node) = cycle;
1688+ SCHED_TIME (u) = cycle;
1689 SET_BIT (sched_nodes, u);
1690 success = 1;
1691 *num_splits = 0;
1692@@ -2062,8 +2158,8 @@
1693 &step, &end) == 0)
1694 {
1695 if (dump_file)
1696- fprintf (dump_file, "\nTrying to schedule node %d \
1697- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1698+ fprintf (dump_file, "\nTrying to schedule node %d "
1699+ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1700 (g->nodes[u].insn)), start, end, step);
1701
1702 gcc_assert ((step > 0 && start < end)
1703@@ -2081,7 +2177,7 @@
1704 &tmp_precede, must_precede,
1705 c, start, end, step);
1706 success =
1707- try_scheduling_node_in_cycle (ps, u_node, u, c,
1708+ try_scheduling_node_in_cycle (ps, u, c,
1709 sched_nodes,
1710 &num_splits, tmp_precede,
1711 tmp_follow);
1712@@ -2181,7 +2277,7 @@
1713 for (crr_insn = rows_new[row];
1714 crr_insn; crr_insn = crr_insn->next_in_row)
1715 {
1716- ddg_node_ptr u = crr_insn->node;
1717+ int u = crr_insn->id;
1718 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1719
1720 SCHED_TIME (u) = new_time;
1721@@ -2202,7 +2298,7 @@
1722 for (crr_insn = rows_new[row + 1];
1723 crr_insn; crr_insn = crr_insn->next_in_row)
1724 {
1725- ddg_node_ptr u = crr_insn->node;
1726+ int u = crr_insn->id;
1727 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1728
1729 SCHED_TIME (u) = new_time;
1730@@ -2242,24 +2338,24 @@
1731 {
1732 ddg_edge_ptr e;
1733 int lower = INT_MIN, upper = INT_MAX;
1734- ddg_node_ptr crit_pred = NULL;
1735- ddg_node_ptr crit_succ = NULL;
1736+ int crit_pred = -1;
1737+ int crit_succ = -1;
1738 int crit_cycle;
1739
1740 for (e = u_node->in; e != 0; e = e->next_in)
1741 {
1742- ddg_node_ptr v_node = e->src;
1743+ int v = e->src->cuid;
1744
1745- if (TEST_BIT (sched_nodes, v_node->cuid)
1746- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1747- if (SCHED_TIME (v_node) > lower)
1748+ if (TEST_BIT (sched_nodes, v)
1749+ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
1750+ if (SCHED_TIME (v) > lower)
1751 {
1752- crit_pred = v_node;
1753- lower = SCHED_TIME (v_node);
1754+ crit_pred = v;
1755+ lower = SCHED_TIME (v);
1756 }
1757 }
1758
1759- if (crit_pred != NULL)
1760+ if (crit_pred >= 0)
1761 {
1762 crit_cycle = SCHED_TIME (crit_pred) + 1;
1763 return SMODULO (crit_cycle, ii);
1764@@ -2267,17 +2363,18 @@
1765
1766 for (e = u_node->out; e != 0; e = e->next_out)
1767 {
1768- ddg_node_ptr v_node = e->dest;
1769- if (TEST_BIT (sched_nodes, v_node->cuid)
1770- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1771- if (SCHED_TIME (v_node) < upper)
1772+ int v = e->dest->cuid;
1773+
1774+ if (TEST_BIT (sched_nodes, v)
1775+ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
1776+ if (SCHED_TIME (v) < upper)
1777 {
1778- crit_succ = v_node;
1779- upper = SCHED_TIME (v_node);
1780+ crit_succ = v;
1781+ upper = SCHED_TIME (v);
1782 }
1783 }
1784
1785- if (crit_succ != NULL)
1786+ if (crit_succ >= 0)
1787 {
1788 crit_cycle = SCHED_TIME (crit_succ);
1789 return SMODULO (crit_cycle, ii);
1790@@ -2301,10 +2398,10 @@
1791
1792 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1793 {
1794- ddg_node_ptr u = crr_insn->node;
1795+ int u = crr_insn->id;
1796
1797 length++;
1798- gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1799+ gcc_assert (TEST_BIT (sched_nodes, u));
1800 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1801 popcount (sched_nodes) == number of insns in ps. */
1802 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1803@@ -2719,6 +2816,7 @@
1804 partial_schedule_ptr ps = XNEW (struct partial_schedule);
1805 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1806 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
1807+ ps->reg_moves = NULL;
1808 ps->ii = ii;
1809 ps->history = history;
1810 ps->min_cycle = INT_MAX;
1811@@ -2753,8 +2851,16 @@
1812 static void
1813 free_partial_schedule (partial_schedule_ptr ps)
1814 {
1815+ ps_reg_move_info *move;
1816+ unsigned int i;
1817+
1818 if (!ps)
1819 return;
1820+
1821+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
1822+ sbitmap_free (move->uses);
1823+ VEC_free (ps_reg_move_info, heap, ps->reg_moves);
1824+
1825 free_ps_insns (ps);
1826 free (ps->rows);
1827 free (ps->rows_length);
1828@@ -2796,12 +2902,12 @@
1829 fprintf (dump, "\n[ROW %d ]: ", i);
1830 while (ps_i)
1831 {
1832- if (JUMP_P (ps_i->node->insn))
1833- fprintf (dump, "%d (branch), ",
1834- INSN_UID (ps_i->node->insn));
1835+ rtx insn = ps_rtl_insn (ps, ps_i->id);
1836+
1837+ if (JUMP_P (insn))
1838+ fprintf (dump, "%d (branch), ", INSN_UID (insn));
1839 else
1840- fprintf (dump, "%d, ",
1841- INSN_UID (ps_i->node->insn));
1842+ fprintf (dump, "%d, ", INSN_UID (insn));
1843
1844 ps_i = ps_i->next_in_row;
1845 }
1846@@ -2810,11 +2916,11 @@
1847
1848 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1849 static ps_insn_ptr
1850-create_ps_insn (ddg_node_ptr node, int cycle)
1851+create_ps_insn (int id, int cycle)
1852 {
1853 ps_insn_ptr ps_i = XNEW (struct ps_insn);
1854
1855- ps_i->node = node;
1856+ ps_i->id = id;
1857 ps_i->next_in_row = NULL;
1858 ps_i->prev_in_row = NULL;
1859 ps_i->cycle = cycle;
1860@@ -2879,10 +2985,11 @@
1861 next_ps_i;
1862 next_ps_i = next_ps_i->next_in_row)
1863 {
1864- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
1865+ if (must_follow
1866+ && TEST_BIT (must_follow, next_ps_i->id)
1867 && ! first_must_follow)
1868 first_must_follow = next_ps_i;
1869- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
1870+ if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
1871 {
1872 /* If we have already met a node that must follow, then
1873 there is no possible column. */
1874@@ -2893,8 +3000,8 @@
1875 }
1876 /* The closing branch must be the last in the row. */
1877 if (must_precede
1878- && TEST_BIT (must_precede, next_ps_i->node->cuid)
1879- && JUMP_P (next_ps_i->node->insn))
1880+ && TEST_BIT (must_precede, next_ps_i->id)
1881+ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
1882 return false;
1883
1884 last_in_row = next_ps_i;
1885@@ -2903,7 +3010,7 @@
1886 /* The closing branch is scheduled as well. Make sure there is no
1887 dependent instruction after it as the branch should be the last
1888 instruction in the row. */
1889- if (JUMP_P (ps_i->node->insn))
1890+ if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
1891 {
1892 if (first_must_follow)
1893 return false;
1894@@ -2954,7 +3061,6 @@
1895 {
1896 ps_insn_ptr prev, next;
1897 int row;
1898- ddg_node_ptr next_node;
1899
1900 if (!ps || !ps_i)
1901 return false;
1902@@ -2964,11 +3070,9 @@
1903 if (! ps_i->next_in_row)
1904 return false;
1905
1906- next_node = ps_i->next_in_row->node;
1907-
1908 /* Check if next_in_row is dependent on ps_i, both having same sched
1909 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
1910- if (must_follow && TEST_BIT (must_follow, next_node->cuid))
1911+ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
1912 return false;
1913
1914 /* Advance PS_I over its next_in_row in the doubly linked list. */
1915@@ -2999,7 +3103,7 @@
1916 before/after (respectively) the node pointed to by PS_I when scheduled
1917 in the same cycle. */
1918 static ps_insn_ptr
1919-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
1920+add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
1921 sbitmap must_precede, sbitmap must_follow)
1922 {
1923 ps_insn_ptr ps_i;
1924@@ -3008,7 +3112,7 @@
1925 if (ps->rows_length[row] >= issue_rate)
1926 return NULL;
1927
1928- ps_i = create_ps_insn (node, cycle);
1929+ ps_i = create_ps_insn (id, cycle);
1930
1931 /* Finds and inserts PS_I according to MUST_FOLLOW and
1932 MUST_PRECEDE. */
1933@@ -3060,7 +3164,7 @@
1934 crr_insn;
1935 crr_insn = crr_insn->next_in_row)
1936 {
1937- rtx insn = crr_insn->node->insn;
1938+ rtx insn = ps_rtl_insn (ps, crr_insn->id);
1939
1940 if (!NONDEBUG_INSN_P (insn))
1941 continue;
1942@@ -3097,7 +3201,7 @@
1943 cuid N must be come before/after (respectively) the node pointed to by
1944 PS_I when scheduled in the same cycle. */
1945 ps_insn_ptr
1946-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
1947+ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
1948 int c, sbitmap must_precede,
1949 sbitmap must_follow)
1950 {
1951