summaryrefslogtreecommitdiffstats
path: root/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch')
-rw-r--r--recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch376
1 files changed, 376 insertions, 0 deletions
diff --git a/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch
new file mode 100644
index 0000000000..64efbc759e
--- /dev/null
+++ b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99356.patch
@@ -0,0 +1,376 @@
1
2 2010-04-11 Julian Brown <julian@codesourcery.com>
3
4 Issue #7326
5
6 gcc/
7 * config/arm/arm.c (arm_issue_rate): Return 2 for Cortex-A5.
8 * config/arm/arm.md (generic_sched): No for Cortex-A5.
9 (generic_vfp): Likewise.
10 (cortex-a5.md): Include.
11 * config/arm/cortex-a5.md: New.
12
132010-07-26 Julian Brown <julian@codesourcery.com>
14
15 Merge from Sourcery G++ 4.4:
16
17 2010-04-12 Andrew Stubbs <ams@codesourcery.com>
18
19
20=== modified file 'gcc/config/arm/arm.c'
21--- old/gcc/config/arm/arm.c 2010-08-13 14:08:20 +0000
22+++ new/gcc/config/arm/arm.c 2010-08-13 15:15:12 +0000
23@@ -22262,6 +22262,7 @@
24 {
25 case cortexr4:
26 case cortexr4f:
27+ case cortexa5:
28 case cortexa8:
29 case cortexa9:
30 return 2;
31
32=== modified file 'gcc/config/arm/arm.md'
33--- old/gcc/config/arm/arm.md 2010-08-13 11:40:17 +0000
34+++ new/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000
35@@ -419,7 +419,7 @@
36
37 (define_attr "generic_sched" "yes,no"
38 (const (if_then_else
39- (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9")
40+ (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9")
41 (eq_attr "tune_cortexr4" "yes"))
42 (const_string "no")
43 (const_string "yes"))))
44@@ -427,7 +427,7 @@
45 (define_attr "generic_vfp" "yes,no"
46 (const (if_then_else
47 (and (eq_attr "fpu" "vfp")
48- (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9")
49+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9")
50 (eq_attr "tune_cortexr4" "no"))
51 (const_string "yes")
52 (const_string "no"))))
53@@ -451,6 +451,7 @@
54 (include "arm1020e.md")
55 (include "arm1026ejs.md")
56 (include "arm1136jfs.md")
57+(include "cortex-a5.md")
58 (include "cortex-a8.md")
59 (include "cortex-a9.md")
60 (include "cortex-r4.md")
61
62=== added file 'gcc/config/arm/cortex-a5.md'
63--- old/gcc/config/arm/cortex-a5.md 1970-01-01 00:00:00 +0000
64+++ new/gcc/config/arm/cortex-a5.md 2010-08-13 15:15:12 +0000
65@@ -0,0 +1,310 @@
66+;; ARM Cortex-A5 pipeline description
67+;; Copyright (C) 2010 Free Software Foundation, Inc.
68+;; Contributed by CodeSourcery.
69+;;
70+;; This file is part of GCC.
71+;;
72+;; GCC is free software; you can redistribute it and/or modify it
73+;; under the terms of the GNU General Public License as published by
74+;; the Free Software Foundation; either version 3, or (at your option)
75+;; any later version.
76+;;
77+;; GCC is distributed in the hope that it will be useful, but
78+;; WITHOUT ANY WARRANTY; without even the implied warranty of
79+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
80+;; General Public License for more details.
81+;;
82+;; You should have received a copy of the GNU General Public License
83+;; along with GCC; see the file COPYING3. If not see
84+;; <http://www.gnu.org/licenses/>.
85+
86+(define_automaton "cortex_a5")
87+
88+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
89+;; Functional units.
90+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
91+
92+;; The integer (ALU) pipeline. There are five DPU pipeline stages. However the
93+;; decode/issue stages operate the same for all instructions, so do not model
94+;; them. We only need to model the first execute stage because instructions
95+;; always advance one stage per cycle in order. Only branch instructions may
96+;; dual-issue, so a single unit covers all of the LS, ALU, MAC and FPU
97+;; pipelines.
98+
99+(define_cpu_unit "cortex_a5_ex1" "cortex_a5")
100+
101+;; The branch pipeline. Branches can dual-issue with other instructions
102+;; (except when those instructions take multiple cycles to issue).
103+
104+(define_cpu_unit "cortex_a5_branch" "cortex_a5")
105+
106+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
107+;; multiply is in progress.
108+
109+(define_cpu_unit "cortex_a5_fpmul_pipe" "cortex_a5")
110+
111+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
112+;; of the add pipeline by fmac instructions, etc.
113+
114+(define_cpu_unit "cortex_a5_fpadd_pipe" "cortex_a5")
115+
116+;; Floating-point div/sqrt (long latency, out-of-order completion).
117+
118+(define_cpu_unit "cortex_a5_fp_div_sqrt" "cortex_a5")
119+
120+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121+;; ALU instructions.
122+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123+
124+(define_insn_reservation "cortex_a5_alu" 2
125+ (and (eq_attr "tune" "cortexa5")
126+ (eq_attr "type" "alu"))
127+ "cortex_a5_ex1")
128+
129+(define_insn_reservation "cortex_a5_alu_shift" 2
130+ (and (eq_attr "tune" "cortexa5")
131+ (eq_attr "type" "alu_shift,alu_shift_reg"))
132+ "cortex_a5_ex1")
133+
134+;; Forwarding path for unshifted operands.
135+
136+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
137+ "cortex_a5_alu")
138+
139+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
140+ "cortex_a5_alu_shift"
141+ "arm_no_early_alu_shift_dep")
142+
143+;; The multiplier pipeline can forward results from wr stage only (so I don't
144+;; think there's any need to specify bypasses).
145+
146+(define_insn_reservation "cortex_a5_mul" 2
147+ (and (eq_attr "tune" "cortexa5")
148+ (eq_attr "type" "mult"))
149+ "cortex_a5_ex1")
150+
151+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
152+;; Load/store instructions.
153+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
154+
155+;; Address-generation happens in the issue stage, which is one stage behind
156+;; the ex1 stage (the first stage we care about for scheduling purposes). The
157+;; dc1 stage is parallel with ex1, dc2 with ex2 and rot with wr.
158+
159+;; FIXME: These might not be entirely accurate for load2, load3, load4. I think
160+;; they make sense since there's a 32-bit interface between the DPU and the DCU,
161+;; so we can't load more than that per cycle. The store2, store3, store4
162+;; reservations are similarly guessed.
163+
164+(define_insn_reservation "cortex_a5_load1" 2
165+ (and (eq_attr "tune" "cortexa5")
166+ (eq_attr "type" "load_byte,load1"))
167+ "cortex_a5_ex1")
168+
169+(define_insn_reservation "cortex_a5_store1" 0
170+ (and (eq_attr "tune" "cortexa5")
171+ (eq_attr "type" "store1"))
172+ "cortex_a5_ex1")
173+
174+(define_insn_reservation "cortex_a5_load2" 3
175+ (and (eq_attr "tune" "cortexa5")
176+ (eq_attr "type" "load2"))
177+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
178+
179+(define_insn_reservation "cortex_a5_store2" 0
180+ (and (eq_attr "tune" "cortexa5")
181+ (eq_attr "type" "store2"))
182+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
183+
184+(define_insn_reservation "cortex_a5_load3" 4
185+ (and (eq_attr "tune" "cortexa5")
186+ (eq_attr "type" "load3"))
187+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
188+ cortex_a5_ex1")
189+
190+(define_insn_reservation "cortex_a5_store3" 0
191+ (and (eq_attr "tune" "cortexa5")
192+ (eq_attr "type" "store3"))
193+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
194+ cortex_a5_ex1")
195+
196+(define_insn_reservation "cortex_a5_load4" 5
197+ (and (eq_attr "tune" "cortexa5")
198+ (eq_attr "type" "load3"))
199+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
200+ cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
201+
202+(define_insn_reservation "cortex_a5_store4" 0
203+ (and (eq_attr "tune" "cortexa5")
204+ (eq_attr "type" "store3"))
205+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
206+ cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
207+
208+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
209+;; Branches.
210+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
211+
212+;; Direct branches are the only instructions we can dual-issue (also IT and
213+;; nop, but those aren't very interesting for scheduling). (The latency here
214+;; is meant to represent when the branch actually takes place, but may not be
215+;; entirely correct.)
216+
217+(define_insn_reservation "cortex_a5_branch" 3
218+ (and (eq_attr "tune" "cortexa5")
219+ (eq_attr "type" "branch,call"))
220+ "cortex_a5_branch")
221+
222+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
223+;; Floating-point arithmetic.
224+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
225+
226+(define_insn_reservation "cortex_a5_fpalu" 4
227+ (and (eq_attr "tune" "cortexa5")
228+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
229+ fcmps, fcmpd"))
230+ "cortex_a5_ex1+cortex_a5_fpadd_pipe")
231+
232+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
233+;; f1 to f3 (which I think reduces the latency by one cycle).
234+
235+(define_insn_reservation "cortex_a5_fconst" 3
236+ (and (eq_attr "tune" "cortexa5")
237+ (eq_attr "type" "fconsts,fconstd"))
238+ "cortex_a5_ex1+cortex_a5_fpadd_pipe")
239+
240+;; We should try not to attempt to issue a single-precision multiplication in
241+;; the middle of a double-precision multiplication operation (the usage of
242+;; cortex_a5_fpmul_pipe).
243+
244+(define_insn_reservation "cortex_a5_fpmuls" 4
245+ (and (eq_attr "tune" "cortexa5")
246+ (eq_attr "type" "fmuls"))
247+ "cortex_a5_ex1+cortex_a5_fpmul_pipe")
248+
249+;; For single-precision multiply-accumulate, the add (accumulate) is issued
250+;; whilst the multiply is in F4. The multiply result can then be forwarded
251+;; from F5 to F1. The issue unit is only used once (when we first start
252+;; processing the instruction), but the usage of the FP add pipeline could
253+;; block other instructions attempting to use it simultaneously. We try to
254+;; avoid that using cortex_a5_fpadd_pipe.
255+
256+(define_insn_reservation "cortex_a5_fpmacs" 8
257+ (and (eq_attr "tune" "cortexa5")
258+ (eq_attr "type" "fmacs"))
259+ "cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
260+
261+;; Non-multiply instructions can issue in the middle two instructions of a
262+;; double-precision multiply. Note that it isn't entirely clear when a branch
263+;; can dual-issue when a multi-cycle multiplication is in progress; we ignore
264+;; that for now though.
265+
266+(define_insn_reservation "cortex_a5_fpmuld" 7
267+ (and (eq_attr "tune" "cortexa5")
268+ (eq_attr "type" "fmuld"))
269+ "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
270+ cortex_a5_ex1+cortex_a5_fpmul_pipe")
271+
272+(define_insn_reservation "cortex_a5_fpmacd" 11
273+ (and (eq_attr "tune" "cortexa5")
274+ (eq_attr "type" "fmacd"))
275+ "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
276+ cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
277+
278+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
279+;; Floating-point divide/square root instructions.
280+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
281+
282+;; ??? Not sure if the 14 cycles taken for single-precision divide to complete
283+;; includes the time taken for the special instruction used to collect the
284+;; result to travel down the multiply pipeline, or not. Assuming so. (If
285+;; that's wrong, the latency should be increased by a few cycles.)
286+
287+;; fsqrt takes one cycle less, but that is not modelled, nor is the use of the
288+;; multiply pipeline to collect the divide/square-root result.
289+
290+(define_insn_reservation "cortex_a5_fdivs" 14
291+ (and (eq_attr "tune" "cortexa5")
292+ (eq_attr "type" "fdivs"))
293+ "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13")
294+
295+;; ??? Similarly for fdivd.
296+
297+(define_insn_reservation "cortex_a5_fdivd" 29
298+ (and (eq_attr "tune" "cortexa5")
299+ (eq_attr "type" "fdivd"))
300+ "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28")
301+
302+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
303+;; VFP to/from core transfers.
304+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
305+
306+;; FP loads take data from wr/rot/f3. Might need to define bypasses to model
307+;; this?
308+
309+;; Core-to-VFP transfers use the multiply pipeline.
310+;; Not sure about this at all... I think we need some bypasses too.
311+
312+(define_insn_reservation "cortex_a5_r2f" 4
313+ (and (eq_attr "tune" "cortexa5")
314+ (eq_attr "type" "r_2_f"))
315+ "cortex_a5_ex1")
316+
317+;; Not sure about this either. 6.8.7 says "Additionally, the store pipe used
318+;; for store and FP->core register transfers can forward into the F2 and F3
319+;; stages."
320+;; This doesn't correspond to what we have though.
321+
322+(define_insn_reservation "cortex_a5_f2r" 2
323+ (and (eq_attr "tune" "cortexa5")
324+ (eq_attr "type" "f_2_r"))
325+ "cortex_a5_ex1")
326+
327+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
328+;; VFP flag transfer.
329+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330+
331+;; ??? The flag forwarding described in section 6.8.11 of the Cortex-A5 DPU
332+;; specification (from fmstat to the ex2 stage of the second instruction) is
333+;; not modeled at present.
334+
335+(define_insn_reservation "cortex_a5_f_flags" 4
336+ (and (eq_attr "tune" "cortexa5")
337+ (eq_attr "type" "f_flag"))
338+ "cortex_a5_ex1")
339+
340+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
341+;; VFP load/store.
342+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
343+
344+(define_insn_reservation "cortex_a5_f_loads" 4
345+ (and (eq_attr "tune" "cortexa5")
346+ (eq_attr "type" "f_loads"))
347+ "cortex_a5_ex1")
348+
349+(define_insn_reservation "cortex_a5_f_loadd" 5
350+ (and (eq_attr "tune" "cortexa5")
351+ (eq_attr "type" "f_load,f_loadd"))
352+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
353+
354+(define_insn_reservation "cortex_a5_f_stores" 0
355+ (and (eq_attr "tune" "cortexa5")
356+ (eq_attr "type" "f_stores"))
357+ "cortex_a5_ex1")
358+
359+(define_insn_reservation "cortex_a5_f_stored" 0
360+ (and (eq_attr "tune" "cortexa5")
361+ (eq_attr "type" "f_store,f_stored"))
362+ "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
363+
364+;; Load-to-use for floating-point values has a penalty of one cycle, i.e. a
365+;; latency of two (6.8.3).
366+
367+(define_bypass 2 "cortex_a5_f_loads"
368+ "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
369+ cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
370+ cortex_a5_f2r")
371+
372+(define_bypass 3 "cortex_a5_f_loadd"
373+ "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
374+ cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
375+ cortex_a5_f2r")
376