summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch515
1 files changed, 0 insertions, 515 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
deleted file mode 100644
index d72446919f..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
+++ /dev/null
@@ -1,515 +0,0 @@
12011-11-21 Michael Hope <michael.hope@linaro.org>
2
3 Backport from mainline r180131:
4
5 2011-10-18 Julian Brown <julian@codesourcery.com>
6
7 gcc/
8 * config/arm/arm.c (arm_block_move_unaligned_straight)
9 (arm_adjust_block_mem, arm_block_move_unaligned_loop)
10 (arm_movmemqi_unaligned): New.
11 (arm_gen_movmemqi): Support unaligned block copies.
12
13 gcc/testsuite/
14 * lib/target-supports.exp (check_effective_target_arm_unaligned): New.
15 * gcc.target/arm/unaligned-memcpy-1.c: New.
16 * gcc.target/arm/unaligned-memcpy-2.c: New.
17 * gcc.target/arm/unaligned-memcpy-3.c: New.
18 * gcc.target/arm/unaligned-memcpy-4.c: New.
19
20 2011-09-15 James Greenhalgh <james.greenhalgh@arm.com>
21
22 gcc/
23 * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
24
25=== modified file 'gcc/config/arm/arm.c'
26--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000
27+++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000
28@@ -10803,6 +10803,335 @@
29 return true;
30 }
31
32+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
33+ unaligned copies on processors which support unaligned semantics for those
34+ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
35+ (using more registers) by doing e.g. load/load/store/store for a factor of 2.
36+ An interleave factor of 1 (the minimum) will perform no interleaving.
37+ Load/store multiple are used for aligned addresses where possible. */
38+
39+static void
40+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
41+ HOST_WIDE_INT length,
42+ unsigned int interleave_factor)
43+{
44+ rtx *regs = XALLOCAVEC (rtx, interleave_factor);
45+ int *regnos = XALLOCAVEC (int, interleave_factor);
46+ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
47+ HOST_WIDE_INT i, j;
48+ HOST_WIDE_INT remaining = length, words;
49+ rtx halfword_tmp = NULL, byte_tmp = NULL;
50+ rtx dst, src;
51+ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
52+ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
53+ HOST_WIDE_INT srcoffset, dstoffset;
54+ HOST_WIDE_INT src_autoinc, dst_autoinc;
55+ rtx mem, addr;
56+
57+ gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
58+
59+ /* Use hard registers if we have aligned source or destination so we can use
60+ load/store multiple with contiguous registers. */
61+ if (dst_aligned || src_aligned)
62+ for (i = 0; i < interleave_factor; i++)
63+ regs[i] = gen_rtx_REG (SImode, i);
64+ else
65+ for (i = 0; i < interleave_factor; i++)
66+ regs[i] = gen_reg_rtx (SImode);
67+
68+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
69+ src = copy_addr_to_reg (XEXP (srcbase, 0));
70+
71+ srcoffset = dstoffset = 0;
72+
73+ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
74+ For copying the last bytes we want to subtract this offset again. */
75+ src_autoinc = dst_autoinc = 0;
76+
77+ for (i = 0; i < interleave_factor; i++)
78+ regnos[i] = i;
79+
80+ /* Copy BLOCK_SIZE_BYTES chunks. */
81+
82+ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
83+ {
84+ /* Load words. */
85+ if (src_aligned && interleave_factor > 1)
86+ {
87+ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
88+ TRUE, srcbase, &srcoffset));
89+ src_autoinc += UNITS_PER_WORD * interleave_factor;
90+ }
91+ else
92+ {
93+ for (j = 0; j < interleave_factor; j++)
94+ {
95+ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
96+ - src_autoinc);
97+ mem = adjust_automodify_address (srcbase, SImode, addr,
98+ srcoffset + j * UNITS_PER_WORD);
99+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
100+ }
101+ srcoffset += block_size_bytes;
102+ }
103+
104+ /* Store words. */
105+ if (dst_aligned && interleave_factor > 1)
106+ {
107+ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
108+ TRUE, dstbase, &dstoffset));
109+ dst_autoinc += UNITS_PER_WORD * interleave_factor;
110+ }
111+ else
112+ {
113+ for (j = 0; j < interleave_factor; j++)
114+ {
115+ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
116+ - dst_autoinc);
117+ mem = adjust_automodify_address (dstbase, SImode, addr,
118+ dstoffset + j * UNITS_PER_WORD);
119+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
120+ }
121+ dstoffset += block_size_bytes;
122+ }
123+
124+ remaining -= block_size_bytes;
125+ }
126+
127+ /* Copy any whole words left (note these aren't interleaved with any
128+ subsequent halfword/byte load/stores in the interests of simplicity). */
129+
130+ words = remaining / UNITS_PER_WORD;
131+
132+ gcc_assert (words < interleave_factor);
133+
134+ if (src_aligned && words > 1)
135+ {
136+ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
137+ &srcoffset));
138+ src_autoinc += UNITS_PER_WORD * words;
139+ }
140+ else
141+ {
142+ for (j = 0; j < words; j++)
143+ {
144+ addr = plus_constant (src,
145+ srcoffset + j * UNITS_PER_WORD - src_autoinc);
146+ mem = adjust_automodify_address (srcbase, SImode, addr,
147+ srcoffset + j * UNITS_PER_WORD);
148+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
149+ }
150+ srcoffset += words * UNITS_PER_WORD;
151+ }
152+
153+ if (dst_aligned && words > 1)
154+ {
155+ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
156+ &dstoffset));
157+ dst_autoinc += words * UNITS_PER_WORD;
158+ }
159+ else
160+ {
161+ for (j = 0; j < words; j++)
162+ {
163+ addr = plus_constant (dst,
164+ dstoffset + j * UNITS_PER_WORD - dst_autoinc);
165+ mem = adjust_automodify_address (dstbase, SImode, addr,
166+ dstoffset + j * UNITS_PER_WORD);
167+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
168+ }
169+ dstoffset += words * UNITS_PER_WORD;
170+ }
171+
172+ remaining -= words * UNITS_PER_WORD;
173+
174+ gcc_assert (remaining < 4);
175+
176+ /* Copy a halfword if necessary. */
177+
178+ if (remaining >= 2)
179+ {
180+ halfword_tmp = gen_reg_rtx (SImode);
181+
182+ addr = plus_constant (src, srcoffset - src_autoinc);
183+ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
184+ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
185+
186+ /* Either write out immediately, or delay until we've loaded the last
187+ byte, depending on interleave factor. */
188+ if (interleave_factor == 1)
189+ {
190+ addr = plus_constant (dst, dstoffset - dst_autoinc);
191+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
192+ emit_insn (gen_unaligned_storehi (mem,
193+ gen_lowpart (HImode, halfword_tmp)));
194+ halfword_tmp = NULL;
195+ dstoffset += 2;
196+ }
197+
198+ remaining -= 2;
199+ srcoffset += 2;
200+ }
201+
202+ gcc_assert (remaining < 2);
203+
204+ /* Copy last byte. */
205+
206+ if ((remaining & 1) != 0)
207+ {
208+ byte_tmp = gen_reg_rtx (SImode);
209+
210+ addr = plus_constant (src, srcoffset - src_autoinc);
211+ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
212+ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
213+
214+ if (interleave_factor == 1)
215+ {
216+ addr = plus_constant (dst, dstoffset - dst_autoinc);
217+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
218+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
219+ byte_tmp = NULL;
220+ dstoffset++;
221+ }
222+
223+ remaining--;
224+ srcoffset++;
225+ }
226+
227+ /* Store last halfword if we haven't done so already. */
228+
229+ if (halfword_tmp)
230+ {
231+ addr = plus_constant (dst, dstoffset - dst_autoinc);
232+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
233+ emit_insn (gen_unaligned_storehi (mem,
234+ gen_lowpart (HImode, halfword_tmp)));
235+ dstoffset += 2;
236+ }
237+
238+ /* Likewise for last byte. */
239+
240+ if (byte_tmp)
241+ {
242+ addr = plus_constant (dst, dstoffset - dst_autoinc);
243+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
244+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
245+ dstoffset++;
246+ }
247+
248+ gcc_assert (remaining == 0 && srcoffset == dstoffset);
249+}
250+
251+/* From mips_adjust_block_mem:
252+
253+ Helper function for doing a loop-based block operation on memory
254+ reference MEM. Each iteration of the loop will operate on LENGTH
255+ bytes of MEM.
256+
257+ Create a new base register for use within the loop and point it to
258+ the start of MEM. Create a new memory reference that uses this
259+ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
260+
261+static void
262+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
263+ rtx *loop_mem)
264+{
265+ *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
266+
267+ /* Although the new mem does not refer to a known location,
268+ it does keep up to LENGTH bytes of alignment. */
269+ *loop_mem = change_address (mem, BLKmode, *loop_reg);
270+ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
271+}
272+
273+/* From mips_block_move_loop:
274+
275+ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
276+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
277+ the memory regions do not overlap. */
278+
279+static void
280+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
281+ unsigned int interleave_factor,
282+ HOST_WIDE_INT bytes_per_iter)
283+{
284+ rtx label, src_reg, dest_reg, final_src, test;
285+ HOST_WIDE_INT leftover;
286+
287+ leftover = length % bytes_per_iter;
288+ length -= leftover;
289+
290+ /* Create registers and memory references for use within the loop. */
291+ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
292+ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
293+
294+ /* Calculate the value that SRC_REG should have after the last iteration of
295+ the loop. */
296+ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
297+ 0, 0, OPTAB_WIDEN);
298+
299+ /* Emit the start of the loop. */
300+ label = gen_label_rtx ();
301+ emit_label (label);
302+
303+ /* Emit the loop body. */
304+ arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
305+ interleave_factor);
306+
307+ /* Move on to the next block. */
308+ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
309+ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
310+
311+ /* Emit the loop condition. */
312+ test = gen_rtx_NE (VOIDmode, src_reg, final_src);
313+ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
314+
315+ /* Mop up any left-over bytes. */
316+ if (leftover)
317+ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
318+}
319+
320+/* Emit a block move when either the source or destination is unaligned (not
321+ aligned to a four-byte boundary). This may need further tuning depending on
322+ core type, optimize_size setting, etc. */
323+
324+static int
325+arm_movmemqi_unaligned (rtx *operands)
326+{
327+ HOST_WIDE_INT length = INTVAL (operands[2]);
328+
329+ if (optimize_size)
330+ {
331+ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
332+ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
333+ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
334+ size of code if optimizing for size. We'll use ldm/stm if src_aligned
335+ or dst_aligned though: allow more interleaving in those cases since the
336+ resulting code can be smaller. */
337+ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
338+ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
339+
340+ if (length > 12)
341+ arm_block_move_unaligned_loop (operands[0], operands[1], length,
342+ interleave_factor, bytes_per_iter);
343+ else
344+ arm_block_move_unaligned_straight (operands[0], operands[1], length,
345+ interleave_factor);
346+ }
347+ else
348+ {
349+ /* Note that the loop created by arm_block_move_unaligned_loop may be
350+ subject to loop unrolling, which makes tuning this condition a little
351+ redundant. */
352+ if (length > 32)
353+ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
354+ else
355+ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
356+ }
357+
358+ return 1;
359+}
360+
361 int
362 arm_gen_movmemqi (rtx *operands)
363 {
364@@ -10815,8 +11144,13 @@
365
366 if (GET_CODE (operands[2]) != CONST_INT
367 || GET_CODE (operands[3]) != CONST_INT
368- || INTVAL (operands[2]) > 64
369- || INTVAL (operands[3]) & 3)
370+ || INTVAL (operands[2]) > 64)
371+ return 0;
372+
373+ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
374+ return arm_movmemqi_unaligned (operands);
375+
376+ if (INTVAL (operands[3]) & 3)
377 return 0;
378
379 dstbase = operands[0];
380
381=== modified file 'gcc/config/arm/arm.h'
382--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
383+++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000
384@@ -47,6 +47,8 @@
385 { \
386 if (TARGET_DSP_MULTIPLY) \
387 builtin_define ("__ARM_FEATURE_DSP"); \
388+ if (unaligned_access) \
389+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \
390 /* Define __arm__ even when in thumb mode, for \
391 consistency with armcc. */ \
392 builtin_define ("__arm__"); \
393
394=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
395--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000
396+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000
397@@ -0,0 +1,19 @@
398+/* { dg-do compile } */
399+/* { dg-require-effective-target arm_unaligned } */
400+/* { dg-options "-O2" } */
401+
402+#include <string.h>
403+
404+void unknown_alignment (char *dest, char *src)
405+{
406+ memcpy (dest, src, 15);
407+}
408+
409+/* We should see three unaligned word loads and store pairs, one unaligned
410+ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */
411+
412+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
413+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
414+/* { dg-final { scan-assembler-times "strh" 1 } } */
415+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
416+/* { dg-final { scan-assembler-times "strb" 1 } } */
417
418=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
419--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000
420+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000
421@@ -0,0 +1,21 @@
422+/* { dg-do compile } */
423+/* { dg-require-effective-target arm_unaligned } */
424+/* { dg-options "-O2" } */
425+
426+#include <string.h>
427+
428+char dest[16];
429+
430+void aligned_dest (char *src)
431+{
432+ memcpy (dest, src, 15);
433+}
434+
435+/* Expect a multi-word store for the main part of the copy, but subword
436+ loads/stores for the remainder. */
437+
438+/* { dg-final { scan-assembler-times "stmia" 1 } } */
439+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
440+/* { dg-final { scan-assembler-times "strh" 1 } } */
441+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
442+/* { dg-final { scan-assembler-times "strb" 1 } } */
443
444=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
445--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000
446+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000
447@@ -0,0 +1,21 @@
448+/* { dg-do compile } */
449+/* { dg-require-effective-target arm_unaligned } */
450+/* { dg-options "-O2" } */
451+
452+#include <string.h>
453+
454+char src[16];
455+
456+void aligned_src (char *dest)
457+{
458+ memcpy (dest, src, 15);
459+}
460+
461+/* Expect a multi-word load for the main part of the copy, but subword
462+ loads/stores for the remainder. */
463+
464+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
465+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
466+/* { dg-final { scan-assembler-times "strh" 1 } } */
467+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
468+/* { dg-final { scan-assembler-times "strb" 1 } } */
469
470=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
471--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000
472+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000
473@@ -0,0 +1,18 @@
474+/* { dg-do compile } */
475+/* { dg-require-effective-target arm_unaligned } */
476+/* { dg-options "-O2" } */
477+
478+#include <string.h>
479+
480+char src[16];
481+char dest[16];
482+
483+void aligned_both (void)
484+{
485+ memcpy (dest, src, 15);
486+}
487+
488+/* We know both src and dest to be aligned: expect multiword loads/stores. */
489+
490+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
491+/* { dg-final { scan-assembler-times "stmia" 1 } } */
492
493=== modified file 'gcc/testsuite/lib/target-supports.exp'
494--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
495+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
496@@ -1894,6 +1894,18 @@
497 }]
498 }
499
500+# Return 1 if this is an ARM target that supports unaligned word/halfword
501+# load/store instructions.
502+
503+proc check_effective_target_arm_unaligned { } {
504+ return [check_no_compiler_messages arm_unaligned assembly {
505+ #ifndef __ARM_FEATURE_UNALIGNED
506+ #error no unaligned support
507+ #endif
508+ int i;
509+ }]
510+}
511+
512 # Add the options needed for NEON. We need either -mfloat-abi=softfp
513 # or -mfloat-abi=hard, but if one is already specified by the
514 # multilib, use it. Similarly, if a -mfpu option already enables
515