summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch1505
1 files changed, 0 insertions, 1505 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
deleted file mode 100644
index 421a8fe3a9..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
+++ /dev/null
@@ -1,1505 +0,0 @@
1 2011-10-18 Ira Rosen <ira.rosen@linaro.org>
2
3 gcc/
4 * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
5 vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document.
6 * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR,
7 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
8 (op_code_prio): Likewise.
9 (op_symbol_code): Handle WIDEN_LSHIFT_EXPR.
10 * optabs.c (optab_for_tree_code): Handle
11 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
12 (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
13 * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
14 * genopinit.c (optabs): Initialize the new optabs.
15 * expr.c (expand_expr_real_2): Handle
16 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
17 * gimple-pretty-print.c (dump_binary_rhs): Likewise.
18 * tree-vectorizer.h (NUM_PATTERNS): Increase to 8.
19 * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR,
20 VEC_WIDEN_LSHIFT_LO_EXPR): New.
21 * cfgexpand.c (expand_debug_expr): Handle new tree codes.
22 * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
23 vect_recog_widen_shift_pattern.
24 (vect_handle_widen_mult_by_const): Rename...
25 (vect_handle_widen_op_by_const): ...to this. Handle shifts.
26 Add a new argument, update documentation.
27 (vect_recog_widen_mult_pattern): Assume that only second
28 operand can be constant. Update call to
29 vect_handle_widen_op_by_const.
30 (vect_recog_over_widening_pattern): Fix typo.
31 (vect_recog_widen_shift_pattern): New.
32 * tree-vect-stmts.c (vectorizable_type_promotion): Handle
33 widening shifts.
34 (supportable_widening_operation): Likewise.
35 * tree-inline.c (estimate_operator_cost): Handle new tree codes.
36 * tree-vect-generic.c (expand_vector_operations_1): Likewise.
37 * tree-cfg.c (verify_gimple_assign_binary): Likewise.
38 * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New.
39 (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>,
40 vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>):
41 Likewise.
42 * config/arm/predicates.md (const_neon_scalar_shift_amount_operand):
43 New.
44 * config/arm/iterators.md (V_innermode): New.
45 * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
46 for widening shift.
47
48 gcc/testsuite
49 * testsuite/lib/target-supports.exp
50 (check_effective_target_vect_widen_shift): New.
51 * gcc.dg/vect/vect-widen-shift-s16.c: New.
52 * gcc.dg/vect/vect-widen-shift-s8.c: New.
53 * gcc.dg/vect/vect-widen-shift-u16.c: New.
54 * gcc.dg/vect/vect-widen-shift-u8.c: New.
55
56 2011-10-06 Jakub Jelinek <jakub@redhat.com>
57
58 gcc/
59 * tree-vect-patterns.c (vect_pattern_recog_1): Use
60 vect_recog_func_ptr typedef for the first argument.
61 (vect_pattern_recog): Rename vect_recog_func_ptr variable
62 to vect_recog_func, use vect_recog_func_ptr typedef for it.
63
64 2011-10-16 Ira Rosen <ira.rosen@linaro.org>
65
66 gcc/
67 PR tree-optimization/50727
68 * tree-vect-patterns.c (vect_operation_fits_smaller_type): Add
69 DEF_STMT to the list of statements to be replaced by the
70 pattern statements.
71
72 2011-10-09 Ira Rosen <ira.rosen@linaro.org>
73
74 gcc/
75 PR tree-optimization/50635
76 * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add
77 DEF_STMT to the list of statements to be replaced by the
78 pattern statements.
79 (vect_handle_widen_mult_by_const): Don't check TYPE_OUT.
80
81=== modified file 'gcc/cfgexpand.c'
82--- old/gcc/cfgexpand.c 2011-07-01 09:19:21 +0000
83+++ new/gcc/cfgexpand.c 2011-10-23 13:33:07 +0000
84@@ -3215,6 +3215,8 @@
85 case VEC_UNPACK_LO_EXPR:
86 case VEC_WIDEN_MULT_HI_EXPR:
87 case VEC_WIDEN_MULT_LO_EXPR:
88+ case VEC_WIDEN_LSHIFT_HI_EXPR:
89+ case VEC_WIDEN_LSHIFT_LO_EXPR:
90 return NULL;
91
92 /* Misc codes. */
93
94=== modified file 'gcc/config/arm/iterators.md'
95--- old/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000
96+++ new/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000
97@@ -388,6 +388,9 @@
98 (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
99 (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
100
101+;; Mode attribute for vshll.
102+(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
103+
104 ;;----------------------------------------------------------------------------
105 ;; Code attributes
106 ;;----------------------------------------------------------------------------
107
108=== modified file 'gcc/config/arm/neon.md'
109--- old/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
110+++ new/gcc/config/arm/neon.md 2011-10-23 13:33:07 +0000
111@@ -5316,6 +5316,44 @@
112 }
113 )
114
115+(define_insn "neon_vec_<US>shiftl_<mode>"
116+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
117+ (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
118+ (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
119+ "TARGET_NEON"
120+{
121+ return "vshll.<US><V_sz_elem> %q0, %P1, %2";
122+}
123+ [(set_attr "neon_type" "neon_shift_1")]
124+)
125+
126+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
127+ [(match_operand:<V_unpack> 0 "register_operand" "")
128+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
129+ (match_operand:SI 2 "immediate_operand" "i")]
130+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
131+ {
132+ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
133+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
134+ operands[2]));
135+ DONE;
136+ }
137+)
138+
139+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
140+ [(match_operand:<V_unpack> 0 "register_operand" "")
141+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
142+ (match_operand:SI 2 "immediate_operand" "i")]
143+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
144+ {
145+ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
146+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
147+ GET_MODE_SIZE (<V_HALF>mode)),
148+ operands[2]));
149+ DONE;
150+ }
151+)
152+
153 ;; Vectorize for non-neon-quad case
154 (define_insn "neon_unpack<US>_<mode>"
155 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
156@@ -5392,6 +5430,34 @@
157 }
158 )
159
160+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
161+ [(match_operand:<V_double_width> 0 "register_operand" "")
162+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
163+ (match_operand:SI 2 "immediate_operand" "i")]
164+ "TARGET_NEON"
165+ {
166+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
167+ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
168+ emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
169+
170+ DONE;
171+ }
172+)
173+
174+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
175+ [(match_operand:<V_double_width> 0 "register_operand" "")
176+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
177+ (match_operand:SI 2 "immediate_operand" "i")]
178+ "TARGET_NEON"
179+ {
180+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
181+ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
182+ emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
183+
184+ DONE;
185+ }
186+)
187+
188 ;; The case when using all quad registers.
189 (define_insn "vec_pack_trunc_<mode>"
190 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
191
192=== modified file 'gcc/config/arm/predicates.md'
193--- old/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
194+++ new/gcc/config/arm/predicates.md 2011-10-23 13:33:07 +0000
195@@ -136,6 +136,11 @@
196 (match_operand 0 "s_register_operand"))
197 (match_operand 0 "const_int_operand")))
198
199+(define_predicate "const_neon_scalar_shift_amount_operand"
200+ (and (match_code "const_int")
201+ (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
202+ && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
203+
204 (define_predicate "arm_add_operand"
205 (ior (match_operand 0 "arm_rhs_operand")
206 (match_operand 0 "arm_neg_immediate_operand")))
207
208=== modified file 'gcc/doc/md.texi'
209--- old/gcc/doc/md.texi 2011-08-13 08:32:32 +0000
210+++ new/gcc/doc/md.texi 2011-10-23 13:33:07 +0000
211@@ -4230,6 +4230,17 @@
212 elements of the two vectors, and put the N/2 products of size 2*S in the
213 output vector (operand 0).
214
215+@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
216+@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
217+@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern
218+@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern
219+@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}}
220+@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}}
221+Signed/Unsigned widening shift left. The first input (operand 1) is a vector
222+with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift
223+the high/low elements of operand 1, and put the N/2 results of size 2*S in the
224+output vector (operand 0).
225+
226 @cindex @code{mulhisi3} instruction pattern
227 @item @samp{mulhisi3}
228 Multiply operands 1 and 2, which have mode @code{HImode}, and store
229
230=== modified file 'gcc/expr.c'
231--- old/gcc/expr.c 2011-08-25 11:42:09 +0000
232+++ new/gcc/expr.c 2011-10-23 13:33:07 +0000
233@@ -8290,6 +8290,19 @@
234 return target;
235 }
236
237+ case VEC_WIDEN_LSHIFT_HI_EXPR:
238+ case VEC_WIDEN_LSHIFT_LO_EXPR:
239+ {
240+ tree oprnd0 = treeop0;
241+ tree oprnd1 = treeop1;
242+
243+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
244+ target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
245+ target, unsignedp);
246+ gcc_assert (target);
247+ return target;
248+ }
249+
250 case VEC_PACK_TRUNC_EXPR:
251 case VEC_PACK_SAT_EXPR:
252 case VEC_PACK_FIX_TRUNC_EXPR:
253
254=== modified file 'gcc/genopinit.c'
255--- old/gcc/genopinit.c 2011-07-15 13:06:31 +0000
256+++ new/gcc/genopinit.c 2011-10-23 13:33:07 +0000
257@@ -268,6 +268,10 @@
258 "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
259 "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
260 "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
261+ "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
262+ "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
263+ "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
264+ "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
265 "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
266 "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
267 "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
268
269=== modified file 'gcc/gimple-pretty-print.c'
270--- old/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000
271+++ new/gcc/gimple-pretty-print.c 2011-10-23 13:33:07 +0000
272@@ -343,6 +343,8 @@
273 case VEC_EXTRACT_ODD_EXPR:
274 case VEC_INTERLEAVE_HIGH_EXPR:
275 case VEC_INTERLEAVE_LOW_EXPR:
276+ case VEC_WIDEN_LSHIFT_HI_EXPR:
277+ case VEC_WIDEN_LSHIFT_LO_EXPR:
278 for (p = tree_code_name [(int) code]; *p; p++)
279 pp_character (buffer, TOUPPER (*p));
280 pp_string (buffer, " <");
281
282=== modified file 'gcc/optabs.c'
283--- old/gcc/optabs.c 2011-08-11 15:46:01 +0000
284+++ new/gcc/optabs.c 2011-10-23 13:33:07 +0000
285@@ -454,6 +454,14 @@
286 return TYPE_UNSIGNED (type) ?
287 vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
288
289+ case VEC_WIDEN_LSHIFT_HI_EXPR:
290+ return TYPE_UNSIGNED (type) ?
291+ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
292+
293+ case VEC_WIDEN_LSHIFT_LO_EXPR:
294+ return TYPE_UNSIGNED (type) ?
295+ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
296+
297 case VEC_UNPACK_HI_EXPR:
298 return TYPE_UNSIGNED (type) ?
299 vec_unpacku_hi_optab : vec_unpacks_hi_optab;
300@@ -6351,6 +6359,10 @@
301 init_optab (vec_widen_umult_lo_optab, UNKNOWN);
302 init_optab (vec_widen_smult_hi_optab, UNKNOWN);
303 init_optab (vec_widen_smult_lo_optab, UNKNOWN);
304+ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
305+ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
306+ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
307+ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
308 init_optab (vec_unpacks_hi_optab, UNKNOWN);
309 init_optab (vec_unpacks_lo_optab, UNKNOWN);
310 init_optab (vec_unpacku_hi_optab, UNKNOWN);
311
312=== modified file 'gcc/optabs.h'
313--- old/gcc/optabs.h 2011-07-27 14:12:45 +0000
314+++ new/gcc/optabs.h 2011-10-23 13:33:07 +0000
315@@ -350,6 +350,12 @@
316 OTI_vec_widen_umult_lo,
317 OTI_vec_widen_smult_hi,
318 OTI_vec_widen_smult_lo,
319+ /* Widening shift left.
320+ The high/low part of the resulting vector is returned. */
321+ OTI_vec_widen_ushiftl_hi,
322+ OTI_vec_widen_ushiftl_lo,
323+ OTI_vec_widen_sshiftl_hi,
324+ OTI_vec_widen_sshiftl_lo,
325 /* Extract and widen the high/low part of a vector of signed or
326 floating point elements. */
327 OTI_vec_unpacks_hi,
328@@ -542,6 +548,10 @@
329 #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
330 #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
331 #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
332+#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
333+#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
334+#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
335+#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
336 #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
337 #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
338 #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
339
340=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c'
341--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 1970-01-01 00:00:00 +0000
342+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 2011-10-23 13:33:07 +0000
343@@ -0,0 +1,107 @@
344+/* { dg-require-effective-target vect_int } */
345+/* { dg-require-effective-target vect_shift } */
346+
347+#include <stdarg.h>
348+#include "tree-vect.h"
349+
350+#define N 64
351+#define C 16
352+
353+__attribute__ ((noinline)) void
354+foo (short *src, int *dst)
355+{
356+ int i;
357+ short b, b0, b1, b2, b3, *s = src;
358+ int *d = dst;
359+
360+ for (i = 0; i < N/4; i++)
361+ {
362+ b0 = *s++;
363+ b1 = *s++;
364+ b2 = *s++;
365+ b3 = *s++;
366+ *d = b0 << C;
367+ d++;
368+ *d = b1 << C;
369+ d++;
370+ *d = b2 << C;
371+ d++;
372+ *d = b3 << C;
373+ d++;
374+ }
375+
376+ s = src;
377+ d = dst;
378+ for (i = 0; i < N; i++)
379+ {
380+ b = *s++;
381+ if (*d != b << C)
382+ abort ();
383+ d++;
384+ }
385+
386+ s = src;
387+ d = dst;
388+ for (i = 0; i < N/4; i++)
389+ {
390+ b0 = *s++;
391+ b1 = *s++;
392+ b2 = *s++;
393+ b3 = *s++;
394+ *d = b0 << C;
395+ d++;
396+ *d = b1 << C;
397+ d++;
398+ *d = b2 << C;
399+ d++;
400+ *d = b3 << 6;
401+ d++;
402+ }
403+
404+ s = src;
405+ d = dst;
406+ for (i = 0; i < N/4; i++)
407+ {
408+ b = *s++;
409+ if (*d != b << C)
410+ abort ();
411+ d++;
412+ b = *s++;
413+ if (*d != b << C)
414+ abort ();
415+ d++;
416+ b = *s++;
417+ if (*d != b << C)
418+ abort ();
419+ d++;
420+ b = *s++;
421+ if (*d != b << 6)
422+ abort ();
423+ d++;
424+ }
425+}
426+
427+int main (void)
428+{
429+ int i;
430+ short in[N];
431+ int out[N];
432+
433+ check_vect ();
434+
435+ for (i = 0; i < N; i++)
436+ {
437+ in[i] = i;
438+ out[i] = 255;
439+ __asm__ volatile ("");
440+ }
441+
442+ foo (in, out);
443+
444+ return 0;
445+}
446+
447+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
448+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
449+/* { dg-final { cleanup-tree-dump "vect" } } */
450+
451
452=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c'
453--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 1970-01-01 00:00:00 +0000
454+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 2011-10-23 13:33:07 +0000
455@@ -0,0 +1,58 @@
456+/* { dg-require-effective-target vect_int } */
457+/* { dg-require-effective-target vect_shift } */
458+
459+#include <stdarg.h>
460+#include "tree-vect.h"
461+
462+#define N 64
463+#define C 12
464+
465+__attribute__ ((noinline)) void
466+foo (char *src, int *dst)
467+{
468+ int i;
469+ char b, *s = src;
470+ int *d = dst;
471+
472+ for (i = 0; i < N; i++)
473+ {
474+ b = *s++;
475+ *d = b << C;
476+ d++;
477+ }
478+
479+ s = src;
480+ d = dst;
481+ for (i = 0; i < N; i++)
482+ {
483+ b = *s++;
484+ if (*d != b << C)
485+ abort ();
486+ d++;
487+ }
488+}
489+
490+int main (void)
491+{
492+ int i;
493+ char in[N];
494+ int out[N];
495+
496+ check_vect ();
497+
498+ for (i = 0; i < N; i++)
499+ {
500+ in[i] = i;
501+ out[i] = 255;
502+ __asm__ volatile ("");
503+ }
504+
505+ foo (in, out);
506+
507+ return 0;
508+}
509+
510+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
511+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
512+/* { dg-final { cleanup-tree-dump "vect" } } */
513+
514
515=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c'
516--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 1970-01-01 00:00:00 +0000
517+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 2011-10-23 13:33:07 +0000
518@@ -0,0 +1,58 @@
519+/* { dg-require-effective-target vect_int } */
520+/* { dg-require-effective-target vect_shift } */
521+
522+#include <stdarg.h>
523+#include "tree-vect.h"
524+
525+#define N 64
526+#define C 7
527+
528+__attribute__ ((noinline)) void
529+foo (unsigned short *src, unsigned int *dst)
530+{
531+ int i;
532+ unsigned short b, *s = src;
533+ unsigned int *d = dst;
534+
535+ for (i = 0; i < N; i++)
536+ {
537+ b = *s++;
538+ *d = b << C;
539+ d++;
540+ }
541+
542+ s = src;
543+ d = dst;
544+ for (i = 0; i < N; i++)
545+ {
546+ b = *s++;
547+ if (*d != b << C)
548+ abort ();
549+ d++;
550+ }
551+}
552+
553+int main (void)
554+{
555+ int i;
556+ unsigned short in[N];
557+ unsigned int out[N];
558+
559+ check_vect ();
560+
561+ for (i = 0; i < N; i++)
562+ {
563+ in[i] = i;
564+ out[i] = 255;
565+ __asm__ volatile ("");
566+ }
567+
568+ foo (in, out);
569+
570+ return 0;
571+}
572+
573+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
574+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
575+/* { dg-final { cleanup-tree-dump "vect" } } */
576+
577
578=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c'
579--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 1970-01-01 00:00:00 +0000
580+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000
581@@ -0,0 +1,65 @@
582+/* { dg-require-effective-target vect_int } */
583+/* { dg-require-effective-target vect_shift } */
584+
585+#include <stdarg.h>
586+#include "tree-vect.h"
587+
588+#define N 64
589+#define C1 10
590+#define C2 5
591+
592+__attribute__ ((noinline)) void
593+foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
594+{
595+ int i;
596+ unsigned char b, *s = src;
597+ unsigned int *d1 = dst1, *d2 = dst2;
598+
599+ for (i = 0; i < N; i++)
600+ {
601+ b = *s++;
602+ *d1 = b << C1;
603+ d1++;
604+ *d2 = b << C2;
605+ d2++;
606+ }
607+
608+ s = src;
609+ d1 = dst1;
610+ d2 = dst2;
611+ for (i = 0; i < N; i++)
612+ {
613+ b = *s++;
614+ if (*d1 != b << C1 || *d2 != b << C2)
615+ abort ();
616+ d1++;
617+ d2++;
618+ }
619+}
620+
621+int main (void)
622+{
623+ int i;
624+ unsigned char in[N];
625+ unsigned int out1[N];
626+ unsigned int out2[N];
627+
628+ check_vect ();
629+
630+ for (i = 0; i < N; i++)
631+ {
632+ in[i] = i;
633+ out1[i] = 255;
634+ out2[i] = 255;
635+ __asm__ volatile ("");
636+ }
637+
638+ foo (in, out1, out2);
639+
640+ return 0;
641+}
642+
643+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
644+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
645+/* { dg-final { cleanup-tree-dump "vect" } } */
646+
647
648=== modified file 'gcc/testsuite/lib/target-supports.exp'
649--- old/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
650+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
651@@ -2783,6 +2783,26 @@
652 }
653
654 # Return 1 if the target plus current options supports a vector
655+# widening shift, 0 otherwise.
656+#
657+# This won't change for different subtargets so cache the result.
658+
659+proc check_effective_target_vect_widen_shift { } {
660+ global et_vect_widen_shift_saved
661+
662+ if [info exists et_vect_shift_saved] {
663+ verbose "check_effective_target_vect_widen_shift: using cached result" 2
664+ } else {
665+ set et_vect_widen_shift_saved 0
666+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
667+ set et_vect_widen_shift_saved 1
668+ }
669+ }
670+ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
671+ return $et_vect_widen_shift_saved
672+}
673+
674+# Return 1 if the target plus current options supports a vector
675 # dot-product of signed chars, 0 otherwise.
676 #
677 # This won't change for different subtargets so cache the result.
678
679=== modified file 'gcc/tree-cfg.c'
680--- old/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000
681+++ new/gcc/tree-cfg.c 2011-10-23 13:33:07 +0000
682@@ -3473,6 +3473,44 @@
683 return false;
684 }
685
686+ case WIDEN_LSHIFT_EXPR:
687+ {
688+ if (!INTEGRAL_TYPE_P (lhs_type)
689+ || !INTEGRAL_TYPE_P (rhs1_type)
690+ || TREE_CODE (rhs2) != INTEGER_CST
691+ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)))
692+ {
693+ error ("type mismatch in widening vector shift expression");
694+ debug_generic_expr (lhs_type);
695+ debug_generic_expr (rhs1_type);
696+ debug_generic_expr (rhs2_type);
697+ return true;
698+ }
699+
700+ return false;
701+ }
702+
703+ case VEC_WIDEN_LSHIFT_HI_EXPR:
704+ case VEC_WIDEN_LSHIFT_LO_EXPR:
705+ {
706+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
707+ || TREE_CODE (lhs_type) != VECTOR_TYPE
708+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
709+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))
710+ || TREE_CODE (rhs2) != INTEGER_CST
711+ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type))
712+ > TYPE_PRECISION (TREE_TYPE (lhs_type))))
713+ {
714+ error ("type mismatch in widening vector shift expression");
715+ debug_generic_expr (lhs_type);
716+ debug_generic_expr (rhs1_type);
717+ debug_generic_expr (rhs2_type);
718+ return true;
719+ }
720+
721+ return false;
722+ }
723+
724 case PLUS_EXPR:
725 case MINUS_EXPR:
726 {
727
728=== modified file 'gcc/tree-inline.c'
729--- old/gcc/tree-inline.c 2011-08-13 08:32:32 +0000
730+++ new/gcc/tree-inline.c 2011-10-23 13:33:07 +0000
731@@ -3343,6 +3343,7 @@
732 case DOT_PROD_EXPR:
733 case WIDEN_MULT_PLUS_EXPR:
734 case WIDEN_MULT_MINUS_EXPR:
735+ case WIDEN_LSHIFT_EXPR:
736
737 case VEC_WIDEN_MULT_HI_EXPR:
738 case VEC_WIDEN_MULT_LO_EXPR:
739@@ -3357,6 +3358,8 @@
740 case VEC_EXTRACT_ODD_EXPR:
741 case VEC_INTERLEAVE_HIGH_EXPR:
742 case VEC_INTERLEAVE_LOW_EXPR:
743+ case VEC_WIDEN_LSHIFT_HI_EXPR:
744+ case VEC_WIDEN_LSHIFT_LO_EXPR:
745
746 return 1;
747
748
749=== modified file 'gcc/tree-pretty-print.c'
750--- old/gcc/tree-pretty-print.c 2010-11-05 09:00:50 +0000
751+++ new/gcc/tree-pretty-print.c 2011-10-23 13:33:07 +0000
752@@ -1539,6 +1539,7 @@
753 case RROTATE_EXPR:
754 case VEC_LSHIFT_EXPR:
755 case VEC_RSHIFT_EXPR:
756+ case WIDEN_LSHIFT_EXPR:
757 case BIT_IOR_EXPR:
758 case BIT_XOR_EXPR:
759 case BIT_AND_EXPR:
760@@ -2209,6 +2210,22 @@
761 pp_string (buffer, " > ");
762 break;
763
764+ case VEC_WIDEN_LSHIFT_HI_EXPR:
765+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
766+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
767+ pp_string (buffer, ", ");
768+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
769+ pp_string (buffer, " > ");
770+ break;
771+
772+ case VEC_WIDEN_LSHIFT_LO_EXPR:
773+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
774+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
775+ pp_string (buffer, ", ");
776+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
777+ pp_string (buffer, " > ");
778+ break;
779+
780 case VEC_UNPACK_HI_EXPR:
781 pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
782 dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
783@@ -2531,6 +2548,9 @@
784 case RSHIFT_EXPR:
785 case LROTATE_EXPR:
786 case RROTATE_EXPR:
787+ case VEC_WIDEN_LSHIFT_HI_EXPR:
788+ case VEC_WIDEN_LSHIFT_LO_EXPR:
789+ case WIDEN_LSHIFT_EXPR:
790 return 11;
791
792 case WIDEN_SUM_EXPR:
793@@ -2706,6 +2726,9 @@
794 case VEC_RSHIFT_EXPR:
795 return "v>>";
796
797+ case WIDEN_LSHIFT_EXPR:
798+ return "w<<";
799+
800 case POINTER_PLUS_EXPR:
801 return "+";
802
803
804=== modified file 'gcc/tree-vect-generic.c'
805--- old/gcc/tree-vect-generic.c 2011-02-08 14:16:50 +0000
806+++ new/gcc/tree-vect-generic.c 2011-10-23 13:33:07 +0000
807@@ -552,7 +552,9 @@
808 || code == VEC_UNPACK_LO_EXPR
809 || code == VEC_PACK_TRUNC_EXPR
810 || code == VEC_PACK_SAT_EXPR
811- || code == VEC_PACK_FIX_TRUNC_EXPR)
812+ || code == VEC_PACK_FIX_TRUNC_EXPR
813+ || code == VEC_WIDEN_LSHIFT_HI_EXPR
814+ || code == VEC_WIDEN_LSHIFT_LO_EXPR)
815 type = TREE_TYPE (rhs1);
816
817 /* Optabs will try converting a negation into a subtraction, so
818
819=== modified file 'gcc/tree-vect-patterns.c'
820--- old/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000
821+++ new/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000
822@@ -48,12 +48,15 @@
823 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
824 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
825 tree *);
826+static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
827+ tree *, tree *);
828 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
829 vect_recog_widen_mult_pattern,
830 vect_recog_widen_sum_pattern,
831 vect_recog_dot_prod_pattern,
832 vect_recog_pow_pattern,
833- vect_recog_over_widening_pattern};
834+ vect_recog_over_widening_pattern,
835+ vect_recog_widen_shift_pattern};
836
837
838 /* Function widened_name_p
839@@ -331,27 +334,38 @@
840 return pattern_stmt;
841 }
842
843-/* Handle two cases of multiplication by a constant. The first one is when
844- the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
845- operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
846- TYPE.
847+
848+/* Handle widening operation by a constant. At the moment we support MULT_EXPR
849+ and LSHIFT_EXPR.
850+
851+ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
852+ we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
853
854 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
855- HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
856- TYPE), we can perform widen-mult from the intermediate type to TYPE and
857- replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
858+ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
859+ that satisfies the above restrictions, we can perform a widening opeartion
860+ from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
861+ with a_it = (interm_type) a_t; */
862
863 static bool
864-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
865- VEC (gimple, heap) **stmts, tree type,
866- tree *half_type, gimple def_stmt)
867+vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
868+ tree const_oprnd, tree *oprnd,
869+ VEC (gimple, heap) **stmts, tree type,
870+ tree *half_type, gimple def_stmt)
871 {
872 tree new_type, new_oprnd, tmp;
873 gimple new_stmt;
874 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
875 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
876
877- if (int_fits_type_p (const_oprnd, *half_type))
878+ if (code != MULT_EXPR && code != LSHIFT_EXPR)
879+ return false;
880+
881+ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
882+ || (code == LSHIFT_EXPR
883+ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
884+ != 1))
885+ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
886 {
887 /* CONST_OPRND is a constant of HALF_TYPE. */
888 *oprnd = gimple_assign_rhs1 (def_stmt);
889@@ -364,14 +378,16 @@
890 || !vinfo_for_stmt (def_stmt))
891 return false;
892
893- /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
894+ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
895 a type 2 times bigger than HALF_TYPE. */
896 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
897 TYPE_UNSIGNED (type));
898- if (!int_fits_type_p (const_oprnd, new_type))
899+ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
900+ || (code == LSHIFT_EXPR
901+ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
902 return false;
903
904- /* Use NEW_TYPE for widen_mult. */
905+ /* Use NEW_TYPE for widening operation. */
906 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
907 {
908 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
909@@ -381,6 +397,7 @@
910 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
911 return false;
912
913+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
914 *oprnd = gimple_assign_lhs (new_stmt);
915 }
916 else
917@@ -392,7 +409,6 @@
918 new_oprnd = make_ssa_name (tmp, NULL);
919 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
920 NULL_TREE);
921- SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
922 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
923 VEC_safe_push (gimple, heap, *stmts, def_stmt);
924 *oprnd = new_oprnd;
925@@ -402,7 +418,6 @@
926 return true;
927 }
928
929-
930 /* Function vect_recog_widen_mult_pattern
931
932 Try to find the following pattern:
933@@ -491,7 +506,7 @@
934 enum tree_code dummy_code;
935 int dummy_int;
936 VEC (tree, heap) *dummy_vec;
937- bool op0_ok, op1_ok;
938+ bool op1_ok;
939
940 if (!is_gimple_assign (last_stmt))
941 return NULL;
942@@ -511,38 +526,23 @@
943 return NULL;
944
945 /* Check argument 0. */
946- op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
947+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
948+ return NULL;
949 /* Check argument 1. */
950 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
951
952- /* In case of multiplication by a constant one of the operands may not match
953- the pattern, but not both. */
954- if (!op0_ok && !op1_ok)
955- return NULL;
956-
957- if (op0_ok && op1_ok)
958+ if (op1_ok)
959 {
960 oprnd0 = gimple_assign_rhs1 (def_stmt0);
961 oprnd1 = gimple_assign_rhs1 (def_stmt1);
962 }
963- else if (!op0_ok)
964- {
965- if (TREE_CODE (oprnd0) == INTEGER_CST
966- && TREE_CODE (half_type1) == INTEGER_TYPE
967- && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
968- stmts, type,
969- &half_type1, def_stmt1))
970- half_type0 = half_type1;
971- else
972- return NULL;
973- }
974- else if (!op1_ok)
975+ else
976 {
977 if (TREE_CODE (oprnd1) == INTEGER_CST
978 && TREE_CODE (half_type0) == INTEGER_TYPE
979- && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
980- stmts, type,
981- &half_type0, def_stmt0))
982+ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
983+ &oprnd0, stmts, type,
984+ &half_type0, def_stmt0))
985 half_type1 = half_type0;
986 else
987 return NULL;
988@@ -998,6 +998,7 @@
989 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
990 return false;
991
992+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
993 oprnd = gimple_assign_lhs (new_stmt);
994 }
995 else
996@@ -1128,7 +1129,7 @@
997 statetments, except for the case when the last statement in the
998 sequence doesn't have a corresponding pattern statement. In such
999 case we associate the last pattern statement with the last statement
1000- in the sequence. Therefore, we only add an original statetement to
1001+ in the sequence. Therefore, we only add the original statement to
1002 the list if we know that it is not the last. */
1003 if (prev_stmt)
1004 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1005@@ -1215,6 +1216,231 @@
1006 }
1007
1008
1009+/* Detect widening shift pattern:
1010+
1011+ type a_t;
1012+ TYPE a_T, res_T;
1013+
1014+ S1 a_t = ;
1015+ S2 a_T = (TYPE) a_t;
1016+ S3 res_T = a_T << CONST;
1017+
1018+ where type 'TYPE' is at least double the size of type 'type'.
1019+
1020+ Also detect unsigned cases:
1021+
1022+ unsigned type a_t;
1023+ unsigned TYPE u_res_T;
1024+ TYPE a_T, res_T;
1025+
1026+ S1 a_t = ;
1027+ S2 a_T = (TYPE) a_t;
1028+ S3 res_T = a_T << CONST;
1029+ S4 u_res_T = (unsigned TYPE) res_T;
1030+
1031+ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1032+ create an additional pattern stmt for S2 to create a variable of an
1033+ intermediate type, and perform widen-shift on the intermediate type:
1034+
1035+ type a_t;
1036+ interm_type a_it;
1037+ TYPE a_T, res_T, res_T';
1038+
1039+ S1 a_t = ;
1040+ S2 a_T = (TYPE) a_t;
1041+ '--> a_it = (interm_type) a_t;
1042+ S3 res_T = a_T << CONST;
1043+ '--> res_T' = a_it <<* CONST;
1044+
1045+ Input/Output:
1046+
1047+ * STMTS: Contains a stmt from which the pattern search begins.
1048+ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1049+ in STMTS. When an intermediate type is used and a pattern statement is
1050+ created for S2, we also put S2 here (before S3).
1051+
1052+ Output:
1053+
1054+ * TYPE_IN: The type of the input arguments to the pattern.
1055+
1056+ * TYPE_OUT: The type of the output of this pattern.
1057+
1058+ * Return value: A new stmt that will be used to replace the sequence of
1059+ stmts that constitute the pattern. In this case it will be:
1060+ WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1061+
1062+static gimple
1063+vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1064+ tree *type_in, tree *type_out)
1065+{
1066+ gimple last_stmt = VEC_pop (gimple, *stmts);
1067+ gimple def_stmt0;
1068+ tree oprnd0, oprnd1;
1069+ tree type, half_type0;
1070+ gimple pattern_stmt, orig_stmt = NULL;
1071+ tree vectype, vectype_out = NULL_TREE;
1072+ tree dummy;
1073+ tree var;
1074+ enum tree_code dummy_code;
1075+ int dummy_int;
1076+ VEC (tree, heap) * dummy_vec;
1077+ gimple use_stmt = NULL;
1078+ bool over_widen = false;
1079+
1080+ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1081+ return NULL;
1082+
1083+ orig_stmt = last_stmt;
1084+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1085+ {
1086+ /* This statement was also detected as over-widening operation (it can't
1087+ be any other pattern, because only over-widening detects shifts).
1088+ LAST_STMT is the final type demotion statement, but its related
1089+ statement is shift. We analyze the related statement to catch cases:
1090+
1091+ orig code:
1092+ type a_t;
1093+ itype res;
1094+ TYPE a_T, res_T;
1095+
1096+ S1 a_T = (TYPE) a_t;
1097+ S2 res_T = a_T << CONST;
1098+ S3 res = (itype)res_T;
1099+
1100+ (size of type * 2 <= size of itype
1101+ and size of itype * 2 <= size of TYPE)
1102+
1103+ code after over-widening pattern detection:
1104+
1105+ S1 a_T = (TYPE) a_t;
1106+ --> a_it = (itype) a_t;
1107+ S2 res_T = a_T << CONST;
1108+ S3 res = (itype)res_T; <--- LAST_STMT
1109+ --> res = a_it << CONST;
1110+
1111+ after widen_shift:
1112+
1113+ S1 a_T = (TYPE) a_t;
1114+ --> a_it = (itype) a_t; - redundant
1115+ S2 res_T = a_T << CONST;
1116+ S3 res = (itype)res_T;
1117+ --> res = a_t w<< CONST;
1118+
1119+ i.e., we replace the three statements with res = a_t w<< CONST. */
1120+ last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1121+ over_widen = true;
1122+ }
1123+
1124+ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1125+ return NULL;
1126+
1127+ oprnd0 = gimple_assign_rhs1 (last_stmt);
1128+ oprnd1 = gimple_assign_rhs2 (last_stmt);
1129+ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1130+ return NULL;
1131+
1132+ /* Check operand 0: it has to be defined by a type promotion. */
1133+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1134+ return NULL;
1135+
1136+ /* Check operand 1: has to be positive. We check that it fits the type
1137+ in vect_handle_widen_op_by_const (). */
1138+ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1139+ return NULL;
1140+
1141+ oprnd0 = gimple_assign_rhs1 (def_stmt0);
1142+ type = gimple_expr_type (last_stmt);
1143+
1144+ /* Check if this a widening operation. */
1145+ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1146+ &oprnd0, stmts,
1147+ type, &half_type0, def_stmt0))
1148+ return NULL;
1149+
1150+ /* Handle unsigned case. Look for
1151+ S4 u_res_T = (unsigned TYPE) res_T;
1152+ Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1153+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1154+ {
1155+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1156+ imm_use_iterator imm_iter;
1157+ use_operand_p use_p;
1158+ int nuses = 0;
1159+ tree use_type;
1160+
1161+ if (over_widen)
1162+ {
1163+ /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1164+ We check here that TYPE is the correct type for the operation,
1165+ i.e., it's the type of the original result. */
1166+ tree orig_type = gimple_expr_type (orig_stmt);
1167+ if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1168+ || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1169+ return NULL;
1170+ }
1171+ else
1172+ {
1173+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1174+ {
1175+ if (is_gimple_debug (USE_STMT (use_p)))
1176+ continue;
1177+ use_stmt = USE_STMT (use_p);
1178+ nuses++;
1179+ }
1180+
1181+ if (nuses != 1 || !is_gimple_assign (use_stmt)
1182+ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1183+ return NULL;
1184+
1185+ use_lhs = gimple_assign_lhs (use_stmt);
1186+ use_type = TREE_TYPE (use_lhs);
1187+
1188+ if (!INTEGRAL_TYPE_P (use_type)
1189+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1190+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1191+ return NULL;
1192+
1193+ type = use_type;
1194+ }
1195+ }
1196+
1197+ /* Pattern detected. */
1198+ if (vect_print_dump_info (REPORT_DETAILS))
1199+ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1200+
1201+ /* Check target support. */
1202+ vectype = get_vectype_for_scalar_type (half_type0);
1203+ vectype_out = get_vectype_for_scalar_type (type);
1204+
1205+ if (!vectype
1206+ || !vectype_out
1207+ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1208+ vectype_out, vectype,
1209+ &dummy, &dummy, &dummy_code,
1210+ &dummy_code, &dummy_int,
1211+ &dummy_vec))
1212+ return NULL;
1213+
1214+ *type_in = vectype;
1215+ *type_out = vectype_out;
1216+
1217+ /* Pattern supported. Create a stmt to be used to replace the pattern. */
1218+ var = vect_recog_temp_ssa_var (type, NULL);
1219+ pattern_stmt =
1220+ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1221+
1222+ if (vect_print_dump_info (REPORT_DETAILS))
1223+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1224+
1225+ if (use_stmt)
1226+ last_stmt = use_stmt;
1227+ else
1228+ last_stmt = orig_stmt;
1229+
1230+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
1231+ return pattern_stmt;
1232+}
1233+
1234 /* Mark statements that are involved in a pattern. */
1235
1236 static inline void
1237@@ -1278,7 +1504,8 @@
1238 static void
1239 vect_pattern_recog_1 (
1240 gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
1241- gimple_stmt_iterator si)
1242+ gimple_stmt_iterator si,
1243+ VEC (gimple, heap) **stmts_to_replace)
1244 {
1245 gimple stmt = gsi_stmt (si), pattern_stmt;
1246 stmt_vec_info stmt_info;
1247@@ -1288,14 +1515,14 @@
1248 enum tree_code code;
1249 int i;
1250 gimple next;
1251- VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1252
1253- VEC_quick_push (gimple, stmts_to_replace, stmt);
1254- pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
1255+ VEC_truncate (gimple, *stmts_to_replace, 0);
1256+ VEC_quick_push (gimple, *stmts_to_replace, stmt);
1257+ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1258 if (!pattern_stmt)
1259 return;
1260
1261- stmt = VEC_last (gimple, stmts_to_replace);
1262+ stmt = VEC_last (gimple, *stmts_to_replace);
1263 stmt_info = vinfo_for_stmt (stmt);
1264 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1265
1266@@ -1303,8 +1530,6 @@
1267 {
1268 /* No need to check target support (already checked by the pattern
1269 recognition function). */
1270- if (type_out)
1271- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1272 pattern_vectype = type_out ? type_out : type_in;
1273 }
1274 else
1275@@ -1360,8 +1585,8 @@
1276 /* It is possible that additional pattern stmts are created and inserted in
1277 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1278 relevant statements. */
1279- for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
1280- && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
1281+ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1282+ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1283 i++)
1284 {
1285 stmt_info = vinfo_for_stmt (stmt);
1286@@ -1374,8 +1599,6 @@
1287
1288 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1289 }
1290-
1291- VEC_free (gimple, heap, stmts_to_replace);
1292 }
1293
1294
1295@@ -1465,6 +1688,7 @@
1296 gimple_stmt_iterator si;
1297 unsigned int i, j;
1298 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1299+ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1300
1301 if (vect_print_dump_info (REPORT_DETAILS))
1302 fprintf (vect_dump, "=== vect_pattern_recog ===");
1303@@ -1480,8 +1704,11 @@
1304 for (j = 0; j < NUM_PATTERNS; j++)
1305 {
1306 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
1307- vect_pattern_recog_1 (vect_recog_func_ptr, si);
1308+ vect_pattern_recog_1 (vect_recog_func_ptr, si,
1309+ &stmts_to_replace);
1310 }
1311 }
1312 }
1313+
1314+ VEC_free (gimple, heap, stmts_to_replace);
1315 }
1316
1317=== modified file 'gcc/tree-vect-slp.c'
1318--- old/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000
1319+++ new/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000
1320@@ -480,6 +480,11 @@
1321 }
1322 }
1323 }
1324+ else if (rhs_code == WIDEN_LSHIFT_EXPR)
1325+ {
1326+ need_same_oprnds = true;
1327+ first_op1 = gimple_assign_rhs2 (stmt);
1328+ }
1329 }
1330 else
1331 {
1332
1333=== modified file 'gcc/tree-vect-stmts.c'
1334--- old/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000
1335+++ new/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000
1336@@ -3359,6 +3359,7 @@
1337 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1338 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
1339 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1340+ unsigned int k;
1341
1342 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1343 return false;
1344@@ -3375,7 +3376,8 @@
1345
1346 code = gimple_assign_rhs_code (stmt);
1347 if (!CONVERT_EXPR_CODE_P (code)
1348- && code != WIDEN_MULT_EXPR)
1349+ && code != WIDEN_MULT_EXPR
1350+ && code != WIDEN_LSHIFT_EXPR)
1351 return false;
1352
1353 scalar_dest = gimple_assign_lhs (stmt);
1354@@ -3403,7 +3405,7 @@
1355 bool ok;
1356
1357 op1 = gimple_assign_rhs2 (stmt);
1358- if (code == WIDEN_MULT_EXPR)
1359+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
1360 {
1361 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
1362 OP1. */
1363@@ -3480,7 +3482,7 @@
1364 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
1365 ncopies);
1366
1367- if (code == WIDEN_MULT_EXPR)
1368+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
1369 {
1370 if (CONSTANT_CLASS_P (op0))
1371 op0 = fold_convert (TREE_TYPE (op1), op0);
1372@@ -3521,6 +3523,8 @@
1373 if (op_type == binary_op)
1374 vec_oprnds1 = VEC_alloc (tree, heap, 1);
1375 }
1376+ else if (code == WIDEN_LSHIFT_EXPR)
1377+ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
1378
1379 /* In case the vectorization factor (VF) is bigger than the number
1380 of elements that we can fit in a vectype (nunits), we have to generate
1381@@ -3534,15 +3538,33 @@
1382 if (j == 0)
1383 {
1384 if (slp_node)
1385- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
1386- &vec_oprnds1, -1);
1387- else
1388+ {
1389+ if (code == WIDEN_LSHIFT_EXPR)
1390+ {
1391+ vec_oprnd1 = op1;
1392+ /* Store vec_oprnd1 for every vector stmt to be created
1393+ for SLP_NODE. We check during the analysis that all
1394+ the shift arguments are the same. */
1395+ for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
1396+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
1397+
1398+ vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
1399+ -1);
1400+ }
1401+ else
1402+ vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
1403+ &vec_oprnds1, -1);
1404+ }
1405+ else
1406 {
1407 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1408 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
1409 if (op_type == binary_op)
1410 {
1411- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1412+ if (code == WIDEN_LSHIFT_EXPR)
1413+ vec_oprnd1 = op1;
1414+ else
1415+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1416 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
1417 }
1418 }
1419@@ -3553,7 +3575,10 @@
1420 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
1421 if (op_type == binary_op)
1422 {
1423- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
1424+ if (code == WIDEN_LSHIFT_EXPR)
1425+ vec_oprnd1 = op1;
1426+ else
1427+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
1428 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
1429 }
1430 }
1431@@ -5853,6 +5878,19 @@
1432 }
1433 break;
1434
1435+ case WIDEN_LSHIFT_EXPR:
1436+ if (BYTES_BIG_ENDIAN)
1437+ {
1438+ c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
1439+ c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
1440+ }
1441+ else
1442+ {
1443+ c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
1444+ c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
1445+ }
1446+ break;
1447+
1448 CASE_CONVERT:
1449 if (BYTES_BIG_ENDIAN)
1450 {
1451
1452=== modified file 'gcc/tree-vectorizer.h'
1453--- old/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000
1454+++ new/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000
1455@@ -896,7 +896,7 @@
1456 Additional pattern recognition functions can (and will) be added
1457 in the future. */
1458 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1459-#define NUM_PATTERNS 5
1460+#define NUM_PATTERNS 6
1461 void vect_pattern_recog (loop_vec_info);
1462
1463 /* In tree-vectorizer.c. */
1464
1465=== modified file 'gcc/tree.def'
1466--- old/gcc/tree.def 2011-01-21 14:14:12 +0000
1467+++ new/gcc/tree.def 2011-10-23 13:33:07 +0000
1468@@ -1092,6 +1092,19 @@
1469 is subtracted from t3. */
1470 DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
1471
1472+/* Widening shift left.
1473+ The first operand is of type t1.
1474+ The second operand is the number of bits to shift by; it need not be the
1475+ same type as the first operand and result.
1476+ Note that the result is undefined if the second operand is larger
1477+ than or equal to the first operand's type size.
1478+ The type of the entire expression is t2, such that t2 is at least twice
1479+ the size of t1.
1480+ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting)
1481+ the first argument from type t1 to type t2, and then shifting it
1482+ by the second argument. */
1483+DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
1484+
1485 /* Fused multiply-add.
1486 All operands and the result are of the same type. No intermediate
1487 rounding is performed after multiplying operand one with operand two
1488@@ -1147,6 +1160,16 @@
1489 DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
1490 DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
1491
1492+/* Widening vector shift left in bits.
1493+ Operand 0 is a vector to be shifted with N elements of size S.
1494+ Operand 1 is an integer shift amount in bits.
1495+ The result of the operation is N elements of size 2*S.
1496+ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results.
1497+ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results.
1498+ */
1499+DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
1500+DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
1501+
1502 /* PREDICT_EXPR. Specify hint for branch prediction. The
1503 PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
1504 outcome (0 for not taken and 1 for taken). Once the profile is guessed
1505