diff options
author | Koen Kooi <koen@dominion.thruhere.net> | 2010-12-04 21:40:49 +0100 |
---|---|---|
committer | Koen Kooi <koen@dominion.thruhere.net> | 2010-12-04 21:40:49 +0100 |
commit | 39fb00c188032075a0a8298e333e6914bd88e53a (patch) | |
tree | ccb10a0b7064862bcce144316b9392614d355b2d | |
parent | 1857df74ac9bfec16d0274fe972fda7bb1f99e6b (diff) | |
download | meta-openembedded-39fb00c188032075a0a8298e333e6914bd88e53a.tar.gz |
meta-openembedded: import pixman 0.21.2 from OE
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
24 files changed, 2361 insertions, 0 deletions
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch new file mode 100644 index 0000000000..ebf6eafb0d --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch | |||
@@ -0,0 +1,35 @@ | |||
1 | From e7ee43c39d2370716a4d011afa8f5067eced9899 Mon Sep 17 00:00:00 2001 | ||
2 | From: Cyril Brulebois <kibi@debian.org> | ||
3 | Date: Wed, 17 Nov 2010 16:16:56 +0100 | ||
4 | Subject: [PATCH 02/24] Fix argument quoting for AC_INIT. | ||
5 | |||
6 | One gets rid of this accordingly: | ||
7 | | autoreconf -vfi | ||
8 | | autoreconf: Entering directory `.' | ||
9 | | autoreconf: configure.ac: not using Gettext | ||
10 | | autoreconf: running: aclocal --force | ||
11 | | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" | ||
12 | | autoreconf: configure.ac: tracing | ||
13 | | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" | ||
14 | |||
15 | Signed-off-by: Cyril Brulebois <kibi@debian.org> | ||
16 | --- | ||
17 | configure.ac | 2 +- | ||
18 | 1 files changed, 1 insertions(+), 1 deletions(-) | ||
19 | |||
20 | diff --git a/configure.ac b/configure.ac | ||
21 | index db1da21..147e1bf 100644 | ||
22 | --- a/configure.ac | ||
23 | +++ b/configure.ac | ||
24 | @@ -58,7 +58,7 @@ m4_define([pixman_micro], 3) | ||
25 | |||
26 | m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) | ||
27 | |||
28 | -AC_INIT(pixman, pixman_version, "pixman@lists.freedesktop.org", pixman) | ||
29 | +AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman) | ||
30 | AM_INIT_AUTOMAKE([foreign dist-bzip2]) | ||
31 | |||
32 | # Suppress verbose compile lines | ||
33 | -- | ||
34 | 1.6.6.1 | ||
35 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch new file mode 100644 index 0000000000..e48a2b37dc --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch | |||
@@ -0,0 +1,39 @@ | |||
1 | From 654961efe405ad1a7e54a77548ca8af322ecc1f8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Alan Coopersmith <alan.coopersmith@oracle.com> | ||
3 | Date: Sun, 21 Nov 2010 11:42:22 -0800 | ||
4 | Subject: [PATCH 03/24] Sun's copyrights belong to Oracle now | ||
5 | |||
6 | Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> | ||
7 | --- | ||
8 | COPYING | 2 +- | ||
9 | pixman/solaris-hwcap.mapfile | 2 +- | ||
10 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/COPYING b/COPYING | ||
13 | index 3092a34..15f9517 100644 | ||
14 | --- a/COPYING | ||
15 | +++ b/COPYING | ||
16 | @@ -18,7 +18,7 @@ possible. They may also add themselves to the list below. | ||
17 | * Copyright 2008 André Tupinambá | ||
18 | * Copyright 2008 Mozilla Corporation | ||
19 | * Copyright 2008 Frederic Plourde | ||
20 | - * Copyright 2009 Sun Microsystems, Inc. | ||
21 | + * Copyright 2009, Oracle and/or its affiliates. All rights reserved. | ||
22 | * | ||
23 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
24 | * copy of this software and associated documentation files (the "Software"), | ||
25 | diff --git a/pixman/solaris-hwcap.mapfile b/pixman/solaris-hwcap.mapfile | ||
26 | index 3605ca7..87efce1 100644 | ||
27 | --- a/pixman/solaris-hwcap.mapfile | ||
28 | +++ b/pixman/solaris-hwcap.mapfile | ||
29 | @@ -1,6 +1,6 @@ | ||
30 | ############################################################################### | ||
31 | # | ||
32 | -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. | ||
33 | +# Copyright 2009, Oracle and/or its affiliates. All rights reserved. | ||
34 | # | ||
35 | # Permission is hereby granted, free of charge, to any person obtaining a | ||
36 | # copy of this software and associated documentation files (the "Software"), | ||
37 | -- | ||
38 | 1.6.6.1 | ||
39 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch new file mode 100644 index 0000000000..75eaac7bf2 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch | |||
@@ -0,0 +1,159 @@ | |||
1 | From 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 15 Nov 2010 18:26:43 +0200 | ||
4 | Subject: [PATCH 04/24] C fast path for a1 fill operation | ||
5 | |||
6 | Can be used as one of the solutions to fix bug | ||
7 | https://bugs.freedesktop.org/show_bug.cgi?id=31604 | ||
8 | --- | ||
9 | pixman/pixman-fast-path.c | 87 ++++++++++++++++++++++++++++++++++++++++++++- | ||
10 | pixman/pixman.c | 7 +++- | ||
11 | 2 files changed, 91 insertions(+), 3 deletions(-) | ||
12 | |||
13 | diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c | ||
14 | index 5d5fa95..37dfbae 100644 | ||
15 | --- a/pixman/pixman-fast-path.c | ||
16 | +++ b/pixman/pixman-fast-path.c | ||
17 | @@ -1334,7 +1334,11 @@ fast_composite_solid_fill (pixman_implementation_t *imp, | ||
18 | |||
19 | src = _pixman_image_get_solid (src_image, dst_image->bits.format); | ||
20 | |||
21 | - if (dst_image->bits.format == PIXMAN_a8) | ||
22 | + if (dst_image->bits.format == PIXMAN_a1) | ||
23 | + { | ||
24 | + src = src >> 31; | ||
25 | + } | ||
26 | + else if (dst_image->bits.format == PIXMAN_a8) | ||
27 | { | ||
28 | src = src >> 24; | ||
29 | } | ||
30 | @@ -1655,6 +1659,7 @@ static const pixman_fast_path_t c_fast_paths[] = | ||
31 | PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), | ||
32 | PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), | ||
33 | PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), | ||
34 | + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), | ||
35 | PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), | ||
36 | PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), | ||
37 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), | ||
38 | @@ -1733,6 +1738,82 @@ static const pixman_fast_path_t c_fast_paths[] = | ||
39 | { PIXMAN_OP_NONE }, | ||
40 | }; | ||
41 | |||
42 | +#ifdef WORDS_BIGENDIAN | ||
43 | +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) | ||
44 | +#else | ||
45 | +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) | ||
46 | +#endif | ||
47 | + | ||
48 | +static force_inline void | ||
49 | +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) | ||
50 | +{ | ||
51 | + if (offs) | ||
52 | + { | ||
53 | + int leading_pixels = 32 - offs; | ||
54 | + if (leading_pixels >= width) | ||
55 | + { | ||
56 | + if (v) | ||
57 | + *dst |= A1_FILL_MASK (width, offs); | ||
58 | + else | ||
59 | + *dst &= ~A1_FILL_MASK (width, offs); | ||
60 | + return; | ||
61 | + } | ||
62 | + else | ||
63 | + { | ||
64 | + if (v) | ||
65 | + *dst++ |= A1_FILL_MASK (leading_pixels, offs); | ||
66 | + else | ||
67 | + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); | ||
68 | + width -= leading_pixels; | ||
69 | + } | ||
70 | + } | ||
71 | + while (width >= 32) | ||
72 | + { | ||
73 | + if (v) | ||
74 | + *dst++ = 0xFFFFFFFF; | ||
75 | + else | ||
76 | + *dst++ = 0; | ||
77 | + width -= 32; | ||
78 | + } | ||
79 | + if (width > 0) | ||
80 | + { | ||
81 | + if (v) | ||
82 | + *dst |= A1_FILL_MASK (width, 0); | ||
83 | + else | ||
84 | + *dst &= ~A1_FILL_MASK (width, 0); | ||
85 | + } | ||
86 | +} | ||
87 | + | ||
88 | +static void | ||
89 | +pixman_fill1 (uint32_t *bits, | ||
90 | + int stride, | ||
91 | + int x, | ||
92 | + int y, | ||
93 | + int width, | ||
94 | + int height, | ||
95 | + uint32_t xor) | ||
96 | +{ | ||
97 | + uint32_t *dst = bits + y * stride + (x >> 5); | ||
98 | + int offs = x & 31; | ||
99 | + | ||
100 | + if (xor & 1) | ||
101 | + { | ||
102 | + while (height--) | ||
103 | + { | ||
104 | + pixman_fill1_line (dst, offs, width, 1); | ||
105 | + dst += stride; | ||
106 | + } | ||
107 | + } | ||
108 | + else | ||
109 | + { | ||
110 | + while (height--) | ||
111 | + { | ||
112 | + pixman_fill1_line (dst, offs, width, 0); | ||
113 | + dst += stride; | ||
114 | + } | ||
115 | + } | ||
116 | +} | ||
117 | + | ||
118 | static void | ||
119 | pixman_fill8 (uint32_t *bits, | ||
120 | int stride, | ||
121 | @@ -1819,6 +1900,10 @@ fast_path_fill (pixman_implementation_t *imp, | ||
122 | { | ||
123 | switch (bpp) | ||
124 | { | ||
125 | + case 1: | ||
126 | + pixman_fill1 (bits, stride, x, y, width, height, xor); | ||
127 | + break; | ||
128 | + | ||
129 | case 8: | ||
130 | pixman_fill8 (bits, stride, x, y, width, height, xor); | ||
131 | break; | ||
132 | diff --git a/pixman/pixman.c b/pixman/pixman.c | ||
133 | index 045c556..ec565f9 100644 | ||
134 | --- a/pixman/pixman.c | ||
135 | +++ b/pixman/pixman.c | ||
136 | @@ -875,7 +875,8 @@ color_to_pixel (pixman_color_t * color, | ||
137 | format == PIXMAN_b8g8r8x8 || | ||
138 | format == PIXMAN_r5g6b5 || | ||
139 | format == PIXMAN_b5g6r5 || | ||
140 | - format == PIXMAN_a8)) | ||
141 | + format == PIXMAN_a8 || | ||
142 | + format == PIXMAN_a1)) | ||
143 | { | ||
144 | return FALSE; | ||
145 | } | ||
146 | @@ -895,7 +896,9 @@ color_to_pixel (pixman_color_t * color, | ||
147 | ((c & 0x000000ff) << 24); | ||
148 | } | ||
149 | |||
150 | - if (format == PIXMAN_a8) | ||
151 | + if (format == PIXMAN_a1) | ||
152 | + c = c >> 31; | ||
153 | + else if (format == PIXMAN_a8) | ||
154 | c = c >> 24; | ||
155 | else if (format == PIXMAN_r5g6b5 || | ||
156 | format == PIXMAN_b5g6r5) | ||
157 | -- | ||
158 | 1.6.6.1 | ||
159 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch new file mode 100644 index 0000000000..a7a9b11a87 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch | |||
@@ -0,0 +1,113 @@ | |||
1 | From 98d08b37f17a3379d0ceff8bb7de8f943873fbd8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Fri, 26 Nov 2010 08:55:49 +0200 | ||
4 | Subject: [PATCH 05/24] ARM: added 'neon_composite_over_n_8_8' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 68 ++++++++++++++++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 3 ++ | ||
9 | 2 files changed, 71 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 91ec27d..a3875ee 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1203,6 +1203,74 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_over_n_8_8_process_pixblock_head | ||
20 | + vmull.u8 q0, d24, d8 | ||
21 | + vmull.u8 q1, d25, d8 | ||
22 | + vmull.u8 q6, d26, d8 | ||
23 | + vmull.u8 q7, d27, d8 | ||
24 | + vrshr.u16 q10, q0, #8 | ||
25 | + vrshr.u16 q11, q1, #8 | ||
26 | + vrshr.u16 q12, q6, #8 | ||
27 | + vrshr.u16 q13, q7, #8 | ||
28 | + vraddhn.u16 d0, q0, q10 | ||
29 | + vraddhn.u16 d1, q1, q11 | ||
30 | + vraddhn.u16 d2, q6, q12 | ||
31 | + vraddhn.u16 d3, q7, q13 | ||
32 | + vmvn.8 q12, q0 | ||
33 | + vmvn.8 q13, q1 | ||
34 | + vmull.u8 q8, d24, d4 | ||
35 | + vmull.u8 q9, d25, d5 | ||
36 | + vmull.u8 q10, d26, d6 | ||
37 | + vmull.u8 q11, d27, d7 | ||
38 | +.endm | ||
39 | + | ||
40 | +.macro pixman_composite_over_n_8_8_process_pixblock_tail | ||
41 | + vrshr.u16 q14, q8, #8 | ||
42 | + vrshr.u16 q15, q9, #8 | ||
43 | + vrshr.u16 q12, q10, #8 | ||
44 | + vrshr.u16 q13, q11, #8 | ||
45 | + vraddhn.u16 d28, q14, q8 | ||
46 | + vraddhn.u16 d29, q15, q9 | ||
47 | + vraddhn.u16 d30, q12, q10 | ||
48 | + vraddhn.u16 d31, q13, q11 | ||
49 | + vqadd.u8 q14, q0, q14 | ||
50 | + vqadd.u8 q15, q1, q15 | ||
51 | +.endm | ||
52 | + | ||
53 | +/* TODO: expand macros and do better instructions scheduling */ | ||
54 | +.macro pixman_composite_over_n_8_8_process_pixblock_tail_head | ||
55 | + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
56 | + pixman_composite_over_n_8_8_process_pixblock_tail | ||
57 | + vld1.8 {d24, d25, d26, d27}, [MASK]! | ||
58 | + cache_preload 32, 32 | ||
59 | + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
60 | + pixman_composite_over_n_8_8_process_pixblock_head | ||
61 | +.endm | ||
62 | + | ||
63 | +.macro pixman_composite_over_n_8_8_init | ||
64 | + add DUMMY, sp, #ARGS_STACK_OFFSET | ||
65 | + vpush {d8-d15} | ||
66 | + vld1.32 {d8[0]}, [DUMMY] | ||
67 | + vdup.8 d8, d8[3] | ||
68 | +.endm | ||
69 | + | ||
70 | +.macro pixman_composite_over_n_8_8_cleanup | ||
71 | + vpop {d8-d15} | ||
72 | +.endm | ||
73 | + | ||
74 | +generate_composite_function \ | ||
75 | + pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ | ||
76 | + FLAG_DST_READWRITE, \ | ||
77 | + 32, /* number of pixels, processed in a single block */ \ | ||
78 | + 5, /* prefetch distance */ \ | ||
79 | + pixman_composite_over_n_8_8_init, \ | ||
80 | + pixman_composite_over_n_8_8_cleanup, \ | ||
81 | + pixman_composite_over_n_8_8_process_pixblock_head, \ | ||
82 | + pixman_composite_over_n_8_8_process_pixblock_tail, \ | ||
83 | + pixman_composite_over_n_8_8_process_pixblock_tail_head | ||
84 | + | ||
85 | +/******************************************************************************/ | ||
86 | + | ||
87 | .macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head | ||
88 | /* | ||
89 | * 'combine_mask_ca' replacement | ||
90 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
91 | index 2f82069..72ef75e 100644 | ||
92 | --- a/pixman/pixman-arm-neon.c | ||
93 | +++ b/pixman/pixman-arm-neon.c | ||
94 | @@ -76,6 +76,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888, | ||
95 | uint8_t, 1, uint32_t, 1) | ||
96 | PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca, | ||
97 | uint32_t, 1, uint32_t, 1) | ||
98 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, | ||
99 | + uint8_t, 1, uint8_t, 1) | ||
100 | PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, | ||
101 | uint8_t, 1, uint8_t, 1) | ||
102 | |||
103 | @@ -235,6 +237,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
104 | PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), | ||
105 | PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), | ||
106 | PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), | ||
107 | + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), | ||
108 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), | ||
109 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), | ||
110 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), | ||
111 | -- | ||
112 | 1.6.6.1 | ||
113 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch new file mode 100644 index 0000000000..71a41a7a59 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch | |||
@@ -0,0 +1,157 @@ | |||
1 | From 3be86a92ccab240859062a541cdb871d81c9501a Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Sun, 28 Nov 2010 21:45:06 +0200 | ||
4 | Subject: [PATCH 06/24] ARM: introduced 'fetch_mask_pixblock' macro to simplify code | ||
5 | |||
6 | This macro hides the implementation details of pixels fetching | ||
7 | for the mask image just like 'fetch_src_pixblock' does for the | ||
8 | source image. This provides more possibilities for reusing the | ||
9 | same code blocks in different compositing functions. | ||
10 | |||
11 | This patch does not introduce any functional changes and the | ||
12 | resulting code in the compiled object file is exactly the same. | ||
13 | --- | ||
14 | pixman/pixman-arm-neon-asm.S | 26 +++++++++++++------------- | ||
15 | pixman/pixman-arm-neon-asm.h | 5 +++++ | ||
16 | 2 files changed, 18 insertions(+), 13 deletions(-) | ||
17 | |||
18 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
19 | index a3875ee..155a236 100644 | ||
20 | --- a/pixman/pixman-arm-neon-asm.S | ||
21 | +++ b/pixman/pixman-arm-neon-asm.S | ||
22 | @@ -841,7 +841,7 @@ generate_composite_function \ | ||
23 | pixman_composite_over_n_8_0565_process_pixblock_tail | ||
24 | vst1.16 {d28, d29}, [DST_W, :128]! | ||
25 | vld1.16 {d4, d5}, [DST_R, :128]! | ||
26 | - vld1.8 {d24}, [MASK]! | ||
27 | + fetch_mask_pixblock | ||
28 | cache_preload 8, 8 | ||
29 | pixman_composite_over_n_8_0565_process_pixblock_head | ||
30 | .endm | ||
31 | @@ -889,7 +889,7 @@ generate_composite_function \ | ||
32 | pixman_composite_over_n_8_0565_process_pixblock_tail | ||
33 | fetch_src_pixblock | ||
34 | cache_preload 8, 8 | ||
35 | - vld1.8 {d24}, [MASK]! | ||
36 | + fetch_mask_pixblock | ||
37 | pixman_composite_over_n_8_0565_process_pixblock_head | ||
38 | vst1.16 {d28, d29}, [DST_W, :128]! | ||
39 | .endm | ||
40 | @@ -1171,7 +1171,7 @@ generate_composite_function \ | ||
41 | pixman_composite_over_n_8_8888_process_pixblock_tail | ||
42 | vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
43 | vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
44 | - vld1.8 {d24}, [MASK]! | ||
45 | + fetch_mask_pixblock | ||
46 | cache_preload 8, 8 | ||
47 | pixman_composite_over_n_8_8888_process_pixblock_head | ||
48 | .endm | ||
49 | @@ -1241,7 +1241,7 @@ generate_composite_function \ | ||
50 | .macro pixman_composite_over_n_8_8_process_pixblock_tail_head | ||
51 | vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
52 | pixman_composite_over_n_8_8_process_pixblock_tail | ||
53 | - vld1.8 {d24, d25, d26, d27}, [MASK]! | ||
54 | + fetch_mask_pixblock | ||
55 | cache_preload 32, 32 | ||
56 | vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
57 | pixman_composite_over_n_8_8_process_pixblock_head | ||
58 | @@ -1341,7 +1341,7 @@ generate_composite_function \ | ||
59 | vraddhn.u16 d29, q15, q9 | ||
60 | vraddhn.u16 d30, q6, q10 | ||
61 | vraddhn.u16 d31, q7, q11 | ||
62 | - vld4.8 {d24, d25, d26, d27}, [MASK]! | ||
63 | + fetch_mask_pixblock | ||
64 | vqadd.u8 q14, q0, q14 | ||
65 | vqadd.u8 q15, q1, q15 | ||
66 | cache_preload 8, 8 | ||
67 | @@ -1405,7 +1405,7 @@ generate_composite_function \ | ||
68 | pixman_composite_add_n_8_8_process_pixblock_tail | ||
69 | vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
70 | vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
71 | - vld1.8 {d24, d25, d26, d27}, [MASK]! | ||
72 | + fetch_mask_pixblock | ||
73 | cache_preload 32, 32 | ||
74 | pixman_composite_add_n_8_8_process_pixblock_head | ||
75 | .endm | ||
76 | @@ -1462,7 +1462,7 @@ generate_composite_function \ | ||
77 | pixman_composite_add_8_8_8_process_pixblock_tail | ||
78 | vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
79 | vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
80 | - vld1.8 {d24, d25, d26, d27}, [MASK]! | ||
81 | + fetch_mask_pixblock | ||
82 | fetch_src_pixblock | ||
83 | cache_preload 32, 32 | ||
84 | pixman_composite_add_8_8_8_process_pixblock_head | ||
85 | @@ -1515,7 +1515,7 @@ generate_composite_function \ | ||
86 | pixman_composite_add_8888_8888_8888_process_pixblock_tail | ||
87 | vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
88 | vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
89 | - vld4.8 {d24, d25, d26, d27}, [MASK]! | ||
90 | + fetch_mask_pixblock | ||
91 | fetch_src_pixblock | ||
92 | cache_preload 8, 8 | ||
93 | pixman_composite_add_8888_8888_8888_process_pixblock_head | ||
94 | @@ -1587,7 +1587,7 @@ generate_composite_function_single_scanline \ | ||
95 | pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail | ||
96 | fetch_src_pixblock | ||
97 | cache_preload 8, 8 | ||
98 | - vld4.8 {d12, d13, d14, d15}, [MASK]! | ||
99 | + fetch_mask_pixblock | ||
100 | pixman_composite_out_reverse_8888_n_8888_process_pixblock_head | ||
101 | vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
102 | .endm | ||
103 | @@ -1658,7 +1658,7 @@ generate_composite_function \ | ||
104 | pixman_composite_over_8888_n_8888_process_pixblock_tail | ||
105 | fetch_src_pixblock | ||
106 | cache_preload 8, 8 | ||
107 | - vld4.8 {d12, d13, d14, d15}, [MASK]! | ||
108 | + fetch_mask_pixblock | ||
109 | pixman_composite_over_8888_n_8888_process_pixblock_head | ||
110 | vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
111 | .endm | ||
112 | @@ -1700,7 +1700,7 @@ generate_composite_function_single_scanline \ | ||
113 | pixman_composite_over_8888_n_8888_process_pixblock_tail | ||
114 | fetch_src_pixblock | ||
115 | cache_preload 8, 8 | ||
116 | - vld1.8 {d15}, [MASK]! | ||
117 | + fetch_mask_pixblock | ||
118 | pixman_composite_over_8888_n_8888_process_pixblock_head | ||
119 | vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
120 | .endm | ||
121 | @@ -1917,7 +1917,7 @@ generate_composite_function \ | ||
122 | |||
123 | /* TODO: expand macros and do better instructions scheduling */ | ||
124 | .macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head | ||
125 | - vld1.8 {d15}, [MASK]! | ||
126 | + fetch_mask_pixblock | ||
127 | pixman_composite_over_0565_8_0565_process_pixblock_tail | ||
128 | fetch_src_pixblock | ||
129 | vld1.16 {d10, d11}, [DST_R, :128]! | ||
130 | @@ -1969,7 +1969,7 @@ generate_composite_function \ | ||
131 | |||
132 | /* TODO: expand macros and do better instructions scheduling */ | ||
133 | .macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head | ||
134 | - vld1.8 {d15}, [MASK]! | ||
135 | + fetch_mask_pixblock | ||
136 | pixman_composite_add_0565_8_0565_process_pixblock_tail | ||
137 | fetch_src_pixblock | ||
138 | vld1.16 {d10, d11}, [DST_R, :128]! | ||
139 | diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h | ||
140 | index c75bdc3..24fa361 100644 | ||
141 | --- a/pixman/pixman-arm-neon-asm.h | ||
142 | +++ b/pixman/pixman-arm-neon-asm.h | ||
143 | @@ -431,6 +431,11 @@ | ||
144 | .endif | ||
145 | .endm | ||
146 | |||
147 | +.macro fetch_mask_pixblock | ||
148 | + pixld pixblock_size, mask_bpp, \ | ||
149 | + (mask_basereg - pixblock_size * mask_bpp / 64), MASK | ||
150 | +.endm | ||
151 | + | ||
152 | /* | ||
153 | * Macro which is used to process leading pixels until destination | ||
154 | * pointer is properly aligned (at 16 bytes boundary). When destination | ||
155 | -- | ||
156 | 1.6.6.1 | ||
157 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch new file mode 100644 index 0000000000..acdfdf873d --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch | |||
@@ -0,0 +1,170 @@ | |||
1 | From e6814837a6ccd3e4db329e0131eaf2055d2c864b Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Fri, 26 Nov 2010 17:06:58 +0200 | ||
4 | Subject: [PATCH 07/24] ARM: better NEON instructions scheduling for over_n_8_0565 | ||
5 | |||
6 | Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9. | ||
7 | Now it is ~30% faster for the pixel data in L1 cache and makes better use | ||
8 | of memory bandwidth when running at lower clock frequencies (ex. 500MHz). | ||
9 | Also register d24 (pixels from the mask image) is now not clobbered by | ||
10 | supplementary macros, which allows to reuse them for the other variants | ||
11 | of compositing operations later. | ||
12 | |||
13 | Benchmark from ARM Cortex-A8 @500MHz: | ||
14 | |||
15 | == before == | ||
16 | |||
17 | over_n_8_0565 = L1: 63.90 L2: 63.15 M: 60.97 ( 73.53%) | ||
18 | HT: 28.89 VT: 24.14 R: 21.33 RT: 6.78 ( 67Kops/s) | ||
19 | |||
20 | == after == | ||
21 | |||
22 | over_n_8_0565 = L1: 82.64 L2: 75.19 M: 71.52 ( 84.14%) | ||
23 | HT: 30.49 VT: 25.56 R: 22.36 RT: 6.89 ( 68Kops/s) | ||
24 | --- | ||
25 | pixman/pixman-arm-neon-asm.S | 120 +++++++++++++++++++++++++++--------------- | ||
26 | 1 files changed, 77 insertions(+), 43 deletions(-) | ||
27 | |||
28 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
29 | index 155a236..ffffc1c 100644 | ||
30 | --- a/pixman/pixman-arm-neon-asm.S | ||
31 | +++ b/pixman/pixman-arm-neon-asm.S | ||
32 | @@ -792,58 +792,92 @@ generate_composite_function \ | ||
33 | /******************************************************************************/ | ||
34 | |||
35 | .macro pixman_composite_over_n_8_0565_process_pixblock_head | ||
36 | - /* in */ | ||
37 | - vmull.u8 q0, d24, d8 | ||
38 | - vmull.u8 q1, d24, d9 | ||
39 | - vmull.u8 q6, d24, d10 | ||
40 | - vmull.u8 q7, d24, d11 | ||
41 | - vrshr.u16 q10, q0, #8 | ||
42 | - vrshr.u16 q11, q1, #8 | ||
43 | - vrshr.u16 q12, q6, #8 | ||
44 | - vrshr.u16 q13, q7, #8 | ||
45 | - vraddhn.u16 d0, q0, q10 | ||
46 | - vraddhn.u16 d1, q1, q11 | ||
47 | - vraddhn.u16 d2, q6, q12 | ||
48 | - vraddhn.u16 d3, q7, q13 | ||
49 | - | ||
50 | - vshrn.u16 d6, q2, #8 | ||
51 | - vshrn.u16 d7, q2, #3 | ||
52 | - vsli.u16 q2, q2, #5 | ||
53 | - vsri.u8 d6, d6, #5 | ||
54 | - vmvn.8 d3, d3 | ||
55 | - vsri.u8 d7, d7, #6 | ||
56 | - vshrn.u16 d30, q2, #2 | ||
57 | - /* now do alpha blending */ | ||
58 | - vmull.u8 q10, d3, d6 | ||
59 | - vmull.u8 q11, d3, d7 | ||
60 | - vmull.u8 q12, d3, d30 | ||
61 | - vrshr.u16 q13, q10, #8 | ||
62 | - vrshr.u16 q3, q11, #8 | ||
63 | - vrshr.u16 q15, q12, #8 | ||
64 | - vraddhn.u16 d20, q10, q13 | ||
65 | - vraddhn.u16 d23, q11, q3 | ||
66 | - vraddhn.u16 d22, q12, q15 | ||
67 | + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ | ||
68 | + vmull.u8 q1, d24, d9 | ||
69 | + vmull.u8 q6, d24, d10 | ||
70 | + vmull.u8 q7, d24, d11 | ||
71 | + vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ | ||
72 | + vshrn.u16 d7, q2, #3 | ||
73 | + vsli.u16 q2, q2, #5 | ||
74 | + vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ | ||
75 | + vrshr.u16 q9, q1, #8 | ||
76 | + vrshr.u16 q10, q6, #8 | ||
77 | + vrshr.u16 q11, q7, #8 | ||
78 | + vraddhn.u16 d0, q0, q8 | ||
79 | + vraddhn.u16 d1, q1, q9 | ||
80 | + vraddhn.u16 d2, q6, q10 | ||
81 | + vraddhn.u16 d3, q7, q11 | ||
82 | + vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ | ||
83 | + vsri.u8 d7, d7, #6 | ||
84 | + vmvn.8 d3, d3 | ||
85 | + vshrn.u16 d30, q2, #2 | ||
86 | + vmull.u8 q8, d3, d6 /* now do alpha blending */ | ||
87 | + vmull.u8 q9, d3, d7 | ||
88 | + vmull.u8 q10, d3, d30 | ||
89 | .endm | ||
90 | |||
91 | .macro pixman_composite_over_n_8_0565_process_pixblock_tail | ||
92 | - vqadd.u8 d16, d2, d20 | ||
93 | - vqadd.u8 q9, q0, q11 | ||
94 | - /* convert to r5g6b5 */ | ||
95 | - vshll.u8 q14, d16, #8 | ||
96 | - vshll.u8 q8, d19, #8 | ||
97 | - vshll.u8 q9, d18, #8 | ||
98 | - vsri.u16 q14, q8, #5 | ||
99 | - vsri.u16 q14, q9, #11 | ||
100 | + /* 3 cycle bubble (after vmull.u8) */ | ||
101 | + vrshr.u16 q13, q8, #8 | ||
102 | + vrshr.u16 q11, q9, #8 | ||
103 | + vrshr.u16 q15, q10, #8 | ||
104 | + vraddhn.u16 d16, q8, q13 | ||
105 | + vraddhn.u16 d27, q9, q11 | ||
106 | + vraddhn.u16 d26, q10, q15 | ||
107 | + vqadd.u8 d16, d2, d16 | ||
108 | + /* 1 cycle bubble */ | ||
109 | + vqadd.u8 q9, q0, q13 | ||
110 | + vshll.u8 q14, d16, #8 /* convert to 16bpp */ | ||
111 | + vshll.u8 q8, d19, #8 | ||
112 | + vshll.u8 q9, d18, #8 | ||
113 | + vsri.u16 q14, q8, #5 | ||
114 | + /* 1 cycle bubble */ | ||
115 | + vsri.u16 q14, q9, #11 | ||
116 | .endm | ||
117 | |||
118 | -/* TODO: expand macros and do better instructions scheduling */ | ||
119 | .macro pixman_composite_over_n_8_0565_process_pixblock_tail_head | ||
120 | - pixman_composite_over_n_8_0565_process_pixblock_tail | ||
121 | - vst1.16 {d28, d29}, [DST_W, :128]! | ||
122 | vld1.16 {d4, d5}, [DST_R, :128]! | ||
123 | + vshrn.u16 d6, q2, #8 | ||
124 | fetch_mask_pixblock | ||
125 | + vshrn.u16 d7, q2, #3 | ||
126 | + fetch_src_pixblock | ||
127 | + vmull.u8 q6, d24, d10 | ||
128 | + vrshr.u16 q13, q8, #8 | ||
129 | + vrshr.u16 q11, q9, #8 | ||
130 | + vrshr.u16 q15, q10, #8 | ||
131 | + vraddhn.u16 d16, q8, q13 | ||
132 | + vraddhn.u16 d27, q9, q11 | ||
133 | + vraddhn.u16 d26, q10, q15 | ||
134 | + vqadd.u8 d16, d2, d16 | ||
135 | + vmull.u8 q1, d24, d9 | ||
136 | + vqadd.u8 q9, q0, q13 | ||
137 | + vshll.u8 q14, d16, #8 | ||
138 | + vmull.u8 q0, d24, d8 | ||
139 | + vshll.u8 q8, d19, #8 | ||
140 | + vshll.u8 q9, d18, #8 | ||
141 | + vsri.u16 q14, q8, #5 | ||
142 | + vmull.u8 q7, d24, d11 | ||
143 | + vsri.u16 q14, q9, #11 | ||
144 | + | ||
145 | cache_preload 8, 8 | ||
146 | - pixman_composite_over_n_8_0565_process_pixblock_head | ||
147 | + | ||
148 | + vsli.u16 q2, q2, #5 | ||
149 | + vrshr.u16 q8, q0, #8 | ||
150 | + vrshr.u16 q9, q1, #8 | ||
151 | + vrshr.u16 q10, q6, #8 | ||
152 | + vrshr.u16 q11, q7, #8 | ||
153 | + vraddhn.u16 d0, q0, q8 | ||
154 | + vraddhn.u16 d1, q1, q9 | ||
155 | + vraddhn.u16 d2, q6, q10 | ||
156 | + vraddhn.u16 d3, q7, q11 | ||
157 | + vsri.u8 d6, d6, #5 | ||
158 | + vsri.u8 d7, d7, #6 | ||
159 | + vmvn.8 d3, d3 | ||
160 | + vshrn.u16 d30, q2, #2 | ||
161 | + vst1.16 {d28, d29}, [DST_W, :128]! | ||
162 | + vmull.u8 q8, d3, d6 | ||
163 | + vmull.u8 q9, d3, d7 | ||
164 | + vmull.u8 q10, d3, d30 | ||
165 | .endm | ||
166 | |||
167 | /* | ||
168 | -- | ||
169 | 1.6.6.1 | ||
170 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch new file mode 100644 index 0000000000..4c5bf8d916 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch | |||
@@ -0,0 +1,74 @@ | |||
1 | From a7c36681c0c1955ff9110b81f1789e56abb10a95 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Sat, 27 Nov 2010 03:53:12 +0200 | ||
4 | Subject: [PATCH 08/24] ARM: added 'neon_composite_over_8888_n_0565' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 4 ++++ | ||
9 | 2 files changed, 32 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index ffffc1c..3e52a49 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -917,6 +917,34 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_over_8888_n_0565_init | ||
20 | + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) | ||
21 | + vpush {d8-d15} | ||
22 | + vld1.32 {d24[0]}, [DUMMY] | ||
23 | + vdup.8 d24, d24[3] | ||
24 | +.endm | ||
25 | + | ||
26 | +.macro pixman_composite_over_8888_n_0565_cleanup | ||
27 | + vpop {d8-d15} | ||
28 | +.endm | ||
29 | + | ||
30 | +generate_composite_function \ | ||
31 | + pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ | ||
32 | + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
33 | + 8, /* number of pixels, processed in a single block */ \ | ||
34 | + 5, /* prefetch distance */ \ | ||
35 | + pixman_composite_over_8888_n_0565_init, \ | ||
36 | + pixman_composite_over_8888_n_0565_cleanup, \ | ||
37 | + pixman_composite_over_n_8_0565_process_pixblock_head, \ | ||
38 | + pixman_composite_over_n_8_0565_process_pixblock_tail, \ | ||
39 | + pixman_composite_over_n_8_0565_process_pixblock_tail_head, \ | ||
40 | + 28, /* dst_w_basereg */ \ | ||
41 | + 4, /* dst_r_basereg */ \ | ||
42 | + 8, /* src_basereg */ \ | ||
43 | + 24 /* mask_basereg */ | ||
44 | + | ||
45 | +/******************************************************************************/ | ||
46 | + | ||
47 | /* TODO: expand macros and do better instructions scheduling */ | ||
48 | .macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head | ||
49 | vld1.16 {d4, d5}, [DST_R, :128]! | ||
50 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
51 | index 72ef75e..8156bbb 100644 | ||
52 | --- a/pixman/pixman-arm-neon.c | ||
53 | +++ b/pixman/pixman-arm-neon.c | ||
54 | @@ -83,6 +83,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, | ||
55 | |||
56 | PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, | ||
57 | uint32_t, 1, uint32_t, 1) | ||
58 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, | ||
59 | + uint32_t, 1, uint16_t, 1) | ||
60 | |||
61 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, | ||
62 | uint8_t, 1, uint8_t, 1, uint8_t, 1) | ||
63 | @@ -253,6 +255,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
64 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), | ||
65 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), | ||
66 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), | ||
67 | + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), | ||
68 | + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), | ||
69 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), | ||
70 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), | ||
71 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), | ||
72 | -- | ||
73 | 1.6.6.1 | ||
74 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch new file mode 100644 index 0000000000..b45671e98e --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch | |||
@@ -0,0 +1,139 @@ | |||
1 | From 3990931bf6197eff1cec06cf24bce53ddf9a539a Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Sat, 27 Nov 2010 04:47:39 +0200 | ||
4 | Subject: [PATCH 09/24] ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565 | ||
5 | |||
6 | Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565', | ||
7 | because they can actually support all variants of this operation: | ||
8 | over_8888_8_0565/over_n_8_0565/over_8888_n_0565. | ||
9 | |||
10 | Also 'over_8888_8_0565' now uses more optimized common code instead of its | ||
11 | own variant, improving performance a bit. Even though this operation is | ||
12 | still memory bandwidth limited, scaled variants of these fast paths may | ||
13 | put more stress on CPU later. | ||
14 | |||
15 | Benchmarked on ARM Cortex-A8 @500MHz: | ||
16 | |||
17 | == before == | ||
18 | |||
19 | over_8888_8_0565 = L1: 67.10 L2: 53.82 M: 44.70 (105.17%) | ||
20 | HT: 18.73 VT: 16.91 R: 14.25 RT: 4.80 (52Kops/s) | ||
21 | |||
22 | == after == | ||
23 | |||
24 | over_8888_8_0565 = L1: 77.83 L2: 58.14 M: 44.82 (105.52%) | ||
25 | HT: 20.58 VT: 17.44 R: 15.05 RT: 4.88 (52Kops/s) | ||
26 | --- | ||
27 | pixman/pixman-arm-neon-asm.S | 61 +++++++++++++++++------------------------ | ||
28 | 1 files changed, 25 insertions(+), 36 deletions(-) | ||
29 | |||
30 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
31 | index 3e52a49..4175144 100644 | ||
32 | --- a/pixman/pixman-arm-neon-asm.S | ||
33 | +++ b/pixman/pixman-arm-neon-asm.S | ||
34 | @@ -791,7 +791,7 @@ generate_composite_function \ | ||
35 | |||
36 | /******************************************************************************/ | ||
37 | |||
38 | -.macro pixman_composite_over_n_8_0565_process_pixblock_head | ||
39 | +.macro pixman_composite_over_8888_8_0565_process_pixblock_head | ||
40 | vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ | ||
41 | vmull.u8 q1, d24, d9 | ||
42 | vmull.u8 q6, d24, d10 | ||
43 | @@ -816,7 +816,7 @@ generate_composite_function \ | ||
44 | vmull.u8 q10, d3, d30 | ||
45 | .endm | ||
46 | |||
47 | -.macro pixman_composite_over_n_8_0565_process_pixblock_tail | ||
48 | +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail | ||
49 | /* 3 cycle bubble (after vmull.u8) */ | ||
50 | vrshr.u16 q13, q8, #8 | ||
51 | vrshr.u16 q11, q9, #8 | ||
52 | @@ -835,7 +835,7 @@ generate_composite_function \ | ||
53 | vsri.u16 q14, q9, #11 | ||
54 | .endm | ||
55 | |||
56 | -.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head | ||
57 | +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head | ||
58 | vld1.16 {d4, d5}, [DST_R, :128]! | ||
59 | vshrn.u16 d6, q2, #8 | ||
60 | fetch_mask_pixblock | ||
61 | @@ -880,6 +880,23 @@ generate_composite_function \ | ||
62 | vmull.u8 q10, d3, d30 | ||
63 | .endm | ||
64 | |||
65 | +generate_composite_function \ | ||
66 | + pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ | ||
67 | + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
68 | + 8, /* number of pixels, processed in a single block */ \ | ||
69 | + 5, /* prefetch distance */ \ | ||
70 | + default_init_need_all_regs, \ | ||
71 | + default_cleanup_need_all_regs, \ | ||
72 | + pixman_composite_over_8888_8_0565_process_pixblock_head, \ | ||
73 | + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ | ||
74 | + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ | ||
75 | + 28, /* dst_w_basereg */ \ | ||
76 | + 4, /* dst_r_basereg */ \ | ||
77 | + 8, /* src_basereg */ \ | ||
78 | + 24 /* mask_basereg */ | ||
79 | + | ||
80 | +/******************************************************************************/ | ||
81 | + | ||
82 | /* | ||
83 | * This function needs a special initialization of solid mask. | ||
84 | * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET | ||
85 | @@ -911,9 +928,9 @@ generate_composite_function \ | ||
86 | 5, /* prefetch distance */ \ | ||
87 | pixman_composite_over_n_8_0565_init, \ | ||
88 | pixman_composite_over_n_8_0565_cleanup, \ | ||
89 | - pixman_composite_over_n_8_0565_process_pixblock_head, \ | ||
90 | - pixman_composite_over_n_8_0565_process_pixblock_tail, \ | ||
91 | - pixman_composite_over_n_8_0565_process_pixblock_tail_head | ||
92 | + pixman_composite_over_8888_8_0565_process_pixblock_head, \ | ||
93 | + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ | ||
94 | + pixman_composite_over_8888_8_0565_process_pixblock_tail_head | ||
95 | |||
96 | /******************************************************************************/ | ||
97 | |||
98 | @@ -935,36 +952,8 @@ generate_composite_function \ | ||
99 | 5, /* prefetch distance */ \ | ||
100 | pixman_composite_over_8888_n_0565_init, \ | ||
101 | pixman_composite_over_8888_n_0565_cleanup, \ | ||
102 | - pixman_composite_over_n_8_0565_process_pixblock_head, \ | ||
103 | - pixman_composite_over_n_8_0565_process_pixblock_tail, \ | ||
104 | - pixman_composite_over_n_8_0565_process_pixblock_tail_head, \ | ||
105 | - 28, /* dst_w_basereg */ \ | ||
106 | - 4, /* dst_r_basereg */ \ | ||
107 | - 8, /* src_basereg */ \ | ||
108 | - 24 /* mask_basereg */ | ||
109 | - | ||
110 | -/******************************************************************************/ | ||
111 | - | ||
112 | -/* TODO: expand macros and do better instructions scheduling */ | ||
113 | -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head | ||
114 | - vld1.16 {d4, d5}, [DST_R, :128]! | ||
115 | - pixman_composite_over_n_8_0565_process_pixblock_tail | ||
116 | - fetch_src_pixblock | ||
117 | - cache_preload 8, 8 | ||
118 | - fetch_mask_pixblock | ||
119 | - pixman_composite_over_n_8_0565_process_pixblock_head | ||
120 | - vst1.16 {d28, d29}, [DST_W, :128]! | ||
121 | -.endm | ||
122 | - | ||
123 | -generate_composite_function \ | ||
124 | - pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ | ||
125 | - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
126 | - 8, /* number of pixels, processed in a single block */ \ | ||
127 | - 5, /* prefetch distance */ \ | ||
128 | - default_init_need_all_regs, \ | ||
129 | - default_cleanup_need_all_regs, \ | ||
130 | - pixman_composite_over_n_8_0565_process_pixblock_head, \ | ||
131 | - pixman_composite_over_n_8_0565_process_pixblock_tail, \ | ||
132 | + pixman_composite_over_8888_8_0565_process_pixblock_head, \ | ||
133 | + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ | ||
134 | pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ | ||
135 | 28, /* dst_w_basereg */ \ | ||
136 | 4, /* dst_r_basereg */ \ | ||
137 | -- | ||
138 | 1.6.6.1 | ||
139 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch new file mode 100644 index 0000000000..376631a50b --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch | |||
@@ -0,0 +1,74 @@ | |||
1 | From 6d2f7f981b52b41f4321071c325babcf792bd666 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Sat, 27 Nov 2010 15:53:54 +0200 | ||
4 | Subject: [PATCH 10/24] ARM: added 'neon_composite_over_0565_n_0565' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 4 ++++ | ||
9 | 2 files changed, 32 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 4175144..81c0a34 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1994,6 +1994,34 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_over_0565_n_0565_init | ||
20 | + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) | ||
21 | + vpush {d8-d15} | ||
22 | + vld1.32 {d15[0]}, [DUMMY] | ||
23 | + vdup.8 d15, d15[3] | ||
24 | +.endm | ||
25 | + | ||
26 | +.macro pixman_composite_over_0565_n_0565_cleanup | ||
27 | + vpop {d8-d15} | ||
28 | +.endm | ||
29 | + | ||
30 | +generate_composite_function \ | ||
31 | + pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ | ||
32 | + FLAG_DST_READWRITE, \ | ||
33 | + 8, /* number of pixels, processed in a single block */ \ | ||
34 | + 5, /* prefetch distance */ \ | ||
35 | + pixman_composite_over_0565_n_0565_init, \ | ||
36 | + pixman_composite_over_0565_n_0565_cleanup, \ | ||
37 | + pixman_composite_over_0565_8_0565_process_pixblock_head, \ | ||
38 | + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ | ||
39 | + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ | ||
40 | + 28, /* dst_w_basereg */ \ | ||
41 | + 10, /* dst_r_basereg */ \ | ||
42 | + 8, /* src_basereg */ \ | ||
43 | + 15 /* mask_basereg */ | ||
44 | + | ||
45 | +/******************************************************************************/ | ||
46 | + | ||
47 | .macro pixman_composite_add_0565_8_0565_process_pixblock_head | ||
48 | /* mask is in d15 */ | ||
49 | convert_0565_to_x888 q4, d2, d1, d0 | ||
50 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
51 | index 8156bbb..b01c3e0 100644 | ||
52 | --- a/pixman/pixman-arm-neon.c | ||
53 | +++ b/pixman/pixman-arm-neon.c | ||
54 | @@ -85,6 +85,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, | ||
55 | uint32_t, 1, uint32_t, 1) | ||
56 | PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, | ||
57 | uint32_t, 1, uint16_t, 1) | ||
58 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, | ||
59 | + uint16_t, 1, uint16_t, 1) | ||
60 | |||
61 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, | ||
62 | uint8_t, 1, uint8_t, 1, uint8_t, 1) | ||
63 | @@ -257,6 +259,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
64 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), | ||
65 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), | ||
66 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), | ||
67 | + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), | ||
68 | + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), | ||
69 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), | ||
70 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), | ||
71 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), | ||
72 | -- | ||
73 | 1.6.6.1 | ||
74 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch new file mode 100644 index 0000000000..19f429bbf7 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch | |||
@@ -0,0 +1,63 @@ | |||
1 | From c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Sun, 28 Nov 2010 22:05:53 +0200 | ||
4 | Subject: [PATCH 11/24] ARM: added 'neon_composite_add_8888_8_8888' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 17 +++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 4 ++++ | ||
9 | 2 files changed, 21 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 81c0a34..11ef166 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1595,6 +1595,23 @@ generate_composite_function_single_scanline \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +generate_composite_function \ | ||
20 | + pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ | ||
21 | + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
22 | + 8, /* number of pixels, processed in a single block */ \ | ||
23 | + 5, /* prefetch distance */ \ | ||
24 | + default_init, \ | ||
25 | + default_cleanup, \ | ||
26 | + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ | ||
27 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ | ||
28 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ | ||
29 | + 28, /* dst_w_basereg */ \ | ||
30 | + 4, /* dst_r_basereg */ \ | ||
31 | + 0, /* src_basereg */ \ | ||
32 | + 27 /* mask_basereg */ | ||
33 | + | ||
34 | +/******************************************************************************/ | ||
35 | + | ||
36 | .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head | ||
37 | /* expecting source data in {d0, d1, d2, d3} */ | ||
38 | /* destination data in {d4, d5, d6, d7} */ | ||
39 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
40 | index b01c3e0..eaf9787 100644 | ||
41 | --- a/pixman/pixman-arm-neon.c | ||
42 | +++ b/pixman/pixman-arm-neon.c | ||
43 | @@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, | ||
44 | uint8_t, 1, uint8_t, 1, uint8_t, 1) | ||
45 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, | ||
46 | uint16_t, 1, uint8_t, 1, uint16_t, 1) | ||
47 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, | ||
48 | + uint32_t, 1, uint8_t, 1, uint32_t, 1) | ||
49 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, | ||
50 | uint32_t, 1, uint32_t, 1, uint32_t, 1) | ||
51 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, | ||
52 | @@ -282,6 +284,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
53 | PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), | ||
54 | PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), | ||
55 | PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), | ||
56 | + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), | ||
57 | + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), | ||
58 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), | ||
59 | PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), | ||
60 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), | ||
61 | -- | ||
62 | 1.6.6.1 | ||
63 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch new file mode 100644 index 0000000000..28dd8b6051 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch | |||
@@ -0,0 +1,105 @@ | |||
1 | From 1fba7790367d7b726d05a33bbbcebe10b9280a31 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 29 Nov 2010 02:10:22 +0200 | ||
4 | Subject: [PATCH 12/24] ARM: better NEON instructions scheduling for add_8888_8888_8888 | ||
5 | |||
6 | Provides a minor performance improvement by using pipelining and hiding | ||
7 | instructions latencies. Also do not clobber d0-d3 registers (source | ||
8 | image pixels) while doing calculations in order to allow the use of | ||
9 | the same macro for add_n_8_8888 fast path later. | ||
10 | |||
11 | Benchmark from ARM Cortex-A8 @500MHz: | ||
12 | |||
13 | == before == | ||
14 | |||
15 | add_8888_8888_8888 = L1: 95.94 L2: 42.27 M: 25.60 (121.09%) | ||
16 | HT: 14.54 VT: 13.13 R: 12.77 RT: 4.49 (48Kops/s) | ||
17 | add_8888_8_8888 = L1: 104.51 L2: 57.81 M: 36.06 (106.62%) | ||
18 | HT: 19.24 VT: 16.45 R: 14.71 RT: 4.80 (51Kops/s) | ||
19 | |||
20 | == after == | ||
21 | |||
22 | add_8888_8888_8888 = L1: 106.66 L2: 47.82 M: 27.32 (129.30%) | ||
23 | HT: 15.44 VT: 13.96 R: 12.86 RT: 4.48 (48Kops/s) | ||
24 | add_8888_8_8888 = L1: 107.72 L2: 61.02 M: 38.26 (113.16%) | ||
25 | HT: 19.48 VT: 16.72 R: 14.82 RT: 4.80 (51Kops/s) | ||
26 | --- | ||
27 | pixman/pixman-arm-neon-asm.S | 52 +++++++++++++++++++++++++++-------------- | ||
28 | 1 files changed, 34 insertions(+), 18 deletions(-) | ||
29 | |||
30 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
31 | index 11ef166..829ef84 100644 | ||
32 | --- a/pixman/pixman-arm-neon-asm.S | ||
33 | +++ b/pixman/pixman-arm-neon-asm.S | ||
34 | @@ -1542,34 +1542,50 @@ generate_composite_function \ | ||
35 | /* expecting source data in {d0, d1, d2, d3} */ | ||
36 | /* destination data in {d4, d5, d6, d7} */ | ||
37 | /* mask in {d24, d25, d26, d27} */ | ||
38 | - vmull.u8 q8, d27, d0 | ||
39 | - vmull.u8 q9, d27, d1 | ||
40 | + vmull.u8 q8, d27, d0 | ||
41 | + vmull.u8 q9, d27, d1 | ||
42 | vmull.u8 q10, d27, d2 | ||
43 | vmull.u8 q11, d27, d3 | ||
44 | - vrshr.u16 q0, q8, #8 | ||
45 | - vrshr.u16 q1, q9, #8 | ||
46 | - vrshr.u16 q12, q10, #8 | ||
47 | - vrshr.u16 q13, q11, #8 | ||
48 | - vraddhn.u16 d0, q0, q8 | ||
49 | - vraddhn.u16 d1, q1, q9 | ||
50 | - vraddhn.u16 d2, q12, q10 | ||
51 | - vraddhn.u16 d3, q13, q11 | ||
52 | - vqadd.u8 q14, q0, q2 | ||
53 | - vqadd.u8 q15, q1, q3 | ||
54 | + /* 1 cycle bubble */ | ||
55 | + vrsra.u16 q8, q8, #8 | ||
56 | + vrsra.u16 q9, q9, #8 | ||
57 | + vrsra.u16 q10, q10, #8 | ||
58 | + vrsra.u16 q11, q11, #8 | ||
59 | .endm | ||
60 | |||
61 | .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail | ||
62 | + /* 2 cycle bubble */ | ||
63 | + vrshrn.u16 d28, q8, #8 | ||
64 | + vrshrn.u16 d29, q9, #8 | ||
65 | + vrshrn.u16 d30, q10, #8 | ||
66 | + vrshrn.u16 d31, q11, #8 | ||
67 | + vqadd.u8 q14, q2, q14 | ||
68 | + /* 1 cycle bubble */ | ||
69 | + vqadd.u8 q15, q3, q15 | ||
70 | .endm | ||
71 | |||
72 | -/* TODO: expand macros and do better instructions scheduling */ | ||
73 | .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head | ||
74 | - pixman_composite_add_8888_8888_8888_process_pixblock_tail | ||
75 | - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
76 | - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
77 | - fetch_mask_pixblock | ||
78 | fetch_src_pixblock | ||
79 | + vrshrn.u16 d28, q8, #8 | ||
80 | + fetch_mask_pixblock | ||
81 | + vrshrn.u16 d29, q9, #8 | ||
82 | + vmull.u8 q8, d27, d0 | ||
83 | + vrshrn.u16 d30, q10, #8 | ||
84 | + vmull.u8 q9, d27, d1 | ||
85 | + vrshrn.u16 d31, q11, #8 | ||
86 | + vmull.u8 q10, d27, d2 | ||
87 | + vqadd.u8 q14, q2, q14 | ||
88 | + vmull.u8 q11, d27, d3 | ||
89 | + vqadd.u8 q15, q3, q15 | ||
90 | + vrsra.u16 q8, q8, #8 | ||
91 | + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
92 | + vrsra.u16 q9, q9, #8 | ||
93 | + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
94 | + vrsra.u16 q10, q10, #8 | ||
95 | + | ||
96 | cache_preload 8, 8 | ||
97 | - pixman_composite_add_8888_8888_8888_process_pixblock_head | ||
98 | + | ||
99 | + vrsra.u16 q11, q11, #8 | ||
100 | .endm | ||
101 | |||
102 | generate_composite_function \ | ||
103 | -- | ||
104 | 1.6.6.1 | ||
105 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch new file mode 100644 index 0000000000..a1da09f9bd --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch | |||
@@ -0,0 +1,75 @@ | |||
1 | From b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 29 Nov 2010 02:38:52 +0200 | ||
4 | Subject: [PATCH 13/24] ARM: added 'neon_composite_add_n_8_8888' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 29 +++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 4 ++++ | ||
9 | 2 files changed, 33 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 829ef84..dd6f2c5 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1628,6 +1628,35 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_add_n_8_8888_init | ||
20 | + add DUMMY, sp, #ARGS_STACK_OFFSET | ||
21 | + vld1.32 {d3[0]}, [DUMMY] | ||
22 | + vdup.8 d0, d3[0] | ||
23 | + vdup.8 d1, d3[1] | ||
24 | + vdup.8 d2, d3[2] | ||
25 | + vdup.8 d3, d3[3] | ||
26 | +.endm | ||
27 | + | ||
28 | +.macro pixman_composite_add_n_8_8888_cleanup | ||
29 | +.endm | ||
30 | + | ||
31 | +generate_composite_function \ | ||
32 | + pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ | ||
33 | + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
34 | + 8, /* number of pixels, processed in a single block */ \ | ||
35 | + 5, /* prefetch distance */ \ | ||
36 | + pixman_composite_add_n_8_8888_init, \ | ||
37 | + pixman_composite_add_n_8_8888_cleanup, \ | ||
38 | + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ | ||
39 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ | ||
40 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ | ||
41 | + 28, /* dst_w_basereg */ \ | ||
42 | + 4, /* dst_r_basereg */ \ | ||
43 | + 0, /* src_basereg */ \ | ||
44 | + 27 /* mask_basereg */ | ||
45 | + | ||
46 | +/******************************************************************************/ | ||
47 | + | ||
48 | .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head | ||
49 | /* expecting source data in {d0, d1, d2, d3} */ | ||
50 | /* destination data in {d4, d5, d6, d7} */ | ||
51 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
52 | index eaf9787..5ad58bd 100644 | ||
53 | --- a/pixman/pixman-arm-neon.c | ||
54 | +++ b/pixman/pixman-arm-neon.c | ||
55 | @@ -80,6 +80,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, | ||
56 | uint8_t, 1, uint8_t, 1) | ||
57 | PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, | ||
58 | uint8_t, 1, uint8_t, 1) | ||
59 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888, | ||
60 | + uint8_t, 1, uint32_t, 1) | ||
61 | |||
62 | PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, | ||
63 | uint32_t, 1, uint32_t, 1) | ||
64 | @@ -281,6 +283,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
65 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), | ||
66 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), | ||
67 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), | ||
68 | + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), | ||
69 | + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), | ||
70 | PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), | ||
71 | PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), | ||
72 | PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), | ||
73 | -- | ||
74 | 1.6.6.1 | ||
75 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch new file mode 100644 index 0000000000..0caa29d266 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch | |||
@@ -0,0 +1,72 @@ | |||
1 | From f6843e3797eea7e4aed7614b1086f5cefc06c0f9 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 29 Nov 2010 03:31:32 +0200 | ||
4 | Subject: [PATCH 14/24] ARM: added 'neon_composite_add_8888_n_8888' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 26 ++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 4 ++++ | ||
9 | 2 files changed, 30 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index dd6f2c5..2c0fd37 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1657,6 +1657,32 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_add_8888_n_8888_init | ||
20 | + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) | ||
21 | + vld1.32 {d27[0]}, [DUMMY] | ||
22 | + vdup.8 d27, d27[3] | ||
23 | +.endm | ||
24 | + | ||
25 | +.macro pixman_composite_add_8888_n_8888_cleanup | ||
26 | +.endm | ||
27 | + | ||
28 | +generate_composite_function \ | ||
29 | + pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ | ||
30 | + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ | ||
31 | + 8, /* number of pixels, processed in a single block */ \ | ||
32 | + 5, /* prefetch distance */ \ | ||
33 | + pixman_composite_add_8888_n_8888_init, \ | ||
34 | + pixman_composite_add_8888_n_8888_cleanup, \ | ||
35 | + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ | ||
36 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ | ||
37 | + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ | ||
38 | + 28, /* dst_w_basereg */ \ | ||
39 | + 4, /* dst_r_basereg */ \ | ||
40 | + 0, /* src_basereg */ \ | ||
41 | + 27 /* mask_basereg */ | ||
42 | + | ||
43 | +/******************************************************************************/ | ||
44 | + | ||
45 | .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head | ||
46 | /* expecting source data in {d0, d1, d2, d3} */ | ||
47 | /* destination data in {d4, d5, d6, d7} */ | ||
48 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
49 | index 5ad58bd..f0dc111 100644 | ||
50 | --- a/pixman/pixman-arm-neon.c | ||
51 | +++ b/pixman/pixman-arm-neon.c | ||
52 | @@ -89,6 +89,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, | ||
53 | uint32_t, 1, uint16_t, 1) | ||
54 | PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, | ||
55 | uint16_t, 1, uint16_t, 1) | ||
56 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888, | ||
57 | + uint32_t, 1, uint32_t, 1) | ||
58 | |||
59 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, | ||
60 | uint8_t, 1, uint8_t, 1, uint8_t, 1) | ||
61 | @@ -291,6 +293,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
62 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), | ||
63 | PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), | ||
64 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), | ||
65 | + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), | ||
66 | + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), | ||
67 | PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), | ||
68 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), | ||
69 | PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), | ||
70 | -- | ||
71 | 1.6.6.1 | ||
72 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch new file mode 100644 index 0000000000..5f2448191d --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch | |||
@@ -0,0 +1,153 @@ | |||
1 | From af7a69d90ea2b43a4e850870727723d719f09a1c Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 29 Nov 2010 09:00:46 +0200 | ||
4 | Subject: [PATCH 15/24] ARM: added flags parameter to some asm fast path wrapper macros | ||
5 | |||
6 | Not all types of operations can be skipped when having transparent | ||
7 | solid source or transparent solid mask. Add an extra flags parameter | ||
8 | for providing this information to the wrappers. | ||
9 | --- | ||
10 | pixman/pixman-arm-common.h | 15 +++++++++------ | ||
11 | pixman/pixman-arm-neon.c | 26 +++++++++++++------------- | ||
12 | pixman/pixman-arm-simd.c | 4 ++-- | ||
13 | 3 files changed, 24 insertions(+), 21 deletions(-) | ||
14 | |||
15 | diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h | ||
16 | index 2cff6c8..66f448d 100644 | ||
17 | --- a/pixman/pixman-arm-common.h | ||
18 | +++ b/pixman/pixman-arm-common.h | ||
19 | @@ -47,6 +47,9 @@ | ||
20 | * or mask), the corresponding stride argument is unused. | ||
21 | */ | ||
22 | |||
23 | +#define SKIP_ZERO_SRC 1 | ||
24 | +#define SKIP_ZERO_MASK 2 | ||
25 | + | ||
26 | #define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ | ||
27 | src_type, src_cnt, \ | ||
28 | dst_type, dst_cnt) \ | ||
29 | @@ -87,7 +90,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
30 | src_line, src_stride); \ | ||
31 | } | ||
32 | |||
33 | -#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \ | ||
34 | +#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ | ||
35 | dst_type, dst_cnt) \ | ||
36 | void \ | ||
37 | pixman_composite_##name##_asm_##cputype (int32_t w, \ | ||
38 | @@ -117,7 +120,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
39 | \ | ||
40 | src = _pixman_image_get_solid (src_image, dst_image->bits.format); \ | ||
41 | \ | ||
42 | - if (src == 0) \ | ||
43 | + if ((flags & SKIP_ZERO_SRC) && src == 0) \ | ||
44 | return; \ | ||
45 | \ | ||
46 | PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ | ||
47 | @@ -128,7 +131,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
48 | src); \ | ||
49 | } | ||
50 | |||
51 | -#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \ | ||
52 | +#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ | ||
53 | mask_type, mask_cnt, \ | ||
54 | dst_type, dst_cnt) \ | ||
55 | void \ | ||
56 | @@ -163,7 +166,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
57 | \ | ||
58 | src = _pixman_image_get_solid (src_image, dst_image->bits.format); \ | ||
59 | \ | ||
60 | - if (src == 0) \ | ||
61 | + if ((flags & SKIP_ZERO_SRC) && src == 0) \ | ||
62 | return; \ | ||
63 | \ | ||
64 | PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ | ||
65 | @@ -177,7 +180,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
66 | mask_line, mask_stride); \ | ||
67 | } | ||
68 | |||
69 | -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \ | ||
70 | +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ | ||
71 | src_type, src_cnt, \ | ||
72 | dst_type, dst_cnt) \ | ||
73 | void \ | ||
74 | @@ -211,7 +214,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ | ||
75 | \ | ||
76 | mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\ | ||
77 | \ | ||
78 | - if (mask == 0) \ | ||
79 | + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ | ||
80 | return; \ | ||
81 | \ | ||
82 | PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ | ||
83 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
84 | index f0dc111..1a3741c 100644 | ||
85 | --- a/pixman/pixman-arm-neon.c | ||
86 | +++ b/pixman/pixman-arm-neon.c | ||
87 | @@ -63,33 +63,33 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, | ||
88 | PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, | ||
89 | uint8_t, 1, uint16_t, 1) | ||
90 | |||
91 | -PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565, | ||
92 | +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, | ||
93 | uint16_t, 1) | ||
94 | -PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888, | ||
95 | +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, | ||
96 | uint32_t, 1) | ||
97 | -PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888, | ||
98 | +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, | ||
99 | uint32_t, 1) | ||
100 | |||
101 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565, | ||
102 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, | ||
103 | uint8_t, 1, uint16_t, 1) | ||
104 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888, | ||
105 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, | ||
106 | uint8_t, 1, uint32_t, 1) | ||
107 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca, | ||
108 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, | ||
109 | uint32_t, 1, uint32_t, 1) | ||
110 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, | ||
111 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, | ||
112 | uint8_t, 1, uint8_t, 1) | ||
113 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, | ||
114 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, | ||
115 | uint8_t, 1, uint8_t, 1) | ||
116 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888, | ||
117 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, | ||
118 | uint8_t, 1, uint32_t, 1) | ||
119 | |||
120 | -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, | ||
121 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, | ||
122 | uint32_t, 1, uint32_t, 1) | ||
123 | -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, | ||
124 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, | ||
125 | uint32_t, 1, uint16_t, 1) | ||
126 | -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, | ||
127 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, | ||
128 | uint16_t, 1, uint16_t, 1) | ||
129 | -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888, | ||
130 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, | ||
131 | uint32_t, 1, uint32_t, 1) | ||
132 | |||
133 | PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, | ||
134 | diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c | ||
135 | index 3b05007..dc2f471 100644 | ||
136 | --- a/pixman/pixman-arm-simd.c | ||
137 | +++ b/pixman/pixman-arm-simd.c | ||
138 | @@ -381,10 +381,10 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, | ||
139 | PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, | ||
140 | uint32_t, 1, uint32_t, 1) | ||
141 | |||
142 | -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888, | ||
143 | +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, | ||
144 | uint32_t, 1, uint32_t, 1) | ||
145 | |||
146 | -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888, | ||
147 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, | ||
148 | uint8_t, 1, uint32_t, 1) | ||
149 | |||
150 | PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, | ||
151 | -- | ||
152 | 1.6.6.1 | ||
153 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch new file mode 100644 index 0000000000..8a22f54451 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch | |||
@@ -0,0 +1,97 @@ | |||
1 | From 733f68912f4a44c24ad3973049a7e1d98f4c6ea8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Mon, 29 Nov 2010 09:11:29 +0200 | ||
4 | Subject: [PATCH 16/24] ARM: added 'neon_composite_in_n_8' fast path | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 3 ++ | ||
9 | 2 files changed, 55 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 2c0fd37..cf014fa 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1427,6 +1427,58 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_in_n_8_process_pixblock_head | ||
20 | + /* expecting source data in {d0, d1, d2, d3} */ | ||
21 | + /* and destination data in {d4, d5, d6, d7} */ | ||
22 | + vmull.u8 q8, d4, d3 | ||
23 | + vmull.u8 q9, d5, d3 | ||
24 | + vmull.u8 q10, d6, d3 | ||
25 | + vmull.u8 q11, d7, d3 | ||
26 | +.endm | ||
27 | + | ||
28 | +.macro pixman_composite_in_n_8_process_pixblock_tail | ||
29 | + vrshr.u16 q14, q8, #8 | ||
30 | + vrshr.u16 q15, q9, #8 | ||
31 | + vrshr.u16 q12, q10, #8 | ||
32 | + vrshr.u16 q13, q11, #8 | ||
33 | + vraddhn.u16 d28, q8, q14 | ||
34 | + vraddhn.u16 d29, q9, q15 | ||
35 | + vraddhn.u16 d30, q10, q12 | ||
36 | + vraddhn.u16 d31, q11, q13 | ||
37 | +.endm | ||
38 | + | ||
39 | +.macro pixman_composite_in_n_8_process_pixblock_tail_head | ||
40 | + pixman_composite_in_n_8_process_pixblock_tail | ||
41 | + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! | ||
42 | + cache_preload 32, 32 | ||
43 | + pixman_composite_in_n_8_process_pixblock_head | ||
44 | + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
45 | +.endm | ||
46 | + | ||
47 | +.macro pixman_composite_in_n_8_init | ||
48 | + add DUMMY, sp, #ARGS_STACK_OFFSET | ||
49 | + vld1.32 {d3[0]}, [DUMMY] | ||
50 | + vdup.8 d3, d3[3] | ||
51 | +.endm | ||
52 | + | ||
53 | +.macro pixman_composite_in_n_8_cleanup | ||
54 | +.endm | ||
55 | + | ||
56 | +generate_composite_function \ | ||
57 | + pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ | ||
58 | + FLAG_DST_READWRITE, \ | ||
59 | + 32, /* number of pixels, processed in a single block */ \ | ||
60 | + 5, /* prefetch distance */ \ | ||
61 | + pixman_composite_in_n_8_init, \ | ||
62 | + pixman_composite_in_n_8_cleanup, \ | ||
63 | + pixman_composite_in_n_8_process_pixblock_head, \ | ||
64 | + pixman_composite_in_n_8_process_pixblock_tail, \ | ||
65 | + pixman_composite_in_n_8_process_pixblock_tail_head, \ | ||
66 | + 28, /* dst_w_basereg */ \ | ||
67 | + 4, /* dst_r_basereg */ \ | ||
68 | + 0, /* src_basereg */ \ | ||
69 | + 24 /* mask_basereg */ | ||
70 | + | ||
71 | .macro pixman_composite_add_n_8_8_process_pixblock_head | ||
72 | /* expecting source data in {d8, d9, d10, d11} */ | ||
73 | /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ | ||
74 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
75 | index 1a3741c..e3eca2b 100644 | ||
76 | --- a/pixman/pixman-arm-neon.c | ||
77 | +++ b/pixman/pixman-arm-neon.c | ||
78 | @@ -69,6 +69,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, | ||
79 | uint32_t, 1) | ||
80 | PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, | ||
81 | uint32_t, 1) | ||
82 | +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, | ||
83 | + uint8_t, 1) | ||
84 | |||
85 | PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, | ||
86 | uint8_t, 1, uint16_t, 1) | ||
87 | @@ -298,6 +300,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
88 | PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), | ||
89 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), | ||
90 | PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), | ||
91 | + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), | ||
92 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), | ||
93 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), | ||
94 | PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), | ||
95 | -- | ||
96 | 1.6.6.1 | ||
97 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch new file mode 100644 index 0000000000..a8148d9542 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch | |||
@@ -0,0 +1,75 @@ | |||
1 | From 6593d86679fde724e49efa96b16ca22d9521b288 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 10 Dec 2009 00:51:50 +0200 | ||
4 | Subject: [PATCH 17/24] add _pixman_bits_override_accessors | ||
5 | |||
6 | * from patch ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline | ||
7 | * used in | ||
8 | 0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch | ||
9 | 0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch | ||
10 | 0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch | ||
11 | --- | ||
12 | pixman/pixman-access.c | 23 ++++++++++++++++++++++- | ||
13 | pixman/pixman-private.h | 5 +++++ | ||
14 | 2 files changed, 27 insertions(+), 1 deletions(-) | ||
15 | |||
16 | diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c | ||
17 | index f1ce0ba..b33da29 100644 | ||
18 | --- a/pixman/pixman-access.c | ||
19 | +++ b/pixman/pixman-access.c | ||
20 | @@ -2836,7 +2836,7 @@ typedef struct | ||
21 | store_scanline_ ## format, store_scanline_generic_64 \ | ||
22 | } | ||
23 | |||
24 | -static const format_info_t accessors[] = | ||
25 | +static format_info_t accessors[] = | ||
26 | { | ||
27 | /* 32 bpp formats */ | ||
28 | FORMAT_INFO (a8r8g8b8), | ||
29 | @@ -2978,6 +2978,27 @@ _pixman_bits_image_setup_accessors (bits_image_t *image) | ||
30 | setup_accessors (image); | ||
31 | } | ||
32 | |||
33 | +void | ||
34 | +_pixman_bits_override_accessors (pixman_format_code_t format, | ||
35 | + fetch_scanline_t fetch_func, | ||
36 | + store_scanline_t store_func) | ||
37 | +{ | ||
38 | + format_info_t *info = accessors; | ||
39 | + | ||
40 | + while (info->format != PIXMAN_null) | ||
41 | + { | ||
42 | + if (info->format == format) | ||
43 | + { | ||
44 | + if (fetch_func) | ||
45 | + info->fetch_scanline_32 = fetch_func; | ||
46 | + if (store_func) | ||
47 | + info->store_scanline_32 = store_func; | ||
48 | + return; | ||
49 | + } | ||
50 | + info++; | ||
51 | + } | ||
52 | +} | ||
53 | + | ||
54 | #else | ||
55 | |||
56 | void | ||
57 | diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h | ||
58 | index 383748a..969dfab 100644 | ||
59 | --- a/pixman/pixman-private.h | ||
60 | +++ b/pixman/pixman-private.h | ||
61 | @@ -197,6 +197,11 @@ void | ||
62 | _pixman_bits_image_setup_accessors (bits_image_t *image); | ||
63 | |||
64 | void | ||
65 | +_pixman_bits_override_accessors (pixman_format_code_t format, | ||
66 | + fetch_scanline_t fetch_func, | ||
67 | + store_scanline_t store_func); | ||
68 | + | ||
69 | +void | ||
70 | _pixman_image_get_scanline_generic_64 (pixman_image_t *image, | ||
71 | int x, | ||
72 | int y, | ||
73 | -- | ||
74 | 1.6.6.1 | ||
75 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch new file mode 100644 index 0000000000..5b1c1089ed --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch | |||
@@ -0,0 +1,114 @@ | |||
1 | From 8e8b2809b505486001dc213becab0d50bfd96c1b Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Tue, 16 Mar 2010 16:55:28 +0100 | ||
4 | Subject: [PATCH 18/24] Generic C implementation of pixman_blt with overlapping support | ||
5 | |||
6 | Uses memcpy/memmove functions to copy pixels, can handle the | ||
7 | case when both source and destination areas are in the same | ||
8 | image (this is useful for scrolling). | ||
9 | |||
10 | It is assumed that copying direction is only important when | ||
11 | using the same image for both source and destination (and | ||
12 | src_stride == dst_stride). Copying direction is undefined | ||
13 | for the images with different source and destination stride | ||
14 | which happen to be in the overlapped areas (but this is an | ||
15 | unrealistic case anyway). | ||
16 | --- | ||
17 | pixman/pixman-general.c | 21 ++++++++++++++++++--- | ||
18 | pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++ | ||
19 | 2 files changed, 61 insertions(+), 3 deletions(-) | ||
20 | |||
21 | diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c | ||
22 | index 4d234a0..c4d2c14 100644 | ||
23 | --- a/pixman/pixman-general.c | ||
24 | +++ b/pixman/pixman-general.c | ||
25 | @@ -280,9 +280,24 @@ general_blt (pixman_implementation_t *imp, | ||
26 | int width, | ||
27 | int height) | ||
28 | { | ||
29 | - /* We can't blit unless we have sse2 or mmx */ | ||
30 | - | ||
31 | - return FALSE; | ||
32 | + uint8_t *dst_bytes = (uint8_t *)dst_bits; | ||
33 | + uint8_t *src_bytes = (uint8_t *)src_bits; | ||
34 | + int bpp; | ||
35 | + | ||
36 | + if (src_bpp != dst_bpp || src_bpp & 7) | ||
37 | + return FALSE; | ||
38 | + | ||
39 | + bpp = src_bpp >> 3; | ||
40 | + width *= bpp; | ||
41 | + src_stride *= 4; | ||
42 | + dst_stride *= 4; | ||
43 | + pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp, | ||
44 | + dst_bytes + dst_y * dst_stride + dst_x * bpp, | ||
45 | + src_stride, | ||
46 | + dst_stride, | ||
47 | + width, | ||
48 | + height); | ||
49 | + return TRUE; | ||
50 | } | ||
51 | |||
52 | static pixman_bool_t | ||
53 | diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h | ||
54 | index 969dfab..352bceb 100644 | ||
55 | --- a/pixman/pixman-private.h | ||
56 | +++ b/pixman/pixman-private.h | ||
57 | @@ -10,6 +10,7 @@ | ||
58 | |||
59 | #include "pixman.h" | ||
60 | #include <time.h> | ||
61 | +#include <string.h> | ||
62 | #include <assert.h> | ||
63 | #include <stdio.h> | ||
64 | #include <string.h> | ||
65 | @@ -869,4 +870,46 @@ void pixman_timer_register (pixman_timer_t *timer); | ||
66 | |||
67 | #endif /* PIXMAN_TIMERS */ | ||
68 | |||
69 | +/* a helper function, can blit 8-bit images with src/dst overlapping support */ | ||
70 | +static inline void | ||
71 | +pixman_blt_helper (uint8_t *src_bytes, | ||
72 | + uint8_t *dst_bytes, | ||
73 | + int src_stride, | ||
74 | + int dst_stride, | ||
75 | + int width, | ||
76 | + int height) | ||
77 | +{ | ||
78 | + /* | ||
79 | + * The second part of this check is not strictly needed, but it prevents | ||
80 | + * unnecessary upside-down processing of areas which belong to different | ||
81 | + * images. Upside-down processing can be slower with fixed-distance-ahead | ||
82 | + * prefetch and perceived as having more tearing. | ||
83 | + */ | ||
84 | + if (src_bytes < dst_bytes + width && | ||
85 | + src_bytes + src_stride * height > dst_bytes) | ||
86 | + { | ||
87 | + src_bytes += src_stride * height - src_stride; | ||
88 | + dst_bytes += dst_stride * height - dst_stride; | ||
89 | + dst_stride = -dst_stride; | ||
90 | + src_stride = -src_stride; | ||
91 | + /* Horizontal scrolling to the left needs memmove */ | ||
92 | + if (src_bytes + width > dst_bytes) | ||
93 | + { | ||
94 | + while (--height >= 0) | ||
95 | + { | ||
96 | + memmove (dst_bytes, src_bytes, width); | ||
97 | + dst_bytes += dst_stride; | ||
98 | + src_bytes += src_stride; | ||
99 | + } | ||
100 | + return; | ||
101 | + } | ||
102 | + } | ||
103 | + while (--height >= 0) | ||
104 | + { | ||
105 | + memcpy (dst_bytes, src_bytes, width); | ||
106 | + dst_bytes += dst_stride; | ||
107 | + src_bytes += src_stride; | ||
108 | + } | ||
109 | +} | ||
110 | + | ||
111 | #endif /* PIXMAN_PRIVATE_H */ | ||
112 | -- | ||
113 | 1.6.6.1 | ||
114 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch new file mode 100644 index 0000000000..5193d38f74 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch | |||
@@ -0,0 +1,91 @@ | |||
1 | From f5a54f7d5eb1169bc79f0e445e2998e98080ef13 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 22 Oct 2009 05:45:47 +0300 | ||
4 | Subject: [PATCH 19/24] Support of overlapping src/dst for pixman_blt_mmx | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++--------------------- | ||
8 | 1 files changed, 32 insertions(+), 23 deletions(-) | ||
9 | |||
10 | diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c | ||
11 | index 34637a4..f9dd473 100644 | ||
12 | --- a/pixman/pixman-mmx.c | ||
13 | +++ b/pixman/pixman-mmx.c | ||
14 | @@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits, | ||
15 | { | ||
16 | uint8_t * src_bytes; | ||
17 | uint8_t * dst_bytes; | ||
18 | - int byte_width; | ||
19 | + int bpp; | ||
20 | |||
21 | - if (src_bpp != dst_bpp) | ||
22 | + if (src_bpp != dst_bpp || src_bpp & 7) | ||
23 | return FALSE; | ||
24 | |||
25 | - if (src_bpp == 16) | ||
26 | - { | ||
27 | - src_stride = src_stride * (int) sizeof (uint32_t) / 2; | ||
28 | - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; | ||
29 | - src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||
30 | - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); | ||
31 | - byte_width = 2 * width; | ||
32 | - src_stride *= 2; | ||
33 | - dst_stride *= 2; | ||
34 | - } | ||
35 | - else if (src_bpp == 32) | ||
36 | + bpp = src_bpp >> 3; | ||
37 | + width *= bpp; | ||
38 | + src_stride *= 4; | ||
39 | + dst_stride *= 4; | ||
40 | + src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; | ||
41 | + dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; | ||
42 | + | ||
43 | + if (src_bpp != 16 && src_bpp != 32) | ||
44 | { | ||
45 | - src_stride = src_stride * (int) sizeof (uint32_t) / 4; | ||
46 | - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; | ||
47 | - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||
48 | - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); | ||
49 | - byte_width = 4 * width; | ||
50 | - src_stride *= 4; | ||
51 | - dst_stride *= 4; | ||
52 | + pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, | ||
53 | + width, height); | ||
54 | + return TRUE; | ||
55 | } | ||
56 | - else | ||
57 | + | ||
58 | + if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) | ||
59 | { | ||
60 | - return FALSE; | ||
61 | + src_bytes += src_stride * height - src_stride; | ||
62 | + dst_bytes += dst_stride * height - dst_stride; | ||
63 | + dst_stride = -dst_stride; | ||
64 | + src_stride = -src_stride; | ||
65 | + | ||
66 | + if (src_bytes + width > dst_bytes) | ||
67 | + { | ||
68 | + /* TODO: reverse scanline copy using MMX */ | ||
69 | + while (--height >= 0) | ||
70 | + { | ||
71 | + memmove (dst_bytes, src_bytes, width); | ||
72 | + dst_bytes += dst_stride; | ||
73 | + src_bytes += src_stride; | ||
74 | + } | ||
75 | + return TRUE; | ||
76 | + } | ||
77 | } | ||
78 | |||
79 | while (height--) | ||
80 | @@ -3033,7 +3042,7 @@ pixman_blt_mmx (uint32_t *src_bits, | ||
81 | uint8_t *d = dst_bytes; | ||
82 | src_bytes += src_stride; | ||
83 | dst_bytes += dst_stride; | ||
84 | - w = byte_width; | ||
85 | + w = width; | ||
86 | |||
87 | while (w >= 2 && ((unsigned long)d & 3)) | ||
88 | { | ||
89 | -- | ||
90 | 1.6.6.1 | ||
91 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch new file mode 100644 index 0000000000..f5c0e12f24 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch | |||
@@ -0,0 +1,91 @@ | |||
1 | From c8755294fa9ea396f7113370230b17c424a93be1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 22 Oct 2009 05:45:54 +0300 | ||
4 | Subject: [PATCH 20/24] Support of overlapping src/dst for pixman_blt_sse2 | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- | ||
8 | 1 files changed, 32 insertions(+), 23 deletions(-) | ||
9 | |||
10 | diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c | ||
11 | index 5907de0..25015ae 100644 | ||
12 | --- a/pixman/pixman-sse2.c | ||
13 | +++ b/pixman/pixman-sse2.c | ||
14 | @@ -5027,34 +5027,43 @@ pixman_blt_sse2 (uint32_t *src_bits, | ||
15 | { | ||
16 | uint8_t * src_bytes; | ||
17 | uint8_t * dst_bytes; | ||
18 | - int byte_width; | ||
19 | + int bpp; | ||
20 | |||
21 | - if (src_bpp != dst_bpp) | ||
22 | + if (src_bpp != dst_bpp || src_bpp & 7) | ||
23 | return FALSE; | ||
24 | |||
25 | - if (src_bpp == 16) | ||
26 | - { | ||
27 | - src_stride = src_stride * (int) sizeof (uint32_t) / 2; | ||
28 | - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; | ||
29 | - src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||
30 | - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); | ||
31 | - byte_width = 2 * width; | ||
32 | - src_stride *= 2; | ||
33 | - dst_stride *= 2; | ||
34 | - } | ||
35 | - else if (src_bpp == 32) | ||
36 | + bpp = src_bpp >> 3; | ||
37 | + width *= bpp; | ||
38 | + src_stride *= 4; | ||
39 | + dst_stride *= 4; | ||
40 | + src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; | ||
41 | + dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; | ||
42 | + | ||
43 | + if (src_bpp != 16 && src_bpp != 32) | ||
44 | { | ||
45 | - src_stride = src_stride * (int) sizeof (uint32_t) / 4; | ||
46 | - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; | ||
47 | - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||
48 | - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); | ||
49 | - byte_width = 4 * width; | ||
50 | - src_stride *= 4; | ||
51 | - dst_stride *= 4; | ||
52 | + pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, | ||
53 | + width, height); | ||
54 | + return TRUE; | ||
55 | } | ||
56 | - else | ||
57 | + | ||
58 | + if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) | ||
59 | { | ||
60 | - return FALSE; | ||
61 | + src_bytes += src_stride * height - src_stride; | ||
62 | + dst_bytes += dst_stride * height - dst_stride; | ||
63 | + dst_stride = -dst_stride; | ||
64 | + src_stride = -src_stride; | ||
65 | + | ||
66 | + if (src_bytes + width > dst_bytes) | ||
67 | + { | ||
68 | + /* TODO: reverse scanline copy using SSE2 */ | ||
69 | + while (--height >= 0) | ||
70 | + { | ||
71 | + memmove (dst_bytes, src_bytes, width); | ||
72 | + dst_bytes += dst_stride; | ||
73 | + src_bytes += src_stride; | ||
74 | + } | ||
75 | + return TRUE; | ||
76 | + } | ||
77 | } | ||
78 | |||
79 | while (height--) | ||
80 | @@ -5064,7 +5073,7 @@ pixman_blt_sse2 (uint32_t *src_bits, | ||
81 | uint8_t *d = dst_bytes; | ||
82 | src_bytes += src_stride; | ||
83 | dst_bytes += dst_stride; | ||
84 | - w = byte_width; | ||
85 | + w = width; | ||
86 | |||
87 | while (w >= 2 && ((unsigned long)d & 3)) | ||
88 | { | ||
89 | -- | ||
90 | 1.6.6.1 | ||
91 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch new file mode 100644 index 0000000000..0eb9d88eba --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch | |||
@@ -0,0 +1,94 @@ | |||
1 | From 86c8198598ef6d639e656c04644015795cc249aa Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Wed, 18 Nov 2009 06:08:48 +0200 | ||
4 | Subject: [PATCH 21/24] Support of overlapping src/dst for pixman_blt_neon | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++-------- | ||
8 | 1 files changed, 51 insertions(+), 11 deletions(-) | ||
9 | |||
10 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
11 | index e3eca2b..74316a8 100644 | ||
12 | --- a/pixman/pixman-arm-neon.c | ||
13 | +++ b/pixman/pixman-arm-neon.c | ||
14 | @@ -199,26 +199,66 @@ pixman_blt_neon (uint32_t *src_bits, | ||
15 | int width, | ||
16 | int height) | ||
17 | { | ||
18 | - if (src_bpp != dst_bpp) | ||
19 | + uint8_t * src_bytes; | ||
20 | + uint8_t * dst_bytes; | ||
21 | + int bpp; | ||
22 | + | ||
23 | + if (src_bpp != dst_bpp || src_bpp & 7) | ||
24 | return FALSE; | ||
25 | |||
26 | + bpp = src_bpp >> 3; | ||
27 | + width *= bpp; | ||
28 | + src_stride *= 4; | ||
29 | + dst_stride *= 4; | ||
30 | + src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; | ||
31 | + dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; | ||
32 | + | ||
33 | + if (src_bpp != 16 && src_bpp != 32) | ||
34 | + { | ||
35 | + pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, | ||
36 | + width, height); | ||
37 | + return TRUE; | ||
38 | + } | ||
39 | + | ||
40 | + if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) | ||
41 | + { | ||
42 | + src_bytes += src_stride * height - src_stride; | ||
43 | + dst_bytes += dst_stride * height - dst_stride; | ||
44 | + dst_stride = -dst_stride; | ||
45 | + src_stride = -src_stride; | ||
46 | + | ||
47 | + if (src_bytes + width > dst_bytes) | ||
48 | + { | ||
49 | + /* TODO: reverse scanline copy using NEON */ | ||
50 | + while (--height >= 0) | ||
51 | + { | ||
52 | + memmove (dst_bytes, src_bytes, width); | ||
53 | + dst_bytes += dst_stride; | ||
54 | + src_bytes += src_stride; | ||
55 | + } | ||
56 | + return TRUE; | ||
57 | + } | ||
58 | + } | ||
59 | + | ||
60 | switch (src_bpp) | ||
61 | { | ||
62 | case 16: | ||
63 | pixman_composite_src_0565_0565_asm_neon ( | ||
64 | - width, height, | ||
65 | - (uint16_t *)(((char *) dst_bits) + | ||
66 | - dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, | ||
67 | - (uint16_t *)(((char *) src_bits) + | ||
68 | - src_y * src_stride * 4 + src_x * 2), src_stride * 2); | ||
69 | + width >> 1, | ||
70 | + height, | ||
71 | + (uint16_t *) dst_bytes, | ||
72 | + dst_stride >> 1, | ||
73 | + (uint16_t *) src_bytes, | ||
74 | + src_stride >> 1); | ||
75 | return TRUE; | ||
76 | case 32: | ||
77 | pixman_composite_src_8888_8888_asm_neon ( | ||
78 | - width, height, | ||
79 | - (uint32_t *)(((char *) dst_bits) + | ||
80 | - dst_y * dst_stride * 4 + dst_x * 4), dst_stride, | ||
81 | - (uint32_t *)(((char *) src_bits) + | ||
82 | - src_y * src_stride * 4 + src_x * 4), src_stride); | ||
83 | + width >> 2, | ||
84 | + height, | ||
85 | + (uint32_t *) dst_bytes, | ||
86 | + dst_stride >> 2, | ||
87 | + (uint32_t *) src_bytes, | ||
88 | + src_stride >> 2); | ||
89 | return TRUE; | ||
90 | default: | ||
91 | return FALSE; | ||
92 | -- | ||
93 | 1.6.6.1 | ||
94 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch new file mode 100644 index 0000000000..129c1f1bb6 --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch | |||
@@ -0,0 +1,109 @@ | |||
1 | From 60d972afbae8613d700d3a6b3cb107429d7e11c6 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 10 Dec 2009 00:51:50 +0200 | ||
4 | Subject: [PATCH 22/24] ARM: added NEON optimizations for fetch/store r5g6b5 scanline | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 40 ++++++++++++++++++++++++++++++++++++++++ | ||
9 | 2 files changed, 60 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index cf014fa..25f7bf0 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -459,6 +459,16 @@ generate_composite_function \ | ||
16 | pixman_composite_src_8888_0565_process_pixblock_tail, \ | ||
17 | pixman_composite_src_8888_0565_process_pixblock_tail_head | ||
18 | |||
19 | +generate_composite_function_single_scanline \ | ||
20 | + pixman_store_scanline_r5g6b5_asm_neon, 32, 0, 16, \ | ||
21 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
22 | + 8, /* number of pixels, processed in a single block */ \ | ||
23 | + default_init, \ | ||
24 | + default_cleanup, \ | ||
25 | + pixman_composite_src_8888_0565_process_pixblock_head, \ | ||
26 | + pixman_composite_src_8888_0565_process_pixblock_tail, \ | ||
27 | + pixman_composite_src_8888_0565_process_pixblock_tail_head | ||
28 | + | ||
29 | /******************************************************************************/ | ||
30 | |||
31 | .macro pixman_composite_src_0565_8888_process_pixblock_head | ||
32 | @@ -494,6 +504,16 @@ generate_composite_function \ | ||
33 | pixman_composite_src_0565_8888_process_pixblock_tail, \ | ||
34 | pixman_composite_src_0565_8888_process_pixblock_tail_head | ||
35 | |||
36 | +generate_composite_function_single_scanline \ | ||
37 | + pixman_fetch_scanline_r5g6b5_asm_neon, 16, 0, 32, \ | ||
38 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
39 | + 8, /* number of pixels, processed in a single block */ \ | ||
40 | + default_init, \ | ||
41 | + default_cleanup, \ | ||
42 | + pixman_composite_src_0565_8888_process_pixblock_head, \ | ||
43 | + pixman_composite_src_0565_8888_process_pixblock_tail, \ | ||
44 | + pixman_composite_src_0565_8888_process_pixblock_tail_head | ||
45 | + | ||
46 | /******************************************************************************/ | ||
47 | |||
48 | .macro pixman_composite_add_8_8_process_pixblock_head | ||
49 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
50 | index 74316a8..f773e92 100644 | ||
51 | --- a/pixman/pixman-arm-neon.c | ||
52 | +++ b/pixman/pixman-arm-neon.c | ||
53 | @@ -448,6 +448,42 @@ BIND_COMBINE_U (over) | ||
54 | BIND_COMBINE_U (add) | ||
55 | BIND_COMBINE_U (out_reverse) | ||
56 | |||
57 | +void | ||
58 | +pixman_fetch_scanline_r5g6b5_asm_neon (int width, | ||
59 | + uint32_t *buffer, | ||
60 | + const uint16_t *pixel); | ||
61 | +void | ||
62 | +pixman_store_scanline_r5g6b5_asm_neon (int width, | ||
63 | + uint16_t *pixel, | ||
64 | + const uint32_t *values); | ||
65 | + | ||
66 | +static void | ||
67 | +neon_fetch_scanline_r5g6b5 (pixman_image_t *image, | ||
68 | + int x, | ||
69 | + int y, | ||
70 | + int width, | ||
71 | + uint32_t * buffer, | ||
72 | + const uint32_t *mask) | ||
73 | +{ | ||
74 | + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; | ||
75 | + const uint16_t *pixel = (const uint16_t *)bits + x; | ||
76 | + | ||
77 | + pixman_fetch_scanline_r5g6b5_asm_neon (width, buffer, pixel); | ||
78 | +} | ||
79 | + | ||
80 | +static void | ||
81 | +neon_store_scanline_r5g6b5 (bits_image_t * image, | ||
82 | + int x, | ||
83 | + int y, | ||
84 | + int width, | ||
85 | + const uint32_t *values) | ||
86 | +{ | ||
87 | + uint32_t *bits = image->bits + image->rowstride * y; | ||
88 | + uint16_t *pixel = ((uint16_t *) bits) + x; | ||
89 | + | ||
90 | + pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); | ||
91 | +} | ||
92 | + | ||
93 | pixman_implementation_t * | ||
94 | _pixman_implementation_create_arm_neon (void) | ||
95 | { | ||
96 | @@ -463,6 +499,10 @@ _pixman_implementation_create_arm_neon (void) | ||
97 | imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; | ||
98 | imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; | ||
99 | |||
100 | + _pixman_bits_override_accessors (PIXMAN_r5g6b5, | ||
101 | + neon_fetch_scanline_r5g6b5, | ||
102 | + neon_store_scanline_r5g6b5); | ||
103 | + | ||
104 | imp->blt = arm_neon_blt; | ||
105 | imp->fill = arm_neon_fill; | ||
106 | |||
107 | -- | ||
108 | 1.6.6.1 | ||
109 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch new file mode 100644 index 0000000000..7724f5433e --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch | |||
@@ -0,0 +1,148 @@ | |||
1 | From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 23 Sep 2010 21:10:56 +0300 | ||
4 | Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 42 +++++++++++++++++++++++++++ | ||
9 | 2 files changed, 106 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 25f7bf0..439b06b 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -418,6 +418,70 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_src_8_8888_process_pixblock_head | ||
20 | + /* This is tricky part: we can't set these values just once in 'init' macro | ||
21 | + * because leading/trailing pixels handling part uses VZIP.8 instructions, | ||
22 | + * and they operate on values in-place and destroy original registers | ||
23 | + * content. Think about it like VST4.8 instruction corrupting NEON | ||
24 | + * registers after write in 'tail_head' macro. Except that 'tail_head' | ||
25 | + * macro itself actually does not need these extra VMOVs because it uses | ||
26 | + * real VST4.8 instruction. | ||
27 | + */ | ||
28 | + vmov.u8 q0, #0 | ||
29 | + vmov.u8 d2, #0 | ||
30 | +.endm | ||
31 | + | ||
32 | +.macro pixman_composite_src_8_8888_process_pixblock_tail | ||
33 | +.endm | ||
34 | + | ||
35 | +.macro pixman_composite_src_8_8888_process_pixblock_tail_head | ||
36 | + vst4.8 {d0, d1, d2, d3}, [DST_W, :128]! | ||
37 | + vld1.8 {d3}, [SRC]! | ||
38 | +.endm | ||
39 | + | ||
40 | +generate_composite_function_single_scanline \ | ||
41 | + pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \ | ||
42 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
43 | + 8, /* number of pixels, processed in a single block */ \ | ||
44 | + default_init, \ | ||
45 | + default_cleanup, \ | ||
46 | + pixman_composite_src_8_8888_process_pixblock_head, \ | ||
47 | + pixman_composite_src_8_8888_process_pixblock_tail, \ | ||
48 | + pixman_composite_src_8_8888_process_pixblock_tail_head, \ | ||
49 | + 0, /* dst_w_basereg */ \ | ||
50 | + 0, /* dst_r_basereg */ \ | ||
51 | + 3, /* src_basereg */ \ | ||
52 | + 0 /* mask_basereg */ | ||
53 | + | ||
54 | +/******************************************************************************/ | ||
55 | + | ||
56 | +.macro pixman_composite_src_8888_8_process_pixblock_head | ||
57 | +.endm | ||
58 | + | ||
59 | +.macro pixman_composite_src_8888_8_process_pixblock_tail | ||
60 | +.endm | ||
61 | + | ||
62 | +.macro pixman_composite_src_8888_8_process_pixblock_tail_head | ||
63 | + vst1.8 {d3}, [DST_W, :64]! | ||
64 | + vld4.8 {d0, d1, d2, d3}, [SRC]! | ||
65 | +.endm | ||
66 | + | ||
67 | +generate_composite_function_single_scanline \ | ||
68 | + pixman_store_scanline_a8_asm_neon, 32, 0, 8, \ | ||
69 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
70 | + 8, /* number of pixels, processed in a single block */ \ | ||
71 | + default_init, \ | ||
72 | + default_cleanup, \ | ||
73 | + pixman_composite_src_8888_8_process_pixblock_head, \ | ||
74 | + pixman_composite_src_8888_8_process_pixblock_tail, \ | ||
75 | + pixman_composite_src_8888_8_process_pixblock_tail_head, \ | ||
76 | + 3, /* dst_w_basereg */ \ | ||
77 | + 0, /* dst_r_basereg */ \ | ||
78 | + 0, /* src_basereg */ \ | ||
79 | + 0 /* mask_basereg */ | ||
80 | + | ||
81 | +/******************************************************************************/ | ||
82 | + | ||
83 | .macro pixman_composite_src_8888_0565_process_pixblock_head | ||
84 | vshll.u8 q8, d1, #8 | ||
85 | vshll.u8 q14, d2, #8 | ||
86 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
87 | index f773e92..55219b3 100644 | ||
88 | --- a/pixman/pixman-arm-neon.c | ||
89 | +++ b/pixman/pixman-arm-neon.c | ||
90 | @@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image, | ||
91 | pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); | ||
92 | } | ||
93 | |||
94 | +void | ||
95 | +pixman_fetch_scanline_a8_asm_neon (int width, | ||
96 | + uint32_t *buffer, | ||
97 | + const uint8_t *pixel); | ||
98 | + | ||
99 | + | ||
100 | +void | ||
101 | +pixman_store_scanline_a8_asm_neon (int width, | ||
102 | + uint8_t *pixel, | ||
103 | + const uint32_t *values); | ||
104 | + | ||
105 | +static void | ||
106 | +neon_fetch_scanline_a8 (pixman_image_t *image, | ||
107 | + int x, | ||
108 | + int y, | ||
109 | + int width, | ||
110 | + uint32_t * buffer, | ||
111 | + const uint32_t *mask) | ||
112 | +{ | ||
113 | + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; | ||
114 | + const uint8_t *pixel = (const uint8_t *) bits + x; | ||
115 | + | ||
116 | + pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel); | ||
117 | +} | ||
118 | + | ||
119 | +static void | ||
120 | +neon_store_scanline_a8 (bits_image_t * image, | ||
121 | + int x, | ||
122 | + int y, | ||
123 | + int width, | ||
124 | + const uint32_t *values) | ||
125 | +{ | ||
126 | + uint32_t *bits = image->bits + image->rowstride * y; | ||
127 | + uint8_t *pixel = (uint8_t *) bits + x; | ||
128 | + | ||
129 | + pixman_store_scanline_a8_asm_neon (width, pixel, values); | ||
130 | +} | ||
131 | + | ||
132 | + | ||
133 | pixman_implementation_t * | ||
134 | _pixman_implementation_create_arm_neon (void) | ||
135 | { | ||
136 | @@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void) | ||
137 | _pixman_bits_override_accessors (PIXMAN_r5g6b5, | ||
138 | neon_fetch_scanline_r5g6b5, | ||
139 | neon_store_scanline_r5g6b5); | ||
140 | + _pixman_bits_override_accessors (PIXMAN_a8, | ||
141 | + neon_fetch_scanline_a8, | ||
142 | + neon_store_scanline_a8); | ||
143 | |||
144 | imp->blt = arm_neon_blt; | ||
145 | imp->fill = arm_neon_fill; | ||
146 | -- | ||
147 | 1.6.6.1 | ||
148 | |||
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch new file mode 100644 index 0000000000..8253f41b8f --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch | |||
@@ -0,0 +1,77 @@ | |||
1 | From cf3b8fdc53144ff62c4054996559d3a1a4d62b75 Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Fri, 24 Sep 2010 18:22:44 +0300 | ||
4 | Subject: [PATCH 24/24] ARM: added NEON optimizations for fetching x8r8g8b8 scanline | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 14 ++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 21 +++++++++++++++++++++ | ||
9 | 2 files changed, 35 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 439b06b..3e0dcfe 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -1257,6 +1257,20 @@ generate_composite_function \ | ||
16 | 0, /* src_basereg */ \ | ||
17 | 0 /* mask_basereg */ | ||
18 | |||
19 | +generate_composite_function_single_scanline \ | ||
20 | + pixman_fetch_scanline_x888_asm_neon, 32, 0, 32, \ | ||
21 | + FLAG_DST_WRITEONLY, \ | ||
22 | + 8, /* number of pixels, processed in a single block */ \ | ||
23 | + pixman_composite_src_x888_8888_init, \ | ||
24 | + default_cleanup, \ | ||
25 | + pixman_composite_src_x888_8888_process_pixblock_head, \ | ||
26 | + pixman_composite_src_x888_8888_process_pixblock_tail, \ | ||
27 | + pixman_composite_src_x888_8888_process_pixblock_tail_head, \ | ||
28 | + 0, /* dst_w_basereg */ \ | ||
29 | + 0, /* dst_r_basereg */ \ | ||
30 | + 0, /* src_basereg */ \ | ||
31 | + 0 /* mask_basereg */ | ||
32 | + | ||
33 | /******************************************************************************/ | ||
34 | |||
35 | .macro pixman_composite_over_n_8_8888_process_pixblock_head | ||
36 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
37 | index 55219b3..8cef414 100644 | ||
38 | --- a/pixman/pixman-arm-neon.c | ||
39 | +++ b/pixman/pixman-arm-neon.c | ||
40 | @@ -522,6 +522,24 @@ neon_store_scanline_a8 (bits_image_t * image, | ||
41 | pixman_store_scanline_a8_asm_neon (width, pixel, values); | ||
42 | } | ||
43 | |||
44 | +void | ||
45 | +pixman_fetch_scanline_x888_asm_neon (int width, | ||
46 | + uint32_t *buffer, | ||
47 | + const uint32_t *pixel); | ||
48 | + | ||
49 | +static void | ||
50 | +neon_fetch_scanline_x888 (pixman_image_t *image, | ||
51 | + int x, | ||
52 | + int y, | ||
53 | + int width, | ||
54 | + uint32_t * buffer, | ||
55 | + const uint32_t *mask) | ||
56 | +{ | ||
57 | + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; | ||
58 | + const uint32_t *pixel = (const uint32_t *) bits + x; | ||
59 | + | ||
60 | + pixman_fetch_scanline_x888_asm_neon (width, buffer, pixel); | ||
61 | +} | ||
62 | |||
63 | pixman_implementation_t * | ||
64 | _pixman_implementation_create_arm_neon (void) | ||
65 | @@ -544,6 +562,9 @@ _pixman_implementation_create_arm_neon (void) | ||
66 | _pixman_bits_override_accessors (PIXMAN_a8, | ||
67 | neon_fetch_scanline_a8, | ||
68 | neon_store_scanline_a8); | ||
69 | + _pixman_bits_override_accessors (PIXMAN_x8r8g8b8, | ||
70 | + neon_fetch_scanline_x888, | ||
71 | + NULL); | ||
72 | |||
73 | imp->blt = arm_neon_blt; | ||
74 | imp->fill = arm_neon_fill; | ||
75 | -- | ||
76 | 1.6.6.1 | ||
77 | |||
diff --git a/recipes-graphics/xorg-lib/pixman_0.21.2.bb b/recipes-graphics/xorg-lib/pixman_0.21.2.bb new file mode 100644 index 0000000000..19394d635b --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman_0.21.2.bb | |||
@@ -0,0 +1,37 @@ | |||
1 | require pixman.inc | ||
2 | |||
3 | SRC_URI[archive.md5sum] = "9e09fd6e58cbf9717140891e0b7d4a7a" | ||
4 | SRC_URI[archive.sha256sum] = "295f51416caf307ff7caf1153ee9b1d86b9f7f02a7876d12db6538d80451c5de" | ||
5 | |||
6 | PR = "${INC_PR}.1" | ||
7 | |||
8 | SRC_URI += "\ | ||
9 | file://0002-Fix-argument-quoting-for-AC_INIT.patch \ | ||
10 | file://0003-Sun-s-copyrights-belong-to-Oracle-now.patch \ | ||
11 | file://0004-C-fast-path-for-a1-fill-operation.patch \ | ||
12 | file://0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch \ | ||
13 | file://0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch \ | ||
14 | file://0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch \ | ||
15 | file://0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch \ | ||
16 | file://0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch \ | ||
17 | file://0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch \ | ||
18 | file://0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch \ | ||
19 | file://0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch \ | ||
20 | file://0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch \ | ||
21 | file://0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch \ | ||
22 | file://0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch \ | ||
23 | file://0016-ARM-added-neon_composite_in_n_8-fast-path.patch \ | ||
24 | file://0017-add-_pixman_bits_override_accessors.patch \ | ||
25 | file://0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ | ||
26 | file://0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ | ||
27 | file://0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ | ||
28 | file://0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ | ||
29 | file://0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \ | ||
30 | file://0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \ | ||
31 | file://0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \ | ||
32 | " | ||
33 | |||
34 | NEON = " --disable-arm-neon " | ||
35 | NEON_armv7a = " " | ||
36 | |||
37 | EXTRA_OECONF = "${NEON} --disable-gtk" | ||