summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch35
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch39
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch159
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch113
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch157
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch170
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch74
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch139
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch74
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch63
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch105
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch75
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch72
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch153
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch97
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch75
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch114
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch91
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch91
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch94
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch109
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch148
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch77
-rw-r--r--recipes-graphics/xorg-lib/pixman_0.21.2.bb37
24 files changed, 2361 insertions, 0 deletions
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch
new file mode 100644
index 0000000000..ebf6eafb0d
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch
@@ -0,0 +1,35 @@
1From e7ee43c39d2370716a4d011afa8f5067eced9899 Mon Sep 17 00:00:00 2001
2From: Cyril Brulebois <kibi@debian.org>
3Date: Wed, 17 Nov 2010 16:16:56 +0100
4Subject: [PATCH 02/24] Fix argument quoting for AC_INIT.
5
6One gets rid of this accordingly:
7| autoreconf -vfi
8| autoreconf: Entering directory `.'
9| autoreconf: configure.ac: not using Gettext
10| autoreconf: running: aclocal --force
11| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
12| autoreconf: configure.ac: tracing
13| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
14
15Signed-off-by: Cyril Brulebois <kibi@debian.org>
16---
17 configure.ac | 2 +-
18 1 files changed, 1 insertions(+), 1 deletions(-)
19
20diff --git a/configure.ac b/configure.ac
21index db1da21..147e1bf 100644
22--- a/configure.ac
23+++ b/configure.ac
24@@ -58,7 +58,7 @@ m4_define([pixman_micro], 3)
25
26 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
27
28-AC_INIT(pixman, pixman_version, "pixman@lists.freedesktop.org", pixman)
29+AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman)
30 AM_INIT_AUTOMAKE([foreign dist-bzip2])
31
32 # Suppress verbose compile lines
33--
341.6.6.1
35
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch
new file mode 100644
index 0000000000..e48a2b37dc
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch
@@ -0,0 +1,39 @@
1From 654961efe405ad1a7e54a77548ca8af322ecc1f8 Mon Sep 17 00:00:00 2001
2From: Alan Coopersmith <alan.coopersmith@oracle.com>
3Date: Sun, 21 Nov 2010 11:42:22 -0800
4Subject: [PATCH 03/24] Sun's copyrights belong to Oracle now
5
6Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
7---
8 COPYING | 2 +-
9 pixman/solaris-hwcap.mapfile | 2 +-
10 2 files changed, 2 insertions(+), 2 deletions(-)
11
12diff --git a/COPYING b/COPYING
13index 3092a34..15f9517 100644
14--- a/COPYING
15+++ b/COPYING
16@@ -18,7 +18,7 @@ possible. They may also add themselves to the list below.
17 * Copyright 2008 André Tupinambá
18 * Copyright 2008 Mozilla Corporation
19 * Copyright 2008 Frederic Plourde
20- * Copyright 2009 Sun Microsystems, Inc.
21+ * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
22 *
23 * Permission is hereby granted, free of charge, to any person obtaining a
24 * copy of this software and associated documentation files (the "Software"),
25diff --git a/pixman/solaris-hwcap.mapfile b/pixman/solaris-hwcap.mapfile
26index 3605ca7..87efce1 100644
27--- a/pixman/solaris-hwcap.mapfile
28+++ b/pixman/solaris-hwcap.mapfile
29@@ -1,6 +1,6 @@
30 ###############################################################################
31 #
32-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
33+# Copyright 2009, Oracle and/or its affiliates. All rights reserved.
34 #
35 # Permission is hereby granted, free of charge, to any person obtaining a
36 # copy of this software and associated documentation files (the "Software"),
37--
381.6.6.1
39
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch
new file mode 100644
index 0000000000..75eaac7bf2
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch
@@ -0,0 +1,159 @@
1From 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 15 Nov 2010 18:26:43 +0200
4Subject: [PATCH 04/24] C fast path for a1 fill operation
5
6Can be used as one of the solutions to fix bug
7https://bugs.freedesktop.org/show_bug.cgi?id=31604
8---
9 pixman/pixman-fast-path.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-
10 pixman/pixman.c | 7 +++-
11 2 files changed, 91 insertions(+), 3 deletions(-)
12
13diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
14index 5d5fa95..37dfbae 100644
15--- a/pixman/pixman-fast-path.c
16+++ b/pixman/pixman-fast-path.c
17@@ -1334,7 +1334,11 @@ fast_composite_solid_fill (pixman_implementation_t *imp,
18
19 src = _pixman_image_get_solid (src_image, dst_image->bits.format);
20
21- if (dst_image->bits.format == PIXMAN_a8)
22+ if (dst_image->bits.format == PIXMAN_a1)
23+ {
24+ src = src >> 31;
25+ }
26+ else if (dst_image->bits.format == PIXMAN_a8)
27 {
28 src = src >> 24;
29 }
30@@ -1655,6 +1659,7 @@ static const pixman_fast_path_t c_fast_paths[] =
31 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
32 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
33 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
34+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
35 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
36 PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
37 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
38@@ -1733,6 +1738,82 @@ static const pixman_fast_path_t c_fast_paths[] =
39 { PIXMAN_OP_NONE },
40 };
41
42+#ifdef WORDS_BIGENDIAN
43+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
44+#else
45+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
46+#endif
47+
48+static force_inline void
49+pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
50+{
51+ if (offs)
52+ {
53+ int leading_pixels = 32 - offs;
54+ if (leading_pixels >= width)
55+ {
56+ if (v)
57+ *dst |= A1_FILL_MASK (width, offs);
58+ else
59+ *dst &= ~A1_FILL_MASK (width, offs);
60+ return;
61+ }
62+ else
63+ {
64+ if (v)
65+ *dst++ |= A1_FILL_MASK (leading_pixels, offs);
66+ else
67+ *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
68+ width -= leading_pixels;
69+ }
70+ }
71+ while (width >= 32)
72+ {
73+ if (v)
74+ *dst++ = 0xFFFFFFFF;
75+ else
76+ *dst++ = 0;
77+ width -= 32;
78+ }
79+ if (width > 0)
80+ {
81+ if (v)
82+ *dst |= A1_FILL_MASK (width, 0);
83+ else
84+ *dst &= ~A1_FILL_MASK (width, 0);
85+ }
86+}
87+
88+static void
89+pixman_fill1 (uint32_t *bits,
90+ int stride,
91+ int x,
92+ int y,
93+ int width,
94+ int height,
95+ uint32_t xor)
96+{
97+ uint32_t *dst = bits + y * stride + (x >> 5);
98+ int offs = x & 31;
99+
100+ if (xor & 1)
101+ {
102+ while (height--)
103+ {
104+ pixman_fill1_line (dst, offs, width, 1);
105+ dst += stride;
106+ }
107+ }
108+ else
109+ {
110+ while (height--)
111+ {
112+ pixman_fill1_line (dst, offs, width, 0);
113+ dst += stride;
114+ }
115+ }
116+}
117+
118 static void
119 pixman_fill8 (uint32_t *bits,
120 int stride,
121@@ -1819,6 +1900,10 @@ fast_path_fill (pixman_implementation_t *imp,
122 {
123 switch (bpp)
124 {
125+ case 1:
126+ pixman_fill1 (bits, stride, x, y, width, height, xor);
127+ break;
128+
129 case 8:
130 pixman_fill8 (bits, stride, x, y, width, height, xor);
131 break;
132diff --git a/pixman/pixman.c b/pixman/pixman.c
133index 045c556..ec565f9 100644
134--- a/pixman/pixman.c
135+++ b/pixman/pixman.c
136@@ -875,7 +875,8 @@ color_to_pixel (pixman_color_t * color,
137 format == PIXMAN_b8g8r8x8 ||
138 format == PIXMAN_r5g6b5 ||
139 format == PIXMAN_b5g6r5 ||
140- format == PIXMAN_a8))
141+ format == PIXMAN_a8 ||
142+ format == PIXMAN_a1))
143 {
144 return FALSE;
145 }
146@@ -895,7 +896,9 @@ color_to_pixel (pixman_color_t * color,
147 ((c & 0x000000ff) << 24);
148 }
149
150- if (format == PIXMAN_a8)
151+ if (format == PIXMAN_a1)
152+ c = c >> 31;
153+ else if (format == PIXMAN_a8)
154 c = c >> 24;
155 else if (format == PIXMAN_r5g6b5 ||
156 format == PIXMAN_b5g6r5)
157--
1581.6.6.1
159
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch
new file mode 100644
index 0000000000..a7a9b11a87
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch
@@ -0,0 +1,113 @@
1From 98d08b37f17a3379d0ceff8bb7de8f943873fbd8 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Fri, 26 Nov 2010 08:55:49 +0200
4Subject: [PATCH 05/24] ARM: added 'neon_composite_over_n_8_8' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 68 ++++++++++++++++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 3 ++
9 2 files changed, 71 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 91ec27d..a3875ee 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1203,6 +1203,74 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_over_n_8_8_process_pixblock_head
20+ vmull.u8 q0, d24, d8
21+ vmull.u8 q1, d25, d8
22+ vmull.u8 q6, d26, d8
23+ vmull.u8 q7, d27, d8
24+ vrshr.u16 q10, q0, #8
25+ vrshr.u16 q11, q1, #8
26+ vrshr.u16 q12, q6, #8
27+ vrshr.u16 q13, q7, #8
28+ vraddhn.u16 d0, q0, q10
29+ vraddhn.u16 d1, q1, q11
30+ vraddhn.u16 d2, q6, q12
31+ vraddhn.u16 d3, q7, q13
32+ vmvn.8 q12, q0
33+ vmvn.8 q13, q1
34+ vmull.u8 q8, d24, d4
35+ vmull.u8 q9, d25, d5
36+ vmull.u8 q10, d26, d6
37+ vmull.u8 q11, d27, d7
38+.endm
39+
40+.macro pixman_composite_over_n_8_8_process_pixblock_tail
41+ vrshr.u16 q14, q8, #8
42+ vrshr.u16 q15, q9, #8
43+ vrshr.u16 q12, q10, #8
44+ vrshr.u16 q13, q11, #8
45+ vraddhn.u16 d28, q14, q8
46+ vraddhn.u16 d29, q15, q9
47+ vraddhn.u16 d30, q12, q10
48+ vraddhn.u16 d31, q13, q11
49+ vqadd.u8 q14, q0, q14
50+ vqadd.u8 q15, q1, q15
51+.endm
52+
53+/* TODO: expand macros and do better instructions scheduling */
54+.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
55+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
56+ pixman_composite_over_n_8_8_process_pixblock_tail
57+ vld1.8 {d24, d25, d26, d27}, [MASK]!
58+ cache_preload 32, 32
59+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
60+ pixman_composite_over_n_8_8_process_pixblock_head
61+.endm
62+
63+.macro pixman_composite_over_n_8_8_init
64+ add DUMMY, sp, #ARGS_STACK_OFFSET
65+ vpush {d8-d15}
66+ vld1.32 {d8[0]}, [DUMMY]
67+ vdup.8 d8, d8[3]
68+.endm
69+
70+.macro pixman_composite_over_n_8_8_cleanup
71+ vpop {d8-d15}
72+.endm
73+
74+generate_composite_function \
75+ pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
76+ FLAG_DST_READWRITE, \
77+ 32, /* number of pixels, processed in a single block */ \
78+ 5, /* prefetch distance */ \
79+ pixman_composite_over_n_8_8_init, \
80+ pixman_composite_over_n_8_8_cleanup, \
81+ pixman_composite_over_n_8_8_process_pixblock_head, \
82+ pixman_composite_over_n_8_8_process_pixblock_tail, \
83+ pixman_composite_over_n_8_8_process_pixblock_tail_head
84+
85+/******************************************************************************/
86+
87 .macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
88 /*
89 * 'combine_mask_ca' replacement
90diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
91index 2f82069..72ef75e 100644
92--- a/pixman/pixman-arm-neon.c
93+++ b/pixman/pixman-arm-neon.c
94@@ -76,6 +76,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
95 uint8_t, 1, uint32_t, 1)
96 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
97 uint32_t, 1, uint32_t, 1)
98+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
99+ uint8_t, 1, uint8_t, 1)
100 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
101 uint8_t, 1, uint8_t, 1)
102
103@@ -235,6 +237,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
104 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
105 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
106 PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
107+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
108 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
109 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
110 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
111--
1121.6.6.1
113
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch
new file mode 100644
index 0000000000..71a41a7a59
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch
@@ -0,0 +1,157 @@
1From 3be86a92ccab240859062a541cdb871d81c9501a Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Sun, 28 Nov 2010 21:45:06 +0200
4Subject: [PATCH 06/24] ARM: introduced 'fetch_mask_pixblock' macro to simplify code
5
6This macro hides the implementation details of pixels fetching
7for the mask image just like 'fetch_src_pixblock' does for the
8source image. This provides more possibilities for reusing the
9same code blocks in different compositing functions.
10
11This patch does not introduce any functional changes and the
12resulting code in the compiled object file is exactly the same.
13---
14 pixman/pixman-arm-neon-asm.S | 26 +++++++++++++-------------
15 pixman/pixman-arm-neon-asm.h | 5 +++++
16 2 files changed, 18 insertions(+), 13 deletions(-)
17
18diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
19index a3875ee..155a236 100644
20--- a/pixman/pixman-arm-neon-asm.S
21+++ b/pixman/pixman-arm-neon-asm.S
22@@ -841,7 +841,7 @@ generate_composite_function \
23 pixman_composite_over_n_8_0565_process_pixblock_tail
24 vst1.16 {d28, d29}, [DST_W, :128]!
25 vld1.16 {d4, d5}, [DST_R, :128]!
26- vld1.8 {d24}, [MASK]!
27+ fetch_mask_pixblock
28 cache_preload 8, 8
29 pixman_composite_over_n_8_0565_process_pixblock_head
30 .endm
31@@ -889,7 +889,7 @@ generate_composite_function \
32 pixman_composite_over_n_8_0565_process_pixblock_tail
33 fetch_src_pixblock
34 cache_preload 8, 8
35- vld1.8 {d24}, [MASK]!
36+ fetch_mask_pixblock
37 pixman_composite_over_n_8_0565_process_pixblock_head
38 vst1.16 {d28, d29}, [DST_W, :128]!
39 .endm
40@@ -1171,7 +1171,7 @@ generate_composite_function \
41 pixman_composite_over_n_8_8888_process_pixblock_tail
42 vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
43 vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
44- vld1.8 {d24}, [MASK]!
45+ fetch_mask_pixblock
46 cache_preload 8, 8
47 pixman_composite_over_n_8_8888_process_pixblock_head
48 .endm
49@@ -1241,7 +1241,7 @@ generate_composite_function \
50 .macro pixman_composite_over_n_8_8_process_pixblock_tail_head
51 vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
52 pixman_composite_over_n_8_8_process_pixblock_tail
53- vld1.8 {d24, d25, d26, d27}, [MASK]!
54+ fetch_mask_pixblock
55 cache_preload 32, 32
56 vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
57 pixman_composite_over_n_8_8_process_pixblock_head
58@@ -1341,7 +1341,7 @@ generate_composite_function \
59 vraddhn.u16 d29, q15, q9
60 vraddhn.u16 d30, q6, q10
61 vraddhn.u16 d31, q7, q11
62- vld4.8 {d24, d25, d26, d27}, [MASK]!
63+ fetch_mask_pixblock
64 vqadd.u8 q14, q0, q14
65 vqadd.u8 q15, q1, q15
66 cache_preload 8, 8
67@@ -1405,7 +1405,7 @@ generate_composite_function \
68 pixman_composite_add_n_8_8_process_pixblock_tail
69 vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
70 vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
71- vld1.8 {d24, d25, d26, d27}, [MASK]!
72+ fetch_mask_pixblock
73 cache_preload 32, 32
74 pixman_composite_add_n_8_8_process_pixblock_head
75 .endm
76@@ -1462,7 +1462,7 @@ generate_composite_function \
77 pixman_composite_add_8_8_8_process_pixblock_tail
78 vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
79 vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
80- vld1.8 {d24, d25, d26, d27}, [MASK]!
81+ fetch_mask_pixblock
82 fetch_src_pixblock
83 cache_preload 32, 32
84 pixman_composite_add_8_8_8_process_pixblock_head
85@@ -1515,7 +1515,7 @@ generate_composite_function \
86 pixman_composite_add_8888_8888_8888_process_pixblock_tail
87 vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
88 vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
89- vld4.8 {d24, d25, d26, d27}, [MASK]!
90+ fetch_mask_pixblock
91 fetch_src_pixblock
92 cache_preload 8, 8
93 pixman_composite_add_8888_8888_8888_process_pixblock_head
94@@ -1587,7 +1587,7 @@ generate_composite_function_single_scanline \
95 pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
96 fetch_src_pixblock
97 cache_preload 8, 8
98- vld4.8 {d12, d13, d14, d15}, [MASK]!
99+ fetch_mask_pixblock
100 pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
101 vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
102 .endm
103@@ -1658,7 +1658,7 @@ generate_composite_function \
104 pixman_composite_over_8888_n_8888_process_pixblock_tail
105 fetch_src_pixblock
106 cache_preload 8, 8
107- vld4.8 {d12, d13, d14, d15}, [MASK]!
108+ fetch_mask_pixblock
109 pixman_composite_over_8888_n_8888_process_pixblock_head
110 vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
111 .endm
112@@ -1700,7 +1700,7 @@ generate_composite_function_single_scanline \
113 pixman_composite_over_8888_n_8888_process_pixblock_tail
114 fetch_src_pixblock
115 cache_preload 8, 8
116- vld1.8 {d15}, [MASK]!
117+ fetch_mask_pixblock
118 pixman_composite_over_8888_n_8888_process_pixblock_head
119 vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
120 .endm
121@@ -1917,7 +1917,7 @@ generate_composite_function \
122
123 /* TODO: expand macros and do better instructions scheduling */
124 .macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
125- vld1.8 {d15}, [MASK]!
126+ fetch_mask_pixblock
127 pixman_composite_over_0565_8_0565_process_pixblock_tail
128 fetch_src_pixblock
129 vld1.16 {d10, d11}, [DST_R, :128]!
130@@ -1969,7 +1969,7 @@ generate_composite_function \
131
132 /* TODO: expand macros and do better instructions scheduling */
133 .macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
134- vld1.8 {d15}, [MASK]!
135+ fetch_mask_pixblock
136 pixman_composite_add_0565_8_0565_process_pixblock_tail
137 fetch_src_pixblock
138 vld1.16 {d10, d11}, [DST_R, :128]!
139diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
140index c75bdc3..24fa361 100644
141--- a/pixman/pixman-arm-neon-asm.h
142+++ b/pixman/pixman-arm-neon-asm.h
143@@ -431,6 +431,11 @@
144 .endif
145 .endm
146
147+.macro fetch_mask_pixblock
148+ pixld pixblock_size, mask_bpp, \
149+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
150+.endm
151+
152 /*
153 * Macro which is used to process leading pixels until destination
154 * pointer is properly aligned (at 16 bytes boundary). When destination
155--
1561.6.6.1
157
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch
new file mode 100644
index 0000000000..acdfdf873d
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch
@@ -0,0 +1,170 @@
1From e6814837a6ccd3e4db329e0131eaf2055d2c864b Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Fri, 26 Nov 2010 17:06:58 +0200
4Subject: [PATCH 07/24] ARM: better NEON instructions scheduling for over_n_8_0565
5
6Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9.
7Now it is ~30% faster for the pixel data in L1 cache and makes better use
8of memory bandwidth when running at lower clock frequencies (ex. 500MHz).
9Also register d24 (pixels from the mask image) is now not clobbered by
10supplementary macros, which allows to reuse them for the other variants
11of compositing operations later.
12
13Benchmark from ARM Cortex-A8 @500MHz:
14
15== before ==
16
17 over_n_8_0565 = L1: 63.90 L2: 63.15 M: 60.97 ( 73.53%)
18 HT: 28.89 VT: 24.14 R: 21.33 RT: 6.78 ( 67Kops/s)
19
20== after ==
21
22 over_n_8_0565 = L1: 82.64 L2: 75.19 M: 71.52 ( 84.14%)
23 HT: 30.49 VT: 25.56 R: 22.36 RT: 6.89 ( 68Kops/s)
24---
25 pixman/pixman-arm-neon-asm.S | 120 +++++++++++++++++++++++++++---------------
26 1 files changed, 77 insertions(+), 43 deletions(-)
27
28diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
29index 155a236..ffffc1c 100644
30--- a/pixman/pixman-arm-neon-asm.S
31+++ b/pixman/pixman-arm-neon-asm.S
32@@ -792,58 +792,92 @@ generate_composite_function \
33 /******************************************************************************/
34
35 .macro pixman_composite_over_n_8_0565_process_pixblock_head
36- /* in */
37- vmull.u8 q0, d24, d8
38- vmull.u8 q1, d24, d9
39- vmull.u8 q6, d24, d10
40- vmull.u8 q7, d24, d11
41- vrshr.u16 q10, q0, #8
42- vrshr.u16 q11, q1, #8
43- vrshr.u16 q12, q6, #8
44- vrshr.u16 q13, q7, #8
45- vraddhn.u16 d0, q0, q10
46- vraddhn.u16 d1, q1, q11
47- vraddhn.u16 d2, q6, q12
48- vraddhn.u16 d3, q7, q13
49-
50- vshrn.u16 d6, q2, #8
51- vshrn.u16 d7, q2, #3
52- vsli.u16 q2, q2, #5
53- vsri.u8 d6, d6, #5
54- vmvn.8 d3, d3
55- vsri.u8 d7, d7, #6
56- vshrn.u16 d30, q2, #2
57- /* now do alpha blending */
58- vmull.u8 q10, d3, d6
59- vmull.u8 q11, d3, d7
60- vmull.u8 q12, d3, d30
61- vrshr.u16 q13, q10, #8
62- vrshr.u16 q3, q11, #8
63- vrshr.u16 q15, q12, #8
64- vraddhn.u16 d20, q10, q13
65- vraddhn.u16 d23, q11, q3
66- vraddhn.u16 d22, q12, q15
67+ vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */
68+ vmull.u8 q1, d24, d9
69+ vmull.u8 q6, d24, d10
70+ vmull.u8 q7, d24, d11
71+ vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */
72+ vshrn.u16 d7, q2, #3
73+ vsli.u16 q2, q2, #5
74+ vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */
75+ vrshr.u16 q9, q1, #8
76+ vrshr.u16 q10, q6, #8
77+ vrshr.u16 q11, q7, #8
78+ vraddhn.u16 d0, q0, q8
79+ vraddhn.u16 d1, q1, q9
80+ vraddhn.u16 d2, q6, q10
81+ vraddhn.u16 d3, q7, q11
82+ vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */
83+ vsri.u8 d7, d7, #6
84+ vmvn.8 d3, d3
85+ vshrn.u16 d30, q2, #2
86+ vmull.u8 q8, d3, d6 /* now do alpha blending */
87+ vmull.u8 q9, d3, d7
88+ vmull.u8 q10, d3, d30
89 .endm
90
91 .macro pixman_composite_over_n_8_0565_process_pixblock_tail
92- vqadd.u8 d16, d2, d20
93- vqadd.u8 q9, q0, q11
94- /* convert to r5g6b5 */
95- vshll.u8 q14, d16, #8
96- vshll.u8 q8, d19, #8
97- vshll.u8 q9, d18, #8
98- vsri.u16 q14, q8, #5
99- vsri.u16 q14, q9, #11
100+ /* 3 cycle bubble (after vmull.u8) */
101+ vrshr.u16 q13, q8, #8
102+ vrshr.u16 q11, q9, #8
103+ vrshr.u16 q15, q10, #8
104+ vraddhn.u16 d16, q8, q13
105+ vraddhn.u16 d27, q9, q11
106+ vraddhn.u16 d26, q10, q15
107+ vqadd.u8 d16, d2, d16
108+ /* 1 cycle bubble */
109+ vqadd.u8 q9, q0, q13
110+ vshll.u8 q14, d16, #8 /* convert to 16bpp */
111+ vshll.u8 q8, d19, #8
112+ vshll.u8 q9, d18, #8
113+ vsri.u16 q14, q8, #5
114+ /* 1 cycle bubble */
115+ vsri.u16 q14, q9, #11
116 .endm
117
118-/* TODO: expand macros and do better instructions scheduling */
119 .macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
120- pixman_composite_over_n_8_0565_process_pixblock_tail
121- vst1.16 {d28, d29}, [DST_W, :128]!
122 vld1.16 {d4, d5}, [DST_R, :128]!
123+ vshrn.u16 d6, q2, #8
124 fetch_mask_pixblock
125+ vshrn.u16 d7, q2, #3
126+ fetch_src_pixblock
127+ vmull.u8 q6, d24, d10
128+ vrshr.u16 q13, q8, #8
129+ vrshr.u16 q11, q9, #8
130+ vrshr.u16 q15, q10, #8
131+ vraddhn.u16 d16, q8, q13
132+ vraddhn.u16 d27, q9, q11
133+ vraddhn.u16 d26, q10, q15
134+ vqadd.u8 d16, d2, d16
135+ vmull.u8 q1, d24, d9
136+ vqadd.u8 q9, q0, q13
137+ vshll.u8 q14, d16, #8
138+ vmull.u8 q0, d24, d8
139+ vshll.u8 q8, d19, #8
140+ vshll.u8 q9, d18, #8
141+ vsri.u16 q14, q8, #5
142+ vmull.u8 q7, d24, d11
143+ vsri.u16 q14, q9, #11
144+
145 cache_preload 8, 8
146- pixman_composite_over_n_8_0565_process_pixblock_head
147+
148+ vsli.u16 q2, q2, #5
149+ vrshr.u16 q8, q0, #8
150+ vrshr.u16 q9, q1, #8
151+ vrshr.u16 q10, q6, #8
152+ vrshr.u16 q11, q7, #8
153+ vraddhn.u16 d0, q0, q8
154+ vraddhn.u16 d1, q1, q9
155+ vraddhn.u16 d2, q6, q10
156+ vraddhn.u16 d3, q7, q11
157+ vsri.u8 d6, d6, #5
158+ vsri.u8 d7, d7, #6
159+ vmvn.8 d3, d3
160+ vshrn.u16 d30, q2, #2
161+ vst1.16 {d28, d29}, [DST_W, :128]!
162+ vmull.u8 q8, d3, d6
163+ vmull.u8 q9, d3, d7
164+ vmull.u8 q10, d3, d30
165 .endm
166
167 /*
168--
1691.6.6.1
170
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch
new file mode 100644
index 0000000000..4c5bf8d916
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch
@@ -0,0 +1,74 @@
1From a7c36681c0c1955ff9110b81f1789e56abb10a95 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Sat, 27 Nov 2010 03:53:12 +0200
4Subject: [PATCH 08/24] ARM: added 'neon_composite_over_8888_n_0565' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 4 ++++
9 2 files changed, 32 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index ffffc1c..3e52a49 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -917,6 +917,34 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_over_8888_n_0565_init
20+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
21+ vpush {d8-d15}
22+ vld1.32 {d24[0]}, [DUMMY]
23+ vdup.8 d24, d24[3]
24+.endm
25+
26+.macro pixman_composite_over_8888_n_0565_cleanup
27+ vpop {d8-d15}
28+.endm
29+
30+generate_composite_function \
31+ pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
32+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
33+ 8, /* number of pixels, processed in a single block */ \
34+ 5, /* prefetch distance */ \
35+ pixman_composite_over_8888_n_0565_init, \
36+ pixman_composite_over_8888_n_0565_cleanup, \
37+ pixman_composite_over_n_8_0565_process_pixblock_head, \
38+ pixman_composite_over_n_8_0565_process_pixblock_tail, \
39+ pixman_composite_over_n_8_0565_process_pixblock_tail_head, \
40+ 28, /* dst_w_basereg */ \
41+ 4, /* dst_r_basereg */ \
42+ 8, /* src_basereg */ \
43+ 24 /* mask_basereg */
44+
45+/******************************************************************************/
46+
47 /* TODO: expand macros and do better instructions scheduling */
48 .macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
49 vld1.16 {d4, d5}, [DST_R, :128]!
50diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
51index 72ef75e..8156bbb 100644
52--- a/pixman/pixman-arm-neon.c
53+++ b/pixman/pixman-arm-neon.c
54@@ -83,6 +83,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
55
56 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
57 uint32_t, 1, uint32_t, 1)
58+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
59+ uint32_t, 1, uint16_t, 1)
60
61 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
62 uint8_t, 1, uint8_t, 1, uint8_t, 1)
63@@ -253,6 +255,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
64 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
65 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
66 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
67+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
68+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
69 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
70 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
71 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
72--
731.6.6.1
74
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch
new file mode 100644
index 0000000000..b45671e98e
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch
@@ -0,0 +1,139 @@
1From 3990931bf6197eff1cec06cf24bce53ddf9a539a Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Sat, 27 Nov 2010 04:47:39 +0200
4Subject: [PATCH 09/24] ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565
5
6Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565',
7because they can actually support all variants of this operation:
8over_8888_8_0565/over_n_8_0565/over_8888_n_0565.
9
10Also 'over_8888_8_0565' now uses more optimized common code instead of its
11own variant, improving performance a bit. Even though this operation is
12still memory bandwidth limited, scaled variants of these fast paths may
13put more stress on CPU later.
14
15Benchmarked on ARM Cortex-A8 @500MHz:
16
17== before ==
18
19 over_8888_8_0565 = L1: 67.10 L2: 53.82 M: 44.70 (105.17%)
20 HT: 18.73 VT: 16.91 R: 14.25 RT: 4.80 (52Kops/s)
21
22== after ==
23
24 over_8888_8_0565 = L1: 77.83 L2: 58.14 M: 44.82 (105.52%)
25 HT: 20.58 VT: 17.44 R: 15.05 RT: 4.88 (52Kops/s)
26---
27 pixman/pixman-arm-neon-asm.S | 61 +++++++++++++++++------------------------
28 1 files changed, 25 insertions(+), 36 deletions(-)
29
30diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
31index 3e52a49..4175144 100644
32--- a/pixman/pixman-arm-neon-asm.S
33+++ b/pixman/pixman-arm-neon-asm.S
34@@ -791,7 +791,7 @@ generate_composite_function \
35
36 /******************************************************************************/
37
38-.macro pixman_composite_over_n_8_0565_process_pixblock_head
39+.macro pixman_composite_over_8888_8_0565_process_pixblock_head
40 vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */
41 vmull.u8 q1, d24, d9
42 vmull.u8 q6, d24, d10
43@@ -816,7 +816,7 @@ generate_composite_function \
44 vmull.u8 q10, d3, d30
45 .endm
46
47-.macro pixman_composite_over_n_8_0565_process_pixblock_tail
48+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
49 /* 3 cycle bubble (after vmull.u8) */
50 vrshr.u16 q13, q8, #8
51 vrshr.u16 q11, q9, #8
52@@ -835,7 +835,7 @@ generate_composite_function \
53 vsri.u16 q14, q9, #11
54 .endm
55
56-.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
57+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
58 vld1.16 {d4, d5}, [DST_R, :128]!
59 vshrn.u16 d6, q2, #8
60 fetch_mask_pixblock
61@@ -880,6 +880,23 @@ generate_composite_function \
62 vmull.u8 q10, d3, d30
63 .endm
64
65+generate_composite_function \
66+ pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
67+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
68+ 8, /* number of pixels, processed in a single block */ \
69+ 5, /* prefetch distance */ \
70+ default_init_need_all_regs, \
71+ default_cleanup_need_all_regs, \
72+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
73+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
74+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
75+ 28, /* dst_w_basereg */ \
76+ 4, /* dst_r_basereg */ \
77+ 8, /* src_basereg */ \
78+ 24 /* mask_basereg */
79+
80+/******************************************************************************/
81+
82 /*
83 * This function needs a special initialization of solid mask.
84 * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
85@@ -911,9 +928,9 @@ generate_composite_function \
86 5, /* prefetch distance */ \
87 pixman_composite_over_n_8_0565_init, \
88 pixman_composite_over_n_8_0565_cleanup, \
89- pixman_composite_over_n_8_0565_process_pixblock_head, \
90- pixman_composite_over_n_8_0565_process_pixblock_tail, \
91- pixman_composite_over_n_8_0565_process_pixblock_tail_head
92+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
93+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
94+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head
95
96 /******************************************************************************/
97
98@@ -935,36 +952,8 @@ generate_composite_function \
99 5, /* prefetch distance */ \
100 pixman_composite_over_8888_n_0565_init, \
101 pixman_composite_over_8888_n_0565_cleanup, \
102- pixman_composite_over_n_8_0565_process_pixblock_head, \
103- pixman_composite_over_n_8_0565_process_pixblock_tail, \
104- pixman_composite_over_n_8_0565_process_pixblock_tail_head, \
105- 28, /* dst_w_basereg */ \
106- 4, /* dst_r_basereg */ \
107- 8, /* src_basereg */ \
108- 24 /* mask_basereg */
109-
110-/******************************************************************************/
111-
112-/* TODO: expand macros and do better instructions scheduling */
113-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
114- vld1.16 {d4, d5}, [DST_R, :128]!
115- pixman_composite_over_n_8_0565_process_pixblock_tail
116- fetch_src_pixblock
117- cache_preload 8, 8
118- fetch_mask_pixblock
119- pixman_composite_over_n_8_0565_process_pixblock_head
120- vst1.16 {d28, d29}, [DST_W, :128]!
121-.endm
122-
123-generate_composite_function \
124- pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
125- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
126- 8, /* number of pixels, processed in a single block */ \
127- 5, /* prefetch distance */ \
128- default_init_need_all_regs, \
129- default_cleanup_need_all_regs, \
130- pixman_composite_over_n_8_0565_process_pixblock_head, \
131- pixman_composite_over_n_8_0565_process_pixblock_tail, \
132+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
133+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
134 pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
135 28, /* dst_w_basereg */ \
136 4, /* dst_r_basereg */ \
137--
1381.6.6.1
139
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch
new file mode 100644
index 0000000000..376631a50b
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch
@@ -0,0 +1,74 @@
1From 6d2f7f981b52b41f4321071c325babcf792bd666 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Sat, 27 Nov 2010 15:53:54 +0200
4Subject: [PATCH 10/24] ARM: added 'neon_composite_over_0565_n_0565' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 4 ++++
9 2 files changed, 32 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 4175144..81c0a34 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1994,6 +1994,34 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_over_0565_n_0565_init
20+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
21+ vpush {d8-d15}
22+ vld1.32 {d15[0]}, [DUMMY]
23+ vdup.8 d15, d15[3]
24+.endm
25+
26+.macro pixman_composite_over_0565_n_0565_cleanup
27+ vpop {d8-d15}
28+.endm
29+
30+generate_composite_function \
31+ pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
32+ FLAG_DST_READWRITE, \
33+ 8, /* number of pixels, processed in a single block */ \
34+ 5, /* prefetch distance */ \
35+ pixman_composite_over_0565_n_0565_init, \
36+ pixman_composite_over_0565_n_0565_cleanup, \
37+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
38+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
39+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
40+ 28, /* dst_w_basereg */ \
41+ 10, /* dst_r_basereg */ \
42+ 8, /* src_basereg */ \
43+ 15 /* mask_basereg */
44+
45+/******************************************************************************/
46+
47 .macro pixman_composite_add_0565_8_0565_process_pixblock_head
48 /* mask is in d15 */
49 convert_0565_to_x888 q4, d2, d1, d0
50diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
51index 8156bbb..b01c3e0 100644
52--- a/pixman/pixman-arm-neon.c
53+++ b/pixman/pixman-arm-neon.c
54@@ -85,6 +85,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
55 uint32_t, 1, uint32_t, 1)
56 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
57 uint32_t, 1, uint16_t, 1)
58+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
59+ uint16_t, 1, uint16_t, 1)
60
61 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
62 uint8_t, 1, uint8_t, 1, uint8_t, 1)
63@@ -257,6 +259,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
64 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
65 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
66 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
67+ PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
68+ PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
69 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
70 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
71 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
72--
731.6.6.1
74
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch
new file mode 100644
index 0000000000..19f429bbf7
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch
@@ -0,0 +1,63 @@
1From c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Sun, 28 Nov 2010 22:05:53 +0200
4Subject: [PATCH 11/24] ARM: added 'neon_composite_add_8888_8_8888' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 17 +++++++++++++++++
8 pixman/pixman-arm-neon.c | 4 ++++
9 2 files changed, 21 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 81c0a34..11ef166 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1595,6 +1595,23 @@ generate_composite_function_single_scanline \
16
17 /******************************************************************************/
18
19+generate_composite_function \
20+ pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
21+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
22+ 8, /* number of pixels, processed in a single block */ \
23+ 5, /* prefetch distance */ \
24+ default_init, \
25+ default_cleanup, \
26+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
27+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
28+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
29+ 28, /* dst_w_basereg */ \
30+ 4, /* dst_r_basereg */ \
31+ 0, /* src_basereg */ \
32+ 27 /* mask_basereg */
33+
34+/******************************************************************************/
35+
36 .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
37 /* expecting source data in {d0, d1, d2, d3} */
38 /* destination data in {d4, d5, d6, d7} */
39diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
40index b01c3e0..eaf9787 100644
41--- a/pixman/pixman-arm-neon.c
42+++ b/pixman/pixman-arm-neon.c
43@@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
44 uint8_t, 1, uint8_t, 1, uint8_t, 1)
45 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
46 uint16_t, 1, uint8_t, 1, uint16_t, 1)
47+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
48+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
49 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
50 uint32_t, 1, uint32_t, 1, uint32_t, 1)
51 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
52@@ -282,6 +284,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
53 PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
54 PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
55 PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
56+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
57+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
58 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
59 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
60 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
61--
621.6.6.1
63
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch
new file mode 100644
index 0000000000..28dd8b6051
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch
@@ -0,0 +1,105 @@
1From 1fba7790367d7b726d05a33bbbcebe10b9280a31 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 29 Nov 2010 02:10:22 +0200
4Subject: [PATCH 12/24] ARM: better NEON instructions scheduling for add_8888_8888_8888
5
6Provides a minor performance improvement by using pipelining and hiding
7instructions latencies. Also do not clobber d0-d3 registers (source
8image pixels) while doing calculations in order to allow the use of
9the same macro for add_n_8_8888 fast path later.
10
11Benchmark from ARM Cortex-A8 @500MHz:
12
13== before ==
14
15 add_8888_8888_8888 = L1: 95.94 L2: 42.27 M: 25.60 (121.09%)
16 HT: 14.54 VT: 13.13 R: 12.77 RT: 4.49 (48Kops/s)
17 add_8888_8_8888 = L1: 104.51 L2: 57.81 M: 36.06 (106.62%)
18 HT: 19.24 VT: 16.45 R: 14.71 RT: 4.80 (51Kops/s)
19
20== after ==
21
22 add_8888_8888_8888 = L1: 106.66 L2: 47.82 M: 27.32 (129.30%)
23 HT: 15.44 VT: 13.96 R: 12.86 RT: 4.48 (48Kops/s)
24 add_8888_8_8888 = L1: 107.72 L2: 61.02 M: 38.26 (113.16%)
25 HT: 19.48 VT: 16.72 R: 14.82 RT: 4.80 (51Kops/s)
26---
27 pixman/pixman-arm-neon-asm.S | 52 +++++++++++++++++++++++++++--------------
28 1 files changed, 34 insertions(+), 18 deletions(-)
29
30diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
31index 11ef166..829ef84 100644
32--- a/pixman/pixman-arm-neon-asm.S
33+++ b/pixman/pixman-arm-neon-asm.S
34@@ -1542,34 +1542,50 @@ generate_composite_function \
35 /* expecting source data in {d0, d1, d2, d3} */
36 /* destination data in {d4, d5, d6, d7} */
37 /* mask in {d24, d25, d26, d27} */
38- vmull.u8 q8, d27, d0
39- vmull.u8 q9, d27, d1
40+ vmull.u8 q8, d27, d0
41+ vmull.u8 q9, d27, d1
42 vmull.u8 q10, d27, d2
43 vmull.u8 q11, d27, d3
44- vrshr.u16 q0, q8, #8
45- vrshr.u16 q1, q9, #8
46- vrshr.u16 q12, q10, #8
47- vrshr.u16 q13, q11, #8
48- vraddhn.u16 d0, q0, q8
49- vraddhn.u16 d1, q1, q9
50- vraddhn.u16 d2, q12, q10
51- vraddhn.u16 d3, q13, q11
52- vqadd.u8 q14, q0, q2
53- vqadd.u8 q15, q1, q3
54+ /* 1 cycle bubble */
55+ vrsra.u16 q8, q8, #8
56+ vrsra.u16 q9, q9, #8
57+ vrsra.u16 q10, q10, #8
58+ vrsra.u16 q11, q11, #8
59 .endm
60
61 .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
62+ /* 2 cycle bubble */
63+ vrshrn.u16 d28, q8, #8
64+ vrshrn.u16 d29, q9, #8
65+ vrshrn.u16 d30, q10, #8
66+ vrshrn.u16 d31, q11, #8
67+ vqadd.u8 q14, q2, q14
68+ /* 1 cycle bubble */
69+ vqadd.u8 q15, q3, q15
70 .endm
71
72-/* TODO: expand macros and do better instructions scheduling */
73 .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
74- pixman_composite_add_8888_8888_8888_process_pixblock_tail
75- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
76- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
77- fetch_mask_pixblock
78 fetch_src_pixblock
79+ vrshrn.u16 d28, q8, #8
80+ fetch_mask_pixblock
81+ vrshrn.u16 d29, q9, #8
82+ vmull.u8 q8, d27, d0
83+ vrshrn.u16 d30, q10, #8
84+ vmull.u8 q9, d27, d1
85+ vrshrn.u16 d31, q11, #8
86+ vmull.u8 q10, d27, d2
87+ vqadd.u8 q14, q2, q14
88+ vmull.u8 q11, d27, d3
89+ vqadd.u8 q15, q3, q15
90+ vrsra.u16 q8, q8, #8
91+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
92+ vrsra.u16 q9, q9, #8
93+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
94+ vrsra.u16 q10, q10, #8
95+
96 cache_preload 8, 8
97- pixman_composite_add_8888_8888_8888_process_pixblock_head
98+
99+ vrsra.u16 q11, q11, #8
100 .endm
101
102 generate_composite_function \
103--
1041.6.6.1
105
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch
new file mode 100644
index 0000000000..a1da09f9bd
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch
@@ -0,0 +1,75 @@
1From b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 29 Nov 2010 02:38:52 +0200
4Subject: [PATCH 13/24] ARM: added 'neon_composite_add_n_8_8888' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 29 +++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 4 ++++
9 2 files changed, 33 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 829ef84..dd6f2c5 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1628,6 +1628,35 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_add_n_8_8888_init
20+ add DUMMY, sp, #ARGS_STACK_OFFSET
21+ vld1.32 {d3[0]}, [DUMMY]
22+ vdup.8 d0, d3[0]
23+ vdup.8 d1, d3[1]
24+ vdup.8 d2, d3[2]
25+ vdup.8 d3, d3[3]
26+.endm
27+
28+.macro pixman_composite_add_n_8_8888_cleanup
29+.endm
30+
31+generate_composite_function \
32+ pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
33+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
34+ 8, /* number of pixels, processed in a single block */ \
35+ 5, /* prefetch distance */ \
36+ pixman_composite_add_n_8_8888_init, \
37+ pixman_composite_add_n_8_8888_cleanup, \
38+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
39+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
40+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
41+ 28, /* dst_w_basereg */ \
42+ 4, /* dst_r_basereg */ \
43+ 0, /* src_basereg */ \
44+ 27 /* mask_basereg */
45+
46+/******************************************************************************/
47+
48 .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
49 /* expecting source data in {d0, d1, d2, d3} */
50 /* destination data in {d4, d5, d6, d7} */
51diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
52index eaf9787..5ad58bd 100644
53--- a/pixman/pixman-arm-neon.c
54+++ b/pixman/pixman-arm-neon.c
55@@ -80,6 +80,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
56 uint8_t, 1, uint8_t, 1)
57 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
58 uint8_t, 1, uint8_t, 1)
59+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888,
60+ uint8_t, 1, uint32_t, 1)
61
62 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
63 uint32_t, 1, uint32_t, 1)
64@@ -281,6 +283,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
65 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
66 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
67 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
68+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888),
69+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888),
70 PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
71 PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
72 PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
73--
741.6.6.1
75
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch
new file mode 100644
index 0000000000..0caa29d266
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch
@@ -0,0 +1,72 @@
1From f6843e3797eea7e4aed7614b1086f5cefc06c0f9 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 29 Nov 2010 03:31:32 +0200
4Subject: [PATCH 14/24] ARM: added 'neon_composite_add_8888_n_8888' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 26 ++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 4 ++++
9 2 files changed, 30 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index dd6f2c5..2c0fd37 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1657,6 +1657,32 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_add_8888_n_8888_init
20+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
21+ vld1.32 {d27[0]}, [DUMMY]
22+ vdup.8 d27, d27[3]
23+.endm
24+
25+.macro pixman_composite_add_8888_n_8888_cleanup
26+.endm
27+
28+generate_composite_function \
29+ pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
30+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
31+ 8, /* number of pixels, processed in a single block */ \
32+ 5, /* prefetch distance */ \
33+ pixman_composite_add_8888_n_8888_init, \
34+ pixman_composite_add_8888_n_8888_cleanup, \
35+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
36+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
37+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
38+ 28, /* dst_w_basereg */ \
39+ 4, /* dst_r_basereg */ \
40+ 0, /* src_basereg */ \
41+ 27 /* mask_basereg */
42+
43+/******************************************************************************/
44+
45 .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
46 /* expecting source data in {d0, d1, d2, d3} */
47 /* destination data in {d4, d5, d6, d7} */
48diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
49index 5ad58bd..f0dc111 100644
50--- a/pixman/pixman-arm-neon.c
51+++ b/pixman/pixman-arm-neon.c
52@@ -89,6 +89,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
53 uint32_t, 1, uint16_t, 1)
54 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
55 uint16_t, 1, uint16_t, 1)
56+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888,
57+ uint32_t, 1, uint32_t, 1)
58
59 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
60 uint8_t, 1, uint8_t, 1, uint8_t, 1)
61@@ -291,6 +293,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
62 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
63 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
64 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
65+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888),
66+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888),
67 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
68 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
69 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
70--
711.6.6.1
72
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch
new file mode 100644
index 0000000000..5f2448191d
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch
@@ -0,0 +1,153 @@
1From af7a69d90ea2b43a4e850870727723d719f09a1c Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 29 Nov 2010 09:00:46 +0200
4Subject: [PATCH 15/24] ARM: added flags parameter to some asm fast path wrapper macros
5
6Not all types of operations can be skipped when having transparent
7solid source or transparent solid mask. Add an extra flags parameter
8for providing this information to the wrappers.
9---
10 pixman/pixman-arm-common.h | 15 +++++++++------
11 pixman/pixman-arm-neon.c | 26 +++++++++++++-------------
12 pixman/pixman-arm-simd.c | 4 ++--
13 3 files changed, 24 insertions(+), 21 deletions(-)
14
15diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
16index 2cff6c8..66f448d 100644
17--- a/pixman/pixman-arm-common.h
18+++ b/pixman/pixman-arm-common.h
19@@ -47,6 +47,9 @@
20 * or mask), the corresponding stride argument is unused.
21 */
22
23+#define SKIP_ZERO_SRC 1
24+#define SKIP_ZERO_MASK 2
25+
26 #define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
27 src_type, src_cnt, \
28 dst_type, dst_cnt) \
29@@ -87,7 +90,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
30 src_line, src_stride); \
31 }
32
33-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \
34+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \
35 dst_type, dst_cnt) \
36 void \
37 pixman_composite_##name##_asm_##cputype (int32_t w, \
38@@ -117,7 +120,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
39 \
40 src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
41 \
42- if (src == 0) \
43+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
44 return; \
45 \
46 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
47@@ -128,7 +131,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
48 src); \
49 }
50
51-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \
52+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \
53 mask_type, mask_cnt, \
54 dst_type, dst_cnt) \
55 void \
56@@ -163,7 +166,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
57 \
58 src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
59 \
60- if (src == 0) \
61+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
62 return; \
63 \
64 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
65@@ -177,7 +180,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
66 mask_line, mask_stride); \
67 }
68
69-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \
70+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \
71 src_type, src_cnt, \
72 dst_type, dst_cnt) \
73 void \
74@@ -211,7 +214,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
75 \
76 mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
77 \
78- if (mask == 0) \
79+ if ((flags & SKIP_ZERO_MASK) && mask == 0) \
80 return; \
81 \
82 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
83diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
84index f0dc111..1a3741c 100644
85--- a/pixman/pixman-arm-neon.c
86+++ b/pixman/pixman-arm-neon.c
87@@ -63,33 +63,33 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
88 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
89 uint8_t, 1, uint16_t, 1)
90
91-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
92+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
93 uint16_t, 1)
94-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
95+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
96 uint32_t, 1)
97-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
98+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
99 uint32_t, 1)
100
101-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
102+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
103 uint8_t, 1, uint16_t, 1)
104-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
105+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
106 uint8_t, 1, uint32_t, 1)
107-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
108+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
109 uint32_t, 1, uint32_t, 1)
110-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
111+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
112 uint8_t, 1, uint8_t, 1)
113-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
114+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
115 uint8_t, 1, uint8_t, 1)
116-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888,
117+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
118 uint8_t, 1, uint32_t, 1)
119
120-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
121+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
122 uint32_t, 1, uint32_t, 1)
123-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
124+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
125 uint32_t, 1, uint16_t, 1)
126-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
127+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
128 uint16_t, 1, uint16_t, 1)
129-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888,
130+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
131 uint32_t, 1, uint32_t, 1)
132
133 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
134diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
135index 3b05007..dc2f471 100644
136--- a/pixman/pixman-arm-simd.c
137+++ b/pixman/pixman-arm-simd.c
138@@ -381,10 +381,10 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
139 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
140 uint32_t, 1, uint32_t, 1)
141
142-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
143+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
144 uint32_t, 1, uint32_t, 1)
145
146-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
147+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
148 uint8_t, 1, uint32_t, 1)
149
150 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
151--
1521.6.6.1
153
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch
new file mode 100644
index 0000000000..8a22f54451
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch
@@ -0,0 +1,97 @@
1From 733f68912f4a44c24ad3973049a7e1d98f4c6ea8 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Mon, 29 Nov 2010 09:11:29 +0200
4Subject: [PATCH 16/24] ARM: added 'neon_composite_in_n_8' fast path
5
6---
7 pixman/pixman-arm-neon-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 3 ++
9 2 files changed, 55 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 2c0fd37..cf014fa 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1427,6 +1427,58 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_in_n_8_process_pixblock_head
20+ /* expecting source data in {d0, d1, d2, d3} */
21+ /* and destination data in {d4, d5, d6, d7} */
22+ vmull.u8 q8, d4, d3
23+ vmull.u8 q9, d5, d3
24+ vmull.u8 q10, d6, d3
25+ vmull.u8 q11, d7, d3
26+.endm
27+
28+.macro pixman_composite_in_n_8_process_pixblock_tail
29+ vrshr.u16 q14, q8, #8
30+ vrshr.u16 q15, q9, #8
31+ vrshr.u16 q12, q10, #8
32+ vrshr.u16 q13, q11, #8
33+ vraddhn.u16 d28, q8, q14
34+ vraddhn.u16 d29, q9, q15
35+ vraddhn.u16 d30, q10, q12
36+ vraddhn.u16 d31, q11, q13
37+.endm
38+
39+.macro pixman_composite_in_n_8_process_pixblock_tail_head
40+ pixman_composite_in_n_8_process_pixblock_tail
41+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
42+ cache_preload 32, 32
43+ pixman_composite_in_n_8_process_pixblock_head
44+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
45+.endm
46+
47+.macro pixman_composite_in_n_8_init
48+ add DUMMY, sp, #ARGS_STACK_OFFSET
49+ vld1.32 {d3[0]}, [DUMMY]
50+ vdup.8 d3, d3[3]
51+.endm
52+
53+.macro pixman_composite_in_n_8_cleanup
54+.endm
55+
56+generate_composite_function \
57+ pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
58+ FLAG_DST_READWRITE, \
59+ 32, /* number of pixels, processed in a single block */ \
60+ 5, /* prefetch distance */ \
61+ pixman_composite_in_n_8_init, \
62+ pixman_composite_in_n_8_cleanup, \
63+ pixman_composite_in_n_8_process_pixblock_head, \
64+ pixman_composite_in_n_8_process_pixblock_tail, \
65+ pixman_composite_in_n_8_process_pixblock_tail_head, \
66+ 28, /* dst_w_basereg */ \
67+ 4, /* dst_r_basereg */ \
68+ 0, /* src_basereg */ \
69+ 24 /* mask_basereg */
70+
71 .macro pixman_composite_add_n_8_8_process_pixblock_head
72 /* expecting source data in {d8, d9, d10, d11} */
73 /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
74diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
75index 1a3741c..e3eca2b 100644
76--- a/pixman/pixman-arm-neon.c
77+++ b/pixman/pixman-arm-neon.c
78@@ -69,6 +69,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
79 uint32_t, 1)
80 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
81 uint32_t, 1)
82+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
83+ uint8_t, 1)
84
85 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
86 uint8_t, 1, uint16_t, 1)
87@@ -298,6 +300,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
88 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
89 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
90 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
91+ PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8),
92 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
93 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
94 PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
95--
961.6.6.1
97
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch
new file mode 100644
index 0000000000..a8148d9542
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch
@@ -0,0 +1,75 @@
1From 6593d86679fde724e49efa96b16ca22d9521b288 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 10 Dec 2009 00:51:50 +0200
4Subject: [PATCH 17/24] add _pixman_bits_override_accessors
5
6* from patch ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline
7* used in
8 0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
9 0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
10 0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch
11---
12 pixman/pixman-access.c | 23 ++++++++++++++++++++++-
13 pixman/pixman-private.h | 5 +++++
14 2 files changed, 27 insertions(+), 1 deletions(-)
15
16diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
17index f1ce0ba..b33da29 100644
18--- a/pixman/pixman-access.c
19+++ b/pixman/pixman-access.c
20@@ -2836,7 +2836,7 @@ typedef struct
21 store_scanline_ ## format, store_scanline_generic_64 \
22 }
23
24-static const format_info_t accessors[] =
25+static format_info_t accessors[] =
26 {
27 /* 32 bpp formats */
28 FORMAT_INFO (a8r8g8b8),
29@@ -2978,6 +2978,27 @@ _pixman_bits_image_setup_accessors (bits_image_t *image)
30 setup_accessors (image);
31 }
32
33+void
34+_pixman_bits_override_accessors (pixman_format_code_t format,
35+ fetch_scanline_t fetch_func,
36+ store_scanline_t store_func)
37+{
38+ format_info_t *info = accessors;
39+
40+ while (info->format != PIXMAN_null)
41+ {
42+ if (info->format == format)
43+ {
44+ if (fetch_func)
45+ info->fetch_scanline_32 = fetch_func;
46+ if (store_func)
47+ info->store_scanline_32 = store_func;
48+ return;
49+ }
50+ info++;
51+ }
52+}
53+
54 #else
55
56 void
57diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
58index 383748a..969dfab 100644
59--- a/pixman/pixman-private.h
60+++ b/pixman/pixman-private.h
61@@ -197,6 +197,11 @@ void
62 _pixman_bits_image_setup_accessors (bits_image_t *image);
63
64 void
65+_pixman_bits_override_accessors (pixman_format_code_t format,
66+ fetch_scanline_t fetch_func,
67+ store_scanline_t store_func);
68+
69+void
70 _pixman_image_get_scanline_generic_64 (pixman_image_t *image,
71 int x,
72 int y,
73--
741.6.6.1
75
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
new file mode 100644
index 0000000000..5b1c1089ed
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
@@ -0,0 +1,114 @@
1From 8e8b2809b505486001dc213becab0d50bfd96c1b Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Tue, 16 Mar 2010 16:55:28 +0100
4Subject: [PATCH 18/24] Generic C implementation of pixman_blt with overlapping support
5
6Uses memcpy/memmove functions to copy pixels, can handle the
7case when both source and destination areas are in the same
8image (this is useful for scrolling).
9
10It is assumed that copying direction is only important when
11using the same image for both source and destination (and
12src_stride == dst_stride). Copying direction is undefined
13for the images with different source and destination stride
14which happen to be in the overlapped areas (but this is an
15unrealistic case anyway).
16---
17 pixman/pixman-general.c | 21 ++++++++++++++++++---
18 pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++
19 2 files changed, 61 insertions(+), 3 deletions(-)
20
21diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
22index 4d234a0..c4d2c14 100644
23--- a/pixman/pixman-general.c
24+++ b/pixman/pixman-general.c
25@@ -280,9 +280,24 @@ general_blt (pixman_implementation_t *imp,
26 int width,
27 int height)
28 {
29- /* We can't blit unless we have sse2 or mmx */
30-
31- return FALSE;
32+ uint8_t *dst_bytes = (uint8_t *)dst_bits;
33+ uint8_t *src_bytes = (uint8_t *)src_bits;
34+ int bpp;
35+
36+ if (src_bpp != dst_bpp || src_bpp & 7)
37+ return FALSE;
38+
39+ bpp = src_bpp >> 3;
40+ width *= bpp;
41+ src_stride *= 4;
42+ dst_stride *= 4;
43+ pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
44+ dst_bytes + dst_y * dst_stride + dst_x * bpp,
45+ src_stride,
46+ dst_stride,
47+ width,
48+ height);
49+ return TRUE;
50 }
51
52 static pixman_bool_t
53diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
54index 969dfab..352bceb 100644
55--- a/pixman/pixman-private.h
56+++ b/pixman/pixman-private.h
57@@ -10,6 +10,7 @@
58
59 #include "pixman.h"
60 #include <time.h>
61+#include <string.h>
62 #include <assert.h>
63 #include <stdio.h>
64 #include <string.h>
65@@ -869,4 +870,46 @@ void pixman_timer_register (pixman_timer_t *timer);
66
67 #endif /* PIXMAN_TIMERS */
68
69+/* a helper function, can blit 8-bit images with src/dst overlapping support */
70+static inline void
71+pixman_blt_helper (uint8_t *src_bytes,
72+ uint8_t *dst_bytes,
73+ int src_stride,
74+ int dst_stride,
75+ int width,
76+ int height)
77+{
78+ /*
79+ * The second part of this check is not strictly needed, but it prevents
80+ * unnecessary upside-down processing of areas which belong to different
81+ * images. Upside-down processing can be slower with fixed-distance-ahead
82+ * prefetch and perceived as having more tearing.
83+ */
84+ if (src_bytes < dst_bytes + width &&
85+ src_bytes + src_stride * height > dst_bytes)
86+ {
87+ src_bytes += src_stride * height - src_stride;
88+ dst_bytes += dst_stride * height - dst_stride;
89+ dst_stride = -dst_stride;
90+ src_stride = -src_stride;
91+ /* Horizontal scrolling to the left needs memmove */
92+ if (src_bytes + width > dst_bytes)
93+ {
94+ while (--height >= 0)
95+ {
96+ memmove (dst_bytes, src_bytes, width);
97+ dst_bytes += dst_stride;
98+ src_bytes += src_stride;
99+ }
100+ return;
101+ }
102+ }
103+ while (--height >= 0)
104+ {
105+ memcpy (dst_bytes, src_bytes, width);
106+ dst_bytes += dst_stride;
107+ src_bytes += src_stride;
108+ }
109+}
110+
111 #endif /* PIXMAN_PRIVATE_H */
112--
1131.6.6.1
114
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
new file mode 100644
index 0000000000..5193d38f74
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
@@ -0,0 +1,91 @@
1From f5a54f7d5eb1169bc79f0e445e2998e98080ef13 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 22 Oct 2009 05:45:47 +0300
4Subject: [PATCH 19/24] Support of overlapping src/dst for pixman_blt_mmx
5
6---
7 pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++---------------------
8 1 files changed, 32 insertions(+), 23 deletions(-)
9
10diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
11index 34637a4..f9dd473 100644
12--- a/pixman/pixman-mmx.c
13+++ b/pixman/pixman-mmx.c
14@@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits,
15 {
16 uint8_t * src_bytes;
17 uint8_t * dst_bytes;
18- int byte_width;
19+ int bpp;
20
21- if (src_bpp != dst_bpp)
22+ if (src_bpp != dst_bpp || src_bpp & 7)
23 return FALSE;
24
25- if (src_bpp == 16)
26- {
27- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
28- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
29- src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
30- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
31- byte_width = 2 * width;
32- src_stride *= 2;
33- dst_stride *= 2;
34- }
35- else if (src_bpp == 32)
36+ bpp = src_bpp >> 3;
37+ width *= bpp;
38+ src_stride *= 4;
39+ dst_stride *= 4;
40+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
41+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
42+
43+ if (src_bpp != 16 && src_bpp != 32)
44 {
45- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
46- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
47- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
48- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
49- byte_width = 4 * width;
50- src_stride *= 4;
51- dst_stride *= 4;
52+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
53+ width, height);
54+ return TRUE;
55 }
56- else
57+
58+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
59 {
60- return FALSE;
61+ src_bytes += src_stride * height - src_stride;
62+ dst_bytes += dst_stride * height - dst_stride;
63+ dst_stride = -dst_stride;
64+ src_stride = -src_stride;
65+
66+ if (src_bytes + width > dst_bytes)
67+ {
68+ /* TODO: reverse scanline copy using MMX */
69+ while (--height >= 0)
70+ {
71+ memmove (dst_bytes, src_bytes, width);
72+ dst_bytes += dst_stride;
73+ src_bytes += src_stride;
74+ }
75+ return TRUE;
76+ }
77 }
78
79 while (height--)
80@@ -3033,7 +3042,7 @@ pixman_blt_mmx (uint32_t *src_bits,
81 uint8_t *d = dst_bytes;
82 src_bytes += src_stride;
83 dst_bytes += dst_stride;
84- w = byte_width;
85+ w = width;
86
87 while (w >= 2 && ((unsigned long)d & 3))
88 {
89--
901.6.6.1
91
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
new file mode 100644
index 0000000000..f5c0e12f24
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
@@ -0,0 +1,91 @@
1From c8755294fa9ea396f7113370230b17c424a93be1 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 22 Oct 2009 05:45:54 +0300
4Subject: [PATCH 20/24] Support of overlapping src/dst for pixman_blt_sse2
5
6---
7 pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++--------------------
8 1 files changed, 32 insertions(+), 23 deletions(-)
9
10diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
11index 5907de0..25015ae 100644
12--- a/pixman/pixman-sse2.c
13+++ b/pixman/pixman-sse2.c
14@@ -5027,34 +5027,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
15 {
16 uint8_t * src_bytes;
17 uint8_t * dst_bytes;
18- int byte_width;
19+ int bpp;
20
21- if (src_bpp != dst_bpp)
22+ if (src_bpp != dst_bpp || src_bpp & 7)
23 return FALSE;
24
25- if (src_bpp == 16)
26- {
27- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
28- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
29- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
30- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
31- byte_width = 2 * width;
32- src_stride *= 2;
33- dst_stride *= 2;
34- }
35- else if (src_bpp == 32)
36+ bpp = src_bpp >> 3;
37+ width *= bpp;
38+ src_stride *= 4;
39+ dst_stride *= 4;
40+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
41+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
42+
43+ if (src_bpp != 16 && src_bpp != 32)
44 {
45- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
46- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
47- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
48- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
49- byte_width = 4 * width;
50- src_stride *= 4;
51- dst_stride *= 4;
52+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
53+ width, height);
54+ return TRUE;
55 }
56- else
57+
58+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
59 {
60- return FALSE;
61+ src_bytes += src_stride * height - src_stride;
62+ dst_bytes += dst_stride * height - dst_stride;
63+ dst_stride = -dst_stride;
64+ src_stride = -src_stride;
65+
66+ if (src_bytes + width > dst_bytes)
67+ {
68+ /* TODO: reverse scanline copy using SSE2 */
69+ while (--height >= 0)
70+ {
71+ memmove (dst_bytes, src_bytes, width);
72+ dst_bytes += dst_stride;
73+ src_bytes += src_stride;
74+ }
75+ return TRUE;
76+ }
77 }
78
79 while (height--)
80@@ -5064,7 +5073,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
81 uint8_t *d = dst_bytes;
82 src_bytes += src_stride;
83 dst_bytes += dst_stride;
84- w = byte_width;
85+ w = width;
86
87 while (w >= 2 && ((unsigned long)d & 3))
88 {
89--
901.6.6.1
91
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
new file mode 100644
index 0000000000..0eb9d88eba
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
@@ -0,0 +1,94 @@
1From 86c8198598ef6d639e656c04644015795cc249aa Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Wed, 18 Nov 2009 06:08:48 +0200
4Subject: [PATCH 21/24] Support of overlapping src/dst for pixman_blt_neon
5
6---
7 pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++--------
8 1 files changed, 51 insertions(+), 11 deletions(-)
9
10diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
11index e3eca2b..74316a8 100644
12--- a/pixman/pixman-arm-neon.c
13+++ b/pixman/pixman-arm-neon.c
14@@ -199,26 +199,66 @@ pixman_blt_neon (uint32_t *src_bits,
15 int width,
16 int height)
17 {
18- if (src_bpp != dst_bpp)
19+ uint8_t * src_bytes;
20+ uint8_t * dst_bytes;
21+ int bpp;
22+
23+ if (src_bpp != dst_bpp || src_bpp & 7)
24 return FALSE;
25
26+ bpp = src_bpp >> 3;
27+ width *= bpp;
28+ src_stride *= 4;
29+ dst_stride *= 4;
30+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
31+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
32+
33+ if (src_bpp != 16 && src_bpp != 32)
34+ {
35+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
36+ width, height);
37+ return TRUE;
38+ }
39+
40+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
41+ {
42+ src_bytes += src_stride * height - src_stride;
43+ dst_bytes += dst_stride * height - dst_stride;
44+ dst_stride = -dst_stride;
45+ src_stride = -src_stride;
46+
47+ if (src_bytes + width > dst_bytes)
48+ {
49+ /* TODO: reverse scanline copy using NEON */
50+ while (--height >= 0)
51+ {
52+ memmove (dst_bytes, src_bytes, width);
53+ dst_bytes += dst_stride;
54+ src_bytes += src_stride;
55+ }
56+ return TRUE;
57+ }
58+ }
59+
60 switch (src_bpp)
61 {
62 case 16:
63 pixman_composite_src_0565_0565_asm_neon (
64- width, height,
65- (uint16_t *)(((char *) dst_bits) +
66- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
67- (uint16_t *)(((char *) src_bits) +
68- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
69+ width >> 1,
70+ height,
71+ (uint16_t *) dst_bytes,
72+ dst_stride >> 1,
73+ (uint16_t *) src_bytes,
74+ src_stride >> 1);
75 return TRUE;
76 case 32:
77 pixman_composite_src_8888_8888_asm_neon (
78- width, height,
79- (uint32_t *)(((char *) dst_bits) +
80- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
81- (uint32_t *)(((char *) src_bits) +
82- src_y * src_stride * 4 + src_x * 4), src_stride);
83+ width >> 2,
84+ height,
85+ (uint32_t *) dst_bytes,
86+ dst_stride >> 2,
87+ (uint32_t *) src_bytes,
88+ src_stride >> 2);
89 return TRUE;
90 default:
91 return FALSE;
92--
931.6.6.1
94
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
new file mode 100644
index 0000000000..129c1f1bb6
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
@@ -0,0 +1,109 @@
1From 60d972afbae8613d700d3a6b3cb107429d7e11c6 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 10 Dec 2009 00:51:50 +0200
4Subject: [PATCH 22/24] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
5
6---
7 pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 40 ++++++++++++++++++++++++++++++++++++++++
9 2 files changed, 60 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index cf014fa..25f7bf0 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -459,6 +459,16 @@ generate_composite_function \
16 pixman_composite_src_8888_0565_process_pixblock_tail, \
17 pixman_composite_src_8888_0565_process_pixblock_tail_head
18
19+generate_composite_function_single_scanline \
20+ pixman_store_scanline_r5g6b5_asm_neon, 32, 0, 16, \
21+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
22+ 8, /* number of pixels, processed in a single block */ \
23+ default_init, \
24+ default_cleanup, \
25+ pixman_composite_src_8888_0565_process_pixblock_head, \
26+ pixman_composite_src_8888_0565_process_pixblock_tail, \
27+ pixman_composite_src_8888_0565_process_pixblock_tail_head
28+
29 /******************************************************************************/
30
31 .macro pixman_composite_src_0565_8888_process_pixblock_head
32@@ -494,6 +504,16 @@ generate_composite_function \
33 pixman_composite_src_0565_8888_process_pixblock_tail, \
34 pixman_composite_src_0565_8888_process_pixblock_tail_head
35
36+generate_composite_function_single_scanline \
37+ pixman_fetch_scanline_r5g6b5_asm_neon, 16, 0, 32, \
38+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
39+ 8, /* number of pixels, processed in a single block */ \
40+ default_init, \
41+ default_cleanup, \
42+ pixman_composite_src_0565_8888_process_pixblock_head, \
43+ pixman_composite_src_0565_8888_process_pixblock_tail, \
44+ pixman_composite_src_0565_8888_process_pixblock_tail_head
45+
46 /******************************************************************************/
47
48 .macro pixman_composite_add_8_8_process_pixblock_head
49diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
50index 74316a8..f773e92 100644
51--- a/pixman/pixman-arm-neon.c
52+++ b/pixman/pixman-arm-neon.c
53@@ -448,6 +448,42 @@ BIND_COMBINE_U (over)
54 BIND_COMBINE_U (add)
55 BIND_COMBINE_U (out_reverse)
56
57+void
58+pixman_fetch_scanline_r5g6b5_asm_neon (int width,
59+ uint32_t *buffer,
60+ const uint16_t *pixel);
61+void
62+pixman_store_scanline_r5g6b5_asm_neon (int width,
63+ uint16_t *pixel,
64+ const uint32_t *values);
65+
66+static void
67+neon_fetch_scanline_r5g6b5 (pixman_image_t *image,
68+ int x,
69+ int y,
70+ int width,
71+ uint32_t * buffer,
72+ const uint32_t *mask)
73+{
74+ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
75+ const uint16_t *pixel = (const uint16_t *)bits + x;
76+
77+ pixman_fetch_scanline_r5g6b5_asm_neon (width, buffer, pixel);
78+}
79+
80+static void
81+neon_store_scanline_r5g6b5 (bits_image_t * image,
82+ int x,
83+ int y,
84+ int width,
85+ const uint32_t *values)
86+{
87+ uint32_t *bits = image->bits + image->rowstride * y;
88+ uint16_t *pixel = ((uint16_t *) bits) + x;
89+
90+ pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
91+}
92+
93 pixman_implementation_t *
94 _pixman_implementation_create_arm_neon (void)
95 {
96@@ -463,6 +499,10 @@ _pixman_implementation_create_arm_neon (void)
97 imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
98 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
99
100+ _pixman_bits_override_accessors (PIXMAN_r5g6b5,
101+ neon_fetch_scanline_r5g6b5,
102+ neon_store_scanline_r5g6b5);
103+
104 imp->blt = arm_neon_blt;
105 imp->fill = arm_neon_fill;
106
107--
1081.6.6.1
109
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
new file mode 100644
index 0000000000..7724f5433e
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
@@ -0,0 +1,148 @@
1From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 23 Sep 2010 21:10:56 +0300
4Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline
5
6---
7 pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 42 +++++++++++++++++++++++++++
9 2 files changed, 106 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 25f7bf0..439b06b 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -418,6 +418,70 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_src_8_8888_process_pixblock_head
20+ /* This is tricky part: we can't set these values just once in 'init' macro
21+ * because leading/trailing pixels handling part uses VZIP.8 instructions,
22+ * and they operate on values in-place and destroy original registers
23+ * content. Think about it like VST4.8 instruction corrupting NEON
24+ * registers after write in 'tail_head' macro. Except that 'tail_head'
25+ * macro itself actually does not need these extra VMOVs because it uses
26+ * real VST4.8 instruction.
27+ */
28+ vmov.u8 q0, #0
29+ vmov.u8 d2, #0
30+.endm
31+
32+.macro pixman_composite_src_8_8888_process_pixblock_tail
33+.endm
34+
35+.macro pixman_composite_src_8_8888_process_pixblock_tail_head
36+ vst4.8 {d0, d1, d2, d3}, [DST_W, :128]!
37+ vld1.8 {d3}, [SRC]!
38+.endm
39+
40+generate_composite_function_single_scanline \
41+ pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \
42+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
43+ 8, /* number of pixels, processed in a single block */ \
44+ default_init, \
45+ default_cleanup, \
46+ pixman_composite_src_8_8888_process_pixblock_head, \
47+ pixman_composite_src_8_8888_process_pixblock_tail, \
48+ pixman_composite_src_8_8888_process_pixblock_tail_head, \
49+ 0, /* dst_w_basereg */ \
50+ 0, /* dst_r_basereg */ \
51+ 3, /* src_basereg */ \
52+ 0 /* mask_basereg */
53+
54+/******************************************************************************/
55+
56+.macro pixman_composite_src_8888_8_process_pixblock_head
57+.endm
58+
59+.macro pixman_composite_src_8888_8_process_pixblock_tail
60+.endm
61+
62+.macro pixman_composite_src_8888_8_process_pixblock_tail_head
63+ vst1.8 {d3}, [DST_W, :64]!
64+ vld4.8 {d0, d1, d2, d3}, [SRC]!
65+.endm
66+
67+generate_composite_function_single_scanline \
68+ pixman_store_scanline_a8_asm_neon, 32, 0, 8, \
69+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
70+ 8, /* number of pixels, processed in a single block */ \
71+ default_init, \
72+ default_cleanup, \
73+ pixman_composite_src_8888_8_process_pixblock_head, \
74+ pixman_composite_src_8888_8_process_pixblock_tail, \
75+ pixman_composite_src_8888_8_process_pixblock_tail_head, \
76+ 3, /* dst_w_basereg */ \
77+ 0, /* dst_r_basereg */ \
78+ 0, /* src_basereg */ \
79+ 0 /* mask_basereg */
80+
81+/******************************************************************************/
82+
83 .macro pixman_composite_src_8888_0565_process_pixblock_head
84 vshll.u8 q8, d1, #8
85 vshll.u8 q14, d2, #8
86diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
87index f773e92..55219b3 100644
88--- a/pixman/pixman-arm-neon.c
89+++ b/pixman/pixman-arm-neon.c
90@@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image,
91 pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
92 }
93
94+void
95+pixman_fetch_scanline_a8_asm_neon (int width,
96+ uint32_t *buffer,
97+ const uint8_t *pixel);
98+
99+
100+void
101+pixman_store_scanline_a8_asm_neon (int width,
102+ uint8_t *pixel,
103+ const uint32_t *values);
104+
105+static void
106+neon_fetch_scanline_a8 (pixman_image_t *image,
107+ int x,
108+ int y,
109+ int width,
110+ uint32_t * buffer,
111+ const uint32_t *mask)
112+{
113+ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
114+ const uint8_t *pixel = (const uint8_t *) bits + x;
115+
116+ pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel);
117+}
118+
119+static void
120+neon_store_scanline_a8 (bits_image_t * image,
121+ int x,
122+ int y,
123+ int width,
124+ const uint32_t *values)
125+{
126+ uint32_t *bits = image->bits + image->rowstride * y;
127+ uint8_t *pixel = (uint8_t *) bits + x;
128+
129+ pixman_store_scanline_a8_asm_neon (width, pixel, values);
130+}
131+
132+
133 pixman_implementation_t *
134 _pixman_implementation_create_arm_neon (void)
135 {
136@@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void)
137 _pixman_bits_override_accessors (PIXMAN_r5g6b5,
138 neon_fetch_scanline_r5g6b5,
139 neon_store_scanline_r5g6b5);
140+ _pixman_bits_override_accessors (PIXMAN_a8,
141+ neon_fetch_scanline_a8,
142+ neon_store_scanline_a8);
143
144 imp->blt = arm_neon_blt;
145 imp->fill = arm_neon_fill;
146--
1471.6.6.1
148
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch
new file mode 100644
index 0000000000..8253f41b8f
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch
@@ -0,0 +1,77 @@
1From cf3b8fdc53144ff62c4054996559d3a1a4d62b75 Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Fri, 24 Sep 2010 18:22:44 +0300
4Subject: [PATCH 24/24] ARM: added NEON optimizations for fetching x8r8g8b8 scanline
5
6---
7 pixman/pixman-arm-neon-asm.S | 14 ++++++++++++++
8 pixman/pixman-arm-neon.c | 21 +++++++++++++++++++++
9 2 files changed, 35 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 439b06b..3e0dcfe 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -1257,6 +1257,20 @@ generate_composite_function \
16 0, /* src_basereg */ \
17 0 /* mask_basereg */
18
19+generate_composite_function_single_scanline \
20+ pixman_fetch_scanline_x888_asm_neon, 32, 0, 32, \
21+ FLAG_DST_WRITEONLY, \
22+ 8, /* number of pixels, processed in a single block */ \
23+ pixman_composite_src_x888_8888_init, \
24+ default_cleanup, \
25+ pixman_composite_src_x888_8888_process_pixblock_head, \
26+ pixman_composite_src_x888_8888_process_pixblock_tail, \
27+ pixman_composite_src_x888_8888_process_pixblock_tail_head, \
28+ 0, /* dst_w_basereg */ \
29+ 0, /* dst_r_basereg */ \
30+ 0, /* src_basereg */ \
31+ 0 /* mask_basereg */
32+
33 /******************************************************************************/
34
35 .macro pixman_composite_over_n_8_8888_process_pixblock_head
36diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
37index 55219b3..8cef414 100644
38--- a/pixman/pixman-arm-neon.c
39+++ b/pixman/pixman-arm-neon.c
40@@ -522,6 +522,24 @@ neon_store_scanline_a8 (bits_image_t * image,
41 pixman_store_scanline_a8_asm_neon (width, pixel, values);
42 }
43
44+void
45+pixman_fetch_scanline_x888_asm_neon (int width,
46+ uint32_t *buffer,
47+ const uint32_t *pixel);
48+
49+static void
50+neon_fetch_scanline_x888 (pixman_image_t *image,
51+ int x,
52+ int y,
53+ int width,
54+ uint32_t * buffer,
55+ const uint32_t *mask)
56+{
57+ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
58+ const uint32_t *pixel = (const uint32_t *) bits + x;
59+
60+ pixman_fetch_scanline_x888_asm_neon (width, buffer, pixel);
61+}
62
63 pixman_implementation_t *
64 _pixman_implementation_create_arm_neon (void)
65@@ -544,6 +562,9 @@ _pixman_implementation_create_arm_neon (void)
66 _pixman_bits_override_accessors (PIXMAN_a8,
67 neon_fetch_scanline_a8,
68 neon_store_scanline_a8);
69+ _pixman_bits_override_accessors (PIXMAN_x8r8g8b8,
70+ neon_fetch_scanline_x888,
71+ NULL);
72
73 imp->blt = arm_neon_blt;
74 imp->fill = arm_neon_fill;
75--
761.6.6.1
77
diff --git a/recipes-graphics/xorg-lib/pixman_0.21.2.bb b/recipes-graphics/xorg-lib/pixman_0.21.2.bb
new file mode 100644
index 0000000000..19394d635b
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman_0.21.2.bb
@@ -0,0 +1,37 @@
1require pixman.inc
2
3SRC_URI[archive.md5sum] = "9e09fd6e58cbf9717140891e0b7d4a7a"
4SRC_URI[archive.sha256sum] = "295f51416caf307ff7caf1153ee9b1d86b9f7f02a7876d12db6538d80451c5de"
5
6PR = "${INC_PR}.1"
7
8SRC_URI += "\
9 file://0002-Fix-argument-quoting-for-AC_INIT.patch \
10 file://0003-Sun-s-copyrights-belong-to-Oracle-now.patch \
11 file://0004-C-fast-path-for-a1-fill-operation.patch \
12 file://0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch \
13 file://0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch \
14 file://0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch \
15 file://0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch \
16 file://0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch \
17 file://0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch \
18 file://0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch \
19 file://0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch \
20 file://0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch \
21 file://0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch \
22 file://0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch \
23 file://0016-ARM-added-neon_composite_in_n_8-fast-path.patch \
24 file://0017-add-_pixman_bits_override_accessors.patch \
25 file://0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
26 file://0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
27 file://0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
28 file://0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
29 file://0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \
30 file://0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \
31 file://0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \
32"
33
34NEON = " --disable-arm-neon "
35NEON_armv7a = " "
36
37EXTRA_OECONF = "${NEON} --disable-gtk"