summaryrefslogtreecommitdiffstats
path: root/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
diff options
context:
space:
mode:
authorKoen Kooi <koen@dominion.thruhere.net>2010-12-04 21:40:49 +0100
committerKoen Kooi <koen@dominion.thruhere.net>2010-12-04 21:40:49 +0100
commit39fb00c188032075a0a8298e333e6914bd88e53a (patch)
treeccb10a0b7064862bcce144316b9392614d355b2d /recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
parent1857df74ac9bfec16d0274fe972fda7bb1f99e6b (diff)
downloadmeta-openembedded-39fb00c188032075a0a8298e333e6914bd88e53a.tar.gz
meta-openembedded: import pixman 0.21.2 from OE
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
Diffstat (limited to 'recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch')
-rw-r--r--recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch148
1 files changed, 148 insertions, 0 deletions
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
new file mode 100644
index 0000000000..7724f5433e
--- /dev/null
+++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
@@ -0,0 +1,148 @@
1From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001
2From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
3Date: Thu, 23 Sep 2010 21:10:56 +0300
4Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline
5
6---
7 pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++
8 pixman/pixman-arm-neon.c | 42 +++++++++++++++++++++++++++
9 2 files changed, 106 insertions(+), 0 deletions(-)
10
11diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
12index 25f7bf0..439b06b 100644
13--- a/pixman/pixman-arm-neon-asm.S
14+++ b/pixman/pixman-arm-neon-asm.S
15@@ -418,6 +418,70 @@ generate_composite_function \
16
17 /******************************************************************************/
18
19+.macro pixman_composite_src_8_8888_process_pixblock_head
20+ /* This is tricky part: we can't set these values just once in 'init' macro
21+ * because leading/trailing pixels handling part uses VZIP.8 instructions,
22+ * and they operate on values in-place and destroy original registers
23+ * content. Think about it like VST4.8 instruction corrupting NEON
24+ * registers after write in 'tail_head' macro. Except that 'tail_head'
25+ * macro itself actually does not need these extra VMOVs because it uses
26+ * real VST4.8 instruction.
27+ */
28+ vmov.u8 q0, #0
29+ vmov.u8 d2, #0
30+.endm
31+
32+.macro pixman_composite_src_8_8888_process_pixblock_tail
33+.endm
34+
35+.macro pixman_composite_src_8_8888_process_pixblock_tail_head
36+ vst4.8 {d0, d1, d2, d3}, [DST_W, :128]!
37+ vld1.8 {d3}, [SRC]!
38+.endm
39+
40+generate_composite_function_single_scanline \
41+ pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \
42+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
43+ 8, /* number of pixels, processed in a single block */ \
44+ default_init, \
45+ default_cleanup, \
46+ pixman_composite_src_8_8888_process_pixblock_head, \
47+ pixman_composite_src_8_8888_process_pixblock_tail, \
48+ pixman_composite_src_8_8888_process_pixblock_tail_head, \
49+ 0, /* dst_w_basereg */ \
50+ 0, /* dst_r_basereg */ \
51+ 3, /* src_basereg */ \
52+ 0 /* mask_basereg */
53+
54+/******************************************************************************/
55+
56+.macro pixman_composite_src_8888_8_process_pixblock_head
57+.endm
58+
59+.macro pixman_composite_src_8888_8_process_pixblock_tail
60+.endm
61+
62+.macro pixman_composite_src_8888_8_process_pixblock_tail_head
63+ vst1.8 {d3}, [DST_W, :64]!
64+ vld4.8 {d0, d1, d2, d3}, [SRC]!
65+.endm
66+
67+generate_composite_function_single_scanline \
68+ pixman_store_scanline_a8_asm_neon, 32, 0, 8, \
69+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
70+ 8, /* number of pixels, processed in a single block */ \
71+ default_init, \
72+ default_cleanup, \
73+ pixman_composite_src_8888_8_process_pixblock_head, \
74+ pixman_composite_src_8888_8_process_pixblock_tail, \
75+ pixman_composite_src_8888_8_process_pixblock_tail_head, \
76+ 3, /* dst_w_basereg */ \
77+ 0, /* dst_r_basereg */ \
78+ 0, /* src_basereg */ \
79+ 0 /* mask_basereg */
80+
81+/******************************************************************************/
82+
83 .macro pixman_composite_src_8888_0565_process_pixblock_head
84 vshll.u8 q8, d1, #8
85 vshll.u8 q14, d2, #8
86diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
87index f773e92..55219b3 100644
88--- a/pixman/pixman-arm-neon.c
89+++ b/pixman/pixman-arm-neon.c
90@@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image,
91 pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
92 }
93
94+void
95+pixman_fetch_scanline_a8_asm_neon (int width,
96+ uint32_t *buffer,
97+ const uint8_t *pixel);
98+
99+
100+void
101+pixman_store_scanline_a8_asm_neon (int width,
102+ uint8_t *pixel,
103+ const uint32_t *values);
104+
105+static void
106+neon_fetch_scanline_a8 (pixman_image_t *image,
107+ int x,
108+ int y,
109+ int width,
110+ uint32_t * buffer,
111+ const uint32_t *mask)
112+{
113+ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
114+ const uint8_t *pixel = (const uint8_t *) bits + x;
115+
116+ pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel);
117+}
118+
119+static void
120+neon_store_scanline_a8 (bits_image_t * image,
121+ int x,
122+ int y,
123+ int width,
124+ const uint32_t *values)
125+{
126+ uint32_t *bits = image->bits + image->rowstride * y;
127+ uint8_t *pixel = (uint8_t *) bits + x;
128+
129+ pixman_store_scanline_a8_asm_neon (width, pixel, values);
130+}
131+
132+
133 pixman_implementation_t *
134 _pixman_implementation_create_arm_neon (void)
135 {
136@@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void)
137 _pixman_bits_override_accessors (PIXMAN_r5g6b5,
138 neon_fetch_scanline_r5g6b5,
139 neon_store_scanline_r5g6b5);
140+ _pixman_bits_override_accessors (PIXMAN_a8,
141+ neon_fetch_scanline_a8,
142+ neon_store_scanline_a8);
143
144 imp->blt = arm_neon_blt;
145 imp->fill = arm_neon_fill;
146--
1471.6.6.1
148