diff options
author | Koen Kooi <koen@dominion.thruhere.net> | 2010-12-04 21:40:49 +0100 |
---|---|---|
committer | Koen Kooi <koen@dominion.thruhere.net> | 2010-12-04 21:40:49 +0100 |
commit | 39fb00c188032075a0a8298e333e6914bd88e53a (patch) | |
tree | ccb10a0b7064862bcce144316b9392614d355b2d /recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch | |
parent | 1857df74ac9bfec16d0274fe972fda7bb1f99e6b (diff) | |
download | meta-openembedded-39fb00c188032075a0a8298e333e6914bd88e53a.tar.gz |
meta-openembedded: import pixman 0.21.2 from OE
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
Diffstat (limited to 'recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch')
-rw-r--r-- | recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch new file mode 100644 index 0000000000..7724f5433e --- /dev/null +++ b/recipes-graphics/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch | |||
@@ -0,0 +1,148 @@ | |||
1 | From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001 | ||
2 | From: Siarhei Siamashka <siarhei.siamashka@nokia.com> | ||
3 | Date: Thu, 23 Sep 2010 21:10:56 +0300 | ||
4 | Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline | ||
5 | |||
6 | --- | ||
7 | pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++ | ||
8 | pixman/pixman-arm-neon.c | 42 +++++++++++++++++++++++++++ | ||
9 | 2 files changed, 106 insertions(+), 0 deletions(-) | ||
10 | |||
11 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
12 | index 25f7bf0..439b06b 100644 | ||
13 | --- a/pixman/pixman-arm-neon-asm.S | ||
14 | +++ b/pixman/pixman-arm-neon-asm.S | ||
15 | @@ -418,6 +418,70 @@ generate_composite_function \ | ||
16 | |||
17 | /******************************************************************************/ | ||
18 | |||
19 | +.macro pixman_composite_src_8_8888_process_pixblock_head | ||
20 | + /* This is tricky part: we can't set these values just once in 'init' macro | ||
21 | + * because leading/trailing pixels handling part uses VZIP.8 instructions, | ||
22 | + * and they operate on values in-place and destroy original registers | ||
23 | + * content. Think about it like VST4.8 instruction corrupting NEON | ||
24 | + * registers after write in 'tail_head' macro. Except that 'tail_head' | ||
25 | + * macro itself actually does not need these extra VMOVs because it uses | ||
26 | + * real VST4.8 instruction. | ||
27 | + */ | ||
28 | + vmov.u8 q0, #0 | ||
29 | + vmov.u8 d2, #0 | ||
30 | +.endm | ||
31 | + | ||
32 | +.macro pixman_composite_src_8_8888_process_pixblock_tail | ||
33 | +.endm | ||
34 | + | ||
35 | +.macro pixman_composite_src_8_8888_process_pixblock_tail_head | ||
36 | + vst4.8 {d0, d1, d2, d3}, [DST_W, :128]! | ||
37 | + vld1.8 {d3}, [SRC]! | ||
38 | +.endm | ||
39 | + | ||
40 | +generate_composite_function_single_scanline \ | ||
41 | + pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \ | ||
42 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
43 | + 8, /* number of pixels, processed in a single block */ \ | ||
44 | + default_init, \ | ||
45 | + default_cleanup, \ | ||
46 | + pixman_composite_src_8_8888_process_pixblock_head, \ | ||
47 | + pixman_composite_src_8_8888_process_pixblock_tail, \ | ||
48 | + pixman_composite_src_8_8888_process_pixblock_tail_head, \ | ||
49 | + 0, /* dst_w_basereg */ \ | ||
50 | + 0, /* dst_r_basereg */ \ | ||
51 | + 3, /* src_basereg */ \ | ||
52 | + 0 /* mask_basereg */ | ||
53 | + | ||
54 | +/******************************************************************************/ | ||
55 | + | ||
56 | +.macro pixman_composite_src_8888_8_process_pixblock_head | ||
57 | +.endm | ||
58 | + | ||
59 | +.macro pixman_composite_src_8888_8_process_pixblock_tail | ||
60 | +.endm | ||
61 | + | ||
62 | +.macro pixman_composite_src_8888_8_process_pixblock_tail_head | ||
63 | + vst1.8 {d3}, [DST_W, :64]! | ||
64 | + vld4.8 {d0, d1, d2, d3}, [SRC]! | ||
65 | +.endm | ||
66 | + | ||
67 | +generate_composite_function_single_scanline \ | ||
68 | + pixman_store_scanline_a8_asm_neon, 32, 0, 8, \ | ||
69 | + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ | ||
70 | + 8, /* number of pixels, processed in a single block */ \ | ||
71 | + default_init, \ | ||
72 | + default_cleanup, \ | ||
73 | + pixman_composite_src_8888_8_process_pixblock_head, \ | ||
74 | + pixman_composite_src_8888_8_process_pixblock_tail, \ | ||
75 | + pixman_composite_src_8888_8_process_pixblock_tail_head, \ | ||
76 | + 3, /* dst_w_basereg */ \ | ||
77 | + 0, /* dst_r_basereg */ \ | ||
78 | + 0, /* src_basereg */ \ | ||
79 | + 0 /* mask_basereg */ | ||
80 | + | ||
81 | +/******************************************************************************/ | ||
82 | + | ||
83 | .macro pixman_composite_src_8888_0565_process_pixblock_head | ||
84 | vshll.u8 q8, d1, #8 | ||
85 | vshll.u8 q14, d2, #8 | ||
86 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
87 | index f773e92..55219b3 100644 | ||
88 | --- a/pixman/pixman-arm-neon.c | ||
89 | +++ b/pixman/pixman-arm-neon.c | ||
90 | @@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image, | ||
91 | pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); | ||
92 | } | ||
93 | |||
94 | +void | ||
95 | +pixman_fetch_scanline_a8_asm_neon (int width, | ||
96 | + uint32_t *buffer, | ||
97 | + const uint8_t *pixel); | ||
98 | + | ||
99 | + | ||
100 | +void | ||
101 | +pixman_store_scanline_a8_asm_neon (int width, | ||
102 | + uint8_t *pixel, | ||
103 | + const uint32_t *values); | ||
104 | + | ||
105 | +static void | ||
106 | +neon_fetch_scanline_a8 (pixman_image_t *image, | ||
107 | + int x, | ||
108 | + int y, | ||
109 | + int width, | ||
110 | + uint32_t * buffer, | ||
111 | + const uint32_t *mask) | ||
112 | +{ | ||
113 | + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; | ||
114 | + const uint8_t *pixel = (const uint8_t *) bits + x; | ||
115 | + | ||
116 | + pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel); | ||
117 | +} | ||
118 | + | ||
119 | +static void | ||
120 | +neon_store_scanline_a8 (bits_image_t * image, | ||
121 | + int x, | ||
122 | + int y, | ||
123 | + int width, | ||
124 | + const uint32_t *values) | ||
125 | +{ | ||
126 | + uint32_t *bits = image->bits + image->rowstride * y; | ||
127 | + uint8_t *pixel = (uint8_t *) bits + x; | ||
128 | + | ||
129 | + pixman_store_scanline_a8_asm_neon (width, pixel, values); | ||
130 | +} | ||
131 | + | ||
132 | + | ||
133 | pixman_implementation_t * | ||
134 | _pixman_implementation_create_arm_neon (void) | ||
135 | { | ||
136 | @@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void) | ||
137 | _pixman_bits_override_accessors (PIXMAN_r5g6b5, | ||
138 | neon_fetch_scanline_r5g6b5, | ||
139 | neon_store_scanline_r5g6b5); | ||
140 | + _pixman_bits_override_accessors (PIXMAN_a8, | ||
141 | + neon_fetch_scanline_a8, | ||
142 | + neon_store_scanline_a8); | ||
143 | |||
144 | imp->blt = arm_neon_blt; | ||
145 | imp->fill = arm_neon_fill; | ||
146 | -- | ||
147 | 1.6.6.1 | ||
148 | |||