1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
From 3be86a92ccab240859062a541cdb871d81c9501a Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Sun, 28 Nov 2010 21:45:06 +0200
Subject: [PATCH 06/24] ARM: introduced 'fetch_mask_pixblock' macro to simplify code
This macro hides the implementation details of pixels fetching
for the mask image just like 'fetch_src_pixblock' does for the
source image. This provides more possibilities for reusing the
same code blocks in different compositing functions.
This patch does not introduce any functional changes and the
resulting code in the compiled object file is exactly the same.
---
pixman/pixman-arm-neon-asm.S | 26 +++++++++++++-------------
pixman/pixman-arm-neon-asm.h | 5 +++++
2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index a3875ee..155a236 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -841,7 +841,7 @@ generate_composite_function \
pixman_composite_over_n_8_0565_process_pixblock_tail
vst1.16 {d28, d29}, [DST_W, :128]!
vld1.16 {d4, d5}, [DST_R, :128]!
- vld1.8 {d24}, [MASK]!
+ fetch_mask_pixblock
cache_preload 8, 8
pixman_composite_over_n_8_0565_process_pixblock_head
.endm
@@ -889,7 +889,7 @@ generate_composite_function \
pixman_composite_over_n_8_0565_process_pixblock_tail
fetch_src_pixblock
cache_preload 8, 8
- vld1.8 {d24}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_over_n_8_0565_process_pixblock_head
vst1.16 {d28, d29}, [DST_W, :128]!
.endm
@@ -1171,7 +1171,7 @@ generate_composite_function \
pixman_composite_over_n_8_8888_process_pixblock_tail
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vld1.8 {d24}, [MASK]!
+ fetch_mask_pixblock
cache_preload 8, 8
pixman_composite_over_n_8_8888_process_pixblock_head
.endm
@@ -1241,7 +1241,7 @@ generate_composite_function \
.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
pixman_composite_over_n_8_8_process_pixblock_tail
- vld1.8 {d24, d25, d26, d27}, [MASK]!
+ fetch_mask_pixblock
cache_preload 32, 32
vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
pixman_composite_over_n_8_8_process_pixblock_head
@@ -1341,7 +1341,7 @@ generate_composite_function \
vraddhn.u16 d29, q15, q9
vraddhn.u16 d30, q6, q10
vraddhn.u16 d31, q7, q11
- vld4.8 {d24, d25, d26, d27}, [MASK]!
+ fetch_mask_pixblock
vqadd.u8 q14, q0, q14
vqadd.u8 q15, q1, q15
cache_preload 8, 8
@@ -1405,7 +1405,7 @@ generate_composite_function \
pixman_composite_add_n_8_8_process_pixblock_tail
vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vld1.8 {d24, d25, d26, d27}, [MASK]!
+ fetch_mask_pixblock
cache_preload 32, 32
pixman_composite_add_n_8_8_process_pixblock_head
.endm
@@ -1462,7 +1462,7 @@ generate_composite_function \
pixman_composite_add_8_8_8_process_pixblock_tail
vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vld1.8 {d24, d25, d26, d27}, [MASK]!
+ fetch_mask_pixblock
fetch_src_pixblock
cache_preload 32, 32
pixman_composite_add_8_8_8_process_pixblock_head
@@ -1515,7 +1515,7 @@ generate_composite_function \
pixman_composite_add_8888_8888_8888_process_pixblock_tail
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vld4.8 {d24, d25, d26, d27}, [MASK]!
+ fetch_mask_pixblock
fetch_src_pixblock
cache_preload 8, 8
pixman_composite_add_8888_8888_8888_process_pixblock_head
@@ -1587,7 +1587,7 @@ generate_composite_function_single_scanline \
pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
fetch_src_pixblock
cache_preload 8, 8
- vld4.8 {d12, d13, d14, d15}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
.endm
@@ -1658,7 +1658,7 @@ generate_composite_function \
pixman_composite_over_8888_n_8888_process_pixblock_tail
fetch_src_pixblock
cache_preload 8, 8
- vld4.8 {d12, d13, d14, d15}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_over_8888_n_8888_process_pixblock_head
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
.endm
@@ -1700,7 +1700,7 @@ generate_composite_function_single_scanline \
pixman_composite_over_8888_n_8888_process_pixblock_tail
fetch_src_pixblock
cache_preload 8, 8
- vld1.8 {d15}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_over_8888_n_8888_process_pixblock_head
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
.endm
@@ -1917,7 +1917,7 @@ generate_composite_function \
/* TODO: expand macros and do better instructions scheduling */
.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
- vld1.8 {d15}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_over_0565_8_0565_process_pixblock_tail
fetch_src_pixblock
vld1.16 {d10, d11}, [DST_R, :128]!
@@ -1969,7 +1969,7 @@ generate_composite_function \
/* TODO: expand macros and do better instructions scheduling */
.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
- vld1.8 {d15}, [MASK]!
+ fetch_mask_pixblock
pixman_composite_add_0565_8_0565_process_pixblock_tail
fetch_src_pixblock
vld1.16 {d10, d11}, [DST_R, :128]!
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index c75bdc3..24fa361 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -431,6 +431,11 @@
.endif
.endm
+.macro fetch_mask_pixblock
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+.endm
+
/*
* Macro which is used to process leading pixels until destination
* pointer is properly aligned (at 16 bytes boundary). When destination
--
1.6.6.1
|