URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [drivers/] [video/] [atafb_utils.h] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No.	Rev	Author	Line
1	62	marcus.erl	`#ifndef _VIDEO_ATAFB_UTILS_H`
2			`#define _VIDEO_ATAFB_UTILS_H`
3
4			`/* ================================================================= */`
5			`/* Utility Assembler Functions */`
6			`/* ================================================================= */`
7
8			`/* ====================================================================== */`
9
10			`/* Those of a delicate disposition might like to skip the next couple of`
11			`* pages.`
12			`*`
13			`* These functions are drop in replacements for memmove and`
14			`* memset(_, 0, _). However their five instances add at least a kilobyte`
15			`* to the object file. You have been warned.`
16			`*`
17			`* Not a great fan of assembler for the sake of it, but I think`
18			`* that these routines are at least 10 times faster than their C`
19			`* equivalents for large blits, and that's important to the lowest level of`
20			`* a graphics driver. Question is whether some scheme with the blitter`
21			`* would be faster. I suspect not for simple text system - not much`
22			`* asynchrony.`
23			`*`
24			`* Code is very simple, just gruesome expansion. Basic strategy is to`
25			`* increase data moved/cleared at each step to 16 bytes to reduce`
26			`* instruction per data move overhead. movem might be faster still`
27			`* For more than 15 bytes, we try to align the write direction on a`
28			`* longword boundary to get maximum speed. This is even more gruesome.`
29			`* Unaligned read/write used requires 68020+ - think this is a problem?`
30			`*`
31			`* Sorry!`
32			`*/`
33
34
35			`/* ++roman: I've optimized Robert's original versions in some minor`
36			`* aspects, e.g. moveq instead of movel, let gcc choose the registers,`
37			`* use movem in some places...`
38			`* For other modes than 1 plane, lots of more such assembler functions`
39			`* were needed (e.g. the ones using movep or expanding color values).`
40			`*/`
41
42			`/* ++andreas: more optimizations:`
43			`subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc`
44			`addal is faster than addaw`
45			`movep is rather expensive compared to ordinary move's`
46			`some functions rewritten in C for clarity, no speed loss */`
47
48			`static inline void fb_memclear_small(void s, size_t count)`
49			`{`
50			`if (!count)`
51			`return 0;`
52
53			`asm volatile ("\n"`
54			`" lsr.l #1,%1 ; jcc 1f ; move.b %2,-(%0)\n"`
55			`"1: lsr.l #1,%1 ; jcc 1f ; move.w %2,-(%0)\n"`
56			`"1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0)\n"`
57			`"1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"`
58			`"1:"`
59			`: "=a" (s), "=d" (count)`
60			`: "d" (0), "0" ((char *)s + count), "1" (count));`
61			`asm volatile ("\n"`
62			`" subq.l #1,%1\n"`
63			`" jcs 3f\n"`
64			`" move.l %2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"`
65			`"2: movem.l %2/%%d4/%%d5/%%d6,-(%0)\n"`
66			`" dbra %1,2b\n"`
67			`"3:"`
68			`: "=a" (s), "=d" (count)`
69			`: "d" (0), "0" (s), "1" (count)`
70			`: "d4", "d5", "d6"`
71			`);`
72
73			`return 0;`
74			`}`
75
76
77			`static inline void fb_memclear(void s, size_t count)`
78			`{`
79			`if (!count)`
80			`return 0;`
81
82			`if (count < 16) {`
83			`asm volatile ("\n"`
84			`" lsr.l #1,%1 ; jcc 1f ; clr.b (%0)+\n"`
85			`"1: lsr.l #1,%1 ; jcc 1f ; clr.w (%0)+\n"`
86			`"1: lsr.l #1,%1 ; jcc 1f ; clr.l (%0)+\n"`
87			`"1: lsr.l #1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n"`
88			`"1:"`
89			`: "=a" (s), "=d" (count)`
90			`: "0" (s), "1" (count));`
91			`} else {`
92			`long tmp;`
93			`asm volatile ("\n"`
94			`" move.l %1,%2\n"`
95			`" lsr.l #1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n"`
96			`" lsr.l #1,%2 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/`
97			`" clr.w (%0)+ ; subq.w #2,%1 ; jra 2f\n"`
98			`"1: lsr.l #1,%2 ; jcc 2f\n"`
99			`" clr.w (%0)+ ; subq.w #2,%1\n"`
100			`"2: move.w %1,%2; lsr.l #2,%1 ; jeq 6f\n"`
101			`" lsr.l #1,%1 ; jcc 3f ; clr.l (%0)+\n"`
102			`"3: lsr.l #1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n"`
103			`"4: subq.l #1,%1 ; jcs 6f\n"`
104			`"5: clr.l (%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n"`
105			`" dbra %1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n"`
106			`"6: move.w %2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n"`
107			`"7: btst #0,%1 ; jeq 8f ; clr.b (%0)+\n"`
108			`"8:"`
109			`: "=a" (s), "=d" (count), "=d" (tmp)`
110			`: "0" (s), "1" (count));`
111			`}`
112
113			`return 0;`
114			`}`
115
116
117			`static inline void fb_memset255(void s, size_t count)`
118			`{`
119			`if (!count)`
120			`return 0;`
121
122			`asm volatile ("\n"`
123			`" lsr.l #1,%1 ; jcc 1f ; move.b %2,-(%0)\n"`
124			`"1: lsr.l #1,%1 ; jcc 1f ; move.w %2,-(%0)\n"`
125			`"1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0)\n"`
126			`"1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"`
127			`"1:"`
128			`: "=a" (s), "=d" (count)`
129			`: "d" (-1), "0" ((char *)s+count), "1" (count));`
130			`asm volatile ("\n"`
131			`" subq.l #1,%1 ; jcs 3f\n"`
132			`" move.l %2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"`
133			`"2: movem.l %2/%%d4/%%d5/%%d6,-(%0)\n"`
134			`" dbra %1,2b\n"`
135			`"3:"`
136			`: "=a" (s), "=d" (count)`
137			`: "d" (-1), "0" (s), "1" (count)`
138			`: "d4", "d5", "d6");`
139
140			`return 0;`
141			`}`
142
143
144			`static inline void fb_memmove(void d, const void *s, size_t count)`
145			`{`
146			`if (d < s) {`
147			`if (count < 16) {`
148			`asm volatile ("\n"`
149			`" lsr.l #1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n"`
150			`"1: lsr.l #1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n"`
151			`"1: lsr.l #1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n"`
152			`"1: lsr.l #1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"`
153			`"1:"`
154			`: "=a" (d), "=a" (s), "=d" (count)`
155			`: "0" (d), "1" (s), "2" (count));`
156			`} else {`
157			`long tmp;`
158			`asm volatile ("\n"`
159			`" move.l %0,%3\n"`
160			`" lsr.l #1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n"`
161			`" lsr.l #1,%3 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/`
162			`" move.w (%1)+,(%0)+ ; subqw #2,%2 ; jra 2f\n"`
163			`"1: lsr.l #1,%3 ; jcc 2f\n"`
164			`" move.w (%1)+,(%0)+ ; subqw #2,%2\n"`
165			`"2: move.w %2,%-; lsr.l #2,%2 ; jeq 6f\n"`
166			`" lsr.l #1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n"`
167			`"3: lsr.l #1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"`
168			`"4: subq.l #1,%2 ; jcs 6f\n"`
169			`"5: move.l (%1)+,(%0)+; move.l (%1)+,(%0)+\n"`
170			`" move.l (%1)+,(%0)+; move.l (%1)+,(%0)+\n"`
171			`" dbra %2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"`
172			`"6: move.w %+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n"`
173			`"7: btst #0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n"`
174			`"8:"`
175			`: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)`
176			`: "0" (d), "1" (s), "2" (count));`
177			`}`
178			`} else {`
179			`if (count < 16) {`
180			`asm volatile ("\n"`
181			`" lsr.l #1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n"`
182			`"1: lsr.l #1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n"`
183			`"1: lsr.l #1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n"`
184			`"1: lsr.l #1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"`
185			`"1:"`
186			`: "=a" (d), "=a" (s), "=d" (count)`
187			`: "0" ((char ) d + count), "1" ((char ) s + count), "2" (count));`
188			`} else {`
189			`long tmp;`
190
191			`asm volatile ("\n"`
192			`" move.l %0,%3\n"`
193			`" lsr.l #1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n"`
194			`" lsr.l #1,%3 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/`
195			`" move.w -(%1),-(%0) ; subqw #2,%2 ; jra 2f\n"`
196			`"1: lsr.l #1,%3 ; jcc 2f\n"`
197			`" move.w -(%1),-(%0) ; subqw #2,%2\n"`
198			`"2: move.w %2,%-; lsr.l #2,%2 ; jeq 6f\n"`
199			`" lsr.l #1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n"`
200			`"3: lsr.l #1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"`
201			`"4: subq.l #1,%2 ; jcs 6f\n"`
202			`"5: move.l -(%1),-(%0); move.l -(%1),-(%0)\n"`
203			`" move.l -(%1),-(%0); move.l -(%1),-(%0)\n"`
204			`" dbra %2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"`
205			`"6: move.w %+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n"`
206			`"7: btst #0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n"`
207			`"8:"`
208			`: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)`
209			`: "0" ((char ) d + count), "1" ((char ) s + count), "2" (count));`
210			`}`
211			`}`
212
213			`return 0;`
214			`}`
215
216
217			`/* ++andreas: Simple and fast version of memmove, assumes size is`
218			`divisible by 16, suitable for moving the whole screen bitplane */`
219			`static inline void fast_memmove(char dst, const char src, size_t size)`
220			`{`
221			`if (!size)`
222			`return;`
223			`if (dst < src)`
224			`asm volatile ("\n"`
225			`"1: movem.l (%0)+,%%d0/%%d1/%%a0/%%a1\n"`
226			`" movem.l %%d0/%%d1/%%a0/%%a1,%1@\n"`
227			`" addq.l #8,%1; addq.l #8,%1\n"`
228			`" dbra %2,1b\n"`
229			`" clr.w %2; subq.l #1,%2\n"`
230			`" jcc 1b"`
231			`: "=a" (src), "=a" (dst), "=d" (size)`
232			`: "0" (src), "1" (dst), "2" (size / 16 - 1)`
233			`: "d0", "d1", "a0", "a1", "memory");`
234			`else`
235			`asm volatile ("\n"`
236			`"1: subq.l #8,%0; subq.l #8,%0\n"`
237			`" movem.l %0@,%%d0/%%d1/%%a0/%%a1\n"`
238			`" movem.l %%d0/%%d1/%%a0/%%a1,-(%1)\n"`
239			`" dbra %2,1b\n"`
240			`" clr.w %2; subq.l #1,%2\n"`
241			`" jcc 1b"`
242			`: "=a" (src), "=a" (dst), "=d" (size)`
243			`: "0" (src + size), "1" (dst + size), "2" (size / 16 - 1)`
244			`: "d0", "d1", "a0", "a1", "memory");`
245			`}`
246
247			`#ifdef BPL`
248
249			`/*`
250			`* This expands a up to 8 bit color into two longs`
251			`* for movel operations.`
252			`*/`
253			`static const u32 four2long[] = {`
254			`0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,`
255			`0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,`
256			`0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,`
257			`0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,`
258			`};`
259
260			`static inline void expand8_col2mask(u8 c, u32 m[])`
261			`{`
262			`m[0] = four2long[c & 15];`
263			`#if BPL > 4`
264			`m[1] = four2long[c >> 4];`
265			`#endif`
266			`}`
267
268			`static inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])`
269			`{`
270			`fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]);`
271			`#if BPL > 4`
272			`fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]);`
273			`#endif`
274			`}`
275
276			`/*`
277			`* set an 8bit value to a color`
278			`*/`
279			`static inline void fill8_col(u8 *dst, u32 m[])`
280			`{`
281			`u32 tmp = m[0];`
282			`dst[0] = tmp;`
283			`dst[2] = (tmp >>= 8);`
284			`#if BPL > 2`
285			`dst[4] = (tmp >>= 8);`
286			`dst[6] = tmp >> 8;`
287			`#endif`
288			`#if BPL > 4`
289			`tmp = m[1];`
290			`dst[8] = tmp;`
291			`dst[10] = (tmp >>= 8);`
292			`dst[12] = (tmp >>= 8);`
293			`dst[14] = tmp >> 8;`
294			`#endif`
295			`}`
296
297			`/*`
298			`* set an 8bit value according to foreground/background color`
299			`*/`
300			`static inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask)`
301			`{`
302			`u32 fgm[2], bgm[2], tmp;`
303
304			`expand8_2col2mask(fg, bg, fgm, bgm);`
305
306			`mask \|= mask << 8;`
307			`#if BPL > 2`
308			`mask \|= mask << 16;`
309			`#endif`
310			`tmp = (mask & fgm[0]) ^ bgm[0];`
311			`dst[0] = tmp;`
312			`dst[2] = (tmp >>= 8);`
313			`#if BPL > 2`
314			`dst[4] = (tmp >>= 8);`
315			`dst[6] = tmp >> 8;`
316			`#endif`
317			`#if BPL > 4`
318			`tmp = (mask & fgm[1]) ^ bgm[1];`
319			`dst[8] = tmp;`
320			`dst[10] = (tmp >>= 8);`
321			`dst[12] = (tmp >>= 8);`
322			`dst[14] = tmp >> 8;`
323			`#endif`
324			`}`
325
326			`static const u32 two2word[] = {`
327			`0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff`
328			`};`
329
330			`static inline void expand16_col2mask(u8 c, u32 m[])`
331			`{`
332			`m[0] = two2word[c & 3];`
333			`#if BPL > 2`
334			`m[1] = two2word[(c >> 2) & 3];`
335			`#endif`
336			`#if BPL > 4`
337			`m[2] = two2word[(c >> 4) & 3];`
338			`m[3] = two2word[c >> 6];`
339			`#endif`
340			`}`
341
342			`static inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])`
343			`{`
344			`bgm[0] = two2word[bg & 3];`
345			`fgm[0] = two2word[fg & 3] ^ bgm[0];`
346			`#if BPL > 2`
347			`bgm[1] = two2word[(bg >> 2) & 3];`
348			`fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1];`
349			`#endif`
350			`#if BPL > 4`
351			`bgm[2] = two2word[(bg >> 4) & 3];`
352			`fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2];`
353			`bgm[3] = two2word[bg >> 6];`
354			`fgm[3] = two2word[fg >> 6] ^ bgm[3];`
355			`#endif`
356			`}`
357
358			`static inline u32 fill16_col(u32 dst, int rows, u32 m[])`
359			`{`
360			`while (rows) {`
361			`*dst++ = m[0];`
362			`#if BPL > 2`
363			`*dst++ = m[1];`
364			`#endif`
365			`#if BPL > 4`
366			`*dst++ = m[2];`
367			`*dst++ = m[3];`
368			`#endif`
369			`rows--;`
370			`}`
371			`return dst;`
372			`}`
373
374			`static inline void memmove32_col(void dst, void src, u32 mask, u32 h, u32 bytes)`
375			`{`
376			`u32 s, d, v;`
377
378			`s = src;`
379			`d = dst;`
380			`do {`
381			`v = (s++ & mask) \| (d & ~mask);`
382			`*d++ = v;`
383			`#if BPL > 2`
384			`v = (s++ & mask) \| (d & ~mask);`
385			`*d++ = v;`
386			`#endif`
387			`#if BPL > 4`
388			`v = (s++ & mask) \| (d & ~mask);`
389			`*d++ = v;`
390			`v = (s++ & mask) \| (d & ~mask);`
391			`*d++ = v;`
392			`#endif`
393			`d = (u32 )((u8 )d + bytes);`
394			`s = (u32 )((u8 )s + bytes);`
395			`} while (--h);`
396			`}`
397
398			`#endif`
399
400			`#endif /* _VIDEO_ATAFB_UTILS_H */`

Browse

Tools

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [drivers/] [video/] [atafb_utils.h] - Blame information for rev 62