1 |
38 |
julius |
|
2 |
|
|
.EXTERN MY_LABEL2;
|
3 |
|
|
.section .text;
|
4 |
|
|
|
5 |
|
|
//
|
6 |
|
|
//13 VIDEO PIXEL OPERATIONS
|
7 |
|
|
//
|
8 |
|
|
|
9 |
|
|
//Dreg = ALIGN8 ( Dreg, Dreg ) ; /* overlay 1 byte (b) */
|
10 |
|
|
R0 = ALIGN8(R0, R0);
|
11 |
|
|
R0 = ALIGN8(R0, R1);
|
12 |
|
|
R0 = ALIGN8(R1, R0);
|
13 |
|
|
R0 = ALIGN8(R1, R1);
|
14 |
|
|
R0 = ALIGN8(R1, R2);
|
15 |
|
|
R3 = ALIGN8(R4, R5);
|
16 |
|
|
R6 = ALIGN8(R7, R0);
|
17 |
|
|
R1 = ALIGN8(R2, R3);
|
18 |
|
|
R4 = ALIGN8(R5, R6);
|
19 |
|
|
R7 = ALIGN8(R0, R1);
|
20 |
|
|
R2 = ALIGN8(R3, R4);
|
21 |
|
|
R5 = ALIGN8(R6, R7);
|
22 |
|
|
|
23 |
|
|
//Dreg = ALIGN16 ( Dreg, Dreg ) ; /* overlay 2 bytes (b) */
|
24 |
|
|
R0 = ALIGN16(R0, R0);
|
25 |
|
|
R0 = ALIGN16(R0, R1);
|
26 |
|
|
R0 = ALIGN16(R1, R0);
|
27 |
|
|
R0 = ALIGN16(R1, R1);
|
28 |
|
|
R0 = ALIGN16(R1, R2);
|
29 |
|
|
R3 = ALIGN16(R4, R5);
|
30 |
|
|
R6 = ALIGN16(R7, R0);
|
31 |
|
|
R1 = ALIGN16(R2, R3);
|
32 |
|
|
R4 = ALIGN16(R5, R6);
|
33 |
|
|
R7 = ALIGN16(R0, R1);
|
34 |
|
|
R2 = ALIGN16(R3, R4);
|
35 |
|
|
R5 = ALIGN16(R6, R7);
|
36 |
|
|
|
37 |
|
|
//Dreg = ALIGN24 ( Dreg, Dreg ) ; /* overlay 3 bytes (b) */
|
38 |
|
|
R0 = ALIGN24(R0, R0);
|
39 |
|
|
R0 = ALIGN24(R0, R1);
|
40 |
|
|
R0 = ALIGN24(R1, R0);
|
41 |
|
|
R0 = ALIGN24(R1, R1);
|
42 |
|
|
R0 = ALIGN24(R1, R2);
|
43 |
|
|
R3 = ALIGN24(R4, R5);
|
44 |
|
|
R6 = ALIGN24(R7, R0);
|
45 |
|
|
R1 = ALIGN24(R2, R3);
|
46 |
|
|
R4 = ALIGN24(R5, R6);
|
47 |
|
|
R7 = ALIGN24(R0, R1);
|
48 |
|
|
R2 = ALIGN24(R3, R4);
|
49 |
|
|
R5 = ALIGN24(R6, R7);
|
50 |
|
|
|
51 |
|
|
DISALGNEXCPT ; /* (b) */
|
52 |
|
|
|
53 |
|
|
/* forward byte order operands */
|
54 |
|
|
//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (LO) ; /* sum into low bytes (b) */
|
55 |
|
|
//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (HI) ; /* sum into high bytes (b) */
|
56 |
|
|
/* reverse byte order operands */
|
57 |
|
|
//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (LO, R) ; /* sum into low bytes (b) */
|
58 |
|
|
//Dreg = BYTEOP3P (Dreg_pair, Dreg_pair) (HI, R) ; /* sum into high bytes (b) */
|
59 |
|
|
|
60 |
|
|
r0 = byteop3p (r1:0, r3:2) (lo) ;
|
61 |
|
|
r1 = byteop3p (r1:0, r3:2) (hi) ;
|
62 |
|
|
r2 = byteop3p (r1:0, r3:2) (lo, r) ;
|
63 |
|
|
r3 = byteop3p (r1:0, r3:2) (hi, r) ;
|
64 |
|
|
r4 = byteop3p (r3:2, r1:0) (lo) ;
|
65 |
|
|
r5 = byteop3p (r3:2, r1:0) (hi) ;
|
66 |
|
|
r6 = byteop3p (r3:2, r1:0) (lo, r) ;
|
67 |
|
|
r7 = byteop3p (r3:2, r1:0) (hi, r) ;
|
68 |
|
|
|
69 |
|
|
//Dreg = A1.L + A1.H, Dreg = A0.L + A0.H ; /* (b) */
|
70 |
|
|
|
71 |
|
|
R0 = A1.L + A1.H, R0= A0.L + A0.H ;
|
72 |
|
|
R0 = A1.L + A1.H, R1= A0.L + A0.H ;
|
73 |
|
|
R2 = A1.L + A1.H, R3= A0.L + A0.H ;
|
74 |
|
|
R4 = A1.L + A1.H, R5= A0.L + A0.H ;
|
75 |
|
|
R6 = A1.L + A1.H, R7= A0.L + A0.H ;
|
76 |
|
|
|
77 |
|
|
/* forward byte order operands */
|
78 |
|
|
//( Dreg, Dreg ) = BYTEOP16P ( Dreg_pair, Dreg_pair ) ; /* (b) */
|
79 |
|
|
(r7,r0) = BYTEOP16P ( r3:2,r1:0 ) ;
|
80 |
|
|
(r1,r2) = byteop16p (r3:2,r1:0) ;
|
81 |
|
|
(r0,r1) = BYTEOP16P ( r3:2,r1:0 ) ;
|
82 |
|
|
(r2,r3) = byteop16p (r3:2,r1:0) ;
|
83 |
|
|
(r7,r0) = BYTEOP16P (r1:0, r3:2) ;
|
84 |
|
|
(r1,r2) = byteop16p (r1:0,r3:2) ;
|
85 |
|
|
(r0,r1) = BYTEOP16P (r1:0, r3:2) ;
|
86 |
|
|
(r2,r3) = byteop16p (r1:0,r3:2) ;
|
87 |
|
|
|
88 |
|
|
/* reverse byte order operands */
|
89 |
|
|
//( Dreg, Dreg ) = BYTEOP16P ( Dreg_pair, Dreg_pair ) (R); /* (b) */
|
90 |
|
|
(r7,r0) = BYTEOP16P ( r3:2,r1:0 )(r) ;
|
91 |
|
|
(r1,r2) = byteop16p (r3:2,r1:0)(r) ;
|
92 |
|
|
(r0,r1) = BYTEOP16P ( r3:2,r1:0 )(r) ;
|
93 |
|
|
(r2,r3) = byteop16p (r3:2,r1:0)(r) ;
|
94 |
|
|
(r7,r0) = BYTEOP16P (r1:0, r3:2)(r) ;
|
95 |
|
|
(r1,r2) = byteop16p (r1:0,r3:2)(r) ;
|
96 |
|
|
(r0,r1) = BYTEOP16P (r1:0, r3:2)(r) ;
|
97 |
|
|
(r2,r3) = byteop16p (r1:0,r3:2)(r) ;
|
98 |
|
|
|
99 |
|
|
/* forward byte order operands */
|
100 |
|
|
//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) ; /* (b) */
|
101 |
|
|
//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (T) ; /* truncated (b)*/
|
102 |
|
|
/* reverse byte order operands */
|
103 |
|
|
//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (R) ; /* (b) */
|
104 |
|
|
//Dreg = BYTEOP1P (Dreg_pair, Dreg_pair) (T, R) ; /* truncated (b) */
|
105 |
|
|
|
106 |
|
|
r3 = byteop1p (r1:0, r3:2) ;
|
107 |
|
|
r3 = byteop1p (r1:0, r3:2) (r) ;
|
108 |
|
|
r3 = byteop1p (r1:0, r3:2) (t) ;
|
109 |
|
|
r3 = byteop1p (r1:0, r3:2) (t,r) ;
|
110 |
|
|
|
111 |
|
|
r0 = byteop1p (r3:2,r1:0);
|
112 |
|
|
r1 = byteop1p (r3:2,r1:0)(r) ;
|
113 |
|
|
r2 = byteop1p (r3:2,r1:0)(t) ;
|
114 |
|
|
r3 = byteop1p (r3:2,r1:0)(t,r) ;
|
115 |
|
|
|
116 |
|
|
/* forward byte order operands */
|
117 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDL) ;
|
118 |
|
|
/* round into low bytes (b) */
|
119 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDH) ;
|
120 |
|
|
/* round into high bytes (b) */
|
121 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TL) ;
|
122 |
|
|
/* truncate into low bytes (b) */
|
123 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TH) ;
|
124 |
|
|
/* truncate into high bytes (b) */
|
125 |
|
|
/* reverse byte order operands */
|
126 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDL, R) ;
|
127 |
|
|
/* round into low bytes (b) */
|
128 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (RNDH, R) ;
|
129 |
|
|
/* round into high bytes (b) */
|
130 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TL, R) ;
|
131 |
|
|
/* truncate into low bytes (b) */
|
132 |
|
|
//Dreg = BYTEOP2P (Dreg_pair, Dreg_pair) (TH, R) ;
|
133 |
|
|
/* truncate into high bytes (b) */
|
134 |
|
|
|
135 |
|
|
r3 = byteop2p (r1:0, r3:2) (rndl) ;
|
136 |
|
|
r3 = byteop2p (r1:0, r3:2) (rndh) ;
|
137 |
|
|
r3 = byteop2p (r1:0, r3:2) (tl) ;
|
138 |
|
|
r3 = byteop2p (r1:0, r3:2) (th) ;
|
139 |
|
|
r3 = byteop2p (r1:0, r3:2) (rndl, r) ;
|
140 |
|
|
r3 = byteop2p (r1:0, r3:2) (rndh, r) ;
|
141 |
|
|
r3 = byteop2p (r1:0, r3:2) (tl, r) ;
|
142 |
|
|
r3 = byteop2p (r1:0, r3:2) (th, r) ;
|
143 |
|
|
|
144 |
|
|
r0 = byteop2p (r1:0, r3:2) (rndl) ;
|
145 |
|
|
r1 = byteop2p (r1:0, r3:2) (rndh) ;
|
146 |
|
|
r2 = byteop2p (r1:0, r3:2) (tl) ;
|
147 |
|
|
r3 = byteop2p (r1:0, r3:2) (th) ;
|
148 |
|
|
r4 = byteop2p (r1:0, r3:2) (rndl, r) ;
|
149 |
|
|
r5 = byteop2p (r1:0, r3:2) (rndh, r) ;
|
150 |
|
|
r6 = byteop2p (r1:0, r3:2) (tl, r) ;
|
151 |
|
|
r7 = byteop2p (r1:0, r3:2) (th, r) ;
|
152 |
|
|
|
153 |
|
|
r0 = byteop2p (r3:2, r3:2) (rndl) ;
|
154 |
|
|
r1 = byteop2p (r3:2, r3:2) (rndh) ;
|
155 |
|
|
r2 = byteop2p (r3:2, r3:2) (tl) ;
|
156 |
|
|
r3 = byteop2p (r3:2, r3:2) (th) ;
|
157 |
|
|
r4 = byteop2p (r3:2, r3:2) (rndl, r) ;
|
158 |
|
|
r5 = byteop2p (r3:2, r3:2) (rndh, r) ;
|
159 |
|
|
r6 = byteop2p (r3:2, r3:2) (tl, r) ;
|
160 |
|
|
r7 = byteop2p (r3:2, r3:2) (th, r) ;
|
161 |
|
|
|
162 |
|
|
//Dreg = BYTEPACK ( Dreg, Dreg ) ; /* (b) */
|
163 |
|
|
r0 = bytepack (r0,r0) ;
|
164 |
|
|
r1 = bytepack (r2,r3) ;
|
165 |
|
|
r4 = bytepack (r5,r6) ;
|
166 |
|
|
r7 = bytepack (r0,r1) ;
|
167 |
|
|
r2 = bytepack (r3,r4) ;
|
168 |
|
|
r5 = bytepack (r6,r7) ;
|
169 |
|
|
|
170 |
|
|
/* forward byte order operands */
|
171 |
|
|
//(Dreg, Dreg) = BYTEOP16M (Dreg_pair, Dreg_pair) ; /* (b */)
|
172 |
|
|
/* reverse byte order operands */
|
173 |
|
|
//(Dreg, Dreg) = BYTEOP16M (Dreg-pair, Dreg-pair) (R) ; /* (b) */
|
174 |
|
|
|
175 |
|
|
(r1,r2)= byteop16m (r3:2,r1:0) ;
|
176 |
|
|
(r1,r2)= byteop16m (r3:2,r1:0) (r) ;
|
177 |
|
|
(r0,r1)= byteop16m (r3:2,r1:0) ;
|
178 |
|
|
(r2,r3)= byteop16m (r3:2,r1:0) (r) ;
|
179 |
|
|
(r3,r5)= byteop16m (r3:2,r1:0) ;
|
180 |
|
|
(r6,r7)= byteop16m (r3:2,r1:0) (r) ;
|
181 |
|
|
|
182 |
|
|
(r1,r2)= byteop16m (r1:0,r1:0) ;
|
183 |
|
|
(r1,r2)= byteop16m (r1:0,r1:0) (r) ;
|
184 |
|
|
(r0,r1)= byteop16m (r1:0,r1:0) ;
|
185 |
|
|
(r2,r3)= byteop16m (r1:0,r1:0) (r) ;
|
186 |
|
|
(r3,r5)= byteop16m (r1:0,r1:0) ;
|
187 |
|
|
(r6,r7)= byteop16m (r1:0,r1:0) (r) ;
|
188 |
|
|
|
189 |
|
|
(r1,r2)= byteop16m (r1:0,r3:2) ;
|
190 |
|
|
(r1,r2)= byteop16m (r1:0,r3:2) (r) ;
|
191 |
|
|
(r0,r1)= byteop16m (r1:0,r3:2) ;
|
192 |
|
|
(r2,r3)= byteop16m (r1:0,r3:2) (r) ;
|
193 |
|
|
(r3,r5)= byteop16m (r1:0,r3:2) ;
|
194 |
|
|
(r6,r7)= byteop16m (r1:0,r3:2) (r) ;
|
195 |
|
|
|
196 |
|
|
(r1,r2)= byteop16m (r3:2,r3:2) ;
|
197 |
|
|
(r1,r2)= byteop16m (r3:2,r3:2) (r) ;
|
198 |
|
|
(r0,r1)= byteop16m (r3:2,r3:2) ;
|
199 |
|
|
(r2,r3)= byteop16m (r3:2,r3:2) (r) ;
|
200 |
|
|
(r3,r5)= byteop16m (r3:2,r3:2) ;
|
201 |
|
|
(r6,r7)= byteop16m (r3:2,r3:2) (r) ;
|
202 |
|
|
|
203 |
|
|
//SAA (Dreg_pair, Dreg_pair) ; /* forward byte order operands (b) */
|
204 |
|
|
//SAA (Dreg_pair, Dreg_pair) (R) ; /* reverse byte order operands (b) */
|
205 |
|
|
|
206 |
|
|
saa(r1:0, r3:2) || r0 = [i0++] || r2 = [i1++] ; /* parallel fill instructions */
|
207 |
|
|
saa (r1:0, r3:2) (R) || r1 = [i0++] || r3 = [i1++] ; /* reverse, parallel fill instructions */
|
208 |
|
|
saa (r1:0, r3:2) ; /* last SAA in a loop, no more fill required */
|
209 |
|
|
|
210 |
|
|
//( Dreg , Dreg ) = BYTEUNPACK Dreg_pair ; /* (b) */
|
211 |
|
|
//( Dreg , Dreg ) = BYTEUNPACK Dreg_pair (R) ; /* reverse source order (b) */
|
212 |
|
|
|
213 |
|
|
(r6,r5) = byteunpack r1:0 ; /* non-reversing sources */
|
214 |
|
|
(r6,r5) = byteunpack r1:0 (R) ; /* reversing sources case */
|
215 |
|
|
(r6,r5) = byteunpack r3:2 ; /* non-reversing sources */
|
216 |
|
|
(r6,r5) = byteunpack r3:2 (R) ; /* reversing sources case */
|
217 |
|
|
(r0,r1) = byteunpack r1:0 ; /* non-reversing sources */
|
218 |
|
|
(r2,r3) = byteunpack r1:0 (R) ; /* reversing sources case */
|
219 |
|
|
(r4,r5) = byteunpack r3:2 ; /* non-reversing sources */
|
220 |
|
|
(r6,r7) = byteunpack r3:2 (R) ; /* reversing sources case */
|