Mercurial > vba-linux
comparison src/filters/hq_shared32.cpp @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:8ced16adf2e1 | 1:f9f4f1b99eed |
---|---|
1 #include "../Port.h" | |
2 #include "hq_shared32.h" | |
3 #include "interp.h" | |
4 | |
5 const unsigned __int64 reg_blank = 0x0000000000000000; | |
6 const unsigned __int64 const7 = 0x0000000700070007; | |
7 const unsigned __int64 treshold = 0x0000000000300706; | |
8 | |
9 void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2) | |
10 { | |
11 //*((int*)pc) = (c1*3+c2)/4; | |
12 | |
13 #ifdef MMX | |
14 __asm | |
15 { | |
16 mov eax, pc | |
17 movd mm1, c1 | |
18 movd mm2, c2 | |
19 movq mm0, mm1 | |
20 pslld mm0, 2 | |
21 psubd mm0, mm1 | |
22 paddd mm0, mm2 | |
23 psrld mm0, 2 | |
24 movd [eax], mm0 | |
25 EMMS | |
26 } | |
27 #else | |
28 __asm | |
29 { | |
30 mov eax, pc | |
31 mov edx, c1 | |
32 shl edx, 2 | |
33 add edx, c2 | |
34 sub edx, c1 | |
35 shr edx, 2 | |
36 mov [eax], edx | |
37 } | |
38 #endif | |
39 } | |
40 | |
41 void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) | |
42 { | |
43 //*((int*)pc) = (c1*2+c2+c3)/4; | |
44 | |
45 #ifdef MMX | |
46 __asm | |
47 { | |
48 mov eax, pc | |
49 movd mm0, c1 | |
50 movd mm1, c2 | |
51 movd mm2, c3 | |
52 pslld mm0, 1 | |
53 paddd mm0, mm1 | |
54 paddd mm0, mm2 | |
55 psrad mm0, 2 | |
56 movd [eax], mm0 | |
57 EMMS | |
58 } | |
59 #else | |
60 __asm | |
61 { | |
62 mov eax, pc | |
63 mov edx, c1 | |
64 shl edx, 1 | |
65 add edx, c2 | |
66 add edx, c3 | |
67 shr edx, 2 | |
68 mov [eax], edx | |
69 } | |
70 #endif | |
71 } | |
72 | |
73 void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2) | |
74 { | |
75 //*((int*)pc) = (c1*7+c2)/8; | |
76 //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + | |
77 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; | |
78 | |
79 #ifdef MMX | |
80 __asm | |
81 { | |
82 mov eax, pc | |
83 movd mm1, c1 | |
84 movd mm2, c2 | |
85 punpcklbw mm1, reg_blank | |
86 punpcklbw mm2, reg_blank | |
87 pmullw mm1, const7 | |
88 paddw mm1, mm2 | |
89 psrlw mm1, 3 | |
90 packuswb mm1, reg_blank | |
91 movd [eax], mm1 | |
92 EMMS | |
93 } | |
94 #else | |
95 __asm | |
96 { | |
97 mov eax, c1 | |
98 mov ebx, c2 | |
99 mov ecx, eax | |
100 shl ecx, 3 | |
101 sub ecx, eax | |
102 add ecx, ebx | |
103 shr ecx, 3 | |
104 mov eax, pc | |
105 mov [eax], ecx | |
106 } | |
107 #endif | |
108 } | |
109 | |
110 void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) | |
111 { | |
112 //*((int*)pc) = (c1*2+(c2+c3)*7)/16; | |
113 //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + | |
114 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; | |
115 | |
116 #ifdef MMX | |
117 __asm | |
118 { | |
119 mov eax, pc | |
120 movd mm1, c1 | |
121 movd mm2, c2 | |
122 movd mm3, c3 | |
123 punpcklbw mm1, reg_blank | |
124 punpcklbw mm2, reg_blank | |
125 punpcklbw mm3, reg_blank | |
126 psllw mm1, 1 | |
127 paddw mm2, mm3 | |
128 pmullw mm2, const7 | |
129 paddw mm1, mm2 | |
130 psrlw mm1, 4 | |
131 packuswb mm1, reg_blank | |
132 movd [eax], mm1 | |
133 EMMS | |
134 } | |
135 #else | |
136 | |
137 __asm | |
138 { | |
139 mov eax, [c1] | |
140 and eax, 0FF00h | |
141 shl eax, 1 | |
142 mov ecx, [c2] | |
143 and ecx, 0FF00h | |
144 mov edx, [c3] | |
145 and edx, 0FF00h | |
146 add ecx, edx | |
147 imul ecx, ecx, 7 | |
148 add eax, ecx | |
149 and eax, 0FF000h | |
150 | |
151 mov ebx, [c1] | |
152 and ebx, 0FF00FFh | |
153 shl ebx, 1 | |
154 mov ecx, [c2] | |
155 and ecx, 0FF00FFh | |
156 mov edx, [c3] | |
157 and edx, 0FF00FFh | |
158 add ecx, edx | |
159 imul ecx, ecx, 7 | |
160 add ebx, ecx | |
161 and ebx, 0FF00FF0h | |
162 | |
163 add eax, ebx | |
164 shr eax, 4 | |
165 | |
166 mov ebx, pc | |
167 mov [ebx], eax | |
168 } | |
169 #endif | |
170 } | |
171 | |
172 void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2) | |
173 { | |
174 //*((int*)pc) = (c1+c2)/2; | |
175 | |
176 #ifdef MMX | |
177 __asm | |
178 { | |
179 mov eax, pc | |
180 movd mm0, c1 | |
181 movd mm1, c2 | |
182 paddd mm0, mm1 | |
183 psrad mm0, 1 | |
184 movd [eax], mm0 | |
185 EMMS | |
186 } | |
187 #else | |
188 __asm | |
189 { | |
190 mov eax, pc | |
191 mov edx, c1 | |
192 add edx, c2 | |
193 shr edx, 1 | |
194 mov [eax], edx | |
195 } | |
196 #endif | |
197 } | |
198 | |
199 void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2) | |
200 { | |
201 *((unsigned short *)pc) = interp_16_31(c1, c2); | |
202 //*((int*)pc) = (c1*3+c2)/4; | |
203 } | |
204 | |
205 void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) | |
206 { | |
207 *((unsigned short *)pc) = interp_16_211(c1, c2, c3); | |
208 //*((int*)pc) = (c1*2+c2+c3)/4; | |
209 } | |
210 | |
211 void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2) | |
212 { | |
213 *((unsigned short *)pc) = interp_16_71(c1, c2); | |
214 // *((unsigned short*)pc) = (c1*7+c2)/8; | |
215 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + | |
216 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; | |
217 } | |
218 | |
219 void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) | |
220 { | |
221 *((unsigned short *)pc) = interp_16_772(c2, c3, c1); | |
222 // *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16; | |
223 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + | |
224 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; | |
225 } | |
226 | |
227 void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2) | |
228 { | |
229 *((unsigned short *)pc) = interp_16_11(c1, c2); | |
230 } | |
231 | |
232 bool Diff(unsigned int c1, unsigned int c2) | |
233 { | |
234 unsigned int | |
235 YUV1 = RGBtoYUV(c1), | |
236 YUV2 = RGBtoYUV(c2); | |
237 | |
238 if (YUV1 == YUV2) return false; // Save some processing power | |
239 | |
240 #ifdef MMX | |
241 unsigned int retval; | |
242 __asm | |
243 { | |
244 mov eax, 0x7FFFFFFF | |
245 movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF | |
246 | |
247 ; Copy source colors in first reg | |
248 movd mm0, YUV1 | |
249 movd mm1, YUV2 | |
250 | |
251 mov eax, 0x00FF0000 | |
252 movd mm6, eax; mm6 = Ymask = 0x00FF0000 | |
253 | |
254 ; Calculate color Y difference | |
255 movq mm2, mm0 | |
256 movq mm3, mm1 | |
257 pand mm2, mm6 | |
258 pand mm3, mm6 | |
259 psubd mm2, mm3 | |
260 pand mm2, mm7 | |
261 | |
262 mov eax, 0x0000FF00 | |
263 movd mm6, eax; mm6 = Umask = 0x0000FF00 | |
264 | |
265 ; Calculate color U difference | |
266 movq mm3, mm0 | |
267 movq mm4, mm1 | |
268 pand mm3, mm6 | |
269 pand mm4, mm6 | |
270 psubd mm3, mm4 | |
271 pand mm3, mm7 | |
272 | |
273 mov eax, 0x000000FF | |
274 movd mm6, eax; mm6 = Vmask = 0x000000FF | |
275 | |
276 ; Calculate color V difference | |
277 movq mm4, mm0 | |
278 movq mm5, mm1 | |
279 pand mm4, mm6 | |
280 pand mm5, mm6 | |
281 psubd mm4, mm5 | |
282 pand mm4, mm7 | |
283 | |
284 mov eax, 0x00300000 | |
285 movd mm5, eax; mm5 = trY = 0x00300000 | |
286 mov eax, 0x00000700 | |
287 movd mm6, eax; mm6 = trU = 0x00000700 | |
288 mov eax, 0x00000006 | |
289 movd mm7, eax; mm7 = trV = 0x00000006 | |
290 | |
291 ; Compare the results | |
292 pcmpgtd mm2, trY | |
293 pcmpgtd mm3, trU | |
294 pcmpgtd mm4, trV | |
295 por mm2, mm3 | |
296 por mm2, mm4 | |
297 | |
298 movd retval, mm2 | |
299 | |
300 EMMS | |
301 } | |
302 return (retval != 0); | |
303 #else | |
304 return | |
305 (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) || | |
306 (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) || | |
307 (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV); | |
308 #endif | |
309 } | |
310 | |
311 unsigned int RGBtoYUV(unsigned int c) | |
312 { // Division through 3 slows down the emulation about 10% !!! | |
313 #ifdef MMX | |
314 unsigned int retval; | |
315 __asm | |
316 { | |
317 movd mm0, c | |
318 movq mm1, mm0 | |
319 movq mm2, mm0; mm0 = mm1 = mm2 = c | |
320 | |
321 mov eax, 0x000000FF | |
322 movd mm5, eax; mm5 = REDMASK = 0x000000FF | |
323 mov eax, 0x0000FF00 | |
324 movd mm6, eax; mm6 = GREENMASK = 0x0000FF00 | |
325 mov eax, 0x00FF0000 | |
326 movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000 | |
327 | |
328 pand mm0, mm5 | |
329 pand mm1, mm6 | |
330 pand mm2, mm7; mm0 = R mm1 = G mm2 = B | |
331 | |
332 movq mm3, mm0 | |
333 paddd mm3, mm1 | |
334 paddd mm3, mm2 | |
335 ; psrld mm3, 2; mm3 = Y | |
336 ; pslld mm3, 16 | |
337 pslld mm3, 14; mm3 = Y << 16 | |
338 | |
339 mov eax, 512 | |
340 movd mm7, eax; mm7 = 128 << 2 = 512 | |
341 | |
342 movq mm4, mm0 | |
343 psubd mm4, mm2 | |
344 ; psrld mm4, 2 | |
345 ; paddd mm4, mm7; mm4 = U | |
346 ; pslld mm4, 8; mm4 = U << 8 | |
347 paddd mm4, mm7 | |
348 pslld mm4, 6 | |
349 | |
350 mov eax, 128 | |
351 movd mm7, eax; mm7 = 128 | |
352 | |
353 movq mm5, mm1 | |
354 pslld mm5, 1 | |
355 psubd mm5, mm0 | |
356 psubd mm5, mm2 | |
357 psrld mm5, 3 | |
358 paddd mm5, mm7; mm5 = V | |
359 | |
360 paddd mm5, mm4 | |
361 paddd mm5, mm3 | |
362 | |
363 movd retval, mm5 | |
364 | |
365 EMMS | |
366 } | |
367 return retval; | |
368 #else | |
369 unsigned char r, g, b, Y, u, v; | |
370 r = (c & 0x000000FF); | |
371 g = (c & 0x0000FF00) >> 8; | |
372 b = (c & 0x00FF0000) >> 16; | |
373 Y = (r + g + b) >> 2; | |
374 u = 128 + ((r - b) >> 2); | |
375 v = 128 + ((-r + 2 * g - b) >> 3); | |
376 return (Y << 16) + (u << 8) + v; | |
377 | |
378 // Extremely High Quality Code | |
379 //unsigned char r, g, b; | |
380 //r = c & 0xFF; | |
381 //g = (c >> 8) & 0xFF; | |
382 //b = (c >> 16) & 0xFF; | |
383 //unsigned char y, u, v; | |
384 //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16; | |
385 //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128; | |
386 //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128; | |
387 //return (y << 16) + (u << 8) + v; | |
388 #endif | |
389 } |