Mercurial > vba-linux
comparison src/filters/2xSaImmx.asm @ 27:b970226568d2
brought in filters package
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sun, 04 Mar 2012 20:32:31 -0600 |
parents | f9f4f1b99eed |
children |
comparison
equal
deleted
inserted
replaced
26:18eaae41bde3 | 27:b970226568d2 |
---|---|
1 ;/*---------------------------------------------------------------------* | |
2 ; * The following (piece of) code, (part of) the 2xSaI engine, * | |
3 ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. * | |
4 ; * Non-Commercial use of this software is allowed and is encouraged, * | |
5 ; * provided that appropriate credit be given. * | |
6 ; * You may freely modify this code, but I request * | |
7 ; * that any improvements to the engine be submitted to me, so * | |
8 ; * that I can implement these improvements in newer versions of * | |
9 ; * the software. * | |
10 ; * If you need more information, have any comments or suggestions, * | |
11 ; * you can e-mail me. My e-mail: derek-liauw@usa.net. * | |
12 ; *---------------------------------------------------------------------*/ | |
13 | |
14 ;---------------------- | |
15 ; 2xSaI version 0.59 WIP, soon to become version 0.60 | |
16 ;---------------------- | |
17 | |
18 ;%define FAR_POINTER | |
19 | |
20 | |
21 | |
22 BITS 32 | |
23 %ifdef __DJGPP__ | |
24 GLOBAL __2xSaILine | |
25 GLOBAL __2xSaISuperEagleLine | |
26 GLOBAL __2xSaISuper2xSaILine | |
27 GLOBAL _Init_2xSaIMMX | |
28 %else | |
29 GLOBAL _2xSaILine | |
30 GLOBAL _2xSaISuperEagleLine | |
31 GLOBAL _2xSaISuper2xSaILine | |
32 GLOBAL Init_2xSaIMMX | |
33 %endif | |
34 SECTION .text ALIGN = 32 | |
35 | |
36 %ifdef FAR_POINTER | |
37 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, | |
38 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment); | |
39 %else | |
40 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, | |
41 ; uint8 *dstPtr, uint32 dstPitch); | |
42 %endif | |
43 | |
44 srcPtr equ 8 | |
45 deltaPtr equ 12 | |
46 srcPitch equ 16 | |
47 width equ 20 | |
48 dstOffset equ 24 | |
49 dstPitch equ 28 | |
50 dstSegment equ 32 | |
51 | |
52 | |
53 | |
54 | |
55 colorB0 equ -2 | |
56 colorB1 equ 0 | |
57 colorB2 equ 2 | |
58 colorB3 equ 4 | |
59 | |
60 color7 equ -2 | |
61 color8 equ 0 | |
62 color9 equ 2 | |
63 | |
64 color4 equ -2 | |
65 color5 equ 0 | |
66 color6 equ 2 | |
67 colorS2 equ 4 | |
68 | |
69 color1 equ -2 | |
70 color2 equ 0 | |
71 color3 equ 2 | |
72 colorS1 equ 4 | |
73 | |
74 colorA0 equ -2 | |
75 colorA1 equ 0 | |
76 colorA2 equ 2 | |
77 colorA3 equ 4 | |
78 | |
79 | |
80 | |
81 | |
82 %ifdef __DJGPP__ | |
83 __2xSaISuper2xSaILine: | |
84 %else | |
85 _2xSaISuper2xSaILine: | |
86 %endif | |
87 ; Store some stuff | |
88 push ebp | |
89 mov ebp, esp | |
90 pushad | |
91 | |
92 ; Prepare the destination | |
93 %ifdef FAR_POINTER | |
94 ; Set the selector | |
95 mov eax, [ebp+dstSegment] | |
96 mov fs, ax | |
97 %endif | |
98 mov edx, [ebp+dstOffset] ; edx points to the screen | |
99 ; Prepare the source | |
100 ; eax points to colorA | |
101 mov eax, [ebp+srcPtr] ;eax points to colorA | |
102 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch | |
103 mov ecx, [ebp+width] ;ecx contains the number of pixels to process | |
104 ; eax now points to colorB1 | |
105 sub eax, ebx ;eax points to B1 which is the base | |
106 | |
107 ; Main Loop | |
108 .Loop: push ecx | |
109 | |
110 ;-----Check Delta------------------ | |
111 mov ecx, [ebp+deltaPtr] | |
112 | |
113 | |
114 ;load source img | |
115 movq mm0, [eax+colorB0] | |
116 movq mm1, [eax+colorB3] | |
117 movq mm2, [eax+ebx+color4] | |
118 movq mm3, [eax+ebx+colorS2] | |
119 movq mm4, [eax+ebx+ebx+color1] | |
120 movq mm5, [eax+ebx+ebx+colorS1] | |
121 push eax | |
122 add eax, ebx | |
123 movq mm6, [eax+ebx+ebx+colorA0] | |
124 movq mm7, [eax+ebx+ebx+colorA3] | |
125 pop eax | |
126 | |
127 ;compare to delta | |
128 pcmpeqw mm0, [ecx+2+colorB0] | |
129 pcmpeqw mm1, [ecx+2+colorB3] | |
130 pcmpeqw mm2, [ecx+ebx+2+color4] | |
131 pcmpeqw mm3, [ecx+ebx+2+colorS2] | |
132 pcmpeqw mm4, [ecx+ebx+ebx+2+color1] | |
133 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] | |
134 add ecx, ebx | |
135 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] | |
136 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] | |
137 sub ecx, ebx | |
138 | |
139 | |
140 ;compose results | |
141 pand mm0, mm1 | |
142 pand mm2, mm3 | |
143 pand mm4, mm5 | |
144 pand mm6, mm7 | |
145 pand mm0, mm2 | |
146 pand mm4, mm6 | |
147 pxor mm7, mm7 | |
148 pand mm0, mm4 | |
149 movq mm6, [eax+colorB0] | |
150 pcmpeqw mm7, mm0 ;did any compare give us a zero ? | |
151 | |
152 movq [ecx+2+colorB0], mm6 | |
153 | |
154 packsswb mm7, mm7 | |
155 movd ecx, mm7 | |
156 test ecx, ecx | |
157 jz near .SKIP_PROCESS ;no, so we can skip | |
158 | |
159 ;End Delta | |
160 | |
161 ;--------------------------------- | |
162 movq mm0, [eax+ebx+color5] | |
163 movq mm1, [eax+ebx+color6] | |
164 movq mm2, mm0 | |
165 movq mm3, mm1 | |
166 movq mm4, mm0 | |
167 movq mm5, mm1 | |
168 | |
169 pand mm0, [colorMask] | |
170 pand mm1, [colorMask] | |
171 | |
172 psrlw mm0, 1 | |
173 psrlw mm1, 1 | |
174 | |
175 pand mm3, [lowPixelMask] | |
176 paddw mm0, mm1 | |
177 | |
178 pand mm3, mm2 | |
179 paddw mm0, mm3 ;mm0 contains the interpolated values | |
180 movq [I56Pixel], mm0 | |
181 movq mm7, mm0 | |
182 | |
183 ;------------------- | |
184 movq mm0, mm7 | |
185 movq mm1, mm4 ;5,5,5,6 | |
186 movq mm2, mm0 | |
187 movq mm3, mm1 | |
188 | |
189 pand mm0, [colorMask] | |
190 pand mm1, [colorMask] | |
191 | |
192 psrlw mm0, 1 | |
193 psrlw mm1, 1 | |
194 | |
195 pand mm3, [lowPixelMask] | |
196 paddw mm0, mm1 | |
197 | |
198 pand mm3, mm2 | |
199 paddw mm0, mm3 ;mm0 contains the interpolated values | |
200 movq [I5556Pixel], mm0 | |
201 ;-------------------- | |
202 | |
203 movq mm0, mm7 | |
204 movq mm1, mm5 ;6,6,6,5 | |
205 movq mm2, mm0 | |
206 movq mm3, mm1 | |
207 | |
208 pand mm0, [colorMask] | |
209 pand mm1, [colorMask] | |
210 | |
211 psrlw mm0, 1 | |
212 psrlw mm1, 1 | |
213 | |
214 pand mm3, [lowPixelMask] | |
215 paddw mm0, mm1 | |
216 | |
217 pand mm3, mm2 | |
218 paddw mm0, mm3 | |
219 movq [I5666Pixel], mm0 | |
220 | |
221 ;------------------------- | |
222 ;------------------------- | |
223 movq mm0, [eax+ebx+ebx+color2] | |
224 movq mm1, [eax+ebx+ebx+color3] | |
225 movq mm2, mm0 | |
226 movq mm3, mm1 | |
227 movq mm4, mm0 | |
228 movq mm5, mm1 | |
229 | |
230 pand mm0, [colorMask] | |
231 pand mm1, [colorMask] | |
232 | |
233 psrlw mm0, 1 | |
234 psrlw mm1, 1 | |
235 | |
236 pand mm3, [lowPixelMask] | |
237 paddw mm0, mm1 | |
238 | |
239 pand mm3, mm2 | |
240 paddw mm0, mm3 | |
241 movq [I23Pixel], mm0 | |
242 movq mm7, mm0 | |
243 | |
244 ;--------------------- | |
245 movq mm0, mm7 | |
246 movq mm1, mm4 ;2,2,2,3 | |
247 movq mm2, mm0 | |
248 movq mm3, mm1 | |
249 | |
250 pand mm0, [colorMask] | |
251 pand mm1, [colorMask] | |
252 | |
253 psrlw mm0, 1 | |
254 psrlw mm1, 1 | |
255 | |
256 pand mm3, [lowPixelMask] | |
257 paddw mm0, mm1 | |
258 | |
259 pand mm3, mm2 | |
260 paddw mm0, mm3 | |
261 movq [I2223Pixel], mm0 | |
262 | |
263 ;---------------------- | |
264 movq mm0, mm7 | |
265 movq mm1, mm5 ;3,3,3,2 | |
266 movq mm2, mm0 | |
267 movq mm3, mm1 | |
268 | |
269 pand mm0, [colorMask] | |
270 pand mm1, [colorMask] | |
271 | |
272 psrlw mm0, 1 | |
273 psrlw mm1, 1 | |
274 | |
275 pand mm3, [lowPixelMask] | |
276 paddw mm0, mm1 | |
277 | |
278 pand mm3, mm2 | |
279 paddw mm0, mm3 | |
280 movq [I2333Pixel], mm0 | |
281 | |
282 | |
283 ;-------------------- | |
284 ;//////////////////////////////// | |
285 ; Decide which "branch" to take | |
286 ;-------------------------------- | |
287 movq mm0, [eax+ebx+color5] | |
288 movq mm1, [eax+ebx+color6] | |
289 movq mm6, mm0 | |
290 movq mm7, mm1 | |
291 pcmpeqw mm0, [eax+ebx+ebx+color3] | |
292 pcmpeqw mm1, [eax+ebx+ebx+color2] | |
293 pcmpeqw mm6, mm7 | |
294 | |
295 movq mm2, mm0 | |
296 movq mm3, mm0 | |
297 | |
298 pand mm0, mm1 ;colorA == colorD && colorB == colorC | |
299 pxor mm7, mm7 | |
300 | |
301 pcmpeqw mm2, mm7 | |
302 pand mm6, mm0 | |
303 pand mm2, mm1 ;colorA != colorD && colorB == colorC | |
304 | |
305 pcmpeqw mm1, mm7 | |
306 | |
307 pand mm1, mm3 ;colorA == colorD && colorB != colorC | |
308 pxor mm0, mm6 | |
309 por mm1, mm6 | |
310 movq mm7, mm0 | |
311 movq [Mask26], mm2 | |
312 packsswb mm7, mm7 | |
313 movq [Mask35], mm1 | |
314 | |
315 movd ecx, mm7 | |
316 test ecx, ecx | |
317 jz near .SKIP_GUESS | |
318 | |
319 ;--------------------------------------------- | |
320 movq mm6, mm0 | |
321 movq mm4, [eax+ebx+colorA] | |
322 movq mm5, [eax+ebx+colorB] | |
323 pxor mm7, mm7 | |
324 pand mm6, [ONE] | |
325 | |
326 movq mm0, [eax+colorE] | |
327 movq mm1, [eax+ebx+colorG] | |
328 movq mm2, mm0 | |
329 movq mm3, mm1 | |
330 pcmpeqw mm0, mm4 | |
331 pcmpeqw mm1, mm4 | |
332 pcmpeqw mm2, mm5 | |
333 pcmpeqw mm3, mm5 | |
334 pand mm0, mm6 | |
335 pand mm1, mm6 | |
336 pand mm2, mm6 | |
337 pand mm3, mm6 | |
338 paddw mm0, mm1 | |
339 paddw mm2, mm3 | |
340 | |
341 pxor mm3, mm3 | |
342 pcmpgtw mm0, mm6 | |
343 pcmpgtw mm2, mm6 | |
344 pcmpeqw mm0, mm3 | |
345 pcmpeqw mm2, mm3 | |
346 pand mm0, mm6 | |
347 pand mm2, mm6 | |
348 paddw mm7, mm0 | |
349 psubw mm7, mm2 | |
350 | |
351 movq mm0, [eax+colorF] | |
352 movq mm1, [eax+ebx+colorK] | |
353 movq mm2, mm0 | |
354 movq mm3, mm1 | |
355 pcmpeqw mm0, mm4 | |
356 pcmpeqw mm1, mm4 | |
357 pcmpeqw mm2, mm5 | |
358 pcmpeqw mm3, mm5 | |
359 pand mm0, mm6 | |
360 pand mm1, mm6 | |
361 pand mm2, mm6 | |
362 pand mm3, mm6 | |
363 paddw mm0, mm1 | |
364 paddw mm2, mm3 | |
365 | |
366 pxor mm3, mm3 | |
367 pcmpgtw mm0, mm6 | |
368 pcmpgtw mm2, mm6 | |
369 pcmpeqw mm0, mm3 | |
370 pcmpeqw mm2, mm3 | |
371 pand mm0, mm6 | |
372 pand mm2, mm6 | |
373 paddw mm7, mm0 | |
374 psubw mm7, mm2 | |
375 | |
376 push eax | |
377 add eax, ebx | |
378 movq mm0, [eax+ebx+colorH] | |
379 movq mm1, [eax+ebx+ebx+colorN] | |
380 movq mm2, mm0 | |
381 movq mm3, mm1 | |
382 pcmpeqw mm0, mm4 | |
383 pcmpeqw mm1, mm4 | |
384 pcmpeqw mm2, mm5 | |
385 pcmpeqw mm3, mm5 | |
386 pand mm0, mm6 | |
387 pand mm1, mm6 | |
388 pand mm2, mm6 | |
389 pand mm3, mm6 | |
390 paddw mm0, mm1 | |
391 paddw mm2, mm3 | |
392 | |
393 pxor mm3, mm3 | |
394 pcmpgtw mm0, mm6 | |
395 pcmpgtw mm2, mm6 | |
396 pcmpeqw mm0, mm3 | |
397 pcmpeqw mm2, mm3 | |
398 pand mm0, mm6 | |
399 pand mm2, mm6 | |
400 paddw mm7, mm0 | |
401 psubw mm7, mm2 | |
402 | |
403 movq mm0, [eax+ebx+colorL] | |
404 movq mm1, [eax+ebx+ebx+colorO] | |
405 movq mm2, mm0 | |
406 movq mm3, mm1 | |
407 pcmpeqw mm0, mm4 | |
408 pcmpeqw mm1, mm4 | |
409 pcmpeqw mm2, mm5 | |
410 pcmpeqw mm3, mm5 | |
411 pand mm0, mm6 | |
412 pand mm1, mm6 | |
413 pand mm2, mm6 | |
414 pand mm3, mm6 | |
415 paddw mm0, mm1 | |
416 paddw mm2, mm3 | |
417 | |
418 pxor mm3, mm3 | |
419 pcmpgtw mm0, mm6 | |
420 pcmpgtw mm2, mm6 | |
421 pcmpeqw mm0, mm3 | |
422 pcmpeqw mm2, mm3 | |
423 pand mm0, mm6 | |
424 pand mm2, mm6 | |
425 paddw mm7, mm0 | |
426 psubw mm7, mm2 | |
427 | |
428 pop eax | |
429 movq mm1, mm7 | |
430 pxor mm0, mm0 | |
431 pcmpgtw mm7, mm0 | |
432 pcmpgtw mm0, mm1 | |
433 | |
434 por mm7, [Mask35] | |
435 por mm0, [Mask26] | |
436 movq [Mask35], mm7 | |
437 movq [Mask26], mm0 | |
438 | |
439 .SKIP_GUESS: | |
440 | |
441 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... | |
442 | |
443 | |
444 movq mm0, [eax+ebx+color5] | |
445 movq mm1, [eax+ebx+ebx+color2] | |
446 movq mm2, mm0 | |
447 movq mm3, mm1 | |
448 movq mm4, mm0 | |
449 movq mm5, mm1 | |
450 | |
451 pand mm0, [colorMask] | |
452 pand mm1, [colorMask] | |
453 | |
454 psrlw mm0, 1 | |
455 psrlw mm1, 1 | |
456 | |
457 pand mm3, [lowPixelMask] | |
458 paddw mm0, mm1 | |
459 | |
460 pand mm3, mm2 | |
461 paddw mm0, mm3 ;mm0 contains the interpolated values | |
462 ;--------------------------- | |
463 | |
464 | |
465 | |
466 %ifdef dfhsdfhsdahdsfhdsfh | |
467 | |
468 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) | |
469 product2a = INTERPOLATE (color2, color5); | |
470 else | |
471 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) | |
472 product2a = INTERPOLATE(color2, color5); | |
473 else | |
474 product2a = color2; | |
475 | |
476 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) | |
477 product1a = INTERPOLATE (color2, color5); | |
478 else | |
479 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) | |
480 product1a = INTERPOLATE(color2, color5); | |
481 else | |
482 product1a = color5; | |
483 | |
484 %endif | |
485 | |
486 | |
487 movq mm7, [Mask26] | |
488 movq mm6, [eax+colorB2] | |
489 movq mm5, [eax+ebx+ebx+color2] | |
490 movq mm4, [eax+ebx+ebx+color1] | |
491 pcmpeqw mm4, mm5 | |
492 pcmpeqw mm6, mm5 | |
493 pxor mm5, mm5 | |
494 pand mm7, mm4 | |
495 pcmpeqw mm6, mm5 | |
496 pand mm7, mm6 | |
497 | |
498 | |
499 | |
500 movq mm6, [eax+ebx+ebx+color3] | |
501 movq mm5, [eax+ebx+ebx+color2] | |
502 movq mm4, [eax+ebx+ebx+color1] | |
503 movq mm2, [eax+ebx+color5] | |
504 movq mm1, [eax+ebx+color4] | |
505 movq mm3, [eax+colorB0] | |
506 | |
507 pcmpeqw mm2, mm4 | |
508 pcmpeqw mm6, mm5 | |
509 pcmpeqw mm1, mm5 | |
510 pcmpeqw mm3, mm5 | |
511 pxor mm5, mm5 | |
512 pcmpeqw mm2, mm5 | |
513 pcmpeqw mm3, mm5 | |
514 pand mm6, mm1 | |
515 pand mm2, mm3 | |
516 pand mm6, mm2 | |
517 por mm7, mm6 | |
518 | |
519 | |
520 movq mm6, mm7 | |
521 pcmpeqw mm6, mm5 | |
522 pand mm7, mm0 | |
523 | |
524 movq mm1, [eax+ebx+color5] | |
525 pand mm6, mm1 | |
526 por mm7, mm6 | |
527 movq [final1a], mm7 ;finished 1a | |
528 | |
529 | |
530 | |
531 ;-------------------------------- | |
532 | |
533 movq mm7, [Mask35] | |
534 push eax | |
535 add eax, ebx | |
536 movq mm6, [eax+ebx+ebx+colorA2] | |
537 pop eax | |
538 movq mm5, [eax+ebx+color5] | |
539 movq mm4, [eax+ebx+color4] | |
540 pcmpeqw mm4, mm5 | |
541 pcmpeqw mm6, mm5 | |
542 pxor mm5, mm5 | |
543 pand mm7, mm4 | |
544 pcmpeqw mm6, mm5 | |
545 pand mm7, mm6 | |
546 | |
547 | |
548 | |
549 movq mm6, [eax+ebx+color6] | |
550 movq mm5, [eax+ebx+color5] | |
551 movq mm4, [eax+ebx+color4] | |
552 movq mm2, [eax+ebx+ebx+color2] | |
553 movq mm1, [eax+ebx+ebx+color1] | |
554 push eax | |
555 add eax, ebx | |
556 movq mm3, [eax+ebx+ebx+colorA0] | |
557 pop eax | |
558 | |
559 pcmpeqw mm2, mm4 | |
560 pcmpeqw mm6, mm5 | |
561 pcmpeqw mm1, mm5 | |
562 pcmpeqw mm3, mm5 | |
563 pxor mm5, mm5 | |
564 pcmpeqw mm2, mm5 | |
565 pcmpeqw mm3, mm5 | |
566 pand mm6, mm1 | |
567 pand mm2, mm3 | |
568 pand mm6, mm2 | |
569 por mm7, mm6 | |
570 | |
571 | |
572 movq mm6, mm7 | |
573 pcmpeqw mm6, mm5 | |
574 pand mm7, mm0 | |
575 | |
576 movq mm1, [eax+ebx+ebx+color2] | |
577 pand mm6, mm1 | |
578 por mm7, mm6 | |
579 movq [final2a], mm7 ;finished 2a | |
580 | |
581 | |
582 ;-------------------------------------------- | |
583 | |
584 | |
585 %ifdef dfhsdfhsdahdsfhdsfh | |
586 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) | |
587 product2b = Q_INTERPOLATE (color3, color3, color3, color2); | |
588 else | |
589 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) | |
590 product2b = Q_INTERPOLATE (color2, color2, color2, color3); | |
591 else | |
592 product2b = INTERPOLATE (color2, color3); | |
593 | |
594 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) | |
595 product1b = Q_INTERPOLATE (color6, color6, color6, color5); | |
596 else | |
597 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) | |
598 product1b = Q_INTERPOLATE (color6, color5, color5, color5); | |
599 else | |
600 product1b = INTERPOLATE (color5, color6); | |
601 %endif | |
602 | |
603 push eax | |
604 add eax, ebx | |
605 pxor mm7, mm7 | |
606 movq mm0, [eax+ebx+ebx+colorA0] | |
607 movq mm1, [eax+ebx+ebx+colorA1] | |
608 movq mm2, [eax+ebx+ebx+colorA2] | |
609 movq mm3, [eax+ebx+ebx+colorA3] | |
610 pop eax | |
611 movq mm4, [eax+ebx+ebx+color2] | |
612 movq mm5, [eax+ebx+ebx+color3] | |
613 movq mm6, [eax+ebx+color6] | |
614 | |
615 pcmpeqw mm6, mm5 | |
616 pcmpeqw mm1, mm5 | |
617 pcmpeqw mm4, mm2 | |
618 pcmpeqw mm0, mm5 | |
619 pcmpeqw mm4, mm7 | |
620 pcmpeqw mm0, mm7 | |
621 pand mm0, mm4 | |
622 pand mm6, mm1 | |
623 pand mm0, mm6 | |
624 | |
625 | |
626 push eax | |
627 add eax, ebx | |
628 movq mm1, [eax+ebx+ebx+colorA1] | |
629 pop eax | |
630 movq mm4, [eax+ebx+ebx+color2] | |
631 movq mm5, [eax+ebx+color5] | |
632 movq mm6, [eax+ebx+ebx+color3] | |
633 | |
634 pcmpeqw mm5, mm4 | |
635 pcmpeqw mm2, mm4 | |
636 pcmpeqw mm1, mm6 | |
637 pcmpeqw mm3, mm4 | |
638 pcmpeqw mm1, mm7 | |
639 pcmpeqw mm3, mm7 | |
640 pand mm2, mm5 | |
641 pand mm1, mm3 | |
642 pand mm1, mm2 | |
643 | |
644 | |
645 movq mm7, mm0 | |
646 por mm7, mm1 | |
647 | |
648 movq mm4, [Mask35] | |
649 movq mm3, [Mask26] | |
650 | |
651 movq mm6, mm4 | |
652 pand mm6, mm7 | |
653 pxor mm4, mm6 | |
654 | |
655 movq mm6, mm3 | |
656 pand mm6, mm7 | |
657 pxor mm3, mm6 | |
658 | |
659 movq mm2, mm0 | |
660 movq mm7, [I2333Pixel] | |
661 movq mm6, [I2223Pixel] | |
662 movq mm5, [I23Pixel] | |
663 | |
664 | |
665 por mm2, mm4 | |
666 pand mm4, [eax+ebx+ebx+color3] | |
667 por mm2, mm3 | |
668 pand mm3, [eax+ebx+ebx+color2] | |
669 por mm2, mm1 | |
670 pand mm0, mm7 | |
671 pand mm1, mm6 | |
672 pxor mm7, mm7 | |
673 pcmpeqw mm2, mm7 | |
674 por mm0, mm1 | |
675 por mm3, mm4 | |
676 pand mm2, mm5 | |
677 por mm0, mm3 | |
678 por mm0, mm2 | |
679 movq [final2b], mm0 | |
680 | |
681 ;----------------------------------- | |
682 | |
683 | |
684 pxor mm7, mm7 | |
685 movq mm0, [eax+colorB0] | |
686 movq mm1, [eax+colorB1] | |
687 movq mm2, [eax+colorB2] | |
688 movq mm3, [eax+colorB3] | |
689 movq mm4, [eax+ebx+color5] | |
690 movq mm5, [eax+ebx+color6] | |
691 movq mm6, [eax+ebx+ebx+color3] | |
692 | |
693 pcmpeqw mm6, mm5 | |
694 pcmpeqw mm1, mm5 | |
695 pcmpeqw mm4, mm2 | |
696 pcmpeqw mm0, mm5 | |
697 pcmpeqw mm4, mm7 | |
698 pcmpeqw mm0, mm7 | |
699 pand mm0, mm4 | |
700 pand mm6, mm1 | |
701 pand mm0, mm6 | |
702 | |
703 movq mm1, [eax+colorB1] | |
704 movq mm4, [eax+ebx+color5] | |
705 movq mm5, [eax+ebx+ebx+color2] | |
706 movq mm6, [eax+ebx+color6] | |
707 | |
708 pcmpeqw mm5, mm4 | |
709 pcmpeqw mm2, mm4 | |
710 pcmpeqw mm1, mm6 | |
711 pcmpeqw mm3, mm4 | |
712 pcmpeqw mm1, mm7 | |
713 pcmpeqw mm3, mm7 | |
714 pand mm2, mm5 | |
715 pand mm1, mm3 | |
716 pand mm1, mm2 | |
717 | |
718 | |
719 movq mm7, mm0 | |
720 por mm7, mm1 | |
721 | |
722 movq mm4, [Mask35] | |
723 movq mm3, [Mask26] | |
724 | |
725 movq mm6, mm4 | |
726 pand mm6, mm7 | |
727 pxor mm4, mm6 | |
728 | |
729 movq mm6, mm3 | |
730 pand mm6, mm7 | |
731 pxor mm3, mm6 | |
732 | |
733 movq mm2, mm0 | |
734 movq mm7, [I5666Pixel] | |
735 movq mm6, [I5556Pixel] | |
736 movq mm5, [I56Pixel] | |
737 | |
738 | |
739 por mm2, mm4 | |
740 pand mm4, [eax+ebx+color5] | |
741 por mm2, mm3 | |
742 pand mm3, [eax+ebx+color6] | |
743 por mm2, mm1 | |
744 pand mm0, mm7 | |
745 pand mm1, mm6 | |
746 pxor mm7, mm7 | |
747 pcmpeqw mm2, mm7 | |
748 por mm0, mm1 | |
749 por mm3, mm4 | |
750 pand mm2, mm5 | |
751 por mm0, mm3 | |
752 por mm0, mm2 | |
753 movq [final1b], mm0 | |
754 | |
755 ;--------- | |
756 | |
757 movq mm0, [final1a] | |
758 movq mm4, [final2a] | |
759 movq mm2, [final1b] | |
760 movq mm6, [final2b] | |
761 | |
762 | |
763 movq mm1, mm0 | |
764 movq mm5, mm4 | |
765 | |
766 | |
767 punpcklwd mm0, mm2 | |
768 punpckhwd mm1, mm2 | |
769 | |
770 punpcklwd mm4, mm6 | |
771 punpckhwd mm5, mm6 | |
772 | |
773 | |
774 %ifdef FAR_POINTER | |
775 movq [fs:edx], mm0 | |
776 movq [fs:edx+8], mm1 | |
777 push edx | |
778 add edx, [ebp+dstPitch] | |
779 movq [fs:edx], mm4 | |
780 movq [fs:edx+8], mm5 | |
781 pop edx | |
782 %else | |
783 movq [edx], mm0 | |
784 movq [edx+8], mm1 | |
785 push edx | |
786 add edx, [ebp+dstPitch] | |
787 movq [edx], mm4 | |
788 movq [edx+8], mm5 | |
789 pop edx | |
790 %endif | |
791 .SKIP_PROCESS: | |
792 mov ecx, [ebp+deltaPtr] | |
793 add ecx, 8 | |
794 mov [ebp+deltaPtr], ecx | |
795 add edx, 16 | |
796 add eax, 8 | |
797 | |
798 pop ecx | |
799 sub ecx, 4 | |
800 cmp ecx, 0 | |
801 jg near .Loop | |
802 | |
803 ; Restore some stuff | |
804 popad | |
805 mov esp, ebp | |
806 pop ebp | |
807 emms | |
808 ret | |
809 | |
810 | |
811 ;------------------------------------------------------------------------- | |
812 ;------------------------------------------------------------------------- | |
813 ;------------------------------------------------------------------------- | |
814 ;------------------------------------------------------------------------- | |
815 ;------------------------------------------------------------------------- | |
816 ;------------------------------------------------------------------------- | |
817 ;------------------------------------------------------------------------- | |
818 | |
819 | |
820 | |
821 %ifdef __DJGPP__ | |
822 __2xSaISuperEagleLine: | |
823 %else | |
824 _2xSaISuperEagleLine: | |
825 %endif | |
826 ; Store some stuff | |
827 push ebp | |
828 mov ebp, esp | |
829 pushad | |
830 | |
831 ; Prepare the destination | |
832 %ifdef FAR_POINTER | |
833 ; Set the selector | |
834 mov eax, [ebp+dstSegment] | |
835 mov fs, ax | |
836 %endif | |
837 mov edx, [ebp+dstOffset] ; edx points to the screen | |
838 ; Prepare the source | |
839 ; eax points to colorA | |
840 mov eax, [ebp+srcPtr] | |
841 mov ebx, [ebp+srcPitch] | |
842 mov ecx, [ebp+width] | |
843 ; eax now points to colorB1 | |
844 sub eax, ebx | |
845 | |
846 ; Main Loop | |
847 .Loop: push ecx | |
848 | |
849 ;-----Check Delta------------------ | |
850 mov ecx, [ebp+deltaPtr] | |
851 | |
852 movq mm0, [eax+colorB0] | |
853 movq mm1, [eax+colorB3] | |
854 movq mm2, [eax+ebx+color4] | |
855 movq mm3, [eax+ebx+colorS2] | |
856 movq mm4, [eax+ebx+ebx+color1] | |
857 movq mm5, [eax+ebx+ebx+colorS1] | |
858 push eax | |
859 add eax, ebx | |
860 movq mm6, [eax+ebx+ebx+colorA0] | |
861 movq mm7, [eax+ebx+ebx+colorA3] | |
862 pop eax | |
863 | |
864 pcmpeqw mm0, [ecx+2+colorB0] | |
865 pcmpeqw mm1, [ecx+2+colorB3] | |
866 pcmpeqw mm2, [ecx+ebx+2+color4] | |
867 pcmpeqw mm3, [ecx+ebx+2+colorS2] | |
868 pcmpeqw mm4, [ecx+ebx+ebx+2+color1] | |
869 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] | |
870 add ecx, ebx | |
871 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] | |
872 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] | |
873 sub ecx, ebx | |
874 | |
875 | |
876 pand mm0, mm1 | |
877 pand mm2, mm3 | |
878 pand mm4, mm5 | |
879 pand mm6, mm7 | |
880 pand mm0, mm2 | |
881 pand mm4, mm6 | |
882 pxor mm7, mm7 | |
883 pand mm0, mm4 | |
884 movq mm6, [eax+colorB0] | |
885 pcmpeqw mm7, mm0 | |
886 | |
887 movq [ecx+2+colorB0], mm6 | |
888 | |
889 packsswb mm7, mm7 | |
890 movd ecx, mm7 | |
891 test ecx, ecx | |
892 jz near .SKIP_PROCESS | |
893 | |
894 ;End Delta | |
895 | |
896 ;--------------------------------- | |
897 movq mm0, [eax+ebx+color5] | |
898 movq mm1, [eax+ebx+color6] | |
899 movq mm2, mm0 | |
900 movq mm3, mm1 | |
901 movq mm4, mm0 | |
902 movq mm5, mm1 | |
903 | |
904 pand mm0, [colorMask] | |
905 pand mm1, [colorMask] | |
906 | |
907 psrlw mm0, 1 | |
908 psrlw mm1, 1 | |
909 | |
910 pand mm3, [lowPixelMask] | |
911 paddw mm0, mm1 | |
912 | |
913 pand mm3, mm2 | |
914 paddw mm0, mm3 ;mm0 contains the interpolated values | |
915 movq [I56Pixel], mm0 | |
916 movq mm7, mm0 | |
917 | |
918 ;------------------- | |
919 movq mm0, mm7 | |
920 movq mm1, mm4 ;5,5,5,6 | |
921 movq mm2, mm0 | |
922 movq mm3, mm1 | |
923 | |
924 pand mm0, [colorMask] | |
925 pand mm1, [colorMask] | |
926 | |
927 psrlw mm0, 1 | |
928 psrlw mm1, 1 | |
929 | |
930 pand mm3, [lowPixelMask] | |
931 paddw mm0, mm1 | |
932 | |
933 pand mm3, mm2 | |
934 paddw mm0, mm3 ;mm0 contains the interpolated values | |
935 movq [product1a], mm0 | |
936 ;-------------------- | |
937 | |
938 movq mm0, mm7 | |
939 movq mm1, mm5 ;6,6,6,5 | |
940 movq mm2, mm0 | |
941 movq mm3, mm1 | |
942 | |
943 pand mm0, [colorMask] | |
944 pand mm1, [colorMask] | |
945 | |
946 psrlw mm0, 1 | |
947 psrlw mm1, 1 | |
948 | |
949 pand mm3, [lowPixelMask] | |
950 paddw mm0, mm1 | |
951 | |
952 pand mm3, mm2 | |
953 paddw mm0, mm3 | |
954 movq [product1b], mm0 | |
955 | |
956 ;------------------------- | |
957 ;------------------------- | |
958 movq mm0, [eax+ebx+ebx+color2] | |
959 movq mm1, [eax+ebx+ebx+color3] | |
960 movq mm2, mm0 | |
961 movq mm3, mm1 | |
962 movq mm4, mm0 | |
963 movq mm5, mm1 | |
964 | |
965 pand mm0, [colorMask] | |
966 pand mm1, [colorMask] | |
967 | |
968 psrlw mm0, 1 | |
969 psrlw mm1, 1 | |
970 | |
971 pand mm3, [lowPixelMask] | |
972 paddw mm0, mm1 | |
973 | |
974 pand mm3, mm2 | |
975 paddw mm0, mm3 | |
976 movq [I23Pixel], mm0 | |
977 movq mm7, mm0 | |
978 | |
979 ;--------------------- | |
980 movq mm0, mm7 | |
981 movq mm1, mm4 ;2,2,2,3 | |
982 movq mm2, mm0 | |
983 movq mm3, mm1 | |
984 | |
985 pand mm0, [colorMask] | |
986 pand mm1, [colorMask] | |
987 | |
988 psrlw mm0, 1 | |
989 psrlw mm1, 1 | |
990 | |
991 pand mm3, [lowPixelMask] | |
992 paddw mm0, mm1 | |
993 | |
994 pand mm3, mm2 | |
995 paddw mm0, mm3 | |
996 movq [product2a], mm0 | |
997 | |
998 ;---------------------- | |
999 movq mm0, mm7 | |
1000 movq mm1, mm5 ;3,3,3,2 | |
1001 movq mm2, mm0 | |
1002 movq mm3, mm1 | |
1003 | |
1004 pand mm0, [colorMask] | |
1005 pand mm1, [colorMask] | |
1006 | |
1007 psrlw mm0, 1 | |
1008 psrlw mm1, 1 | |
1009 | |
1010 pand mm3, [lowPixelMask] | |
1011 paddw mm0, mm1 | |
1012 | |
1013 pand mm3, mm2 | |
1014 paddw mm0, mm3 | |
1015 movq [product2b], mm0 | |
1016 | |
1017 | |
1018 ;//////////////////////////////// | |
1019 ; Decide which "branch" to take | |
1020 ;-------------------------------- | |
1021 movq mm4, [eax+ebx+color5] | |
1022 movq mm5, [eax+ebx+color6] | |
1023 movq mm6, [eax+ebx+ebx+color3] | |
1024 movq mm7, [eax+ebx+ebx+color2] | |
1025 | |
1026 pxor mm3, mm3 | |
1027 movq mm0, mm4 | |
1028 movq mm1, mm5 | |
1029 | |
1030 pcmpeqw mm0, mm6 | |
1031 pcmpeqw mm1, mm7 | |
1032 pcmpeqw mm1, mm3 | |
1033 pand mm0, mm1 | |
1034 movq [Mask35], mm0 | |
1035 | |
1036 movq mm0, [eax+ebx+ebx+colorS1] | |
1037 movq mm1, [eax+ebx+color4] | |
1038 push eax | |
1039 add eax, ebx | |
1040 movq mm2, [eax+ebx+ebx+colorA2] | |
1041 pop eax | |
1042 movq mm3, [eax+colorB1] | |
1043 pcmpeqw mm0, mm4 | |
1044 pcmpeqw mm1, mm4 | |
1045 pcmpeqw mm2, mm4 | |
1046 pcmpeqw mm3, mm4 | |
1047 pand mm0, mm1 | |
1048 pand mm2, mm3 | |
1049 por mm0, mm2 | |
1050 pand mm0, [Mask35] | |
1051 movq [Mask35b], mm0 | |
1052 | |
1053 ;----------- | |
1054 pxor mm3, mm3 | |
1055 movq mm0, mm4 | |
1056 movq mm1, mm5 | |
1057 | |
1058 pcmpeqw mm0, mm6 | |
1059 pcmpeqw mm1, mm7 | |
1060 pcmpeqw mm0, mm3 | |
1061 pand mm0, mm1 | |
1062 movq [Mask26], mm0 | |
1063 | |
1064 movq mm0, [eax+ebx+ebx+color1] | |
1065 movq mm1, [eax+ebx+colorS2] | |
1066 push eax | |
1067 add eax, ebx | |
1068 movq mm2, [eax+ebx+ebx+colorA1] | |
1069 pop eax | |
1070 movq mm3, [eax+colorB2] | |
1071 pcmpeqw mm0, mm5 | |
1072 pcmpeqw mm1, mm5 | |
1073 pcmpeqw mm2, mm5 | |
1074 pcmpeqw mm3, mm5 | |
1075 pand mm0, mm1 | |
1076 pand mm2, mm3 | |
1077 por mm0, mm2 | |
1078 pand mm0, [Mask26] | |
1079 movq [Mask26b], mm0 | |
1080 | |
1081 ;-------------------- | |
1082 movq mm0, mm4 | |
1083 movq mm1, mm5 | |
1084 movq mm2, mm0 | |
1085 | |
1086 pcmpeqw mm2, mm1 | |
1087 pcmpeqw mm0, mm6 | |
1088 pcmpeqw mm1, mm7 | |
1089 pand mm0, mm1 | |
1090 pand mm2, mm0 | |
1091 pxor mm0, mm2 | |
1092 movq mm7, mm0 | |
1093 | |
1094 ;------------------ | |
1095 packsswb mm7, mm7 | |
1096 movd ecx, mm7 | |
1097 test ecx, ecx | |
1098 jz near .SKIP_GUESS | |
1099 | |
1100 ;--------------------------------------------- | |
1101 ; Map of the pixels: I|E F|J | |
1102 ; G|A B|K | |
1103 ; H|C D|L | |
1104 ; M|N O|P | |
1105 movq mm6, mm0 | |
1106 movq mm4, [eax+ebx+color5] | |
1107 movq mm5, [eax+ebx+color6] | |
1108 pxor mm7, mm7 | |
1109 pand mm6, [ONE] | |
1110 | |
1111 movq mm0, [eax+colorB1] | |
1112 movq mm1, [eax+ebx+color4] | |
1113 movq mm2, mm0 | |
1114 movq mm3, mm1 | |
1115 pcmpeqw mm0, mm4 | |
1116 pcmpeqw mm1, mm4 | |
1117 pcmpeqw mm2, mm5 | |
1118 pcmpeqw mm3, mm5 | |
1119 pand mm0, mm6 | |
1120 pand mm1, mm6 | |
1121 pand mm2, mm6 | |
1122 pand mm3, mm6 | |
1123 paddw mm0, mm1 | |
1124 paddw mm2, mm3 | |
1125 | |
1126 pxor mm3, mm3 | |
1127 pcmpgtw mm0, mm6 | |
1128 pcmpgtw mm2, mm6 | |
1129 pcmpeqw mm0, mm3 | |
1130 pcmpeqw mm2, mm3 | |
1131 pand mm0, mm6 | |
1132 pand mm2, mm6 | |
1133 paddw mm7, mm0 | |
1134 psubw mm7, mm2 | |
1135 | |
1136 movq mm0, [eax+colorB2] | |
1137 movq mm1, [eax+ebx+colorS2] | |
1138 movq mm2, mm0 | |
1139 movq mm3, mm1 | |
1140 pcmpeqw mm0, mm4 | |
1141 pcmpeqw mm1, mm4 | |
1142 pcmpeqw mm2, mm5 | |
1143 pcmpeqw mm3, mm5 | |
1144 pand mm0, mm6 | |
1145 pand mm1, mm6 | |
1146 pand mm2, mm6 | |
1147 pand mm3, mm6 | |
1148 paddw mm0, mm1 | |
1149 paddw mm2, mm3 | |
1150 | |
1151 pxor mm3, mm3 | |
1152 pcmpgtw mm0, mm6 | |
1153 pcmpgtw mm2, mm6 | |
1154 pcmpeqw mm0, mm3 | |
1155 pcmpeqw mm2, mm3 | |
1156 pand mm0, mm6 | |
1157 pand mm2, mm6 | |
1158 paddw mm7, mm0 | |
1159 psubw mm7, mm2 | |
1160 | |
1161 push eax | |
1162 add eax, ebx | |
1163 movq mm0, [eax+ebx+color1] | |
1164 movq mm1, [eax+ebx+ebx+colorA1] | |
1165 movq mm2, mm0 | |
1166 movq mm3, mm1 | |
1167 pcmpeqw mm0, mm4 | |
1168 pcmpeqw mm1, mm4 | |
1169 pcmpeqw mm2, mm5 | |
1170 pcmpeqw mm3, mm5 | |
1171 pand mm0, mm6 | |
1172 pand mm1, mm6 | |
1173 pand mm2, mm6 | |
1174 pand mm3, mm6 | |
1175 paddw mm0, mm1 | |
1176 paddw mm2, mm3 | |
1177 | |
1178 pxor mm3, mm3 | |
1179 pcmpgtw mm0, mm6 | |
1180 pcmpgtw mm2, mm6 | |
1181 pcmpeqw mm0, mm3 | |
1182 pcmpeqw mm2, mm3 | |
1183 pand mm0, mm6 | |
1184 pand mm2, mm6 | |
1185 paddw mm7, mm0 | |
1186 psubw mm7, mm2 | |
1187 | |
1188 movq mm0, [eax+ebx+colorS1] | |
1189 movq mm1, [eax+ebx+ebx+colorA2] | |
1190 movq mm2, mm0 | |
1191 movq mm3, mm1 | |
1192 pcmpeqw mm0, mm4 | |
1193 pcmpeqw mm1, mm4 | |
1194 pcmpeqw mm2, mm5 | |
1195 pcmpeqw mm3, mm5 | |
1196 pand mm0, mm6 | |
1197 pand mm1, mm6 | |
1198 pand mm2, mm6 | |
1199 pand mm3, mm6 | |
1200 paddw mm0, mm1 | |
1201 paddw mm2, mm3 | |
1202 | |
1203 pxor mm3, mm3 | |
1204 pcmpgtw mm0, mm6 | |
1205 pcmpgtw mm2, mm6 | |
1206 pcmpeqw mm0, mm3 | |
1207 pcmpeqw mm2, mm3 | |
1208 pand mm0, mm6 | |
1209 pand mm2, mm6 | |
1210 paddw mm7, mm0 | |
1211 psubw mm7, mm2 | |
1212 | |
1213 pop eax | |
1214 movq mm1, mm7 | |
1215 pxor mm0, mm0 | |
1216 pcmpgtw mm7, mm0 | |
1217 pcmpgtw mm0, mm1 | |
1218 | |
1219 por mm7, [Mask35] | |
1220 por mm0, [Mask26] | |
1221 movq [Mask35], mm7 | |
1222 movq [Mask26], mm0 | |
1223 | |
1224 .SKIP_GUESS: | |
1225 ;Start the ASSEMBLY !!! | |
1226 | |
1227 movq mm4, [Mask35] | |
1228 movq mm5, [Mask26] | |
1229 movq mm6, [Mask35b] | |
1230 movq mm7, [Mask26b] | |
1231 | |
1232 movq mm0, [eax+ebx+color5] | |
1233 movq mm1, [eax+ebx+color6] | |
1234 movq mm2, [eax+ebx+ebx+color2] | |
1235 movq mm3, [eax+ebx+ebx+color3] | |
1236 pcmpeqw mm0, mm2 | |
1237 pcmpeqw mm1, mm3 | |
1238 movq mm2, mm4 | |
1239 movq mm3, mm5 | |
1240 por mm0, mm1 | |
1241 por mm2, mm3 | |
1242 pand mm2, mm0 | |
1243 pxor mm0, mm2 | |
1244 movq mm3, mm0 | |
1245 | |
1246 movq mm2, mm0 | |
1247 pxor mm0, mm0 | |
1248 por mm2, mm4 | |
1249 pxor mm4, mm6 | |
1250 por mm2, mm5 | |
1251 pxor mm5, mm7 | |
1252 pcmpeqw mm2, mm0 | |
1253 ;---------------- | |
1254 | |
1255 movq mm0, [eax+ebx+color5] | |
1256 movq mm1, mm3 | |
1257 por mm1, mm4 | |
1258 por mm1, mm6 | |
1259 pand mm0, mm1 | |
1260 movq mm1, mm5 | |
1261 pand mm1, [I56Pixel] | |
1262 por mm0, mm1 | |
1263 movq mm1, mm7 | |
1264 pand mm1, [product1b] | |
1265 por mm0, mm1 | |
1266 movq mm1, mm2 | |
1267 pand mm1, [product1a] | |
1268 por mm0, mm1 | |
1269 movq [final1a], mm0 | |
1270 | |
1271 movq mm0, [eax+ebx+color6] | |
1272 movq mm1, mm3 | |
1273 por mm1, mm5 | |
1274 por mm1, mm7 | |
1275 pand mm0, mm1 | |
1276 movq mm1, mm4 | |
1277 pand mm1, [I56Pixel] | |
1278 por mm0, mm1 | |
1279 movq mm1, mm6 | |
1280 pand mm1, [product1a] | |
1281 por mm0, mm1 | |
1282 movq mm1, mm2 | |
1283 pand mm1, [product1b] | |
1284 por mm0, mm1 | |
1285 movq [final1b], mm0 | |
1286 | |
1287 movq mm0, [eax+ebx+ebx+color2] | |
1288 movq mm1, mm3 | |
1289 por mm1, mm5 | |
1290 por mm1, mm7 | |
1291 pand mm0, mm1 | |
1292 movq mm1, mm4 | |
1293 pand mm1, [I23Pixel] | |
1294 por mm0, mm1 | |
1295 movq mm1, mm6 | |
1296 pand mm1, [product2b] | |
1297 por mm0, mm1 | |
1298 movq mm1, mm2 | |
1299 pand mm1, [product2a] | |
1300 por mm0, mm1 | |
1301 movq [final2a], mm0 | |
1302 | |
1303 movq mm0, [eax+ebx+ebx+color3] | |
1304 movq mm1, mm3 | |
1305 por mm1, mm4 | |
1306 por mm1, mm6 | |
1307 pand mm0, mm1 | |
1308 movq mm1, mm5 | |
1309 pand mm1, [I23Pixel] | |
1310 por mm0, mm1 | |
1311 movq mm1, mm7 | |
1312 pand mm1, [product2a] | |
1313 por mm0, mm1 | |
1314 movq mm1, mm2 | |
1315 pand mm1, [product2b] | |
1316 por mm0, mm1 | |
1317 movq [final2b], mm0 | |
1318 | |
1319 | |
1320 movq mm0, [final1a] | |
1321 movq mm2, [final1b] | |
1322 movq mm1, mm0 | |
1323 movq mm4, [final2a] | |
1324 movq mm6, [final2b] | |
1325 movq mm5, mm4 | |
1326 punpcklwd mm0, mm2 | |
1327 punpckhwd mm1, mm2 | |
1328 punpcklwd mm4, mm6 | |
1329 punpckhwd mm5, mm6 | |
1330 | |
1331 | |
1332 | |
1333 | |
1334 %ifdef FAR_POINTER | |
1335 movq [fs:edx], mm0 | |
1336 movq [fs:edx+8], mm1 | |
1337 push edx | |
1338 add edx, [ebp+dstPitch] | |
1339 movq [fs:edx], mm4 | |
1340 movq [fs:edx+8], mm5 | |
1341 pop edx | |
1342 %else | |
1343 movq [edx], mm0 | |
1344 movq [edx+8], mm1 | |
1345 push edx | |
1346 add edx, [ebp+dstPitch] | |
1347 movq [edx], mm4 | |
1348 movq [edx+8], mm5 | |
1349 pop edx | |
1350 %endif | |
1351 .SKIP_PROCESS: | |
1352 mov ecx, [ebp+deltaPtr] | |
1353 add ecx, 8 | |
1354 mov [ebp+deltaPtr], ecx | |
1355 add edx, 16 | |
1356 add eax, 8 | |
1357 | |
1358 pop ecx | |
1359 sub ecx, 4 | |
1360 cmp ecx, 0 | |
1361 jg near .Loop | |
1362 | |
1363 ; Restore some stuff | |
1364 popad | |
1365 mov esp, ebp | |
1366 pop ebp | |
1367 emms | |
1368 ret | |
1369 | |
1370 | |
1371 ;------------------------------------------------------------------------- | |
1372 ;------------------------------------------------------------------------- | |
1373 ;------------------------------------------------------------------------- | |
1374 ;------------------------------------------------------------------------- | |
1375 ;------------------------------------------------------------------------- | |
1376 ;------------------------------------------------------------------------- | |
1377 ;------------------------------------------------------------------------- | |
1378 | |
1379 | |
1380 ;This is version 0.50 | |
1381 colorI equ -2 | |
1382 colorE equ 0 | |
1383 colorF equ 2 | |
1384 colorJ equ 4 | |
1385 | |
1386 colorG equ -2 | |
1387 colorA equ 0 | |
1388 colorB equ 2 | |
1389 colorK equ 4 | |
1390 | |
1391 colorH equ -2 | |
1392 colorC equ 0 | |
1393 colorD equ 2 | |
1394 colorL equ 4 | |
1395 | |
1396 colorM equ -2 | |
1397 colorN equ 0 | |
1398 colorO equ 2 | |
1399 colorP equ 4 | |
1400 | |
1401 %ifdef __DJGPP__ | |
1402 __2xSaILine: | |
1403 %else | |
1404 _2xSaILine: | |
1405 %endif | |
1406 ; Store some stuff | |
1407 push ebp | |
1408 mov ebp, esp | |
1409 pushad | |
1410 | |
1411 ; Prepare the destination | |
1412 %ifdef FAR_POINTER | |
1413 ; Set the selector | |
1414 mov eax, [ebp+dstSegment] | |
1415 mov fs, ax | |
1416 %endif | |
1417 mov edx, [ebp+dstOffset] ; edx points to the screen | |
1418 ; Prepare the source | |
1419 ; eax points to colorA | |
1420 mov eax, [ebp+srcPtr] | |
1421 mov ebx, [ebp+srcPitch] | |
1422 mov ecx, [ebp+width] | |
1423 ; eax now points to colorE | |
1424 sub eax, ebx | |
1425 | |
1426 | |
1427 ; Main Loop | |
1428 .Loop: push ecx | |
1429 | |
1430 ;-----Check Delta------------------ | |
1431 mov ecx, [ebp+deltaPtr] | |
1432 | |
1433 movq mm0, [eax+colorI] | |
1434 movq mm1, [eax+colorJ] | |
1435 movq mm2, [eax+ebx+colorG] | |
1436 movq mm3, [eax+ebx+colorK] | |
1437 movq mm4, [eax+ebx+ebx+colorH] | |
1438 movq mm5, [eax+ebx+ebx+colorL] | |
1439 push eax | |
1440 add eax, ebx | |
1441 movq mm6, [eax+ebx+ebx+colorM] | |
1442 movq mm7, [eax+ebx+ebx+colorP] | |
1443 pop eax | |
1444 | |
1445 pcmpeqw mm0, [ecx+2+colorI] | |
1446 pcmpeqw mm1, [ecx+2+colorK] | |
1447 pcmpeqw mm2, [ecx+ebx+2+colorG] | |
1448 pcmpeqw mm3, [ecx+ebx+2+colorK] | |
1449 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH] | |
1450 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL] | |
1451 add ecx, ebx | |
1452 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM] | |
1453 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP] | |
1454 sub ecx, ebx | |
1455 | |
1456 | |
1457 pand mm0, mm1 | |
1458 pand mm2, mm3 | |
1459 pand mm4, mm5 | |
1460 pand mm6, mm7 | |
1461 pand mm0, mm2 | |
1462 pand mm4, mm6 | |
1463 pxor mm7, mm7 | |
1464 pand mm0, mm4 | |
1465 movq mm6, [eax+colorI] | |
1466 pcmpeqw mm7, mm0 | |
1467 | |
1468 movq [ecx+2+colorI], mm6 | |
1469 | |
1470 packsswb mm7, mm7 | |
1471 movd ecx, mm7 | |
1472 test ecx, ecx | |
1473 jz near .SKIP_PROCESS | |
1474 | |
1475 ;End Delta | |
1476 | |
1477 ;--------------------------------- | |
1478 | |
1479 | |
1480 ;1 | |
1481 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL) | |
1482 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA | |
1483 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB | |
1484 | |
1485 movq mm1, mm0 | |
1486 movq mm3, mm2 | |
1487 | |
1488 pcmpeqw mm0, [eax+ebx+ebx+colorD] | |
1489 pcmpeqw mm1, [eax+colorE] | |
1490 pcmpeqw mm2, [eax+ebx+ebx+colorL] | |
1491 pcmpeqw mm3, [eax+ebx+ebx+colorC] | |
1492 | |
1493 pand mm0, mm1 | |
1494 pxor mm1, mm1 | |
1495 pand mm0, mm2 | |
1496 pcmpeqw mm3, mm1 | |
1497 pand mm0, mm3 ;result in mm0 | |
1498 | |
1499 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ) | |
1500 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA | |
1501 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB | |
1502 movq mm5, mm4 | |
1503 movq mm7, mm6 | |
1504 | |
1505 pcmpeqw mm4, [eax+ebx+ebx+colorC] | |
1506 pcmpeqw mm5, [eax+colorF] | |
1507 pcmpeqw mm6, [eax+colorJ] | |
1508 pcmpeqw mm7, [eax+colorE] | |
1509 | |
1510 pand mm4, mm5 | |
1511 pxor mm5, mm5 | |
1512 pand mm4, mm6 | |
1513 pcmpeqw mm7, mm5 | |
1514 pand mm4, mm7 ;result in mm4 | |
1515 | |
1516 por mm0, mm4 ;combine the masks | |
1517 movq [Mask1], mm0 | |
1518 | |
1519 ;-------------------------------------------- | |
1520 | |
1521 ;2 | |
1522 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH) | |
1523 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB | |
1524 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA | |
1525 movq mm1, mm0 | |
1526 movq mm3, mm2 | |
1527 | |
1528 pcmpeqw mm0, [eax+ebx+ebx+colorC] | |
1529 pcmpeqw mm1, [eax+colorF] | |
1530 pcmpeqw mm2, [eax+ebx+ebx+colorH] | |
1531 pcmpeqw mm3, [eax+ebx+ebx+colorD] | |
1532 | |
1533 pand mm0, mm1 | |
1534 pxor mm1, mm1 | |
1535 pand mm0, mm2 | |
1536 pcmpeqw mm3, mm1 | |
1537 pand mm0, mm3 ;result in mm0 | |
1538 | |
1539 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI) | |
1540 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB | |
1541 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA | |
1542 movq mm5, mm4 | |
1543 movq mm7, mm6 | |
1544 | |
1545 pcmpeqw mm4, [eax+ebx+ebx+colorD] | |
1546 pcmpeqw mm5, [eax+colorE] | |
1547 pcmpeqw mm6, [eax+colorI] | |
1548 pcmpeqw mm7, [eax+colorF] | |
1549 | |
1550 pand mm4, mm5 | |
1551 pxor mm5, mm5 | |
1552 pand mm4, mm6 | |
1553 pcmpeqw mm7, mm5 | |
1554 pand mm4, mm7 ;result in mm4 | |
1555 | |
1556 por mm0, mm4 ;combine the masks | |
1557 movq [Mask2], mm0 | |
1558 | |
1559 | |
1560 ;interpolate colorA and colorB | |
1561 movq mm0, [eax+ebx+colorA] | |
1562 movq mm1, [eax+ebx+colorB] | |
1563 | |
1564 movq mm2, mm0 | |
1565 movq mm3, mm1 | |
1566 | |
1567 pand mm0, [colorMask] | |
1568 pand mm1, [colorMask] | |
1569 | |
1570 psrlw mm0, 1 | |
1571 psrlw mm1, 1 | |
1572 | |
1573 pand mm3, [lowPixelMask] | |
1574 paddw mm0, mm1 | |
1575 | |
1576 pand mm3, mm2 | |
1577 paddw mm0, mm3 ;mm0 contains the interpolated values | |
1578 | |
1579 ;assemble the pixels | |
1580 movq mm1, [eax+ebx+colorA] | |
1581 movq mm2, [eax+ebx+colorB] | |
1582 | |
1583 movq mm3, [Mask1] | |
1584 movq mm5, mm1 | |
1585 movq mm4, [Mask2] | |
1586 movq mm6, mm1 | |
1587 | |
1588 pand mm1, mm3 | |
1589 por mm3, mm4 | |
1590 pxor mm7, mm7 | |
1591 pand mm2, mm4 | |
1592 | |
1593 pcmpeqw mm3, mm7 | |
1594 por mm1, mm2 | |
1595 pand mm0, mm3 | |
1596 | |
1597 por mm0, mm1 | |
1598 | |
1599 punpcklwd mm5, mm0 | |
1600 punpckhwd mm6, mm0 | |
1601 | |
1602 %ifdef FAR_POINTER | |
1603 movq [fs:edx], mm5 | |
1604 movq [fs:edx+8], mm6 | |
1605 %else | |
1606 movq [edx], mm5 | |
1607 movq [edx+8], mm6 | |
1608 %endif | |
1609 | |
1610 ;------------------------------------------------ | |
1611 ; Create the Nextline | |
1612 ;------------------------------------------------ | |
1613 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO) | |
1614 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA | |
1615 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC | |
1616 movq mm1, mm0 | |
1617 movq mm3, mm2 | |
1618 | |
1619 push eax | |
1620 add eax, ebx | |
1621 pcmpeqw mm0, [eax+ebx+colorD] | |
1622 pcmpeqw mm1, [eax+colorG] | |
1623 pcmpeqw mm2, [eax+ebx+ebx+colorO] | |
1624 pcmpeqw mm3, [eax+colorB] | |
1625 pop eax | |
1626 | |
1627 pand mm0, mm1 | |
1628 pxor mm1, mm1 | |
1629 pand mm0, mm2 | |
1630 pcmpeqw mm3, mm1 | |
1631 pand mm0, mm3 ;result in mm0 | |
1632 | |
1633 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM) | |
1634 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA | |
1635 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC | |
1636 movq mm5, mm4 | |
1637 movq mm7, mm6 | |
1638 | |
1639 push eax | |
1640 add eax, ebx | |
1641 pcmpeqw mm4, [eax+ebx+colorH] | |
1642 pcmpeqw mm5, [eax+colorB] | |
1643 pcmpeqw mm6, [eax+ebx+ebx+colorM] | |
1644 pcmpeqw mm7, [eax+colorG] | |
1645 pop eax | |
1646 | |
1647 pand mm4, mm5 | |
1648 pxor mm5, mm5 | |
1649 pand mm4, mm6 | |
1650 pcmpeqw mm7, mm5 | |
1651 pand mm4, mm7 ;result in mm4 | |
1652 | |
1653 por mm0, mm4 ;combine the masks | |
1654 movq [Mask1], mm0 | |
1655 ;-------------------------------------------- | |
1656 | |
1657 ;4 | |
1658 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF) | |
1659 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC | |
1660 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA | |
1661 movq mm1, mm0 | |
1662 movq mm3, mm2 | |
1663 | |
1664 pcmpeqw mm0, [eax+ebx+colorB] | |
1665 pcmpeqw mm1, [eax+ebx+ebx+colorH] | |
1666 pcmpeqw mm2, [eax+colorF] | |
1667 pcmpeqw mm3, [eax+ebx+ebx+colorD] | |
1668 | |
1669 pand mm0, mm1 | |
1670 pxor mm1, mm1 | |
1671 pand mm0, mm2 | |
1672 pcmpeqw mm3, mm1 | |
1673 pand mm0, mm3 ;result in mm0 | |
1674 | |
1675 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI) | |
1676 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC | |
1677 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA | |
1678 movq mm5, mm4 | |
1679 movq mm7, mm6 | |
1680 | |
1681 pcmpeqw mm4, [eax+ebx+ebx+colorD] | |
1682 pcmpeqw mm5, [eax+ebx+colorG] | |
1683 pcmpeqw mm6, [eax+colorI] | |
1684 pcmpeqw mm7, [eax+ebx+ebx+colorH] | |
1685 | |
1686 pand mm4, mm5 | |
1687 pxor mm5, mm5 | |
1688 pand mm4, mm6 | |
1689 pcmpeqw mm7, mm5 | |
1690 pand mm4, mm7 ;result in mm4 | |
1691 | |
1692 por mm0, mm4 ;combine the masks | |
1693 movq [Mask2], mm0 | |
1694 ;---------------------------------------------- | |
1695 | |
1696 ;interpolate colorA and colorC | |
1697 movq mm0, [eax+ebx+colorA] | |
1698 movq mm1, [eax+ebx+ebx+colorC] | |
1699 | |
1700 movq mm2, mm0 | |
1701 movq mm3, mm1 | |
1702 | |
1703 pand mm0, [colorMask] | |
1704 pand mm1, [colorMask] | |
1705 | |
1706 psrlw mm0, 1 | |
1707 psrlw mm1, 1 | |
1708 | |
1709 pand mm3, [lowPixelMask] | |
1710 paddw mm0, mm1 | |
1711 | |
1712 pand mm3, mm2 | |
1713 paddw mm0, mm3 ;mm0 contains the interpolated values | |
1714 ;------------- | |
1715 | |
1716 ;assemble the pixels | |
1717 movq mm1, [eax+ebx+colorA] | |
1718 movq mm2, [eax+ebx+ebx+colorC] | |
1719 | |
1720 movq mm3, [Mask1] | |
1721 movq mm4, [Mask2] | |
1722 | |
1723 pand mm1, mm3 | |
1724 pand mm2, mm4 | |
1725 | |
1726 por mm3, mm4 | |
1727 pxor mm7, mm7 | |
1728 por mm1, mm2 | |
1729 | |
1730 pcmpeqw mm3, mm7 | |
1731 pand mm0, mm3 | |
1732 por mm0, mm1 | |
1733 movq [ACPixel], mm0 | |
1734 | |
1735 ;//////////////////////////////// | |
1736 ; Decide which "branch" to take | |
1737 ;-------------------------------- | |
1738 movq mm0, [eax+ebx+colorA] | |
1739 movq mm1, [eax+ebx+colorB] | |
1740 movq mm6, mm0 | |
1741 movq mm7, mm1 | |
1742 pcmpeqw mm0, [eax+ebx+ebx+colorD] | |
1743 pcmpeqw mm1, [eax+ebx+ebx+colorC] | |
1744 pcmpeqw mm6, mm7 | |
1745 | |
1746 movq mm2, mm0 | |
1747 movq mm3, mm0 | |
1748 | |
1749 pand mm0, mm1 ;colorA == colorD && colorB == colorC | |
1750 pxor mm7, mm7 | |
1751 | |
1752 pcmpeqw mm2, mm7 | |
1753 pand mm6, mm0 | |
1754 pand mm2, mm1 ;colorA != colorD && colorB == colorC | |
1755 | |
1756 pcmpeqw mm1, mm7 | |
1757 | |
1758 pand mm1, mm3 ;colorA == colorD && colorB != colorC | |
1759 pxor mm0, mm6 | |
1760 por mm1, mm6 | |
1761 movq mm7, mm0 | |
1762 movq [Mask2], mm2 | |
1763 packsswb mm7, mm7 | |
1764 movq [Mask1], mm1 | |
1765 | |
1766 movd ecx, mm7 | |
1767 test ecx, ecx | |
1768 jz near .SKIP_GUESS | |
1769 | |
1770 ;--------------------------------------------- | |
1771 ; Map of the pixels: I|E F|J | |
1772 ; G|A B|K | |
1773 ; H|C D|L | |
1774 ; M|N O|P | |
1775 movq mm6, mm0 | |
1776 movq mm4, [eax+ebx+colorA] | |
1777 movq mm5, [eax+ebx+colorB] | |
1778 pxor mm7, mm7 | |
1779 pand mm6, [ONE] | |
1780 | |
1781 movq mm0, [eax+colorE] | |
1782 movq mm1, [eax+ebx+colorG] | |
1783 movq mm2, mm0 | |
1784 movq mm3, mm1 | |
1785 pcmpeqw mm0, mm4 | |
1786 pcmpeqw mm1, mm4 | |
1787 pcmpeqw mm2, mm5 | |
1788 pcmpeqw mm3, mm5 | |
1789 pand mm0, mm6 | |
1790 pand mm1, mm6 | |
1791 pand mm2, mm6 | |
1792 pand mm3, mm6 | |
1793 paddw mm0, mm1 | |
1794 paddw mm2, mm3 | |
1795 | |
1796 pxor mm3, mm3 | |
1797 pcmpgtw mm0, mm6 | |
1798 pcmpgtw mm2, mm6 | |
1799 pcmpeqw mm0, mm3 | |
1800 pcmpeqw mm2, mm3 | |
1801 pand mm0, mm6 | |
1802 pand mm2, mm6 | |
1803 paddw mm7, mm0 | |
1804 psubw mm7, mm2 | |
1805 | |
1806 movq mm0, [eax+colorF] | |
1807 movq mm1, [eax+ebx+colorK] | |
1808 movq mm2, mm0 | |
1809 movq mm3, mm1 | |
1810 pcmpeqw mm0, mm4 | |
1811 pcmpeqw mm1, mm4 | |
1812 pcmpeqw mm2, mm5 | |
1813 pcmpeqw mm3, mm5 | |
1814 pand mm0, mm6 | |
1815 pand mm1, mm6 | |
1816 pand mm2, mm6 | |
1817 pand mm3, mm6 | |
1818 paddw mm0, mm1 | |
1819 paddw mm2, mm3 | |
1820 | |
1821 pxor mm3, mm3 | |
1822 pcmpgtw mm0, mm6 | |
1823 pcmpgtw mm2, mm6 | |
1824 pcmpeqw mm0, mm3 | |
1825 pcmpeqw mm2, mm3 | |
1826 pand mm0, mm6 | |
1827 pand mm2, mm6 | |
1828 paddw mm7, mm0 | |
1829 psubw mm7, mm2 | |
1830 | |
1831 push eax | |
1832 add eax, ebx | |
1833 movq mm0, [eax+ebx+colorH] | |
1834 movq mm1, [eax+ebx+ebx+colorN] | |
1835 movq mm2, mm0 | |
1836 movq mm3, mm1 | |
1837 pcmpeqw mm0, mm4 | |
1838 pcmpeqw mm1, mm4 | |
1839 pcmpeqw mm2, mm5 | |
1840 pcmpeqw mm3, mm5 | |
1841 pand mm0, mm6 | |
1842 pand mm1, mm6 | |
1843 pand mm2, mm6 | |
1844 pand mm3, mm6 | |
1845 paddw mm0, mm1 | |
1846 paddw mm2, mm3 | |
1847 | |
1848 pxor mm3, mm3 | |
1849 pcmpgtw mm0, mm6 | |
1850 pcmpgtw mm2, mm6 | |
1851 pcmpeqw mm0, mm3 | |
1852 pcmpeqw mm2, mm3 | |
1853 pand mm0, mm6 | |
1854 pand mm2, mm6 | |
1855 paddw mm7, mm0 | |
1856 psubw mm7, mm2 | |
1857 | |
1858 movq mm0, [eax+ebx+colorL] | |
1859 movq mm1, [eax+ebx+ebx+colorO] | |
1860 movq mm2, mm0 | |
1861 movq mm3, mm1 | |
1862 pcmpeqw mm0, mm4 | |
1863 pcmpeqw mm1, mm4 | |
1864 pcmpeqw mm2, mm5 | |
1865 pcmpeqw mm3, mm5 | |
1866 pand mm0, mm6 | |
1867 pand mm1, mm6 | |
1868 pand mm2, mm6 | |
1869 pand mm3, mm6 | |
1870 paddw mm0, mm1 | |
1871 paddw mm2, mm3 | |
1872 | |
1873 pxor mm3, mm3 | |
1874 pcmpgtw mm0, mm6 | |
1875 pcmpgtw mm2, mm6 | |
1876 pcmpeqw mm0, mm3 | |
1877 pcmpeqw mm2, mm3 | |
1878 pand mm0, mm6 | |
1879 pand mm2, mm6 | |
1880 paddw mm7, mm0 | |
1881 psubw mm7, mm2 | |
1882 | |
1883 pop eax | |
1884 movq mm1, mm7 | |
1885 pxor mm0, mm0 | |
1886 pcmpgtw mm7, mm0 | |
1887 pcmpgtw mm0, mm1 | |
1888 | |
1889 por mm7, [Mask1] | |
1890 por mm0, [Mask2] | |
1891 movq [Mask1], mm7 | |
1892 movq [Mask2], mm0 | |
1893 | |
1894 .SKIP_GUESS: | |
1895 ;---------------------------- | |
1896 ;interpolate A, B, C and D | |
1897 movq mm0, [eax+ebx+colorA] | |
1898 movq mm1, [eax+ebx+colorB] | |
1899 movq mm4, mm0 | |
1900 movq mm2, [eax+ebx+ebx+colorC] | |
1901 movq mm5, mm1 | |
1902 movq mm3, [qcolorMask] | |
1903 movq mm6, mm2 | |
1904 movq mm7, [qlowpixelMask] | |
1905 | |
1906 pand mm0, mm3 | |
1907 pand mm1, mm3 | |
1908 pand mm2, mm3 | |
1909 pand mm3, [eax+ebx+ebx+colorD] | |
1910 | |
1911 psrlw mm0, 2 | |
1912 pand mm4, mm7 | |
1913 psrlw mm1, 2 | |
1914 pand mm5, mm7 | |
1915 psrlw mm2, 2 | |
1916 pand mm6, mm7 | |
1917 psrlw mm3, 2 | |
1918 pand mm7, [eax+ebx+ebx+colorD] | |
1919 | |
1920 paddw mm0, mm1 | |
1921 paddw mm2, mm3 | |
1922 | |
1923 paddw mm4, mm5 | |
1924 paddw mm6, mm7 | |
1925 | |
1926 paddw mm4, mm6 | |
1927 paddw mm0, mm2 | |
1928 psrlw mm4, 2 | |
1929 pand mm4, [qlowpixelMask] | |
1930 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D | |
1931 | |
1932 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ | |
1933 ;assemble the pixels | |
1934 movq mm1, [Mask1] | |
1935 movq mm2, [Mask2] | |
1936 movq mm4, [eax+ebx+colorA] | |
1937 movq mm5, [eax+ebx+colorB] | |
1938 pand mm4, mm1 | |
1939 pand mm5, mm2 | |
1940 | |
1941 pxor mm7, mm7 | |
1942 por mm1, mm2 | |
1943 por mm4, mm5 | |
1944 pcmpeqw mm1, mm7 | |
1945 pand mm0, mm1 | |
1946 por mm4, mm0 ;mm4 contains the diagonal pixels | |
1947 | |
1948 movq mm0, [ACPixel] | |
1949 movq mm1, mm0 | |
1950 punpcklwd mm0, mm4 | |
1951 punpckhwd mm1, mm4 | |
1952 | |
1953 push edx | |
1954 add edx, [ebp+dstPitch] | |
1955 | |
1956 %ifdef FAR_POINTER | |
1957 movq [fs:edx], mm0 | |
1958 movq [fs:edx+8], mm1 | |
1959 %else | |
1960 movq [edx], mm0 | |
1961 movq [edx+8], mm1 | |
1962 %endif | |
1963 pop edx | |
1964 | |
1965 .SKIP_PROCESS: | |
1966 mov ecx, [ebp+deltaPtr] | |
1967 add ecx, 8 | |
1968 mov [ebp+deltaPtr], ecx | |
1969 add edx, 16 | |
1970 add eax, 8 | |
1971 | |
1972 pop ecx | |
1973 sub ecx, 4 | |
1974 cmp ecx, 0 | |
1975 jg near .Loop | |
1976 | |
1977 ; Restore some stuff | |
1978 popad | |
1979 mov esp, ebp | |
1980 pop ebp | |
1981 emms | |
1982 ret | |
1983 | |
1984 ;------------------------------------------------------------------------- | |
1985 ;------------------------------------------------------------------------- | |
1986 ;------------------------------------------------------------------------- | |
1987 ;------------------------------------------------------------------------- | |
1988 ;------------------------------------------------------------------------- | |
1989 ;------------------------------------------------------------------------- | |
1990 ;------------------------------------------------------------------------- | |
1991 | |
1992 %ifdef __DJGPP__ | |
1993 _Init_2xSaIMMX: | |
1994 %else | |
1995 Init_2xSaIMMX: | |
1996 %endif | |
1997 ; Store some stuff | |
1998 push ebp | |
1999 mov ebp, esp | |
2000 push edx | |
2001 | |
2002 | |
2003 ;Damn thing doesn't work | |
2004 ; mov eax,1 | |
2005 ; cpuid | |
2006 ; test edx, 0x00800000 ;test bit 23 | |
2007 ; jz end2 ;bit not set => no MMX detected | |
2008 | |
2009 mov eax, [ebp+8] ;PixelFormat | |
2010 cmp eax, 555 | |
2011 jz Bits555 | |
2012 cmp eax, 565 | |
2013 jz Bits565 | |
2014 end2: | |
2015 mov eax, 1 | |
2016 jmp end3 | |
2017 Bits555: | |
2018 mov edx, 0x7BDE7BDE | |
2019 mov eax, colorMask | |
2020 mov [eax], edx | |
2021 mov [eax+4], edx | |
2022 mov edx, 0x04210421 | |
2023 mov eax, lowPixelMask | |
2024 mov [eax], edx | |
2025 mov [eax+4], edx | |
2026 mov edx, 0x739C739C | |
2027 mov eax, qcolorMask | |
2028 mov [eax], edx | |
2029 mov [eax+4], edx | |
2030 mov edx, 0x0C630C63 | |
2031 mov eax, qlowpixelMask | |
2032 mov [eax], edx | |
2033 mov [eax+4], edx | |
2034 mov eax, 0 | |
2035 jmp end3 | |
2036 Bits565: | |
2037 mov edx, 0xF7DEF7DE | |
2038 mov eax, colorMask | |
2039 mov [eax], edx | |
2040 mov [eax+4], edx | |
2041 mov edx, 0x08210821 | |
2042 mov eax, lowPixelMask | |
2043 mov [eax], edx | |
2044 mov [eax+4], edx | |
2045 mov edx, 0xE79CE79C | |
2046 mov eax, qcolorMask | |
2047 mov [eax], edx | |
2048 mov [eax+4], edx | |
2049 mov edx, 0x18631863 | |
2050 mov eax, qlowpixelMask | |
2051 mov [eax], edx | |
2052 mov [eax+4], edx | |
2053 mov eax, 0 | |
2054 jmp end3 | |
2055 end3: | |
2056 pop edx | |
2057 mov esp, ebp | |
2058 pop ebp | |
2059 ret | |
2060 | |
2061 | |
2062 ;------------------------------------------------------------------------- | |
2063 ;------------------------------------------------------------------------- | |
2064 ;------------------------------------------------------------------------- | |
2065 ;------------------------------------------------------------------------- | |
2066 ;------------------------------------------------------------------------- | |
2067 ;------------------------------------------------------------------------- | |
2068 ;------------------------------------------------------------------------- | |
2069 | |
2070 SECTION .data ALIGN = 32 | |
2071 ;Some constants | |
2072 colorMask dd 0xF7DEF7DE,0xF7DEF7DE | |
2073 lowPixelMask dd 0x08210821,0x08210821 | |
2074 | |
2075 qcolorMask dd 0xE79CE79C,0xE79CE79C | |
2076 qlowpixelMask dd 0x18631863,0x18631863 | |
2077 | |
2078 darkenMask dd 0xC718C718,0xC718C718 | |
2079 GreenMask dd 0x07E007E0,0x07E007E0 | |
2080 RedBlueMask dd 0xF81FF81F,0xF81FF81F | |
2081 | |
2082 FALSE dd 0x00000000,0x00000000 | |
2083 TRUE dd 0xffffffff,0xffffffff | |
2084 ONE dd 0x00010001,0x00010001 | |
2085 | |
2086 | |
2087 SECTION .bss ALIGN = 32 | |
2088 ACPixel resb 8 | |
2089 Mask1 resb 8 | |
2090 Mask2 resb 8 | |
2091 | |
2092 I56Pixel resb 8 | |
2093 I23Pixel resb 8 | |
2094 I5556Pixel resb 8 | |
2095 I2223Pixel resb 8 | |
2096 I5666Pixel resb 8 | |
2097 I2333Pixel resb 8 | |
2098 Mask26 resb 8 | |
2099 Mask35 resb 8 | |
2100 Mask26b resb 8 | |
2101 Mask35b resb 8 | |
2102 product1a resb 8 | |
2103 product1b resb 8 | |
2104 product2a resb 8 | |
2105 product2b resb 8 | |
2106 final1a resb 8 | |
2107 final1b resb 8 | |
2108 final2a resb 8 | |
2109 final2b resb 8 |