view src/filters/2xSaImmx.asm @ 135:eb6ba88088d3

Wrote a more efficient input-number-assembly program; 91 oc -> 60 oc.
author Dylan Holmes <ocsenave@gmail.com>
date Sun, 18 Mar 2012 05:13:19 -0500
parents f9f4f1b99eed
children
line wrap: on
line source
1 ;/*---------------------------------------------------------------------*
2 ; * The following (piece of) code, (part of) the 2xSaI engine, *
3 ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. *
4 ; * Non-Commercial use of this software is allowed and is encouraged, *
5 ; * provided that appropriate credit be given. *
6 ; * You may freely modify this code, but I request *
7 ; * that any improvements to the engine be submitted to me, so *
8 ; * that I can implement these improvements in newer versions of *
9 ; * the software. *
10 ; * If you need more information, have any comments or suggestions, *
11 ; * you can e-mail me. My e-mail: derek-liauw@usa.net. *
12 ; *---------------------------------------------------------------------*/
14 ;----------------------
15 ; 2xSaI version 0.59 WIP, soon to become version 0.60
16 ;----------------------
18 ;%define FAR_POINTER
22 BITS 32
23 %ifdef __DJGPP__
24 GLOBAL __2xSaILine
25 GLOBAL __2xSaISuperEagleLine
26 GLOBAL __2xSaISuper2xSaILine
27 GLOBAL _Init_2xSaIMMX
28 %else
29 GLOBAL _2xSaILine
30 GLOBAL _2xSaISuperEagleLine
31 GLOBAL _2xSaISuper2xSaILine
32 GLOBAL Init_2xSaIMMX
33 %endif
34 SECTION .text ALIGN = 32
36 %ifdef FAR_POINTER
37 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
38 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
39 %else
40 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
41 ; uint8 *dstPtr, uint32 dstPitch);
42 %endif
44 srcPtr equ 8
45 deltaPtr equ 12
46 srcPitch equ 16
47 width equ 20
48 dstOffset equ 24
49 dstPitch equ 28
50 dstSegment equ 32
55 colorB0 equ -2
56 colorB1 equ 0
57 colorB2 equ 2
58 colorB3 equ 4
60 color7 equ -2
61 color8 equ 0
62 color9 equ 2
64 color4 equ -2
65 color5 equ 0
66 color6 equ 2
67 colorS2 equ 4
69 color1 equ -2
70 color2 equ 0
71 color3 equ 2
72 colorS1 equ 4
74 colorA0 equ -2
75 colorA1 equ 0
76 colorA2 equ 2
77 colorA3 equ 4
82 %ifdef __DJGPP__
83 __2xSaISuper2xSaILine:
84 %else
85 _2xSaISuper2xSaILine:
86 %endif
87 ; Store some stuff
88 push ebp
89 mov ebp, esp
90 pushad
92 ; Prepare the destination
93 %ifdef FAR_POINTER
94 ; Set the selector
95 mov eax, [ebp+dstSegment]
96 mov fs, ax
97 %endif
98 mov edx, [ebp+dstOffset] ; edx points to the screen
99 ; Prepare the source
100 ; eax points to colorA
101 mov eax, [ebp+srcPtr] ;eax points to colorA
102 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
103 mov ecx, [ebp+width] ;ecx contains the number of pixels to process
104 ; eax now points to colorB1
105 sub eax, ebx ;eax points to B1 which is the base
107 ; Main Loop
108 .Loop: push ecx
110 ;-----Check Delta------------------
111 mov ecx, [ebp+deltaPtr]
114 ;load source img
115 movq mm0, [eax+colorB0]
116 movq mm1, [eax+colorB3]
117 movq mm2, [eax+ebx+color4]
118 movq mm3, [eax+ebx+colorS2]
119 movq mm4, [eax+ebx+ebx+color1]
120 movq mm5, [eax+ebx+ebx+colorS1]
121 push eax
122 add eax, ebx
123 movq mm6, [eax+ebx+ebx+colorA0]
124 movq mm7, [eax+ebx+ebx+colorA3]
125 pop eax
127 ;compare to delta
128 pcmpeqw mm0, [ecx+2+colorB0]
129 pcmpeqw mm1, [ecx+2+colorB3]
130 pcmpeqw mm2, [ecx+ebx+2+color4]
131 pcmpeqw mm3, [ecx+ebx+2+colorS2]
132 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
133 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
134 add ecx, ebx
135 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
136 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
137 sub ecx, ebx
140 ;compose results
141 pand mm0, mm1
142 pand mm2, mm3
143 pand mm4, mm5
144 pand mm6, mm7
145 pand mm0, mm2
146 pand mm4, mm6
147 pxor mm7, mm7
148 pand mm0, mm4
149 movq mm6, [eax+colorB0]
150 pcmpeqw mm7, mm0 ;did any compare give us a zero ?
152 movq [ecx+2+colorB0], mm6
154 packsswb mm7, mm7
155 movd ecx, mm7
156 test ecx, ecx
157 jz near .SKIP_PROCESS ;no, so we can skip
159 ;End Delta
161 ;---------------------------------
162 movq mm0, [eax+ebx+color5]
163 movq mm1, [eax+ebx+color6]
164 movq mm2, mm0
165 movq mm3, mm1
166 movq mm4, mm0
167 movq mm5, mm1
169 pand mm0, [colorMask]
170 pand mm1, [colorMask]
172 psrlw mm0, 1
173 psrlw mm1, 1
175 pand mm3, [lowPixelMask]
176 paddw mm0, mm1
178 pand mm3, mm2
179 paddw mm0, mm3 ;mm0 contains the interpolated values
180 movq [I56Pixel], mm0
181 movq mm7, mm0
183 ;-------------------
184 movq mm0, mm7
185 movq mm1, mm4 ;5,5,5,6
186 movq mm2, mm0
187 movq mm3, mm1
189 pand mm0, [colorMask]
190 pand mm1, [colorMask]
192 psrlw mm0, 1
193 psrlw mm1, 1
195 pand mm3, [lowPixelMask]
196 paddw mm0, mm1
198 pand mm3, mm2
199 paddw mm0, mm3 ;mm0 contains the interpolated values
200 movq [I5556Pixel], mm0
201 ;--------------------
203 movq mm0, mm7
204 movq mm1, mm5 ;6,6,6,5
205 movq mm2, mm0
206 movq mm3, mm1
208 pand mm0, [colorMask]
209 pand mm1, [colorMask]
211 psrlw mm0, 1
212 psrlw mm1, 1
214 pand mm3, [lowPixelMask]
215 paddw mm0, mm1
217 pand mm3, mm2
218 paddw mm0, mm3
219 movq [I5666Pixel], mm0
221 ;-------------------------
222 ;-------------------------
223 movq mm0, [eax+ebx+ebx+color2]
224 movq mm1, [eax+ebx+ebx+color3]
225 movq mm2, mm0
226 movq mm3, mm1
227 movq mm4, mm0
228 movq mm5, mm1
230 pand mm0, [colorMask]
231 pand mm1, [colorMask]
233 psrlw mm0, 1
234 psrlw mm1, 1
236 pand mm3, [lowPixelMask]
237 paddw mm0, mm1
239 pand mm3, mm2
240 paddw mm0, mm3
241 movq [I23Pixel], mm0
242 movq mm7, mm0
244 ;---------------------
245 movq mm0, mm7
246 movq mm1, mm4 ;2,2,2,3
247 movq mm2, mm0
248 movq mm3, mm1
250 pand mm0, [colorMask]
251 pand mm1, [colorMask]
253 psrlw mm0, 1
254 psrlw mm1, 1
256 pand mm3, [lowPixelMask]
257 paddw mm0, mm1
259 pand mm3, mm2
260 paddw mm0, mm3
261 movq [I2223Pixel], mm0
263 ;----------------------
264 movq mm0, mm7
265 movq mm1, mm5 ;3,3,3,2
266 movq mm2, mm0
267 movq mm3, mm1
269 pand mm0, [colorMask]
270 pand mm1, [colorMask]
272 psrlw mm0, 1
273 psrlw mm1, 1
275 pand mm3, [lowPixelMask]
276 paddw mm0, mm1
278 pand mm3, mm2
279 paddw mm0, mm3
280 movq [I2333Pixel], mm0
283 ;--------------------
284 ;////////////////////////////////
285 ; Decide which "branch" to take
286 ;--------------------------------
287 movq mm0, [eax+ebx+color5]
288 movq mm1, [eax+ebx+color6]
289 movq mm6, mm0
290 movq mm7, mm1
291 pcmpeqw mm0, [eax+ebx+ebx+color3]
292 pcmpeqw mm1, [eax+ebx+ebx+color2]
293 pcmpeqw mm6, mm7
295 movq mm2, mm0
296 movq mm3, mm0
298 pand mm0, mm1 ;colorA == colorD && colorB == colorC
299 pxor mm7, mm7
301 pcmpeqw mm2, mm7
302 pand mm6, mm0
303 pand mm2, mm1 ;colorA != colorD && colorB == colorC
305 pcmpeqw mm1, mm7
307 pand mm1, mm3 ;colorA == colorD && colorB != colorC
308 pxor mm0, mm6
309 por mm1, mm6
310 movq mm7, mm0
311 movq [Mask26], mm2
312 packsswb mm7, mm7
313 movq [Mask35], mm1
315 movd ecx, mm7
316 test ecx, ecx
317 jz near .SKIP_GUESS
319 ;---------------------------------------------
320 movq mm6, mm0
321 movq mm4, [eax+ebx+colorA]
322 movq mm5, [eax+ebx+colorB]
323 pxor mm7, mm7
324 pand mm6, [ONE]
326 movq mm0, [eax+colorE]
327 movq mm1, [eax+ebx+colorG]
328 movq mm2, mm0
329 movq mm3, mm1
330 pcmpeqw mm0, mm4
331 pcmpeqw mm1, mm4
332 pcmpeqw mm2, mm5
333 pcmpeqw mm3, mm5
334 pand mm0, mm6
335 pand mm1, mm6
336 pand mm2, mm6
337 pand mm3, mm6
338 paddw mm0, mm1
339 paddw mm2, mm3
341 pxor mm3, mm3
342 pcmpgtw mm0, mm6
343 pcmpgtw mm2, mm6
344 pcmpeqw mm0, mm3
345 pcmpeqw mm2, mm3
346 pand mm0, mm6
347 pand mm2, mm6
348 paddw mm7, mm0
349 psubw mm7, mm2
351 movq mm0, [eax+colorF]
352 movq mm1, [eax+ebx+colorK]
353 movq mm2, mm0
354 movq mm3, mm1
355 pcmpeqw mm0, mm4
356 pcmpeqw mm1, mm4
357 pcmpeqw mm2, mm5
358 pcmpeqw mm3, mm5
359 pand mm0, mm6
360 pand mm1, mm6
361 pand mm2, mm6
362 pand mm3, mm6
363 paddw mm0, mm1
364 paddw mm2, mm3
366 pxor mm3, mm3
367 pcmpgtw mm0, mm6
368 pcmpgtw mm2, mm6
369 pcmpeqw mm0, mm3
370 pcmpeqw mm2, mm3
371 pand mm0, mm6
372 pand mm2, mm6
373 paddw mm7, mm0
374 psubw mm7, mm2
376 push eax
377 add eax, ebx
378 movq mm0, [eax+ebx+colorH]
379 movq mm1, [eax+ebx+ebx+colorN]
380 movq mm2, mm0
381 movq mm3, mm1
382 pcmpeqw mm0, mm4
383 pcmpeqw mm1, mm4
384 pcmpeqw mm2, mm5
385 pcmpeqw mm3, mm5
386 pand mm0, mm6
387 pand mm1, mm6
388 pand mm2, mm6
389 pand mm3, mm6
390 paddw mm0, mm1
391 paddw mm2, mm3
393 pxor mm3, mm3
394 pcmpgtw mm0, mm6
395 pcmpgtw mm2, mm6
396 pcmpeqw mm0, mm3
397 pcmpeqw mm2, mm3
398 pand mm0, mm6
399 pand mm2, mm6
400 paddw mm7, mm0
401 psubw mm7, mm2
403 movq mm0, [eax+ebx+colorL]
404 movq mm1, [eax+ebx+ebx+colorO]
405 movq mm2, mm0
406 movq mm3, mm1
407 pcmpeqw mm0, mm4
408 pcmpeqw mm1, mm4
409 pcmpeqw mm2, mm5
410 pcmpeqw mm3, mm5
411 pand mm0, mm6
412 pand mm1, mm6
413 pand mm2, mm6
414 pand mm3, mm6
415 paddw mm0, mm1
416 paddw mm2, mm3
418 pxor mm3, mm3
419 pcmpgtw mm0, mm6
420 pcmpgtw mm2, mm6
421 pcmpeqw mm0, mm3
422 pcmpeqw mm2, mm3
423 pand mm0, mm6
424 pand mm2, mm6
425 paddw mm7, mm0
426 psubw mm7, mm2
428 pop eax
429 movq mm1, mm7
430 pxor mm0, mm0
431 pcmpgtw mm7, mm0
432 pcmpgtw mm0, mm1
434 por mm7, [Mask35]
435 por mm0, [Mask26]
436 movq [Mask35], mm7
437 movq [Mask26], mm0
439 .SKIP_GUESS:
441 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
444 movq mm0, [eax+ebx+color5]
445 movq mm1, [eax+ebx+ebx+color2]
446 movq mm2, mm0
447 movq mm3, mm1
448 movq mm4, mm0
449 movq mm5, mm1
451 pand mm0, [colorMask]
452 pand mm1, [colorMask]
454 psrlw mm0, 1
455 psrlw mm1, 1
457 pand mm3, [lowPixelMask]
458 paddw mm0, mm1
460 pand mm3, mm2
461 paddw mm0, mm3 ;mm0 contains the interpolated values
462 ;---------------------------
466 %ifdef dfhsdfhsdahdsfhdsfh
468 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
469 product2a = INTERPOLATE (color2, color5);
470 else
471 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
472 product2a = INTERPOLATE(color2, color5);
473 else
474 product2a = color2;
476 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
477 product1a = INTERPOLATE (color2, color5);
478 else
479 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
480 product1a = INTERPOLATE(color2, color5);
481 else
482 product1a = color5;
484 %endif
487 movq mm7, [Mask26]
488 movq mm6, [eax+colorB2]
489 movq mm5, [eax+ebx+ebx+color2]
490 movq mm4, [eax+ebx+ebx+color1]
491 pcmpeqw mm4, mm5
492 pcmpeqw mm6, mm5
493 pxor mm5, mm5
494 pand mm7, mm4
495 pcmpeqw mm6, mm5
496 pand mm7, mm6
500 movq mm6, [eax+ebx+ebx+color3]
501 movq mm5, [eax+ebx+ebx+color2]
502 movq mm4, [eax+ebx+ebx+color1]
503 movq mm2, [eax+ebx+color5]
504 movq mm1, [eax+ebx+color4]
505 movq mm3, [eax+colorB0]
507 pcmpeqw mm2, mm4
508 pcmpeqw mm6, mm5
509 pcmpeqw mm1, mm5
510 pcmpeqw mm3, mm5
511 pxor mm5, mm5
512 pcmpeqw mm2, mm5
513 pcmpeqw mm3, mm5
514 pand mm6, mm1
515 pand mm2, mm3
516 pand mm6, mm2
517 por mm7, mm6
520 movq mm6, mm7
521 pcmpeqw mm6, mm5
522 pand mm7, mm0
524 movq mm1, [eax+ebx+color5]
525 pand mm6, mm1
526 por mm7, mm6
527 movq [final1a], mm7 ;finished 1a
531 ;--------------------------------
533 movq mm7, [Mask35]
534 push eax
535 add eax, ebx
536 movq mm6, [eax+ebx+ebx+colorA2]
537 pop eax
538 movq mm5, [eax+ebx+color5]
539 movq mm4, [eax+ebx+color4]
540 pcmpeqw mm4, mm5
541 pcmpeqw mm6, mm5
542 pxor mm5, mm5
543 pand mm7, mm4
544 pcmpeqw mm6, mm5
545 pand mm7, mm6
549 movq mm6, [eax+ebx+color6]
550 movq mm5, [eax+ebx+color5]
551 movq mm4, [eax+ebx+color4]
552 movq mm2, [eax+ebx+ebx+color2]
553 movq mm1, [eax+ebx+ebx+color1]
554 push eax
555 add eax, ebx
556 movq mm3, [eax+ebx+ebx+colorA0]
557 pop eax
559 pcmpeqw mm2, mm4
560 pcmpeqw mm6, mm5
561 pcmpeqw mm1, mm5
562 pcmpeqw mm3, mm5
563 pxor mm5, mm5
564 pcmpeqw mm2, mm5
565 pcmpeqw mm3, mm5
566 pand mm6, mm1
567 pand mm2, mm3
568 pand mm6, mm2
569 por mm7, mm6
572 movq mm6, mm7
573 pcmpeqw mm6, mm5
574 pand mm7, mm0
576 movq mm1, [eax+ebx+ebx+color2]
577 pand mm6, mm1
578 por mm7, mm6
579 movq [final2a], mm7 ;finished 2a
582 ;--------------------------------------------
585 %ifdef dfhsdfhsdahdsfhdsfh
586 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
587 product2b = Q_INTERPOLATE (color3, color3, color3, color2);
588 else
589 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
590 product2b = Q_INTERPOLATE (color2, color2, color2, color3);
591 else
592 product2b = INTERPOLATE (color2, color3);
594 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
595 product1b = Q_INTERPOLATE (color6, color6, color6, color5);
596 else
597 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
598 product1b = Q_INTERPOLATE (color6, color5, color5, color5);
599 else
600 product1b = INTERPOLATE (color5, color6);
601 %endif
603 push eax
604 add eax, ebx
605 pxor mm7, mm7
606 movq mm0, [eax+ebx+ebx+colorA0]
607 movq mm1, [eax+ebx+ebx+colorA1]
608 movq mm2, [eax+ebx+ebx+colorA2]
609 movq mm3, [eax+ebx+ebx+colorA3]
610 pop eax
611 movq mm4, [eax+ebx+ebx+color2]
612 movq mm5, [eax+ebx+ebx+color3]
613 movq mm6, [eax+ebx+color6]
615 pcmpeqw mm6, mm5
616 pcmpeqw mm1, mm5
617 pcmpeqw mm4, mm2
618 pcmpeqw mm0, mm5
619 pcmpeqw mm4, mm7
620 pcmpeqw mm0, mm7
621 pand mm0, mm4
622 pand mm6, mm1
623 pand mm0, mm6
626 push eax
627 add eax, ebx
628 movq mm1, [eax+ebx+ebx+colorA1]
629 pop eax
630 movq mm4, [eax+ebx+ebx+color2]
631 movq mm5, [eax+ebx+color5]
632 movq mm6, [eax+ebx+ebx+color3]
634 pcmpeqw mm5, mm4
635 pcmpeqw mm2, mm4
636 pcmpeqw mm1, mm6
637 pcmpeqw mm3, mm4
638 pcmpeqw mm1, mm7
639 pcmpeqw mm3, mm7
640 pand mm2, mm5
641 pand mm1, mm3
642 pand mm1, mm2
645 movq mm7, mm0
646 por mm7, mm1
648 movq mm4, [Mask35]
649 movq mm3, [Mask26]
651 movq mm6, mm4
652 pand mm6, mm7
653 pxor mm4, mm6
655 movq mm6, mm3
656 pand mm6, mm7
657 pxor mm3, mm6
659 movq mm2, mm0
660 movq mm7, [I2333Pixel]
661 movq mm6, [I2223Pixel]
662 movq mm5, [I23Pixel]
665 por mm2, mm4
666 pand mm4, [eax+ebx+ebx+color3]
667 por mm2, mm3
668 pand mm3, [eax+ebx+ebx+color2]
669 por mm2, mm1
670 pand mm0, mm7
671 pand mm1, mm6
672 pxor mm7, mm7
673 pcmpeqw mm2, mm7
674 por mm0, mm1
675 por mm3, mm4
676 pand mm2, mm5
677 por mm0, mm3
678 por mm0, mm2
679 movq [final2b], mm0
681 ;-----------------------------------
684 pxor mm7, mm7
685 movq mm0, [eax+colorB0]
686 movq mm1, [eax+colorB1]
687 movq mm2, [eax+colorB2]
688 movq mm3, [eax+colorB3]
689 movq mm4, [eax+ebx+color5]
690 movq mm5, [eax+ebx+color6]
691 movq mm6, [eax+ebx+ebx+color3]
693 pcmpeqw mm6, mm5
694 pcmpeqw mm1, mm5
695 pcmpeqw mm4, mm2
696 pcmpeqw mm0, mm5
697 pcmpeqw mm4, mm7
698 pcmpeqw mm0, mm7
699 pand mm0, mm4
700 pand mm6, mm1
701 pand mm0, mm6
703 movq mm1, [eax+colorB1]
704 movq mm4, [eax+ebx+color5]
705 movq mm5, [eax+ebx+ebx+color2]
706 movq mm6, [eax+ebx+color6]
708 pcmpeqw mm5, mm4
709 pcmpeqw mm2, mm4
710 pcmpeqw mm1, mm6
711 pcmpeqw mm3, mm4
712 pcmpeqw mm1, mm7
713 pcmpeqw mm3, mm7
714 pand mm2, mm5
715 pand mm1, mm3
716 pand mm1, mm2
719 movq mm7, mm0
720 por mm7, mm1
722 movq mm4, [Mask35]
723 movq mm3, [Mask26]
725 movq mm6, mm4
726 pand mm6, mm7
727 pxor mm4, mm6
729 movq mm6, mm3
730 pand mm6, mm7
731 pxor mm3, mm6
733 movq mm2, mm0
734 movq mm7, [I5666Pixel]
735 movq mm6, [I5556Pixel]
736 movq mm5, [I56Pixel]
739 por mm2, mm4
740 pand mm4, [eax+ebx+color5]
741 por mm2, mm3
742 pand mm3, [eax+ebx+color6]
743 por mm2, mm1
744 pand mm0, mm7
745 pand mm1, mm6
746 pxor mm7, mm7
747 pcmpeqw mm2, mm7
748 por mm0, mm1
749 por mm3, mm4
750 pand mm2, mm5
751 por mm0, mm3
752 por mm0, mm2
753 movq [final1b], mm0
755 ;---------
757 movq mm0, [final1a]
758 movq mm4, [final2a]
759 movq mm2, [final1b]
760 movq mm6, [final2b]
763 movq mm1, mm0
764 movq mm5, mm4
767 punpcklwd mm0, mm2
768 punpckhwd mm1, mm2
770 punpcklwd mm4, mm6
771 punpckhwd mm5, mm6
774 %ifdef FAR_POINTER
775 movq [fs:edx], mm0
776 movq [fs:edx+8], mm1
777 push edx
778 add edx, [ebp+dstPitch]
779 movq [fs:edx], mm4
780 movq [fs:edx+8], mm5
781 pop edx
782 %else
783 movq [edx], mm0
784 movq [edx+8], mm1
785 push edx
786 add edx, [ebp+dstPitch]
787 movq [edx], mm4
788 movq [edx+8], mm5
789 pop edx
790 %endif
791 .SKIP_PROCESS:
792 mov ecx, [ebp+deltaPtr]
793 add ecx, 8
794 mov [ebp+deltaPtr], ecx
795 add edx, 16
796 add eax, 8
798 pop ecx
799 sub ecx, 4
800 cmp ecx, 0
801 jg near .Loop
803 ; Restore some stuff
804 popad
805 mov esp, ebp
806 pop ebp
807 emms
808 ret
811 ;-------------------------------------------------------------------------
812 ;-------------------------------------------------------------------------
813 ;-------------------------------------------------------------------------
814 ;-------------------------------------------------------------------------
815 ;-------------------------------------------------------------------------
816 ;-------------------------------------------------------------------------
817 ;-------------------------------------------------------------------------
821 %ifdef __DJGPP__
822 __2xSaISuperEagleLine:
823 %else
824 _2xSaISuperEagleLine:
825 %endif
826 ; Store some stuff
827 push ebp
828 mov ebp, esp
829 pushad
831 ; Prepare the destination
832 %ifdef FAR_POINTER
833 ; Set the selector
834 mov eax, [ebp+dstSegment]
835 mov fs, ax
836 %endif
837 mov edx, [ebp+dstOffset] ; edx points to the screen
838 ; Prepare the source
839 ; eax points to colorA
840 mov eax, [ebp+srcPtr]
841 mov ebx, [ebp+srcPitch]
842 mov ecx, [ebp+width]
843 ; eax now points to colorB1
844 sub eax, ebx
846 ; Main Loop
847 .Loop: push ecx
849 ;-----Check Delta------------------
850 mov ecx, [ebp+deltaPtr]
852 movq mm0, [eax+colorB0]
853 movq mm1, [eax+colorB3]
854 movq mm2, [eax+ebx+color4]
855 movq mm3, [eax+ebx+colorS2]
856 movq mm4, [eax+ebx+ebx+color1]
857 movq mm5, [eax+ebx+ebx+colorS1]
858 push eax
859 add eax, ebx
860 movq mm6, [eax+ebx+ebx+colorA0]
861 movq mm7, [eax+ebx+ebx+colorA3]
862 pop eax
864 pcmpeqw mm0, [ecx+2+colorB0]
865 pcmpeqw mm1, [ecx+2+colorB3]
866 pcmpeqw mm2, [ecx+ebx+2+color4]
867 pcmpeqw mm3, [ecx+ebx+2+colorS2]
868 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
869 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
870 add ecx, ebx
871 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
872 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
873 sub ecx, ebx
876 pand mm0, mm1
877 pand mm2, mm3
878 pand mm4, mm5
879 pand mm6, mm7
880 pand mm0, mm2
881 pand mm4, mm6
882 pxor mm7, mm7
883 pand mm0, mm4
884 movq mm6, [eax+colorB0]
885 pcmpeqw mm7, mm0
887 movq [ecx+2+colorB0], mm6
889 packsswb mm7, mm7
890 movd ecx, mm7
891 test ecx, ecx
892 jz near .SKIP_PROCESS
894 ;End Delta
896 ;---------------------------------
897 movq mm0, [eax+ebx+color5]
898 movq mm1, [eax+ebx+color6]
899 movq mm2, mm0
900 movq mm3, mm1
901 movq mm4, mm0
902 movq mm5, mm1
904 pand mm0, [colorMask]
905 pand mm1, [colorMask]
907 psrlw mm0, 1
908 psrlw mm1, 1
910 pand mm3, [lowPixelMask]
911 paddw mm0, mm1
913 pand mm3, mm2
914 paddw mm0, mm3 ;mm0 contains the interpolated values
915 movq [I56Pixel], mm0
916 movq mm7, mm0
918 ;-------------------
919 movq mm0, mm7
920 movq mm1, mm4 ;5,5,5,6
921 movq mm2, mm0
922 movq mm3, mm1
924 pand mm0, [colorMask]
925 pand mm1, [colorMask]
927 psrlw mm0, 1
928 psrlw mm1, 1
930 pand mm3, [lowPixelMask]
931 paddw mm0, mm1
933 pand mm3, mm2
934 paddw mm0, mm3 ;mm0 contains the interpolated values
935 movq [product1a], mm0
936 ;--------------------
938 movq mm0, mm7
939 movq mm1, mm5 ;6,6,6,5
940 movq mm2, mm0
941 movq mm3, mm1
943 pand mm0, [colorMask]
944 pand mm1, [colorMask]
946 psrlw mm0, 1
947 psrlw mm1, 1
949 pand mm3, [lowPixelMask]
950 paddw mm0, mm1
952 pand mm3, mm2
953 paddw mm0, mm3
954 movq [product1b], mm0
956 ;-------------------------
957 ;-------------------------
958 movq mm0, [eax+ebx+ebx+color2]
959 movq mm1, [eax+ebx+ebx+color3]
960 movq mm2, mm0
961 movq mm3, mm1
962 movq mm4, mm0
963 movq mm5, mm1
965 pand mm0, [colorMask]
966 pand mm1, [colorMask]
968 psrlw mm0, 1
969 psrlw mm1, 1
971 pand mm3, [lowPixelMask]
972 paddw mm0, mm1
974 pand mm3, mm2
975 paddw mm0, mm3
976 movq [I23Pixel], mm0
977 movq mm7, mm0
979 ;---------------------
980 movq mm0, mm7
981 movq mm1, mm4 ;2,2,2,3
982 movq mm2, mm0
983 movq mm3, mm1
985 pand mm0, [colorMask]
986 pand mm1, [colorMask]
988 psrlw mm0, 1
989 psrlw mm1, 1
991 pand mm3, [lowPixelMask]
992 paddw mm0, mm1
994 pand mm3, mm2
995 paddw mm0, mm3
996 movq [product2a], mm0
998 ;----------------------
999 movq mm0, mm7
1000 movq mm1, mm5 ;3,3,3,2
1001 movq mm2, mm0
1002 movq mm3, mm1
1004 pand mm0, [colorMask]
1005 pand mm1, [colorMask]
1007 psrlw mm0, 1
1008 psrlw mm1, 1
1010 pand mm3, [lowPixelMask]
1011 paddw mm0, mm1
1013 pand mm3, mm2
1014 paddw mm0, mm3
1015 movq [product2b], mm0
1018 ;////////////////////////////////
1019 ; Decide which "branch" to take
1020 ;--------------------------------
1021 movq mm4, [eax+ebx+color5]
1022 movq mm5, [eax+ebx+color6]
1023 movq mm6, [eax+ebx+ebx+color3]
1024 movq mm7, [eax+ebx+ebx+color2]
1026 pxor mm3, mm3
1027 movq mm0, mm4
1028 movq mm1, mm5
1030 pcmpeqw mm0, mm6
1031 pcmpeqw mm1, mm7
1032 pcmpeqw mm1, mm3
1033 pand mm0, mm1
1034 movq [Mask35], mm0
1036 movq mm0, [eax+ebx+ebx+colorS1]
1037 movq mm1, [eax+ebx+color4]
1038 push eax
1039 add eax, ebx
1040 movq mm2, [eax+ebx+ebx+colorA2]
1041 pop eax
1042 movq mm3, [eax+colorB1]
1043 pcmpeqw mm0, mm4
1044 pcmpeqw mm1, mm4
1045 pcmpeqw mm2, mm4
1046 pcmpeqw mm3, mm4
1047 pand mm0, mm1
1048 pand mm2, mm3
1049 por mm0, mm2
1050 pand mm0, [Mask35]
1051 movq [Mask35b], mm0
1053 ;-----------
1054 pxor mm3, mm3
1055 movq mm0, mm4
1056 movq mm1, mm5
1058 pcmpeqw mm0, mm6
1059 pcmpeqw mm1, mm7
1060 pcmpeqw mm0, mm3
1061 pand mm0, mm1
1062 movq [Mask26], mm0
1064 movq mm0, [eax+ebx+ebx+color1]
1065 movq mm1, [eax+ebx+colorS2]
1066 push eax
1067 add eax, ebx
1068 movq mm2, [eax+ebx+ebx+colorA1]
1069 pop eax
1070 movq mm3, [eax+colorB2]
1071 pcmpeqw mm0, mm5
1072 pcmpeqw mm1, mm5
1073 pcmpeqw mm2, mm5
1074 pcmpeqw mm3, mm5
1075 pand mm0, mm1
1076 pand mm2, mm3
1077 por mm0, mm2
1078 pand mm0, [Mask26]
1079 movq [Mask26b], mm0
1081 ;--------------------
1082 movq mm0, mm4
1083 movq mm1, mm5
1084 movq mm2, mm0
1086 pcmpeqw mm2, mm1
1087 pcmpeqw mm0, mm6
1088 pcmpeqw mm1, mm7
1089 pand mm0, mm1
1090 pand mm2, mm0
1091 pxor mm0, mm2
1092 movq mm7, mm0
1094 ;------------------
1095 packsswb mm7, mm7
1096 movd ecx, mm7
1097 test ecx, ecx
1098 jz near .SKIP_GUESS
1100 ;---------------------------------------------
1101 ; Map of the pixels: I|E F|J
1102 ; G|A B|K
1103 ; H|C D|L
1104 ; M|N O|P
1105 movq mm6, mm0
1106 movq mm4, [eax+ebx+color5]
1107 movq mm5, [eax+ebx+color6]
1108 pxor mm7, mm7
1109 pand mm6, [ONE]
1111 movq mm0, [eax+colorB1]
1112 movq mm1, [eax+ebx+color4]
1113 movq mm2, mm0
1114 movq mm3, mm1
1115 pcmpeqw mm0, mm4
1116 pcmpeqw mm1, mm4
1117 pcmpeqw mm2, mm5
1118 pcmpeqw mm3, mm5
1119 pand mm0, mm6
1120 pand mm1, mm6
1121 pand mm2, mm6
1122 pand mm3, mm6
1123 paddw mm0, mm1
1124 paddw mm2, mm3
1126 pxor mm3, mm3
1127 pcmpgtw mm0, mm6
1128 pcmpgtw mm2, mm6
1129 pcmpeqw mm0, mm3
1130 pcmpeqw mm2, mm3
1131 pand mm0, mm6
1132 pand mm2, mm6
1133 paddw mm7, mm0
1134 psubw mm7, mm2
1136 movq mm0, [eax+colorB2]
1137 movq mm1, [eax+ebx+colorS2]
1138 movq mm2, mm0
1139 movq mm3, mm1
1140 pcmpeqw mm0, mm4
1141 pcmpeqw mm1, mm4
1142 pcmpeqw mm2, mm5
1143 pcmpeqw mm3, mm5
1144 pand mm0, mm6
1145 pand mm1, mm6
1146 pand mm2, mm6
1147 pand mm3, mm6
1148 paddw mm0, mm1
1149 paddw mm2, mm3
1151 pxor mm3, mm3
1152 pcmpgtw mm0, mm6
1153 pcmpgtw mm2, mm6
1154 pcmpeqw mm0, mm3
1155 pcmpeqw mm2, mm3
1156 pand mm0, mm6
1157 pand mm2, mm6
1158 paddw mm7, mm0
1159 psubw mm7, mm2
1161 push eax
1162 add eax, ebx
1163 movq mm0, [eax+ebx+color1]
1164 movq mm1, [eax+ebx+ebx+colorA1]
1165 movq mm2, mm0
1166 movq mm3, mm1
1167 pcmpeqw mm0, mm4
1168 pcmpeqw mm1, mm4
1169 pcmpeqw mm2, mm5
1170 pcmpeqw mm3, mm5
1171 pand mm0, mm6
1172 pand mm1, mm6
1173 pand mm2, mm6
1174 pand mm3, mm6
1175 paddw mm0, mm1
1176 paddw mm2, mm3
1178 pxor mm3, mm3
1179 pcmpgtw mm0, mm6
1180 pcmpgtw mm2, mm6
1181 pcmpeqw mm0, mm3
1182 pcmpeqw mm2, mm3
1183 pand mm0, mm6
1184 pand mm2, mm6
1185 paddw mm7, mm0
1186 psubw mm7, mm2
1188 movq mm0, [eax+ebx+colorS1]
1189 movq mm1, [eax+ebx+ebx+colorA2]
1190 movq mm2, mm0
1191 movq mm3, mm1
1192 pcmpeqw mm0, mm4
1193 pcmpeqw mm1, mm4
1194 pcmpeqw mm2, mm5
1195 pcmpeqw mm3, mm5
1196 pand mm0, mm6
1197 pand mm1, mm6
1198 pand mm2, mm6
1199 pand mm3, mm6
1200 paddw mm0, mm1
1201 paddw mm2, mm3
1203 pxor mm3, mm3
1204 pcmpgtw mm0, mm6
1205 pcmpgtw mm2, mm6
1206 pcmpeqw mm0, mm3
1207 pcmpeqw mm2, mm3
1208 pand mm0, mm6
1209 pand mm2, mm6
1210 paddw mm7, mm0
1211 psubw mm7, mm2
1213 pop eax
1214 movq mm1, mm7
1215 pxor mm0, mm0
1216 pcmpgtw mm7, mm0
1217 pcmpgtw mm0, mm1
1219 por mm7, [Mask35]
1220 por mm0, [Mask26]
1221 movq [Mask35], mm7
1222 movq [Mask26], mm0
1224 .SKIP_GUESS:
1225 ;Start the ASSEMBLY !!!
1227 movq mm4, [Mask35]
1228 movq mm5, [Mask26]
1229 movq mm6, [Mask35b]
1230 movq mm7, [Mask26b]
1232 movq mm0, [eax+ebx+color5]
1233 movq mm1, [eax+ebx+color6]
1234 movq mm2, [eax+ebx+ebx+color2]
1235 movq mm3, [eax+ebx+ebx+color3]
1236 pcmpeqw mm0, mm2
1237 pcmpeqw mm1, mm3
1238 movq mm2, mm4
1239 movq mm3, mm5
1240 por mm0, mm1
1241 por mm2, mm3
1242 pand mm2, mm0
1243 pxor mm0, mm2
1244 movq mm3, mm0
1246 movq mm2, mm0
1247 pxor mm0, mm0
1248 por mm2, mm4
1249 pxor mm4, mm6
1250 por mm2, mm5
1251 pxor mm5, mm7
1252 pcmpeqw mm2, mm0
1253 ;----------------
1255 movq mm0, [eax+ebx+color5]
1256 movq mm1, mm3
1257 por mm1, mm4
1258 por mm1, mm6
1259 pand mm0, mm1
1260 movq mm1, mm5
1261 pand mm1, [I56Pixel]
1262 por mm0, mm1
1263 movq mm1, mm7
1264 pand mm1, [product1b]
1265 por mm0, mm1
1266 movq mm1, mm2
1267 pand mm1, [product1a]
1268 por mm0, mm1
1269 movq [final1a], mm0
1271 movq mm0, [eax+ebx+color6]
1272 movq mm1, mm3
1273 por mm1, mm5
1274 por mm1, mm7
1275 pand mm0, mm1
1276 movq mm1, mm4
1277 pand mm1, [I56Pixel]
1278 por mm0, mm1
1279 movq mm1, mm6
1280 pand mm1, [product1a]
1281 por mm0, mm1
1282 movq mm1, mm2
1283 pand mm1, [product1b]
1284 por mm0, mm1
1285 movq [final1b], mm0
1287 movq mm0, [eax+ebx+ebx+color2]
1288 movq mm1, mm3
1289 por mm1, mm5
1290 por mm1, mm7
1291 pand mm0, mm1
1292 movq mm1, mm4
1293 pand mm1, [I23Pixel]
1294 por mm0, mm1
1295 movq mm1, mm6
1296 pand mm1, [product2b]
1297 por mm0, mm1
1298 movq mm1, mm2
1299 pand mm1, [product2a]
1300 por mm0, mm1
1301 movq [final2a], mm0
1303 movq mm0, [eax+ebx+ebx+color3]
1304 movq mm1, mm3
1305 por mm1, mm4
1306 por mm1, mm6
1307 pand mm0, mm1
1308 movq mm1, mm5
1309 pand mm1, [I23Pixel]
1310 por mm0, mm1
1311 movq mm1, mm7
1312 pand mm1, [product2a]
1313 por mm0, mm1
1314 movq mm1, mm2
1315 pand mm1, [product2b]
1316 por mm0, mm1
1317 movq [final2b], mm0
1320 movq mm0, [final1a]
1321 movq mm2, [final1b]
1322 movq mm1, mm0
1323 movq mm4, [final2a]
1324 movq mm6, [final2b]
1325 movq mm5, mm4
1326 punpcklwd mm0, mm2
1327 punpckhwd mm1, mm2
1328 punpcklwd mm4, mm6
1329 punpckhwd mm5, mm6
1334 %ifdef FAR_POINTER
1335 movq [fs:edx], mm0
1336 movq [fs:edx+8], mm1
1337 push edx
1338 add edx, [ebp+dstPitch]
1339 movq [fs:edx], mm4
1340 movq [fs:edx+8], mm5
1341 pop edx
1342 %else
1343 movq [edx], mm0
1344 movq [edx+8], mm1
1345 push edx
1346 add edx, [ebp+dstPitch]
1347 movq [edx], mm4
1348 movq [edx+8], mm5
1349 pop edx
1350 %endif
1351 .SKIP_PROCESS:
1352 mov ecx, [ebp+deltaPtr]
1353 add ecx, 8
1354 mov [ebp+deltaPtr], ecx
1355 add edx, 16
1356 add eax, 8
1358 pop ecx
1359 sub ecx, 4
1360 cmp ecx, 0
1361 jg near .Loop
1363 ; Restore some stuff
1364 popad
1365 mov esp, ebp
1366 pop ebp
1367 emms
1368 ret
1371 ;-------------------------------------------------------------------------
1372 ;-------------------------------------------------------------------------
1373 ;-------------------------------------------------------------------------
1374 ;-------------------------------------------------------------------------
1375 ;-------------------------------------------------------------------------
1376 ;-------------------------------------------------------------------------
1377 ;-------------------------------------------------------------------------
1380 ;This is version 0.50
1381 colorI equ -2
1382 colorE equ 0
1383 colorF equ 2
1384 colorJ equ 4
1386 colorG equ -2
1387 colorA equ 0
1388 colorB equ 2
1389 colorK equ 4
1391 colorH equ -2
1392 colorC equ 0
1393 colorD equ 2
1394 colorL equ 4
1396 colorM equ -2
1397 colorN equ 0
1398 colorO equ 2
1399 colorP equ 4
1401 %ifdef __DJGPP__
1402 __2xSaILine:
1403 %else
1404 _2xSaILine:
1405 %endif
1406 ; Store some stuff
1407 push ebp
1408 mov ebp, esp
1409 pushad
1411 ; Prepare the destination
1412 %ifdef FAR_POINTER
1413 ; Set the selector
1414 mov eax, [ebp+dstSegment]
1415 mov fs, ax
1416 %endif
1417 mov edx, [ebp+dstOffset] ; edx points to the screen
1418 ; Prepare the source
1419 ; eax points to colorA
1420 mov eax, [ebp+srcPtr]
1421 mov ebx, [ebp+srcPitch]
1422 mov ecx, [ebp+width]
1423 ; eax now points to colorE
1424 sub eax, ebx
1427 ; Main Loop
1428 .Loop: push ecx
1430 ;-----Check Delta------------------
1431 mov ecx, [ebp+deltaPtr]
1433 movq mm0, [eax+colorI]
1434 movq mm1, [eax+colorJ]
1435 movq mm2, [eax+ebx+colorG]
1436 movq mm3, [eax+ebx+colorK]
1437 movq mm4, [eax+ebx+ebx+colorH]
1438 movq mm5, [eax+ebx+ebx+colorL]
1439 push eax
1440 add eax, ebx
1441 movq mm6, [eax+ebx+ebx+colorM]
1442 movq mm7, [eax+ebx+ebx+colorP]
1443 pop eax
1445 pcmpeqw mm0, [ecx+2+colorI]
1446 pcmpeqw mm1, [ecx+2+colorK]
1447 pcmpeqw mm2, [ecx+ebx+2+colorG]
1448 pcmpeqw mm3, [ecx+ebx+2+colorK]
1449 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
1450 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
1451 add ecx, ebx
1452 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
1453 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
1454 sub ecx, ebx
1457 pand mm0, mm1
1458 pand mm2, mm3
1459 pand mm4, mm5
1460 pand mm6, mm7
1461 pand mm0, mm2
1462 pand mm4, mm6
1463 pxor mm7, mm7
1464 pand mm0, mm4
1465 movq mm6, [eax+colorI]
1466 pcmpeqw mm7, mm0
1468 movq [ecx+2+colorI], mm6
1470 packsswb mm7, mm7
1471 movd ecx, mm7
1472 test ecx, ecx
1473 jz near .SKIP_PROCESS
1475 ;End Delta
1477 ;---------------------------------
1480 ;1
1481 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
1482 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
1483 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB
1485 movq mm1, mm0
1486 movq mm3, mm2
1488 pcmpeqw mm0, [eax+ebx+ebx+colorD]
1489 pcmpeqw mm1, [eax+colorE]
1490 pcmpeqw mm2, [eax+ebx+ebx+colorL]
1491 pcmpeqw mm3, [eax+ebx+ebx+colorC]
1493 pand mm0, mm1
1494 pxor mm1, mm1
1495 pand mm0, mm2
1496 pcmpeqw mm3, mm1
1497 pand mm0, mm3 ;result in mm0
1499 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
1500 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
1501 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB
1502 movq mm5, mm4
1503 movq mm7, mm6
1505 pcmpeqw mm4, [eax+ebx+ebx+colorC]
1506 pcmpeqw mm5, [eax+colorF]
1507 pcmpeqw mm6, [eax+colorJ]
1508 pcmpeqw mm7, [eax+colorE]
1510 pand mm4, mm5
1511 pxor mm5, mm5
1512 pand mm4, mm6
1513 pcmpeqw mm7, mm5
1514 pand mm4, mm7 ;result in mm4
1516 por mm0, mm4 ;combine the masks
1517 movq [Mask1], mm0
1519 ;--------------------------------------------
1521 ;2
1522 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
1523 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB
1524 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
1525 movq mm1, mm0
1526 movq mm3, mm2
1528 pcmpeqw mm0, [eax+ebx+ebx+colorC]
1529 pcmpeqw mm1, [eax+colorF]
1530 pcmpeqw mm2, [eax+ebx+ebx+colorH]
1531 pcmpeqw mm3, [eax+ebx+ebx+colorD]
1533 pand mm0, mm1
1534 pxor mm1, mm1
1535 pand mm0, mm2
1536 pcmpeqw mm3, mm1
1537 pand mm0, mm3 ;result in mm0
1539 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
1540 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB
1541 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
1542 movq mm5, mm4
1543 movq mm7, mm6
1545 pcmpeqw mm4, [eax+ebx+ebx+colorD]
1546 pcmpeqw mm5, [eax+colorE]
1547 pcmpeqw mm6, [eax+colorI]
1548 pcmpeqw mm7, [eax+colorF]
1550 pand mm4, mm5
1551 pxor mm5, mm5
1552 pand mm4, mm6
1553 pcmpeqw mm7, mm5
1554 pand mm4, mm7 ;result in mm4
1556 por mm0, mm4 ;combine the masks
1557 movq [Mask2], mm0
1560 ;interpolate colorA and colorB
1561 movq mm0, [eax+ebx+colorA]
1562 movq mm1, [eax+ebx+colorB]
1564 movq mm2, mm0
1565 movq mm3, mm1
1567 pand mm0, [colorMask]
1568 pand mm1, [colorMask]
1570 psrlw mm0, 1
1571 psrlw mm1, 1
1573 pand mm3, [lowPixelMask]
1574 paddw mm0, mm1
1576 pand mm3, mm2
1577 paddw mm0, mm3 ;mm0 contains the interpolated values
1579 ;assemble the pixels
1580 movq mm1, [eax+ebx+colorA]
1581 movq mm2, [eax+ebx+colorB]
1583 movq mm3, [Mask1]
1584 movq mm5, mm1
1585 movq mm4, [Mask2]
1586 movq mm6, mm1
1588 pand mm1, mm3
1589 por mm3, mm4
1590 pxor mm7, mm7
1591 pand mm2, mm4
1593 pcmpeqw mm3, mm7
1594 por mm1, mm2
1595 pand mm0, mm3
1597 por mm0, mm1
1599 punpcklwd mm5, mm0
1600 punpckhwd mm6, mm0
1602 %ifdef FAR_POINTER
1603 movq [fs:edx], mm5
1604 movq [fs:edx+8], mm6
1605 %else
1606 movq [edx], mm5
1607 movq [edx+8], mm6
1608 %endif
1610 ;------------------------------------------------
1611 ; Create the Nextline
1612 ;------------------------------------------------
1613 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
1614 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
1615 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC
1616 movq mm1, mm0
1617 movq mm3, mm2
1619 push eax
1620 add eax, ebx
1621 pcmpeqw mm0, [eax+ebx+colorD]
1622 pcmpeqw mm1, [eax+colorG]
1623 pcmpeqw mm2, [eax+ebx+ebx+colorO]
1624 pcmpeqw mm3, [eax+colorB]
1625 pop eax
1627 pand mm0, mm1
1628 pxor mm1, mm1
1629 pand mm0, mm2
1630 pcmpeqw mm3, mm1
1631 pand mm0, mm3 ;result in mm0
1633 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
1634 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
1635 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC
1636 movq mm5, mm4
1637 movq mm7, mm6
1639 push eax
1640 add eax, ebx
1641 pcmpeqw mm4, [eax+ebx+colorH]
1642 pcmpeqw mm5, [eax+colorB]
1643 pcmpeqw mm6, [eax+ebx+ebx+colorM]
1644 pcmpeqw mm7, [eax+colorG]
1645 pop eax
1647 pand mm4, mm5
1648 pxor mm5, mm5
1649 pand mm4, mm6
1650 pcmpeqw mm7, mm5
1651 pand mm4, mm7 ;result in mm4
1653 por mm0, mm4 ;combine the masks
1654 movq [Mask1], mm0
1655 ;--------------------------------------------
1657 ;4
1658 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
1659 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC
1660 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
1661 movq mm1, mm0
1662 movq mm3, mm2
1664 pcmpeqw mm0, [eax+ebx+colorB]
1665 pcmpeqw mm1, [eax+ebx+ebx+colorH]
1666 pcmpeqw mm2, [eax+colorF]
1667 pcmpeqw mm3, [eax+ebx+ebx+colorD]
1669 pand mm0, mm1
1670 pxor mm1, mm1
1671 pand mm0, mm2
1672 pcmpeqw mm3, mm1
1673 pand mm0, mm3 ;result in mm0
1675 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
1676 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC
1677 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
1678 movq mm5, mm4
1679 movq mm7, mm6
1681 pcmpeqw mm4, [eax+ebx+ebx+colorD]
1682 pcmpeqw mm5, [eax+ebx+colorG]
1683 pcmpeqw mm6, [eax+colorI]
1684 pcmpeqw mm7, [eax+ebx+ebx+colorH]
1686 pand mm4, mm5
1687 pxor mm5, mm5
1688 pand mm4, mm6
1689 pcmpeqw mm7, mm5
1690 pand mm4, mm7 ;result in mm4
1692 por mm0, mm4 ;combine the masks
1693 movq [Mask2], mm0
1694 ;----------------------------------------------
1696 ;interpolate colorA and colorC
1697 movq mm0, [eax+ebx+colorA]
1698 movq mm1, [eax+ebx+ebx+colorC]
1700 movq mm2, mm0
1701 movq mm3, mm1
1703 pand mm0, [colorMask]
1704 pand mm1, [colorMask]
1706 psrlw mm0, 1
1707 psrlw mm1, 1
1709 pand mm3, [lowPixelMask]
1710 paddw mm0, mm1
1712 pand mm3, mm2
1713 paddw mm0, mm3 ;mm0 contains the interpolated values
1714 ;-------------
1716 ;assemble the pixels
1717 movq mm1, [eax+ebx+colorA]
1718 movq mm2, [eax+ebx+ebx+colorC]
1720 movq mm3, [Mask1]
1721 movq mm4, [Mask2]
1723 pand mm1, mm3
1724 pand mm2, mm4
1726 por mm3, mm4
1727 pxor mm7, mm7
1728 por mm1, mm2
1730 pcmpeqw mm3, mm7
1731 pand mm0, mm3
1732 por mm0, mm1
1733 movq [ACPixel], mm0
1735 ;////////////////////////////////
1736 ; Decide which "branch" to take
1737 ;--------------------------------
1738 movq mm0, [eax+ebx+colorA]
1739 movq mm1, [eax+ebx+colorB]
1740 movq mm6, mm0
1741 movq mm7, mm1
1742 pcmpeqw mm0, [eax+ebx+ebx+colorD]
1743 pcmpeqw mm1, [eax+ebx+ebx+colorC]
1744 pcmpeqw mm6, mm7
1746 movq mm2, mm0
1747 movq mm3, mm0
1749 pand mm0, mm1 ;colorA == colorD && colorB == colorC
1750 pxor mm7, mm7
1752 pcmpeqw mm2, mm7
1753 pand mm6, mm0
1754 pand mm2, mm1 ;colorA != colorD && colorB == colorC
1756 pcmpeqw mm1, mm7
1758 pand mm1, mm3 ;colorA == colorD && colorB != colorC
1759 pxor mm0, mm6
1760 por mm1, mm6
1761 movq mm7, mm0
1762 movq [Mask2], mm2
1763 packsswb mm7, mm7
1764 movq [Mask1], mm1
1766 movd ecx, mm7
1767 test ecx, ecx
1768 jz near .SKIP_GUESS
1770 ;---------------------------------------------
1771 ; Map of the pixels: I|E F|J
1772 ; G|A B|K
1773 ; H|C D|L
1774 ; M|N O|P
1775 movq mm6, mm0
1776 movq mm4, [eax+ebx+colorA]
1777 movq mm5, [eax+ebx+colorB]
1778 pxor mm7, mm7
1779 pand mm6, [ONE]
1781 movq mm0, [eax+colorE]
1782 movq mm1, [eax+ebx+colorG]
1783 movq mm2, mm0
1784 movq mm3, mm1
1785 pcmpeqw mm0, mm4
1786 pcmpeqw mm1, mm4
1787 pcmpeqw mm2, mm5
1788 pcmpeqw mm3, mm5
1789 pand mm0, mm6
1790 pand mm1, mm6
1791 pand mm2, mm6
1792 pand mm3, mm6
1793 paddw mm0, mm1
1794 paddw mm2, mm3
1796 pxor mm3, mm3
1797 pcmpgtw mm0, mm6
1798 pcmpgtw mm2, mm6
1799 pcmpeqw mm0, mm3
1800 pcmpeqw mm2, mm3
1801 pand mm0, mm6
1802 pand mm2, mm6
1803 paddw mm7, mm0
1804 psubw mm7, mm2
1806 movq mm0, [eax+colorF]
1807 movq mm1, [eax+ebx+colorK]
1808 movq mm2, mm0
1809 movq mm3, mm1
1810 pcmpeqw mm0, mm4
1811 pcmpeqw mm1, mm4
1812 pcmpeqw mm2, mm5
1813 pcmpeqw mm3, mm5
1814 pand mm0, mm6
1815 pand mm1, mm6
1816 pand mm2, mm6
1817 pand mm3, mm6
1818 paddw mm0, mm1
1819 paddw mm2, mm3
1821 pxor mm3, mm3
1822 pcmpgtw mm0, mm6
1823 pcmpgtw mm2, mm6
1824 pcmpeqw mm0, mm3
1825 pcmpeqw mm2, mm3
1826 pand mm0, mm6
1827 pand mm2, mm6
1828 paddw mm7, mm0
1829 psubw mm7, mm2
1831 push eax
1832 add eax, ebx
1833 movq mm0, [eax+ebx+colorH]
1834 movq mm1, [eax+ebx+ebx+colorN]
1835 movq mm2, mm0
1836 movq mm3, mm1
1837 pcmpeqw mm0, mm4
1838 pcmpeqw mm1, mm4
1839 pcmpeqw mm2, mm5
1840 pcmpeqw mm3, mm5
1841 pand mm0, mm6
1842 pand mm1, mm6
1843 pand mm2, mm6
1844 pand mm3, mm6
1845 paddw mm0, mm1
1846 paddw mm2, mm3
1848 pxor mm3, mm3
1849 pcmpgtw mm0, mm6
1850 pcmpgtw mm2, mm6
1851 pcmpeqw mm0, mm3
1852 pcmpeqw mm2, mm3
1853 pand mm0, mm6
1854 pand mm2, mm6
1855 paddw mm7, mm0
1856 psubw mm7, mm2
1858 movq mm0, [eax+ebx+colorL]
1859 movq mm1, [eax+ebx+ebx+colorO]
1860 movq mm2, mm0
1861 movq mm3, mm1
1862 pcmpeqw mm0, mm4
1863 pcmpeqw mm1, mm4
1864 pcmpeqw mm2, mm5
1865 pcmpeqw mm3, mm5
1866 pand mm0, mm6
1867 pand mm1, mm6
1868 pand mm2, mm6
1869 pand mm3, mm6
1870 paddw mm0, mm1
1871 paddw mm2, mm3
1873 pxor mm3, mm3
1874 pcmpgtw mm0, mm6
1875 pcmpgtw mm2, mm6
1876 pcmpeqw mm0, mm3
1877 pcmpeqw mm2, mm3
1878 pand mm0, mm6
1879 pand mm2, mm6
1880 paddw mm7, mm0
1881 psubw mm7, mm2
1883 pop eax
1884 movq mm1, mm7
1885 pxor mm0, mm0
1886 pcmpgtw mm7, mm0
1887 pcmpgtw mm0, mm1
1889 por mm7, [Mask1]
1890 por mm0, [Mask2]
1891 movq [Mask1], mm7
1892 movq [Mask2], mm0
1894 .SKIP_GUESS:
1895 ;----------------------------
1896 ;interpolate A, B, C and D
1897 movq mm0, [eax+ebx+colorA]
1898 movq mm1, [eax+ebx+colorB]
1899 movq mm4, mm0
1900 movq mm2, [eax+ebx+ebx+colorC]
1901 movq mm5, mm1
1902 movq mm3, [qcolorMask]
1903 movq mm6, mm2
1904 movq mm7, [qlowpixelMask]
1906 pand mm0, mm3
1907 pand mm1, mm3
1908 pand mm2, mm3
1909 pand mm3, [eax+ebx+ebx+colorD]
1911 psrlw mm0, 2
1912 pand mm4, mm7
1913 psrlw mm1, 2
1914 pand mm5, mm7
1915 psrlw mm2, 2
1916 pand mm6, mm7
1917 psrlw mm3, 2
1918 pand mm7, [eax+ebx+ebx+colorD]
1920 paddw mm0, mm1
1921 paddw mm2, mm3
1923 paddw mm4, mm5
1924 paddw mm6, mm7
1926 paddw mm4, mm6
1927 paddw mm0, mm2
1928 psrlw mm4, 2
1929 pand mm4, [qlowpixelMask]
1930 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D
1932 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
1933 ;assemble the pixels
1934 movq mm1, [Mask1]
1935 movq mm2, [Mask2]
1936 movq mm4, [eax+ebx+colorA]
1937 movq mm5, [eax+ebx+colorB]
1938 pand mm4, mm1
1939 pand mm5, mm2
1941 pxor mm7, mm7
1942 por mm1, mm2
1943 por mm4, mm5
1944 pcmpeqw mm1, mm7
1945 pand mm0, mm1
1946 por mm4, mm0 ;mm4 contains the diagonal pixels
1948 movq mm0, [ACPixel]
1949 movq mm1, mm0
1950 punpcklwd mm0, mm4
1951 punpckhwd mm1, mm4
1953 push edx
1954 add edx, [ebp+dstPitch]
1956 %ifdef FAR_POINTER
1957 movq [fs:edx], mm0
1958 movq [fs:edx+8], mm1
1959 %else
1960 movq [edx], mm0
1961 movq [edx+8], mm1
1962 %endif
1963 pop edx
1965 .SKIP_PROCESS:
1966 mov ecx, [ebp+deltaPtr]
1967 add ecx, 8
1968 mov [ebp+deltaPtr], ecx
1969 add edx, 16
1970 add eax, 8
1972 pop ecx
1973 sub ecx, 4
1974 cmp ecx, 0
1975 jg near .Loop
1977 ; Restore some stuff
1978 popad
1979 mov esp, ebp
1980 pop ebp
1981 emms
1982 ret
1984 ;-------------------------------------------------------------------------
1985 ;-------------------------------------------------------------------------
1986 ;-------------------------------------------------------------------------
1987 ;-------------------------------------------------------------------------
1988 ;-------------------------------------------------------------------------
1989 ;-------------------------------------------------------------------------
1990 ;-------------------------------------------------------------------------
1992 %ifdef __DJGPP__
1993 _Init_2xSaIMMX:
1994 %else
1995 Init_2xSaIMMX:
1996 %endif
1997 ; Store some stuff
1998 push ebp
1999 mov ebp, esp
2000 push edx
2003 ;Damn thing doesn't work
2004 ; mov eax,1
2005 ; cpuid
2006 ; test edx, 0x00800000 ;test bit 23
2007 ; jz end2 ;bit not set => no MMX detected
2009 mov eax, [ebp+8] ;PixelFormat
2010 cmp eax, 555
2011 jz Bits555
2012 cmp eax, 565
2013 jz Bits565
2014 end2:
2015 mov eax, 1
2016 jmp end3
2017 Bits555:
2018 mov edx, 0x7BDE7BDE
2019 mov eax, colorMask
2020 mov [eax], edx
2021 mov [eax+4], edx
2022 mov edx, 0x04210421
2023 mov eax, lowPixelMask
2024 mov [eax], edx
2025 mov [eax+4], edx
2026 mov edx, 0x739C739C
2027 mov eax, qcolorMask
2028 mov [eax], edx
2029 mov [eax+4], edx
2030 mov edx, 0x0C630C63
2031 mov eax, qlowpixelMask
2032 mov [eax], edx
2033 mov [eax+4], edx
2034 mov eax, 0
2035 jmp end3
2036 Bits565:
2037 mov edx, 0xF7DEF7DE
2038 mov eax, colorMask
2039 mov [eax], edx
2040 mov [eax+4], edx
2041 mov edx, 0x08210821
2042 mov eax, lowPixelMask
2043 mov [eax], edx
2044 mov [eax+4], edx
2045 mov edx, 0xE79CE79C
2046 mov eax, qcolorMask
2047 mov [eax], edx
2048 mov [eax+4], edx
2049 mov edx, 0x18631863
2050 mov eax, qlowpixelMask
2051 mov [eax], edx
2052 mov [eax+4], edx
2053 mov eax, 0
2054 jmp end3
2055 end3:
2056 pop edx
2057 mov esp, ebp
2058 pop ebp
2059 ret
2062 ;-------------------------------------------------------------------------
2063 ;-------------------------------------------------------------------------
2064 ;-------------------------------------------------------------------------
2065 ;-------------------------------------------------------------------------
2066 ;-------------------------------------------------------------------------
2067 ;-------------------------------------------------------------------------
2068 ;-------------------------------------------------------------------------
2070 SECTION .data ALIGN = 32
2071 ;Some constants
2072 colorMask dd 0xF7DEF7DE,0xF7DEF7DE
2073 lowPixelMask dd 0x08210821,0x08210821
2075 qcolorMask dd 0xE79CE79C,0xE79CE79C
2076 qlowpixelMask dd 0x18631863,0x18631863
2078 darkenMask dd 0xC718C718,0xC718C718
2079 GreenMask dd 0x07E007E0,0x07E007E0
2080 RedBlueMask dd 0xF81FF81F,0xF81FF81F
2082 FALSE dd 0x00000000,0x00000000
2083 TRUE dd 0xffffffff,0xffffffff
2084 ONE dd 0x00010001,0x00010001
2087 SECTION .bss ALIGN = 32
2088 ACPixel resb 8
2089 Mask1 resb 8
2090 Mask2 resb 8
2092 I56Pixel resb 8
2093 I23Pixel resb 8
2094 I5556Pixel resb 8
2095 I2223Pixel resb 8
2096 I5666Pixel resb 8
2097 I2333Pixel resb 8
2098 Mask26 resb 8
2099 Mask35 resb 8
2100 Mask26b resb 8
2101 Mask35b resb 8
2102 product1a resb 8
2103 product1b resb 8
2104 product2a resb 8
2105 product2b resb 8
2106 final1a resb 8
2107 final1b resb 8
2108 final2a resb 8
2109 final2b resb 8