Mercurial > vba-clojure
view src/filters/2xSaImmx.asm @ 135:eb6ba88088d3
Wrote a more efficient input-number-assembly program; 91 oc -> 60 oc.
author | Dylan Holmes <ocsenave@gmail.com> |
---|---|
date | Sun, 18 Mar 2012 05:13:19 -0500 |
parents | f9f4f1b99eed |
children |
line wrap: on
line source
1 ;/*---------------------------------------------------------------------*2 ; * The following (piece of) code, (part of) the 2xSaI engine, *3 ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. *4 ; * Non-Commercial use of this software is allowed and is encouraged, *5 ; * provided that appropriate credit be given. *6 ; * You may freely modify this code, but I request *7 ; * that any improvements to the engine be submitted to me, so *8 ; * that I can implement these improvements in newer versions of *9 ; * the software. *10 ; * If you need more information, have any comments or suggestions, *11 ; * you can e-mail me. My e-mail: derek-liauw@usa.net. *12 ; *---------------------------------------------------------------------*/14 ;----------------------15 ; 2xSaI version 0.59 WIP, soon to become version 0.6016 ;----------------------18 ;%define FAR_POINTER22 BITS 3223 %ifdef __DJGPP__24 GLOBAL __2xSaILine25 GLOBAL __2xSaISuperEagleLine26 GLOBAL __2xSaISuper2xSaILine27 GLOBAL _Init_2xSaIMMX28 %else29 GLOBAL _2xSaILine30 GLOBAL _2xSaISuperEagleLine31 GLOBAL _2xSaISuper2xSaILine32 GLOBAL Init_2xSaIMMX33 %endif34 SECTION .text ALIGN = 3236 %ifdef FAR_POINTER37 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,38 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);39 %else40 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,41 ; uint8 *dstPtr, uint32 dstPitch);42 %endif44 srcPtr equ 845 deltaPtr equ 1246 srcPitch equ 1647 width equ 2048 dstOffset equ 2449 dstPitch equ 2850 dstSegment equ 3255 colorB0 equ -256 colorB1 equ 057 colorB2 equ 258 colorB3 equ 460 color7 equ -261 color8 equ 062 color9 equ 264 color4 equ -265 color5 equ 066 color6 equ 267 colorS2 equ 469 color1 equ -270 color2 equ 071 color3 equ 272 colorS1 equ 474 colorA0 equ -275 colorA1 equ 076 colorA2 equ 277 colorA3 equ 482 %ifdef __DJGPP__83 __2xSaISuper2xSaILine:84 %else85 _2xSaISuper2xSaILine:86 %endif87 ; Store some stuff88 push ebp89 mov ebp, esp90 pushad92 ; Prepare the destination93 %ifdef FAR_POINTER94 ; Set the selector95 mov eax, [ebp+dstSegment]96 mov fs, ax97 %endif98 mov edx, [ebp+dstOffset] ; edx points to the screen99 ; Prepare the source100 ; eax points to colorA101 mov eax, [ebp+srcPtr] ;eax points to colorA102 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch103 mov ecx, [ebp+width] ;ecx contains the number of pixels to process104 ; eax now points to colorB1105 sub eax, ebx ;eax points to B1 which is the base107 ; Main Loop108 .Loop: push ecx110 ;-----Check Delta------------------111 mov ecx, [ebp+deltaPtr]114 ;load source img115 movq mm0, [eax+colorB0]116 movq mm1, [eax+colorB3]117 movq mm2, [eax+ebx+color4]118 movq mm3, [eax+ebx+colorS2]119 movq mm4, [eax+ebx+ebx+color1]120 movq mm5, [eax+ebx+ebx+colorS1]121 push eax122 add eax, ebx123 movq mm6, [eax+ebx+ebx+colorA0]124 movq mm7, [eax+ebx+ebx+colorA3]125 pop eax127 ;compare to delta128 pcmpeqw mm0, [ecx+2+colorB0]129 pcmpeqw mm1, [ecx+2+colorB3]130 pcmpeqw mm2, [ecx+ebx+2+color4]131 pcmpeqw mm3, [ecx+ebx+2+colorS2]132 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]133 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]134 add ecx, ebx135 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]136 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]137 sub ecx, ebx140 ;compose results141 pand mm0, mm1142 pand mm2, mm3143 pand mm4, mm5144 pand mm6, mm7145 pand mm0, mm2146 pand mm4, mm6147 pxor mm7, mm7148 pand mm0, mm4149 movq mm6, [eax+colorB0]150 pcmpeqw mm7, mm0 ;did any compare give us a zero ?152 movq [ecx+2+colorB0], mm6154 packsswb mm7, mm7155 movd ecx, mm7156 test ecx, ecx157 jz near .SKIP_PROCESS ;no, so we can skip159 ;End Delta161 ;---------------------------------162 movq mm0, [eax+ebx+color5]163 movq mm1, [eax+ebx+color6]164 movq mm2, mm0165 movq mm3, mm1166 movq mm4, mm0167 movq mm5, mm1169 pand mm0, [colorMask]170 pand mm1, [colorMask]172 psrlw mm0, 1173 psrlw mm1, 1175 pand mm3, [lowPixelMask]176 paddw mm0, mm1178 pand mm3, mm2179 paddw mm0, mm3 ;mm0 contains the interpolated values180 movq [I56Pixel], mm0181 movq mm7, mm0183 ;-------------------184 movq mm0, mm7185 movq mm1, mm4 ;5,5,5,6186 movq mm2, mm0187 movq mm3, mm1189 pand mm0, [colorMask]190 pand mm1, [colorMask]192 psrlw mm0, 1193 psrlw mm1, 1195 pand mm3, [lowPixelMask]196 paddw mm0, mm1198 pand mm3, mm2199 paddw mm0, mm3 ;mm0 contains the interpolated values200 movq [I5556Pixel], mm0201 ;--------------------203 movq mm0, mm7204 movq mm1, mm5 ;6,6,6,5205 movq mm2, mm0206 movq mm3, mm1208 pand mm0, [colorMask]209 pand mm1, [colorMask]211 psrlw mm0, 1212 psrlw mm1, 1214 pand mm3, [lowPixelMask]215 paddw mm0, mm1217 pand mm3, mm2218 paddw mm0, mm3219 movq [I5666Pixel], mm0221 ;-------------------------222 ;-------------------------223 movq mm0, [eax+ebx+ebx+color2]224 movq mm1, [eax+ebx+ebx+color3]225 movq mm2, mm0226 movq mm3, mm1227 movq mm4, mm0228 movq mm5, mm1230 pand mm0, [colorMask]231 pand mm1, [colorMask]233 psrlw mm0, 1234 psrlw mm1, 1236 pand mm3, [lowPixelMask]237 paddw mm0, mm1239 pand mm3, mm2240 paddw mm0, mm3241 movq [I23Pixel], mm0242 movq mm7, mm0244 ;---------------------245 movq mm0, mm7246 movq mm1, mm4 ;2,2,2,3247 movq mm2, mm0248 movq mm3, mm1250 pand mm0, [colorMask]251 pand mm1, [colorMask]253 psrlw mm0, 1254 psrlw mm1, 1256 pand mm3, [lowPixelMask]257 paddw mm0, mm1259 pand mm3, mm2260 paddw mm0, mm3261 movq [I2223Pixel], mm0263 ;----------------------264 movq mm0, mm7265 movq mm1, mm5 ;3,3,3,2266 movq mm2, mm0267 movq mm3, mm1269 pand mm0, [colorMask]270 pand mm1, [colorMask]272 psrlw mm0, 1273 psrlw mm1, 1275 pand mm3, [lowPixelMask]276 paddw mm0, mm1278 pand mm3, mm2279 paddw mm0, mm3280 movq [I2333Pixel], mm0283 ;--------------------284 ;////////////////////////////////285 ; Decide which "branch" to take286 ;--------------------------------287 movq mm0, [eax+ebx+color5]288 movq mm1, [eax+ebx+color6]289 movq mm6, mm0290 movq mm7, mm1291 pcmpeqw mm0, [eax+ebx+ebx+color3]292 pcmpeqw mm1, [eax+ebx+ebx+color2]293 pcmpeqw mm6, mm7295 movq mm2, mm0296 movq mm3, mm0298 pand mm0, mm1 ;colorA == colorD && colorB == colorC299 pxor mm7, mm7301 pcmpeqw mm2, mm7302 pand mm6, mm0303 pand mm2, mm1 ;colorA != colorD && colorB == colorC305 pcmpeqw mm1, mm7307 pand mm1, mm3 ;colorA == colorD && colorB != colorC308 pxor mm0, mm6309 por mm1, mm6310 movq mm7, mm0311 movq [Mask26], mm2312 packsswb mm7, mm7313 movq [Mask35], mm1315 movd ecx, mm7316 test ecx, ecx317 jz near .SKIP_GUESS319 ;---------------------------------------------320 movq mm6, mm0321 movq mm4, [eax+ebx+colorA]322 movq mm5, [eax+ebx+colorB]323 pxor mm7, mm7324 pand mm6, [ONE]326 movq mm0, [eax+colorE]327 movq mm1, [eax+ebx+colorG]328 movq mm2, mm0329 movq mm3, mm1330 pcmpeqw mm0, mm4331 pcmpeqw mm1, mm4332 pcmpeqw mm2, mm5333 pcmpeqw mm3, mm5334 pand mm0, mm6335 pand mm1, mm6336 pand mm2, mm6337 pand mm3, mm6338 paddw mm0, mm1339 paddw mm2, mm3341 pxor mm3, mm3342 pcmpgtw mm0, mm6343 pcmpgtw mm2, mm6344 pcmpeqw mm0, mm3345 pcmpeqw mm2, mm3346 pand mm0, mm6347 pand mm2, mm6348 paddw mm7, mm0349 psubw mm7, mm2351 movq mm0, [eax+colorF]352 movq mm1, [eax+ebx+colorK]353 movq mm2, mm0354 movq mm3, mm1355 pcmpeqw mm0, mm4356 pcmpeqw mm1, mm4357 pcmpeqw mm2, mm5358 pcmpeqw mm3, mm5359 pand mm0, mm6360 pand mm1, mm6361 pand mm2, mm6362 pand mm3, mm6363 paddw mm0, mm1364 paddw mm2, mm3366 pxor mm3, mm3367 pcmpgtw mm0, mm6368 pcmpgtw mm2, mm6369 pcmpeqw mm0, mm3370 pcmpeqw mm2, mm3371 pand mm0, mm6372 pand mm2, mm6373 paddw mm7, mm0374 psubw mm7, mm2376 push eax377 add eax, ebx378 movq mm0, [eax+ebx+colorH]379 movq mm1, [eax+ebx+ebx+colorN]380 movq mm2, mm0381 movq mm3, mm1382 pcmpeqw mm0, mm4383 pcmpeqw mm1, mm4384 pcmpeqw mm2, mm5385 pcmpeqw mm3, mm5386 pand mm0, mm6387 pand mm1, mm6388 pand mm2, mm6389 pand mm3, mm6390 paddw mm0, mm1391 paddw mm2, mm3393 pxor mm3, mm3394 pcmpgtw mm0, mm6395 pcmpgtw mm2, mm6396 pcmpeqw mm0, mm3397 pcmpeqw mm2, mm3398 pand mm0, mm6399 pand mm2, mm6400 paddw mm7, mm0401 psubw mm7, mm2403 movq mm0, [eax+ebx+colorL]404 movq mm1, [eax+ebx+ebx+colorO]405 movq mm2, mm0406 movq mm3, mm1407 pcmpeqw mm0, mm4408 pcmpeqw mm1, mm4409 pcmpeqw mm2, mm5410 pcmpeqw mm3, mm5411 pand mm0, mm6412 pand mm1, mm6413 pand mm2, mm6414 pand mm3, mm6415 paddw mm0, mm1416 paddw mm2, mm3418 pxor mm3, mm3419 pcmpgtw mm0, mm6420 pcmpgtw mm2, mm6421 pcmpeqw mm0, mm3422 pcmpeqw mm2, mm3423 pand mm0, mm6424 pand mm2, mm6425 paddw mm7, mm0426 psubw mm7, mm2428 pop eax429 movq mm1, mm7430 pxor mm0, mm0431 pcmpgtw mm7, mm0432 pcmpgtw mm0, mm1434 por mm7, [Mask35]435 por mm0, [Mask26]436 movq [Mask35], mm7437 movq [Mask26], mm0439 .SKIP_GUESS:441 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...444 movq mm0, [eax+ebx+color5]445 movq mm1, [eax+ebx+ebx+color2]446 movq mm2, mm0447 movq mm3, mm1448 movq mm4, mm0449 movq mm5, mm1451 pand mm0, [colorMask]452 pand mm1, [colorMask]454 psrlw mm0, 1455 psrlw mm1, 1457 pand mm3, [lowPixelMask]458 paddw mm0, mm1460 pand mm3, mm2461 paddw mm0, mm3 ;mm0 contains the interpolated values462 ;---------------------------466 %ifdef dfhsdfhsdahdsfhdsfh468 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)469 product2a = INTERPOLATE (color2, color5);470 else471 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)472 product2a = INTERPOLATE(color2, color5);473 else474 product2a = color2;476 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)477 product1a = INTERPOLATE (color2, color5);478 else479 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)480 product1a = INTERPOLATE(color2, color5);481 else482 product1a = color5;484 %endif487 movq mm7, [Mask26]488 movq mm6, [eax+colorB2]489 movq mm5, [eax+ebx+ebx+color2]490 movq mm4, [eax+ebx+ebx+color1]491 pcmpeqw mm4, mm5492 pcmpeqw mm6, mm5493 pxor mm5, mm5494 pand mm7, mm4495 pcmpeqw mm6, mm5496 pand mm7, mm6500 movq mm6, [eax+ebx+ebx+color3]501 movq mm5, [eax+ebx+ebx+color2]502 movq mm4, [eax+ebx+ebx+color1]503 movq mm2, [eax+ebx+color5]504 movq mm1, [eax+ebx+color4]505 movq mm3, [eax+colorB0]507 pcmpeqw mm2, mm4508 pcmpeqw mm6, mm5509 pcmpeqw mm1, mm5510 pcmpeqw mm3, mm5511 pxor mm5, mm5512 pcmpeqw mm2, mm5513 pcmpeqw mm3, mm5514 pand mm6, mm1515 pand mm2, mm3516 pand mm6, mm2517 por mm7, mm6520 movq mm6, mm7521 pcmpeqw mm6, mm5522 pand mm7, mm0524 movq mm1, [eax+ebx+color5]525 pand mm6, mm1526 por mm7, mm6527 movq [final1a], mm7 ;finished 1a531 ;--------------------------------533 movq mm7, [Mask35]534 push eax535 add eax, ebx536 movq mm6, [eax+ebx+ebx+colorA2]537 pop eax538 movq mm5, [eax+ebx+color5]539 movq mm4, [eax+ebx+color4]540 pcmpeqw mm4, mm5541 pcmpeqw mm6, mm5542 pxor mm5, mm5543 pand mm7, mm4544 pcmpeqw mm6, mm5545 pand mm7, mm6549 movq mm6, [eax+ebx+color6]550 movq mm5, [eax+ebx+color5]551 movq mm4, [eax+ebx+color4]552 movq mm2, [eax+ebx+ebx+color2]553 movq mm1, [eax+ebx+ebx+color1]554 push eax555 add eax, ebx556 movq mm3, [eax+ebx+ebx+colorA0]557 pop eax559 pcmpeqw mm2, mm4560 pcmpeqw mm6, mm5561 pcmpeqw mm1, mm5562 pcmpeqw mm3, mm5563 pxor mm5, mm5564 pcmpeqw mm2, mm5565 pcmpeqw mm3, mm5566 pand mm6, mm1567 pand mm2, mm3568 pand mm6, mm2569 por mm7, mm6572 movq mm6, mm7573 pcmpeqw mm6, mm5574 pand mm7, mm0576 movq mm1, [eax+ebx+ebx+color2]577 pand mm6, mm1578 por mm7, mm6579 movq [final2a], mm7 ;finished 2a582 ;--------------------------------------------585 %ifdef dfhsdfhsdahdsfhdsfh586 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)587 product2b = Q_INTERPOLATE (color3, color3, color3, color2);588 else589 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)590 product2b = Q_INTERPOLATE (color2, color2, color2, color3);591 else592 product2b = INTERPOLATE (color2, color3);594 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)595 product1b = Q_INTERPOLATE (color6, color6, color6, color5);596 else597 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)598 product1b = Q_INTERPOLATE (color6, color5, color5, color5);599 else600 product1b = INTERPOLATE (color5, color6);601 %endif603 push eax604 add eax, ebx605 pxor mm7, mm7606 movq mm0, [eax+ebx+ebx+colorA0]607 movq mm1, [eax+ebx+ebx+colorA1]608 movq mm2, [eax+ebx+ebx+colorA2]609 movq mm3, [eax+ebx+ebx+colorA3]610 pop eax611 movq mm4, [eax+ebx+ebx+color2]612 movq mm5, [eax+ebx+ebx+color3]613 movq mm6, [eax+ebx+color6]615 pcmpeqw mm6, mm5616 pcmpeqw mm1, mm5617 pcmpeqw mm4, mm2618 pcmpeqw mm0, mm5619 pcmpeqw mm4, mm7620 pcmpeqw mm0, mm7621 pand mm0, mm4622 pand mm6, mm1623 pand mm0, mm6626 push eax627 add eax, ebx628 movq mm1, [eax+ebx+ebx+colorA1]629 pop eax630 movq mm4, [eax+ebx+ebx+color2]631 movq mm5, [eax+ebx+color5]632 movq mm6, [eax+ebx+ebx+color3]634 pcmpeqw mm5, mm4635 pcmpeqw mm2, mm4636 pcmpeqw mm1, mm6637 pcmpeqw mm3, mm4638 pcmpeqw mm1, mm7639 pcmpeqw mm3, mm7640 pand mm2, mm5641 pand mm1, mm3642 pand mm1, mm2645 movq mm7, mm0646 por mm7, mm1648 movq mm4, [Mask35]649 movq mm3, [Mask26]651 movq mm6, mm4652 pand mm6, mm7653 pxor mm4, mm6655 movq mm6, mm3656 pand mm6, mm7657 pxor mm3, mm6659 movq mm2, mm0660 movq mm7, [I2333Pixel]661 movq mm6, [I2223Pixel]662 movq mm5, [I23Pixel]665 por mm2, mm4666 pand mm4, [eax+ebx+ebx+color3]667 por mm2, mm3668 pand mm3, [eax+ebx+ebx+color2]669 por mm2, mm1670 pand mm0, mm7671 pand mm1, mm6672 pxor mm7, mm7673 pcmpeqw mm2, mm7674 por mm0, mm1675 por mm3, mm4676 pand mm2, mm5677 por mm0, mm3678 por mm0, mm2679 movq [final2b], mm0681 ;-----------------------------------684 pxor mm7, mm7685 movq mm0, [eax+colorB0]686 movq mm1, [eax+colorB1]687 movq mm2, [eax+colorB2]688 movq mm3, [eax+colorB3]689 movq mm4, [eax+ebx+color5]690 movq mm5, [eax+ebx+color6]691 movq mm6, [eax+ebx+ebx+color3]693 pcmpeqw mm6, mm5694 pcmpeqw mm1, mm5695 pcmpeqw mm4, mm2696 pcmpeqw mm0, mm5697 pcmpeqw mm4, mm7698 pcmpeqw mm0, mm7699 pand mm0, mm4700 pand mm6, mm1701 pand mm0, mm6703 movq mm1, [eax+colorB1]704 movq mm4, [eax+ebx+color5]705 movq mm5, [eax+ebx+ebx+color2]706 movq mm6, [eax+ebx+color6]708 pcmpeqw mm5, mm4709 pcmpeqw mm2, mm4710 pcmpeqw mm1, mm6711 pcmpeqw mm3, mm4712 pcmpeqw mm1, mm7713 pcmpeqw mm3, mm7714 pand mm2, mm5715 pand mm1, mm3716 pand mm1, mm2719 movq mm7, mm0720 por mm7, mm1722 movq mm4, [Mask35]723 movq mm3, [Mask26]725 movq mm6, mm4726 pand mm6, mm7727 pxor mm4, mm6729 movq mm6, mm3730 pand mm6, mm7731 pxor mm3, mm6733 movq mm2, mm0734 movq mm7, [I5666Pixel]735 movq mm6, [I5556Pixel]736 movq mm5, [I56Pixel]739 por mm2, mm4740 pand mm4, [eax+ebx+color5]741 por mm2, mm3742 pand mm3, [eax+ebx+color6]743 por mm2, mm1744 pand mm0, mm7745 pand mm1, mm6746 pxor mm7, mm7747 pcmpeqw mm2, mm7748 por mm0, mm1749 por mm3, mm4750 pand mm2, mm5751 por mm0, mm3752 por mm0, mm2753 movq [final1b], mm0755 ;---------757 movq mm0, [final1a]758 movq mm4, [final2a]759 movq mm2, [final1b]760 movq mm6, [final2b]763 movq mm1, mm0764 movq mm5, mm4767 punpcklwd mm0, mm2768 punpckhwd mm1, mm2770 punpcklwd mm4, mm6771 punpckhwd mm5, mm6774 %ifdef FAR_POINTER775 movq [fs:edx], mm0776 movq [fs:edx+8], mm1777 push edx778 add edx, [ebp+dstPitch]779 movq [fs:edx], mm4780 movq [fs:edx+8], mm5781 pop edx782 %else783 movq [edx], mm0784 movq [edx+8], mm1785 push edx786 add edx, [ebp+dstPitch]787 movq [edx], mm4788 movq [edx+8], mm5789 pop edx790 %endif791 .SKIP_PROCESS:792 mov ecx, [ebp+deltaPtr]793 add ecx, 8794 mov [ebp+deltaPtr], ecx795 add edx, 16796 add eax, 8798 pop ecx799 sub ecx, 4800 cmp ecx, 0801 jg near .Loop803 ; Restore some stuff804 popad805 mov esp, ebp806 pop ebp807 emms808 ret811 ;-------------------------------------------------------------------------812 ;-------------------------------------------------------------------------813 ;-------------------------------------------------------------------------814 ;-------------------------------------------------------------------------815 ;-------------------------------------------------------------------------816 ;-------------------------------------------------------------------------817 ;-------------------------------------------------------------------------821 %ifdef __DJGPP__822 __2xSaISuperEagleLine:823 %else824 _2xSaISuperEagleLine:825 %endif826 ; Store some stuff827 push ebp828 mov ebp, esp829 pushad831 ; Prepare the destination832 %ifdef FAR_POINTER833 ; Set the selector834 mov eax, [ebp+dstSegment]835 mov fs, ax836 %endif837 mov edx, [ebp+dstOffset] ; edx points to the screen838 ; Prepare the source839 ; eax points to colorA840 mov eax, [ebp+srcPtr]841 mov ebx, [ebp+srcPitch]842 mov ecx, [ebp+width]843 ; eax now points to colorB1844 sub eax, ebx846 ; Main Loop847 .Loop: push ecx849 ;-----Check Delta------------------850 mov ecx, [ebp+deltaPtr]852 movq mm0, [eax+colorB0]853 movq mm1, [eax+colorB3]854 movq mm2, [eax+ebx+color4]855 movq mm3, [eax+ebx+colorS2]856 movq mm4, [eax+ebx+ebx+color1]857 movq mm5, [eax+ebx+ebx+colorS1]858 push eax859 add eax, ebx860 movq mm6, [eax+ebx+ebx+colorA0]861 movq mm7, [eax+ebx+ebx+colorA3]862 pop eax864 pcmpeqw mm0, [ecx+2+colorB0]865 pcmpeqw mm1, [ecx+2+colorB3]866 pcmpeqw mm2, [ecx+ebx+2+color4]867 pcmpeqw mm3, [ecx+ebx+2+colorS2]868 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]869 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]870 add ecx, ebx871 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]872 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]873 sub ecx, ebx876 pand mm0, mm1877 pand mm2, mm3878 pand mm4, mm5879 pand mm6, mm7880 pand mm0, mm2881 pand mm4, mm6882 pxor mm7, mm7883 pand mm0, mm4884 movq mm6, [eax+colorB0]885 pcmpeqw mm7, mm0887 movq [ecx+2+colorB0], mm6889 packsswb mm7, mm7890 movd ecx, mm7891 test ecx, ecx892 jz near .SKIP_PROCESS894 ;End Delta896 ;---------------------------------897 movq mm0, [eax+ebx+color5]898 movq mm1, [eax+ebx+color6]899 movq mm2, mm0900 movq mm3, mm1901 movq mm4, mm0902 movq mm5, mm1904 pand mm0, [colorMask]905 pand mm1, [colorMask]907 psrlw mm0, 1908 psrlw mm1, 1910 pand mm3, [lowPixelMask]911 paddw mm0, mm1913 pand mm3, mm2914 paddw mm0, mm3 ;mm0 contains the interpolated values915 movq [I56Pixel], mm0916 movq mm7, mm0918 ;-------------------919 movq mm0, mm7920 movq mm1, mm4 ;5,5,5,6921 movq mm2, mm0922 movq mm3, mm1924 pand mm0, [colorMask]925 pand mm1, [colorMask]927 psrlw mm0, 1928 psrlw mm1, 1930 pand mm3, [lowPixelMask]931 paddw mm0, mm1933 pand mm3, mm2934 paddw mm0, mm3 ;mm0 contains the interpolated values935 movq [product1a], mm0936 ;--------------------938 movq mm0, mm7939 movq mm1, mm5 ;6,6,6,5940 movq mm2, mm0941 movq mm3, mm1943 pand mm0, [colorMask]944 pand mm1, [colorMask]946 psrlw mm0, 1947 psrlw mm1, 1949 pand mm3, [lowPixelMask]950 paddw mm0, mm1952 pand mm3, mm2953 paddw mm0, mm3954 movq [product1b], mm0956 ;-------------------------957 ;-------------------------958 movq mm0, [eax+ebx+ebx+color2]959 movq mm1, [eax+ebx+ebx+color3]960 movq mm2, mm0961 movq mm3, mm1962 movq mm4, mm0963 movq mm5, mm1965 pand mm0, [colorMask]966 pand mm1, [colorMask]968 psrlw mm0, 1969 psrlw mm1, 1971 pand mm3, [lowPixelMask]972 paddw mm0, mm1974 pand mm3, mm2975 paddw mm0, mm3976 movq [I23Pixel], mm0977 movq mm7, mm0979 ;---------------------980 movq mm0, mm7981 movq mm1, mm4 ;2,2,2,3982 movq mm2, mm0983 movq mm3, mm1985 pand mm0, [colorMask]986 pand mm1, [colorMask]988 psrlw mm0, 1989 psrlw mm1, 1991 pand mm3, [lowPixelMask]992 paddw mm0, mm1994 pand mm3, mm2995 paddw mm0, mm3996 movq [product2a], mm0998 ;----------------------999 movq mm0, mm71000 movq mm1, mm5 ;3,3,3,21001 movq mm2, mm01002 movq mm3, mm11004 pand mm0, [colorMask]1005 pand mm1, [colorMask]1007 psrlw mm0, 11008 psrlw mm1, 11010 pand mm3, [lowPixelMask]1011 paddw mm0, mm11013 pand mm3, mm21014 paddw mm0, mm31015 movq [product2b], mm01018 ;////////////////////////////////1019 ; Decide which "branch" to take1020 ;--------------------------------1021 movq mm4, [eax+ebx+color5]1022 movq mm5, [eax+ebx+color6]1023 movq mm6, [eax+ebx+ebx+color3]1024 movq mm7, [eax+ebx+ebx+color2]1026 pxor mm3, mm31027 movq mm0, mm41028 movq mm1, mm51030 pcmpeqw mm0, mm61031 pcmpeqw mm1, mm71032 pcmpeqw mm1, mm31033 pand mm0, mm11034 movq [Mask35], mm01036 movq mm0, [eax+ebx+ebx+colorS1]1037 movq mm1, [eax+ebx+color4]1038 push eax1039 add eax, ebx1040 movq mm2, [eax+ebx+ebx+colorA2]1041 pop eax1042 movq mm3, [eax+colorB1]1043 pcmpeqw mm0, mm41044 pcmpeqw mm1, mm41045 pcmpeqw mm2, mm41046 pcmpeqw mm3, mm41047 pand mm0, mm11048 pand mm2, mm31049 por mm0, mm21050 pand mm0, [Mask35]1051 movq [Mask35b], mm01053 ;-----------1054 pxor mm3, mm31055 movq mm0, mm41056 movq mm1, mm51058 pcmpeqw mm0, mm61059 pcmpeqw mm1, mm71060 pcmpeqw mm0, mm31061 pand mm0, mm11062 movq [Mask26], mm01064 movq mm0, [eax+ebx+ebx+color1]1065 movq mm1, [eax+ebx+colorS2]1066 push eax1067 add eax, ebx1068 movq mm2, [eax+ebx+ebx+colorA1]1069 pop eax1070 movq mm3, [eax+colorB2]1071 pcmpeqw mm0, mm51072 pcmpeqw mm1, mm51073 pcmpeqw mm2, mm51074 pcmpeqw mm3, mm51075 pand mm0, mm11076 pand mm2, mm31077 por mm0, mm21078 pand mm0, [Mask26]1079 movq [Mask26b], mm01081 ;--------------------1082 movq mm0, mm41083 movq mm1, mm51084 movq mm2, mm01086 pcmpeqw mm2, mm11087 pcmpeqw mm0, mm61088 pcmpeqw mm1, mm71089 pand mm0, mm11090 pand mm2, mm01091 pxor mm0, mm21092 movq mm7, mm01094 ;------------------1095 packsswb mm7, mm71096 movd ecx, mm71097 test ecx, ecx1098 jz near .SKIP_GUESS1100 ;---------------------------------------------1101 ; Map of the pixels: I|E F|J1102 ; G|A B|K1103 ; H|C D|L1104 ; M|N O|P1105 movq mm6, mm01106 movq mm4, [eax+ebx+color5]1107 movq mm5, [eax+ebx+color6]1108 pxor mm7, mm71109 pand mm6, [ONE]1111 movq mm0, [eax+colorB1]1112 movq mm1, [eax+ebx+color4]1113 movq mm2, mm01114 movq mm3, mm11115 pcmpeqw mm0, mm41116 pcmpeqw mm1, mm41117 pcmpeqw mm2, mm51118 pcmpeqw mm3, mm51119 pand mm0, mm61120 pand mm1, mm61121 pand mm2, mm61122 pand mm3, mm61123 paddw mm0, mm11124 paddw mm2, mm31126 pxor mm3, mm31127 pcmpgtw mm0, mm61128 pcmpgtw mm2, mm61129 pcmpeqw mm0, mm31130 pcmpeqw mm2, mm31131 pand mm0, mm61132 pand mm2, mm61133 paddw mm7, mm01134 psubw mm7, mm21136 movq mm0, [eax+colorB2]1137 movq mm1, [eax+ebx+colorS2]1138 movq mm2, mm01139 movq mm3, mm11140 pcmpeqw mm0, mm41141 pcmpeqw mm1, mm41142 pcmpeqw mm2, mm51143 pcmpeqw mm3, mm51144 pand mm0, mm61145 pand mm1, mm61146 pand mm2, mm61147 pand mm3, mm61148 paddw mm0, mm11149 paddw mm2, mm31151 pxor mm3, mm31152 pcmpgtw mm0, mm61153 pcmpgtw mm2, mm61154 pcmpeqw mm0, mm31155 pcmpeqw mm2, mm31156 pand mm0, mm61157 pand mm2, mm61158 paddw mm7, mm01159 psubw mm7, mm21161 push eax1162 add eax, ebx1163 movq mm0, [eax+ebx+color1]1164 movq mm1, [eax+ebx+ebx+colorA1]1165 movq mm2, mm01166 movq mm3, mm11167 pcmpeqw mm0, mm41168 pcmpeqw mm1, mm41169 pcmpeqw mm2, mm51170 pcmpeqw mm3, mm51171 pand mm0, mm61172 pand mm1, mm61173 pand mm2, mm61174 pand mm3, mm61175 paddw mm0, mm11176 paddw mm2, mm31178 pxor mm3, mm31179 pcmpgtw mm0, mm61180 pcmpgtw mm2, mm61181 pcmpeqw mm0, mm31182 pcmpeqw mm2, mm31183 pand mm0, mm61184 pand mm2, mm61185 paddw mm7, mm01186 psubw mm7, mm21188 movq mm0, [eax+ebx+colorS1]1189 movq mm1, [eax+ebx+ebx+colorA2]1190 movq mm2, mm01191 movq mm3, mm11192 pcmpeqw mm0, mm41193 pcmpeqw mm1, mm41194 pcmpeqw mm2, mm51195 pcmpeqw mm3, mm51196 pand mm0, mm61197 pand mm1, mm61198 pand mm2, mm61199 pand mm3, mm61200 paddw mm0, mm11201 paddw mm2, mm31203 pxor mm3, mm31204 pcmpgtw mm0, mm61205 pcmpgtw mm2, mm61206 pcmpeqw mm0, mm31207 pcmpeqw mm2, mm31208 pand mm0, mm61209 pand mm2, mm61210 paddw mm7, mm01211 psubw mm7, mm21213 pop eax1214 movq mm1, mm71215 pxor mm0, mm01216 pcmpgtw mm7, mm01217 pcmpgtw mm0, mm11219 por mm7, [Mask35]1220 por mm0, [Mask26]1221 movq [Mask35], mm71222 movq [Mask26], mm01224 .SKIP_GUESS:1225 ;Start the ASSEMBLY !!!1227 movq mm4, [Mask35]1228 movq mm5, [Mask26]1229 movq mm6, [Mask35b]1230 movq mm7, [Mask26b]1232 movq mm0, [eax+ebx+color5]1233 movq mm1, [eax+ebx+color6]1234 movq mm2, [eax+ebx+ebx+color2]1235 movq mm3, [eax+ebx+ebx+color3]1236 pcmpeqw mm0, mm21237 pcmpeqw mm1, mm31238 movq mm2, mm41239 movq mm3, mm51240 por mm0, mm11241 por mm2, mm31242 pand mm2, mm01243 pxor mm0, mm21244 movq mm3, mm01246 movq mm2, mm01247 pxor mm0, mm01248 por mm2, mm41249 pxor mm4, mm61250 por mm2, mm51251 pxor mm5, mm71252 pcmpeqw mm2, mm01253 ;----------------1255 movq mm0, [eax+ebx+color5]1256 movq mm1, mm31257 por mm1, mm41258 por mm1, mm61259 pand mm0, mm11260 movq mm1, mm51261 pand mm1, [I56Pixel]1262 por mm0, mm11263 movq mm1, mm71264 pand mm1, [product1b]1265 por mm0, mm11266 movq mm1, mm21267 pand mm1, [product1a]1268 por mm0, mm11269 movq [final1a], mm01271 movq mm0, [eax+ebx+color6]1272 movq mm1, mm31273 por mm1, mm51274 por mm1, mm71275 pand mm0, mm11276 movq mm1, mm41277 pand mm1, [I56Pixel]1278 por mm0, mm11279 movq mm1, mm61280 pand mm1, [product1a]1281 por mm0, mm11282 movq mm1, mm21283 pand mm1, [product1b]1284 por mm0, mm11285 movq [final1b], mm01287 movq mm0, [eax+ebx+ebx+color2]1288 movq mm1, mm31289 por mm1, mm51290 por mm1, mm71291 pand mm0, mm11292 movq mm1, mm41293 pand mm1, [I23Pixel]1294 por mm0, mm11295 movq mm1, mm61296 pand mm1, [product2b]1297 por mm0, mm11298 movq mm1, mm21299 pand mm1, [product2a]1300 por mm0, mm11301 movq [final2a], mm01303 movq mm0, [eax+ebx+ebx+color3]1304 movq mm1, mm31305 por mm1, mm41306 por mm1, mm61307 pand mm0, mm11308 movq mm1, mm51309 pand mm1, [I23Pixel]1310 por mm0, mm11311 movq mm1, mm71312 pand mm1, [product2a]1313 por mm0, mm11314 movq mm1, mm21315 pand mm1, [product2b]1316 por mm0, mm11317 movq [final2b], mm01320 movq mm0, [final1a]1321 movq mm2, [final1b]1322 movq mm1, mm01323 movq mm4, [final2a]1324 movq mm6, [final2b]1325 movq mm5, mm41326 punpcklwd mm0, mm21327 punpckhwd mm1, mm21328 punpcklwd mm4, mm61329 punpckhwd mm5, mm61334 %ifdef FAR_POINTER1335 movq [fs:edx], mm01336 movq [fs:edx+8], mm11337 push edx1338 add edx, [ebp+dstPitch]1339 movq [fs:edx], mm41340 movq [fs:edx+8], mm51341 pop edx1342 %else1343 movq [edx], mm01344 movq [edx+8], mm11345 push edx1346 add edx, [ebp+dstPitch]1347 movq [edx], mm41348 movq [edx+8], mm51349 pop edx1350 %endif1351 .SKIP_PROCESS:1352 mov ecx, [ebp+deltaPtr]1353 add ecx, 81354 mov [ebp+deltaPtr], ecx1355 add edx, 161356 add eax, 81358 pop ecx1359 sub ecx, 41360 cmp ecx, 01361 jg near .Loop1363 ; Restore some stuff1364 popad1365 mov esp, ebp1366 pop ebp1367 emms1368 ret1371 ;-------------------------------------------------------------------------1372 ;-------------------------------------------------------------------------1373 ;-------------------------------------------------------------------------1374 ;-------------------------------------------------------------------------1375 ;-------------------------------------------------------------------------1376 ;-------------------------------------------------------------------------1377 ;-------------------------------------------------------------------------1380 ;This is version 0.501381 colorI equ -21382 colorE equ 01383 colorF equ 21384 colorJ equ 41386 colorG equ -21387 colorA equ 01388 colorB equ 21389 colorK equ 41391 colorH equ -21392 colorC equ 01393 colorD equ 21394 colorL equ 41396 colorM equ -21397 colorN equ 01398 colorO equ 21399 colorP equ 41401 %ifdef __DJGPP__1402 __2xSaILine:1403 %else1404 _2xSaILine:1405 %endif1406 ; Store some stuff1407 push ebp1408 mov ebp, esp1409 pushad1411 ; Prepare the destination1412 %ifdef FAR_POINTER1413 ; Set the selector1414 mov eax, [ebp+dstSegment]1415 mov fs, ax1416 %endif1417 mov edx, [ebp+dstOffset] ; edx points to the screen1418 ; Prepare the source1419 ; eax points to colorA1420 mov eax, [ebp+srcPtr]1421 mov ebx, [ebp+srcPitch]1422 mov ecx, [ebp+width]1423 ; eax now points to colorE1424 sub eax, ebx1427 ; Main Loop1428 .Loop: push ecx1430 ;-----Check Delta------------------1431 mov ecx, [ebp+deltaPtr]1433 movq mm0, [eax+colorI]1434 movq mm1, [eax+colorJ]1435 movq mm2, [eax+ebx+colorG]1436 movq mm3, [eax+ebx+colorK]1437 movq mm4, [eax+ebx+ebx+colorH]1438 movq mm5, [eax+ebx+ebx+colorL]1439 push eax1440 add eax, ebx1441 movq mm6, [eax+ebx+ebx+colorM]1442 movq mm7, [eax+ebx+ebx+colorP]1443 pop eax1445 pcmpeqw mm0, [ecx+2+colorI]1446 pcmpeqw mm1, [ecx+2+colorK]1447 pcmpeqw mm2, [ecx+ebx+2+colorG]1448 pcmpeqw mm3, [ecx+ebx+2+colorK]1449 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]1450 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]1451 add ecx, ebx1452 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]1453 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]1454 sub ecx, ebx1457 pand mm0, mm11458 pand mm2, mm31459 pand mm4, mm51460 pand mm6, mm71461 pand mm0, mm21462 pand mm4, mm61463 pxor mm7, mm71464 pand mm0, mm41465 movq mm6, [eax+colorI]1466 pcmpeqw mm7, mm01468 movq [ecx+2+colorI], mm61470 packsswb mm7, mm71471 movd ecx, mm71472 test ecx, ecx1473 jz near .SKIP_PROCESS1475 ;End Delta1477 ;---------------------------------1480 ;11481 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)1482 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA1483 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB1485 movq mm1, mm01486 movq mm3, mm21488 pcmpeqw mm0, [eax+ebx+ebx+colorD]1489 pcmpeqw mm1, [eax+colorE]1490 pcmpeqw mm2, [eax+ebx+ebx+colorL]1491 pcmpeqw mm3, [eax+ebx+ebx+colorC]1493 pand mm0, mm11494 pxor mm1, mm11495 pand mm0, mm21496 pcmpeqw mm3, mm11497 pand mm0, mm3 ;result in mm01499 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)1500 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA1501 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB1502 movq mm5, mm41503 movq mm7, mm61505 pcmpeqw mm4, [eax+ebx+ebx+colorC]1506 pcmpeqw mm5, [eax+colorF]1507 pcmpeqw mm6, [eax+colorJ]1508 pcmpeqw mm7, [eax+colorE]1510 pand mm4, mm51511 pxor mm5, mm51512 pand mm4, mm61513 pcmpeqw mm7, mm51514 pand mm4, mm7 ;result in mm41516 por mm0, mm4 ;combine the masks1517 movq [Mask1], mm01519 ;--------------------------------------------1521 ;21522 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)1523 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB1524 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA1525 movq mm1, mm01526 movq mm3, mm21528 pcmpeqw mm0, [eax+ebx+ebx+colorC]1529 pcmpeqw mm1, [eax+colorF]1530 pcmpeqw mm2, [eax+ebx+ebx+colorH]1531 pcmpeqw mm3, [eax+ebx+ebx+colorD]1533 pand mm0, mm11534 pxor mm1, mm11535 pand mm0, mm21536 pcmpeqw mm3, mm11537 pand mm0, mm3 ;result in mm01539 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)1540 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB1541 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA1542 movq mm5, mm41543 movq mm7, mm61545 pcmpeqw mm4, [eax+ebx+ebx+colorD]1546 pcmpeqw mm5, [eax+colorE]1547 pcmpeqw mm6, [eax+colorI]1548 pcmpeqw mm7, [eax+colorF]1550 pand mm4, mm51551 pxor mm5, mm51552 pand mm4, mm61553 pcmpeqw mm7, mm51554 pand mm4, mm7 ;result in mm41556 por mm0, mm4 ;combine the masks1557 movq [Mask2], mm01560 ;interpolate colorA and colorB1561 movq mm0, [eax+ebx+colorA]1562 movq mm1, [eax+ebx+colorB]1564 movq mm2, mm01565 movq mm3, mm11567 pand mm0, [colorMask]1568 pand mm1, [colorMask]1570 psrlw mm0, 11571 psrlw mm1, 11573 pand mm3, [lowPixelMask]1574 paddw mm0, mm11576 pand mm3, mm21577 paddw mm0, mm3 ;mm0 contains the interpolated values1579 ;assemble the pixels1580 movq mm1, [eax+ebx+colorA]1581 movq mm2, [eax+ebx+colorB]1583 movq mm3, [Mask1]1584 movq mm5, mm11585 movq mm4, [Mask2]1586 movq mm6, mm11588 pand mm1, mm31589 por mm3, mm41590 pxor mm7, mm71591 pand mm2, mm41593 pcmpeqw mm3, mm71594 por mm1, mm21595 pand mm0, mm31597 por mm0, mm11599 punpcklwd mm5, mm01600 punpckhwd mm6, mm01602 %ifdef FAR_POINTER1603 movq [fs:edx], mm51604 movq [fs:edx+8], mm61605 %else1606 movq [edx], mm51607 movq [edx+8], mm61608 %endif1610 ;------------------------------------------------1611 ; Create the Nextline1612 ;------------------------------------------------1613 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)1614 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA1615 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC1616 movq mm1, mm01617 movq mm3, mm21619 push eax1620 add eax, ebx1621 pcmpeqw mm0, [eax+ebx+colorD]1622 pcmpeqw mm1, [eax+colorG]1623 pcmpeqw mm2, [eax+ebx+ebx+colorO]1624 pcmpeqw mm3, [eax+colorB]1625 pop eax1627 pand mm0, mm11628 pxor mm1, mm11629 pand mm0, mm21630 pcmpeqw mm3, mm11631 pand mm0, mm3 ;result in mm01633 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)1634 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA1635 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC1636 movq mm5, mm41637 movq mm7, mm61639 push eax1640 add eax, ebx1641 pcmpeqw mm4, [eax+ebx+colorH]1642 pcmpeqw mm5, [eax+colorB]1643 pcmpeqw mm6, [eax+ebx+ebx+colorM]1644 pcmpeqw mm7, [eax+colorG]1645 pop eax1647 pand mm4, mm51648 pxor mm5, mm51649 pand mm4, mm61650 pcmpeqw mm7, mm51651 pand mm4, mm7 ;result in mm41653 por mm0, mm4 ;combine the masks1654 movq [Mask1], mm01655 ;--------------------------------------------1657 ;41658 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)1659 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC1660 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA1661 movq mm1, mm01662 movq mm3, mm21664 pcmpeqw mm0, [eax+ebx+colorB]1665 pcmpeqw mm1, [eax+ebx+ebx+colorH]1666 pcmpeqw mm2, [eax+colorF]1667 pcmpeqw mm3, [eax+ebx+ebx+colorD]1669 pand mm0, mm11670 pxor mm1, mm11671 pand mm0, mm21672 pcmpeqw mm3, mm11673 pand mm0, mm3 ;result in mm01675 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)1676 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC1677 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA1678 movq mm5, mm41679 movq mm7, mm61681 pcmpeqw mm4, [eax+ebx+ebx+colorD]1682 pcmpeqw mm5, [eax+ebx+colorG]1683 pcmpeqw mm6, [eax+colorI]1684 pcmpeqw mm7, [eax+ebx+ebx+colorH]1686 pand mm4, mm51687 pxor mm5, mm51688 pand mm4, mm61689 pcmpeqw mm7, mm51690 pand mm4, mm7 ;result in mm41692 por mm0, mm4 ;combine the masks1693 movq [Mask2], mm01694 ;----------------------------------------------1696 ;interpolate colorA and colorC1697 movq mm0, [eax+ebx+colorA]1698 movq mm1, [eax+ebx+ebx+colorC]1700 movq mm2, mm01701 movq mm3, mm11703 pand mm0, [colorMask]1704 pand mm1, [colorMask]1706 psrlw mm0, 11707 psrlw mm1, 11709 pand mm3, [lowPixelMask]1710 paddw mm0, mm11712 pand mm3, mm21713 paddw mm0, mm3 ;mm0 contains the interpolated values1714 ;-------------1716 ;assemble the pixels1717 movq mm1, [eax+ebx+colorA]1718 movq mm2, [eax+ebx+ebx+colorC]1720 movq mm3, [Mask1]1721 movq mm4, [Mask2]1723 pand mm1, mm31724 pand mm2, mm41726 por mm3, mm41727 pxor mm7, mm71728 por mm1, mm21730 pcmpeqw mm3, mm71731 pand mm0, mm31732 por mm0, mm11733 movq [ACPixel], mm01735 ;////////////////////////////////1736 ; Decide which "branch" to take1737 ;--------------------------------1738 movq mm0, [eax+ebx+colorA]1739 movq mm1, [eax+ebx+colorB]1740 movq mm6, mm01741 movq mm7, mm11742 pcmpeqw mm0, [eax+ebx+ebx+colorD]1743 pcmpeqw mm1, [eax+ebx+ebx+colorC]1744 pcmpeqw mm6, mm71746 movq mm2, mm01747 movq mm3, mm01749 pand mm0, mm1 ;colorA == colorD && colorB == colorC1750 pxor mm7, mm71752 pcmpeqw mm2, mm71753 pand mm6, mm01754 pand mm2, mm1 ;colorA != colorD && colorB == colorC1756 pcmpeqw mm1, mm71758 pand mm1, mm3 ;colorA == colorD && colorB != colorC1759 pxor mm0, mm61760 por mm1, mm61761 movq mm7, mm01762 movq [Mask2], mm21763 packsswb mm7, mm71764 movq [Mask1], mm11766 movd ecx, mm71767 test ecx, ecx1768 jz near .SKIP_GUESS1770 ;---------------------------------------------1771 ; Map of the pixels: I|E F|J1772 ; G|A B|K1773 ; H|C D|L1774 ; M|N O|P1775 movq mm6, mm01776 movq mm4, [eax+ebx+colorA]1777 movq mm5, [eax+ebx+colorB]1778 pxor mm7, mm71779 pand mm6, [ONE]1781 movq mm0, [eax+colorE]1782 movq mm1, [eax+ebx+colorG]1783 movq mm2, mm01784 movq mm3, mm11785 pcmpeqw mm0, mm41786 pcmpeqw mm1, mm41787 pcmpeqw mm2, mm51788 pcmpeqw mm3, mm51789 pand mm0, mm61790 pand mm1, mm61791 pand mm2, mm61792 pand mm3, mm61793 paddw mm0, mm11794 paddw mm2, mm31796 pxor mm3, mm31797 pcmpgtw mm0, mm61798 pcmpgtw mm2, mm61799 pcmpeqw mm0, mm31800 pcmpeqw mm2, mm31801 pand mm0, mm61802 pand mm2, mm61803 paddw mm7, mm01804 psubw mm7, mm21806 movq mm0, [eax+colorF]1807 movq mm1, [eax+ebx+colorK]1808 movq mm2, mm01809 movq mm3, mm11810 pcmpeqw mm0, mm41811 pcmpeqw mm1, mm41812 pcmpeqw mm2, mm51813 pcmpeqw mm3, mm51814 pand mm0, mm61815 pand mm1, mm61816 pand mm2, mm61817 pand mm3, mm61818 paddw mm0, mm11819 paddw mm2, mm31821 pxor mm3, mm31822 pcmpgtw mm0, mm61823 pcmpgtw mm2, mm61824 pcmpeqw mm0, mm31825 pcmpeqw mm2, mm31826 pand mm0, mm61827 pand mm2, mm61828 paddw mm7, mm01829 psubw mm7, mm21831 push eax1832 add eax, ebx1833 movq mm0, [eax+ebx+colorH]1834 movq mm1, [eax+ebx+ebx+colorN]1835 movq mm2, mm01836 movq mm3, mm11837 pcmpeqw mm0, mm41838 pcmpeqw mm1, mm41839 pcmpeqw mm2, mm51840 pcmpeqw mm3, mm51841 pand mm0, mm61842 pand mm1, mm61843 pand mm2, mm61844 pand mm3, mm61845 paddw mm0, mm11846 paddw mm2, mm31848 pxor mm3, mm31849 pcmpgtw mm0, mm61850 pcmpgtw mm2, mm61851 pcmpeqw mm0, mm31852 pcmpeqw mm2, mm31853 pand mm0, mm61854 pand mm2, mm61855 paddw mm7, mm01856 psubw mm7, mm21858 movq mm0, [eax+ebx+colorL]1859 movq mm1, [eax+ebx+ebx+colorO]1860 movq mm2, mm01861 movq mm3, mm11862 pcmpeqw mm0, mm41863 pcmpeqw mm1, mm41864 pcmpeqw mm2, mm51865 pcmpeqw mm3, mm51866 pand mm0, mm61867 pand mm1, mm61868 pand mm2, mm61869 pand mm3, mm61870 paddw mm0, mm11871 paddw mm2, mm31873 pxor mm3, mm31874 pcmpgtw mm0, mm61875 pcmpgtw mm2, mm61876 pcmpeqw mm0, mm31877 pcmpeqw mm2, mm31878 pand mm0, mm61879 pand mm2, mm61880 paddw mm7, mm01881 psubw mm7, mm21883 pop eax1884 movq mm1, mm71885 pxor mm0, mm01886 pcmpgtw mm7, mm01887 pcmpgtw mm0, mm11889 por mm7, [Mask1]1890 por mm0, [Mask2]1891 movq [Mask1], mm71892 movq [Mask2], mm01894 .SKIP_GUESS:1895 ;----------------------------1896 ;interpolate A, B, C and D1897 movq mm0, [eax+ebx+colorA]1898 movq mm1, [eax+ebx+colorB]1899 movq mm4, mm01900 movq mm2, [eax+ebx+ebx+colorC]1901 movq mm5, mm11902 movq mm3, [qcolorMask]1903 movq mm6, mm21904 movq mm7, [qlowpixelMask]1906 pand mm0, mm31907 pand mm1, mm31908 pand mm2, mm31909 pand mm3, [eax+ebx+ebx+colorD]1911 psrlw mm0, 21912 pand mm4, mm71913 psrlw mm1, 21914 pand mm5, mm71915 psrlw mm2, 21916 pand mm6, mm71917 psrlw mm3, 21918 pand mm7, [eax+ebx+ebx+colorD]1920 paddw mm0, mm11921 paddw mm2, mm31923 paddw mm4, mm51924 paddw mm6, mm71926 paddw mm4, mm61927 paddw mm0, mm21928 psrlw mm4, 21929 pand mm4, [qlowpixelMask]1930 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D1932 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\1933 ;assemble the pixels1934 movq mm1, [Mask1]1935 movq mm2, [Mask2]1936 movq mm4, [eax+ebx+colorA]1937 movq mm5, [eax+ebx+colorB]1938 pand mm4, mm11939 pand mm5, mm21941 pxor mm7, mm71942 por mm1, mm21943 por mm4, mm51944 pcmpeqw mm1, mm71945 pand mm0, mm11946 por mm4, mm0 ;mm4 contains the diagonal pixels1948 movq mm0, [ACPixel]1949 movq mm1, mm01950 punpcklwd mm0, mm41951 punpckhwd mm1, mm41953 push edx1954 add edx, [ebp+dstPitch]1956 %ifdef FAR_POINTER1957 movq [fs:edx], mm01958 movq [fs:edx+8], mm11959 %else1960 movq [edx], mm01961 movq [edx+8], mm11962 %endif1963 pop edx1965 .SKIP_PROCESS:1966 mov ecx, [ebp+deltaPtr]1967 add ecx, 81968 mov [ebp+deltaPtr], ecx1969 add edx, 161970 add eax, 81972 pop ecx1973 sub ecx, 41974 cmp ecx, 01975 jg near .Loop1977 ; Restore some stuff1978 popad1979 mov esp, ebp1980 pop ebp1981 emms1982 ret1984 ;-------------------------------------------------------------------------1985 ;-------------------------------------------------------------------------1986 ;-------------------------------------------------------------------------1987 ;-------------------------------------------------------------------------1988 ;-------------------------------------------------------------------------1989 ;-------------------------------------------------------------------------1990 ;-------------------------------------------------------------------------1992 %ifdef __DJGPP__1993 _Init_2xSaIMMX:1994 %else1995 Init_2xSaIMMX:1996 %endif1997 ; Store some stuff1998 push ebp1999 mov ebp, esp2000 push edx2003 ;Damn thing doesn't work2004 ; mov eax,12005 ; cpuid2006 ; test edx, 0x00800000 ;test bit 232007 ; jz end2 ;bit not set => no MMX detected2009 mov eax, [ebp+8] ;PixelFormat2010 cmp eax, 5552011 jz Bits5552012 cmp eax, 5652013 jz Bits5652014 end2:2015 mov eax, 12016 jmp end32017 Bits555:2018 mov edx, 0x7BDE7BDE2019 mov eax, colorMask2020 mov [eax], edx2021 mov [eax+4], edx2022 mov edx, 0x042104212023 mov eax, lowPixelMask2024 mov [eax], edx2025 mov [eax+4], edx2026 mov edx, 0x739C739C2027 mov eax, qcolorMask2028 mov [eax], edx2029 mov [eax+4], edx2030 mov edx, 0x0C630C632031 mov eax, qlowpixelMask2032 mov [eax], edx2033 mov [eax+4], edx2034 mov eax, 02035 jmp end32036 Bits565:2037 mov edx, 0xF7DEF7DE2038 mov eax, colorMask2039 mov [eax], edx2040 mov [eax+4], edx2041 mov edx, 0x082108212042 mov eax, lowPixelMask2043 mov [eax], edx2044 mov [eax+4], edx2045 mov edx, 0xE79CE79C2046 mov eax, qcolorMask2047 mov [eax], edx2048 mov [eax+4], edx2049 mov edx, 0x186318632050 mov eax, qlowpixelMask2051 mov [eax], edx2052 mov [eax+4], edx2053 mov eax, 02054 jmp end32055 end3:2056 pop edx2057 mov esp, ebp2058 pop ebp2059 ret2062 ;-------------------------------------------------------------------------2063 ;-------------------------------------------------------------------------2064 ;-------------------------------------------------------------------------2065 ;-------------------------------------------------------------------------2066 ;-------------------------------------------------------------------------2067 ;-------------------------------------------------------------------------2068 ;-------------------------------------------------------------------------2070 SECTION .data ALIGN = 322071 ;Some constants2072 colorMask dd 0xF7DEF7DE,0xF7DEF7DE2073 lowPixelMask dd 0x08210821,0x082108212075 qcolorMask dd 0xE79CE79C,0xE79CE79C2076 qlowpixelMask dd 0x18631863,0x186318632078 darkenMask dd 0xC718C718,0xC718C7182079 GreenMask dd 0x07E007E0,0x07E007E02080 RedBlueMask dd 0xF81FF81F,0xF81FF81F2082 FALSE dd 0x00000000,0x000000002083 TRUE dd 0xffffffff,0xffffffff2084 ONE dd 0x00010001,0x000100012087 SECTION .bss ALIGN = 322088 ACPixel resb 82089 Mask1 resb 82090 Mask2 resb 82092 I56Pixel resb 82093 I23Pixel resb 82094 I5556Pixel resb 82095 I2223Pixel resb 82096 I5666Pixel resb 82097 I2333Pixel resb 82098 Mask26 resb 82099 Mask35 resb 82100 Mask26b resb 82101 Mask35b resb 82102 product1a resb 82103 product1b resb 82104 product2a resb 82105 product2b resb 82106 final1a resb 82107 final1b resb 82108 final2a resb 82109 final2b resb 8