rlm@1: ;/*---------------------------------------------------------------------* rlm@1: ; * The following (piece of) code, (part of) the 2xSaI engine, * rlm@1: ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. * rlm@1: ; * Non-Commercial use of this software is allowed and is encouraged, * rlm@1: ; * provided that appropriate credit be given. * rlm@1: ; * You may freely modify this code, but I request * rlm@1: ; * that any improvements to the engine be submitted to me, so * rlm@1: ; * that I can implement these improvements in newer versions of * rlm@1: ; * the software. * rlm@1: ; * If you need more information, have any comments or suggestions, * rlm@1: ; * you can e-mail me. My e-mail: derek-liauw@usa.net. * rlm@1: ; *---------------------------------------------------------------------*/ rlm@1: rlm@1: ;---------------------- rlm@1: ; 2xSaI version 0.59 WIP, soon to become version 0.60 rlm@1: ;---------------------- rlm@1: rlm@1: ;%define FAR_POINTER rlm@1: rlm@1: rlm@1: rlm@1: BITS 32 rlm@1: %ifdef __DJGPP__ rlm@1: GLOBAL __2xSaILine rlm@1: GLOBAL __2xSaISuperEagleLine rlm@1: GLOBAL __2xSaISuper2xSaILine rlm@1: GLOBAL _Init_2xSaIMMX rlm@1: %else rlm@1: GLOBAL _2xSaILine rlm@1: GLOBAL _2xSaISuperEagleLine rlm@1: GLOBAL _2xSaISuper2xSaILine rlm@1: GLOBAL Init_2xSaIMMX rlm@1: %endif rlm@1: SECTION .text ALIGN = 32 rlm@1: rlm@1: %ifdef FAR_POINTER rlm@1: ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, rlm@1: ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment); rlm@1: %else rlm@1: ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, rlm@1: ; uint8 *dstPtr, uint32 dstPitch); rlm@1: %endif rlm@1: rlm@1: srcPtr equ 8 rlm@1: deltaPtr equ 12 rlm@1: srcPitch equ 16 rlm@1: width equ 20 rlm@1: dstOffset equ 24 rlm@1: dstPitch equ 28 rlm@1: dstSegment equ 32 rlm@1: rlm@1: rlm@1: rlm@1: rlm@1: colorB0 equ -2 rlm@1: colorB1 equ 0 rlm@1: colorB2 equ 2 rlm@1: colorB3 equ 4 rlm@1: rlm@1: color7 equ -2 rlm@1: color8 equ 0 rlm@1: color9 equ 2 rlm@1: rlm@1: color4 equ -2 rlm@1: color5 equ 0 rlm@1: color6 equ 2 rlm@1: colorS2 equ 4 rlm@1: rlm@1: color1 equ -2 rlm@1: color2 equ 0 rlm@1: color3 equ 2 rlm@1: colorS1 equ 4 rlm@1: rlm@1: colorA0 equ -2 rlm@1: colorA1 equ 0 rlm@1: colorA2 equ 2 rlm@1: colorA3 equ 4 rlm@1: rlm@1: rlm@1: rlm@1: rlm@1: %ifdef __DJGPP__ rlm@1: __2xSaISuper2xSaILine: rlm@1: %else rlm@1: _2xSaISuper2xSaILine: rlm@1: %endif rlm@1: ; Store some stuff rlm@1: push ebp rlm@1: mov ebp, esp rlm@1: pushad rlm@1: rlm@1: ; Prepare the destination rlm@1: %ifdef FAR_POINTER rlm@1: ; Set the selector rlm@1: mov eax, [ebp+dstSegment] rlm@1: mov fs, ax rlm@1: %endif rlm@1: mov edx, [ebp+dstOffset] ; edx points to the screen rlm@1: ; Prepare the source rlm@1: ; eax points to colorA rlm@1: mov eax, [ebp+srcPtr] ;eax points to colorA rlm@1: mov ebx, [ebp+srcPitch] ;ebx contains the source pitch rlm@1: mov ecx, [ebp+width] ;ecx contains the number of pixels to process rlm@1: ; eax now points to colorB1 rlm@1: sub eax, ebx ;eax points to B1 which is the base rlm@1: rlm@1: ; Main Loop rlm@1: .Loop: push ecx rlm@1: rlm@1: ;-----Check Delta------------------ rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: rlm@1: rlm@1: ;load source img rlm@1: movq mm0, [eax+colorB0] rlm@1: movq mm1, [eax+colorB3] rlm@1: movq mm2, [eax+ebx+color4] rlm@1: movq mm3, [eax+ebx+colorS2] rlm@1: movq mm4, [eax+ebx+ebx+color1] rlm@1: movq mm5, [eax+ebx+ebx+colorS1] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm6, [eax+ebx+ebx+colorA0] rlm@1: movq mm7, [eax+ebx+ebx+colorA3] rlm@1: pop eax rlm@1: rlm@1: ;compare to delta rlm@1: pcmpeqw mm0, [ecx+2+colorB0] rlm@1: pcmpeqw mm1, [ecx+2+colorB3] rlm@1: pcmpeqw mm2, [ecx+ebx+2+color4] rlm@1: pcmpeqw mm3, [ecx+ebx+2+colorS2] rlm@1: pcmpeqw mm4, [ecx+ebx+ebx+2+color1] rlm@1: pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] rlm@1: add ecx, ebx rlm@1: pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] rlm@1: pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] rlm@1: sub ecx, ebx rlm@1: rlm@1: rlm@1: ;compose results rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm3 rlm@1: pand mm4, mm5 rlm@1: pand mm6, mm7 rlm@1: pand mm0, mm2 rlm@1: pand mm4, mm6 rlm@1: pxor mm7, mm7 rlm@1: pand mm0, mm4 rlm@1: movq mm6, [eax+colorB0] rlm@1: pcmpeqw mm7, mm0 ;did any compare give us a zero ? rlm@1: rlm@1: movq [ecx+2+colorB0], mm6 rlm@1: rlm@1: packsswb mm7, mm7 rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_PROCESS ;no, so we can skip rlm@1: rlm@1: ;End Delta rlm@1: rlm@1: ;--------------------------------- rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+color6] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: movq [I56Pixel], mm0 rlm@1: movq mm7, mm0 rlm@1: rlm@1: ;------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm4 ;5,5,5,6 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: movq [I5556Pixel], mm0 rlm@1: ;-------------------- rlm@1: rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm5 ;6,6,6,5 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [I5666Pixel], mm0 rlm@1: rlm@1: ;------------------------- rlm@1: ;------------------------- rlm@1: movq mm0, [eax+ebx+ebx+color2] rlm@1: movq mm1, [eax+ebx+ebx+color3] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [I23Pixel], mm0 rlm@1: movq mm7, mm0 rlm@1: rlm@1: ;--------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm4 ;2,2,2,3 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [I2223Pixel], mm0 rlm@1: rlm@1: ;---------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm5 ;3,3,3,2 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [I2333Pixel], mm0 rlm@1: rlm@1: rlm@1: ;-------------------- rlm@1: ;//////////////////////////////// rlm@1: ; Decide which "branch" to take rlm@1: ;-------------------------------- rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+color6] rlm@1: movq mm6, mm0 rlm@1: movq mm7, mm1 rlm@1: pcmpeqw mm0, [eax+ebx+ebx+color3] rlm@1: pcmpeqw mm1, [eax+ebx+ebx+color2] rlm@1: pcmpeqw mm6, mm7 rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm0 rlm@1: rlm@1: pand mm0, mm1 ;colorA == colorD && colorB == colorC rlm@1: pxor mm7, mm7 rlm@1: rlm@1: pcmpeqw mm2, mm7 rlm@1: pand mm6, mm0 rlm@1: pand mm2, mm1 ;colorA != colorD && colorB == colorC rlm@1: rlm@1: pcmpeqw mm1, mm7 rlm@1: rlm@1: pand mm1, mm3 ;colorA == colorD && colorB != colorC rlm@1: pxor mm0, mm6 rlm@1: por mm1, mm6 rlm@1: movq mm7, mm0 rlm@1: movq [Mask26], mm2 rlm@1: packsswb mm7, mm7 rlm@1: movq [Mask35], mm1 rlm@1: rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_GUESS rlm@1: rlm@1: ;--------------------------------------------- rlm@1: movq mm6, mm0 rlm@1: movq mm4, [eax+ebx+colorA] rlm@1: movq mm5, [eax+ebx+colorB] rlm@1: pxor mm7, mm7 rlm@1: pand mm6, [ONE] rlm@1: rlm@1: movq mm0, [eax+colorE] rlm@1: movq mm1, [eax+ebx+colorG] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+colorF] rlm@1: movq mm1, [eax+ebx+colorK] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm0, [eax+ebx+colorH] rlm@1: movq mm1, [eax+ebx+ebx+colorN] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+ebx+colorL] rlm@1: movq mm1, [eax+ebx+ebx+colorO] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: pop eax rlm@1: movq mm1, mm7 rlm@1: pxor mm0, mm0 rlm@1: pcmpgtw mm7, mm0 rlm@1: pcmpgtw mm0, mm1 rlm@1: rlm@1: por mm7, [Mask35] rlm@1: por mm0, [Mask26] rlm@1: movq [Mask35], mm7 rlm@1: movq [Mask26], mm0 rlm@1: rlm@1: .SKIP_GUESS: rlm@1: rlm@1: ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... rlm@1: rlm@1: rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+ebx+color2] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: ;--------------------------- rlm@1: rlm@1: rlm@1: rlm@1: %ifdef dfhsdfhsdahdsfhdsfh rlm@1: rlm@1: if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) rlm@1: product2a = INTERPOLATE (color2, color5); rlm@1: else rlm@1: if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) rlm@1: product2a = INTERPOLATE(color2, color5); rlm@1: else rlm@1: product2a = color2; rlm@1: rlm@1: if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) rlm@1: product1a = INTERPOLATE (color2, color5); rlm@1: else rlm@1: if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) rlm@1: product1a = INTERPOLATE(color2, color5); rlm@1: else rlm@1: product1a = color5; rlm@1: rlm@1: %endif rlm@1: rlm@1: rlm@1: movq mm7, [Mask26] rlm@1: movq mm6, [eax+colorB2] rlm@1: movq mm5, [eax+ebx+ebx+color2] rlm@1: movq mm4, [eax+ebx+ebx+color1] rlm@1: pcmpeqw mm4, mm5 rlm@1: pcmpeqw mm6, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm7, mm4 rlm@1: pcmpeqw mm6, mm5 rlm@1: pand mm7, mm6 rlm@1: rlm@1: rlm@1: rlm@1: movq mm6, [eax+ebx+ebx+color3] rlm@1: movq mm5, [eax+ebx+ebx+color2] rlm@1: movq mm4, [eax+ebx+ebx+color1] rlm@1: movq mm2, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+color4] rlm@1: movq mm3, [eax+colorB0] rlm@1: rlm@1: pcmpeqw mm2, mm4 rlm@1: pcmpeqw mm6, mm5 rlm@1: pcmpeqw mm1, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pxor mm5, mm5 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm6, mm1 rlm@1: pand mm2, mm3 rlm@1: pand mm6, mm2 rlm@1: por mm7, mm6 rlm@1: rlm@1: rlm@1: movq mm6, mm7 rlm@1: pcmpeqw mm6, mm5 rlm@1: pand mm7, mm0 rlm@1: rlm@1: movq mm1, [eax+ebx+color5] rlm@1: pand mm6, mm1 rlm@1: por mm7, mm6 rlm@1: movq [final1a], mm7 ;finished 1a rlm@1: rlm@1: rlm@1: rlm@1: ;-------------------------------- rlm@1: rlm@1: movq mm7, [Mask35] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm6, [eax+ebx+ebx+colorA2] rlm@1: pop eax rlm@1: movq mm5, [eax+ebx+color5] rlm@1: movq mm4, [eax+ebx+color4] rlm@1: pcmpeqw mm4, mm5 rlm@1: pcmpeqw mm6, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm7, mm4 rlm@1: pcmpeqw mm6, mm5 rlm@1: pand mm7, mm6 rlm@1: rlm@1: rlm@1: rlm@1: movq mm6, [eax+ebx+color6] rlm@1: movq mm5, [eax+ebx+color5] rlm@1: movq mm4, [eax+ebx+color4] rlm@1: movq mm2, [eax+ebx+ebx+color2] rlm@1: movq mm1, [eax+ebx+ebx+color1] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm3, [eax+ebx+ebx+colorA0] rlm@1: pop eax rlm@1: rlm@1: pcmpeqw mm2, mm4 rlm@1: pcmpeqw mm6, mm5 rlm@1: pcmpeqw mm1, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pxor mm5, mm5 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm6, mm1 rlm@1: pand mm2, mm3 rlm@1: pand mm6, mm2 rlm@1: por mm7, mm6 rlm@1: rlm@1: rlm@1: movq mm6, mm7 rlm@1: pcmpeqw mm6, mm5 rlm@1: pand mm7, mm0 rlm@1: rlm@1: movq mm1, [eax+ebx+ebx+color2] rlm@1: pand mm6, mm1 rlm@1: por mm7, mm6 rlm@1: movq [final2a], mm7 ;finished 2a rlm@1: rlm@1: rlm@1: ;-------------------------------------------- rlm@1: rlm@1: rlm@1: %ifdef dfhsdfhsdahdsfhdsfh rlm@1: if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) rlm@1: product2b = Q_INTERPOLATE (color3, color3, color3, color2); rlm@1: else rlm@1: if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) rlm@1: product2b = Q_INTERPOLATE (color2, color2, color2, color3); rlm@1: else rlm@1: product2b = INTERPOLATE (color2, color3); rlm@1: rlm@1: if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) rlm@1: product1b = Q_INTERPOLATE (color6, color6, color6, color5); rlm@1: else rlm@1: if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) rlm@1: product1b = Q_INTERPOLATE (color6, color5, color5, color5); rlm@1: else rlm@1: product1b = INTERPOLATE (color5, color6); rlm@1: %endif rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: pxor mm7, mm7 rlm@1: movq mm0, [eax+ebx+ebx+colorA0] rlm@1: movq mm1, [eax+ebx+ebx+colorA1] rlm@1: movq mm2, [eax+ebx+ebx+colorA2] rlm@1: movq mm3, [eax+ebx+ebx+colorA3] rlm@1: pop eax rlm@1: movq mm4, [eax+ebx+ebx+color2] rlm@1: movq mm5, [eax+ebx+ebx+color3] rlm@1: movq mm6, [eax+ebx+color6] rlm@1: rlm@1: pcmpeqw mm6, mm5 rlm@1: pcmpeqw mm1, mm5 rlm@1: pcmpeqw mm4, mm2 rlm@1: pcmpeqw mm0, mm5 rlm@1: pcmpeqw mm4, mm7 rlm@1: pcmpeqw mm0, mm7 rlm@1: pand mm0, mm4 rlm@1: pand mm6, mm1 rlm@1: pand mm0, mm6 rlm@1: rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm1, [eax+ebx+ebx+colorA1] rlm@1: pop eax rlm@1: movq mm4, [eax+ebx+ebx+color2] rlm@1: movq mm5, [eax+ebx+color5] rlm@1: movq mm6, [eax+ebx+ebx+color3] rlm@1: rlm@1: pcmpeqw mm5, mm4 rlm@1: pcmpeqw mm2, mm4 rlm@1: pcmpeqw mm1, mm6 rlm@1: pcmpeqw mm3, mm4 rlm@1: pcmpeqw mm1, mm7 rlm@1: pcmpeqw mm3, mm7 rlm@1: pand mm2, mm5 rlm@1: pand mm1, mm3 rlm@1: pand mm1, mm2 rlm@1: rlm@1: rlm@1: movq mm7, mm0 rlm@1: por mm7, mm1 rlm@1: rlm@1: movq mm4, [Mask35] rlm@1: movq mm3, [Mask26] rlm@1: rlm@1: movq mm6, mm4 rlm@1: pand mm6, mm7 rlm@1: pxor mm4, mm6 rlm@1: rlm@1: movq mm6, mm3 rlm@1: pand mm6, mm7 rlm@1: pxor mm3, mm6 rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm7, [I2333Pixel] rlm@1: movq mm6, [I2223Pixel] rlm@1: movq mm5, [I23Pixel] rlm@1: rlm@1: rlm@1: por mm2, mm4 rlm@1: pand mm4, [eax+ebx+ebx+color3] rlm@1: por mm2, mm3 rlm@1: pand mm3, [eax+ebx+ebx+color2] rlm@1: por mm2, mm1 rlm@1: pand mm0, mm7 rlm@1: pand mm1, mm6 rlm@1: pxor mm7, mm7 rlm@1: pcmpeqw mm2, mm7 rlm@1: por mm0, mm1 rlm@1: por mm3, mm4 rlm@1: pand mm2, mm5 rlm@1: por mm0, mm3 rlm@1: por mm0, mm2 rlm@1: movq [final2b], mm0 rlm@1: rlm@1: ;----------------------------------- rlm@1: rlm@1: rlm@1: pxor mm7, mm7 rlm@1: movq mm0, [eax+colorB0] rlm@1: movq mm1, [eax+colorB1] rlm@1: movq mm2, [eax+colorB2] rlm@1: movq mm3, [eax+colorB3] rlm@1: movq mm4, [eax+ebx+color5] rlm@1: movq mm5, [eax+ebx+color6] rlm@1: movq mm6, [eax+ebx+ebx+color3] rlm@1: rlm@1: pcmpeqw mm6, mm5 rlm@1: pcmpeqw mm1, mm5 rlm@1: pcmpeqw mm4, mm2 rlm@1: pcmpeqw mm0, mm5 rlm@1: pcmpeqw mm4, mm7 rlm@1: pcmpeqw mm0, mm7 rlm@1: pand mm0, mm4 rlm@1: pand mm6, mm1 rlm@1: pand mm0, mm6 rlm@1: rlm@1: movq mm1, [eax+colorB1] rlm@1: movq mm4, [eax+ebx+color5] rlm@1: movq mm5, [eax+ebx+ebx+color2] rlm@1: movq mm6, [eax+ebx+color6] rlm@1: rlm@1: pcmpeqw mm5, mm4 rlm@1: pcmpeqw mm2, mm4 rlm@1: pcmpeqw mm1, mm6 rlm@1: pcmpeqw mm3, mm4 rlm@1: pcmpeqw mm1, mm7 rlm@1: pcmpeqw mm3, mm7 rlm@1: pand mm2, mm5 rlm@1: pand mm1, mm3 rlm@1: pand mm1, mm2 rlm@1: rlm@1: rlm@1: movq mm7, mm0 rlm@1: por mm7, mm1 rlm@1: rlm@1: movq mm4, [Mask35] rlm@1: movq mm3, [Mask26] rlm@1: rlm@1: movq mm6, mm4 rlm@1: pand mm6, mm7 rlm@1: pxor mm4, mm6 rlm@1: rlm@1: movq mm6, mm3 rlm@1: pand mm6, mm7 rlm@1: pxor mm3, mm6 rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm7, [I5666Pixel] rlm@1: movq mm6, [I5556Pixel] rlm@1: movq mm5, [I56Pixel] rlm@1: rlm@1: rlm@1: por mm2, mm4 rlm@1: pand mm4, [eax+ebx+color5] rlm@1: por mm2, mm3 rlm@1: pand mm3, [eax+ebx+color6] rlm@1: por mm2, mm1 rlm@1: pand mm0, mm7 rlm@1: pand mm1, mm6 rlm@1: pxor mm7, mm7 rlm@1: pcmpeqw mm2, mm7 rlm@1: por mm0, mm1 rlm@1: por mm3, mm4 rlm@1: pand mm2, mm5 rlm@1: por mm0, mm3 rlm@1: por mm0, mm2 rlm@1: movq [final1b], mm0 rlm@1: rlm@1: ;--------- rlm@1: rlm@1: movq mm0, [final1a] rlm@1: movq mm4, [final2a] rlm@1: movq mm2, [final1b] rlm@1: movq mm6, [final2b] rlm@1: rlm@1: rlm@1: movq mm1, mm0 rlm@1: movq mm5, mm4 rlm@1: rlm@1: rlm@1: punpcklwd mm0, mm2 rlm@1: punpckhwd mm1, mm2 rlm@1: rlm@1: punpcklwd mm4, mm6 rlm@1: punpckhwd mm5, mm6 rlm@1: rlm@1: rlm@1: %ifdef FAR_POINTER rlm@1: movq [fs:edx], mm0 rlm@1: movq [fs:edx+8], mm1 rlm@1: push edx rlm@1: add edx, [ebp+dstPitch] rlm@1: movq [fs:edx], mm4 rlm@1: movq [fs:edx+8], mm5 rlm@1: pop edx rlm@1: %else rlm@1: movq [edx], mm0 rlm@1: movq [edx+8], mm1 rlm@1: push edx rlm@1: add edx, [ebp+dstPitch] rlm@1: movq [edx], mm4 rlm@1: movq [edx+8], mm5 rlm@1: pop edx rlm@1: %endif rlm@1: .SKIP_PROCESS: rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: add ecx, 8 rlm@1: mov [ebp+deltaPtr], ecx rlm@1: add edx, 16 rlm@1: add eax, 8 rlm@1: rlm@1: pop ecx rlm@1: sub ecx, 4 rlm@1: cmp ecx, 0 rlm@1: jg near .Loop rlm@1: rlm@1: ; Restore some stuff rlm@1: popad rlm@1: mov esp, ebp rlm@1: pop ebp rlm@1: emms rlm@1: ret rlm@1: rlm@1: rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: rlm@1: rlm@1: rlm@1: %ifdef __DJGPP__ rlm@1: __2xSaISuperEagleLine: rlm@1: %else rlm@1: _2xSaISuperEagleLine: rlm@1: %endif rlm@1: ; Store some stuff rlm@1: push ebp rlm@1: mov ebp, esp rlm@1: pushad rlm@1: rlm@1: ; Prepare the destination rlm@1: %ifdef FAR_POINTER rlm@1: ; Set the selector rlm@1: mov eax, [ebp+dstSegment] rlm@1: mov fs, ax rlm@1: %endif rlm@1: mov edx, [ebp+dstOffset] ; edx points to the screen rlm@1: ; Prepare the source rlm@1: ; eax points to colorA rlm@1: mov eax, [ebp+srcPtr] rlm@1: mov ebx, [ebp+srcPitch] rlm@1: mov ecx, [ebp+width] rlm@1: ; eax now points to colorB1 rlm@1: sub eax, ebx rlm@1: rlm@1: ; Main Loop rlm@1: .Loop: push ecx rlm@1: rlm@1: ;-----Check Delta------------------ rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: rlm@1: movq mm0, [eax+colorB0] rlm@1: movq mm1, [eax+colorB3] rlm@1: movq mm2, [eax+ebx+color4] rlm@1: movq mm3, [eax+ebx+colorS2] rlm@1: movq mm4, [eax+ebx+ebx+color1] rlm@1: movq mm5, [eax+ebx+ebx+colorS1] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm6, [eax+ebx+ebx+colorA0] rlm@1: movq mm7, [eax+ebx+ebx+colorA3] rlm@1: pop eax rlm@1: rlm@1: pcmpeqw mm0, [ecx+2+colorB0] rlm@1: pcmpeqw mm1, [ecx+2+colorB3] rlm@1: pcmpeqw mm2, [ecx+ebx+2+color4] rlm@1: pcmpeqw mm3, [ecx+ebx+2+colorS2] rlm@1: pcmpeqw mm4, [ecx+ebx+ebx+2+color1] rlm@1: pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] rlm@1: add ecx, ebx rlm@1: pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] rlm@1: pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] rlm@1: sub ecx, ebx rlm@1: rlm@1: rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm3 rlm@1: pand mm4, mm5 rlm@1: pand mm6, mm7 rlm@1: pand mm0, mm2 rlm@1: pand mm4, mm6 rlm@1: pxor mm7, mm7 rlm@1: pand mm0, mm4 rlm@1: movq mm6, [eax+colorB0] rlm@1: pcmpeqw mm7, mm0 rlm@1: rlm@1: movq [ecx+2+colorB0], mm6 rlm@1: rlm@1: packsswb mm7, mm7 rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_PROCESS rlm@1: rlm@1: ;End Delta rlm@1: rlm@1: ;--------------------------------- rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+color6] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: movq [I56Pixel], mm0 rlm@1: movq mm7, mm0 rlm@1: rlm@1: ;------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm4 ;5,5,5,6 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: movq [product1a], mm0 rlm@1: ;-------------------- rlm@1: rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm5 ;6,6,6,5 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [product1b], mm0 rlm@1: rlm@1: ;------------------------- rlm@1: ;------------------------- rlm@1: movq mm0, [eax+ebx+ebx+color2] rlm@1: movq mm1, [eax+ebx+ebx+color3] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [I23Pixel], mm0 rlm@1: movq mm7, mm0 rlm@1: rlm@1: ;--------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm4 ;2,2,2,3 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [product2a], mm0 rlm@1: rlm@1: ;---------------------- rlm@1: movq mm0, mm7 rlm@1: movq mm1, mm5 ;3,3,3,2 rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 rlm@1: movq [product2b], mm0 rlm@1: rlm@1: rlm@1: ;//////////////////////////////// rlm@1: ; Decide which "branch" to take rlm@1: ;-------------------------------- rlm@1: movq mm4, [eax+ebx+color5] rlm@1: movq mm5, [eax+ebx+color6] rlm@1: movq mm6, [eax+ebx+ebx+color3] rlm@1: movq mm7, [eax+ebx+ebx+color2] rlm@1: rlm@1: pxor mm3, mm3 rlm@1: movq mm0, mm4 rlm@1: movq mm1, mm5 rlm@1: rlm@1: pcmpeqw mm0, mm6 rlm@1: pcmpeqw mm1, mm7 rlm@1: pcmpeqw mm1, mm3 rlm@1: pand mm0, mm1 rlm@1: movq [Mask35], mm0 rlm@1: rlm@1: movq mm0, [eax+ebx+ebx+colorS1] rlm@1: movq mm1, [eax+ebx+color4] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm2, [eax+ebx+ebx+colorA2] rlm@1: pop eax rlm@1: movq mm3, [eax+colorB1] rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm4 rlm@1: pcmpeqw mm3, mm4 rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm3 rlm@1: por mm0, mm2 rlm@1: pand mm0, [Mask35] rlm@1: movq [Mask35b], mm0 rlm@1: rlm@1: ;----------- rlm@1: pxor mm3, mm3 rlm@1: movq mm0, mm4 rlm@1: movq mm1, mm5 rlm@1: rlm@1: pcmpeqw mm0, mm6 rlm@1: pcmpeqw mm1, mm7 rlm@1: pcmpeqw mm0, mm3 rlm@1: pand mm0, mm1 rlm@1: movq [Mask26], mm0 rlm@1: rlm@1: movq mm0, [eax+ebx+ebx+color1] rlm@1: movq mm1, [eax+ebx+colorS2] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm2, [eax+ebx+ebx+colorA1] rlm@1: pop eax rlm@1: movq mm3, [eax+colorB2] rlm@1: pcmpeqw mm0, mm5 rlm@1: pcmpeqw mm1, mm5 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm3 rlm@1: por mm0, mm2 rlm@1: pand mm0, [Mask26] rlm@1: movq [Mask26b], mm0 rlm@1: rlm@1: ;-------------------- rlm@1: movq mm0, mm4 rlm@1: movq mm1, mm5 rlm@1: movq mm2, mm0 rlm@1: rlm@1: pcmpeqw mm2, mm1 rlm@1: pcmpeqw mm0, mm6 rlm@1: pcmpeqw mm1, mm7 rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm0 rlm@1: pxor mm0, mm2 rlm@1: movq mm7, mm0 rlm@1: rlm@1: ;------------------ rlm@1: packsswb mm7, mm7 rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_GUESS rlm@1: rlm@1: ;--------------------------------------------- rlm@1: ; Map of the pixels: I|E F|J rlm@1: ; G|A B|K rlm@1: ; H|C D|L rlm@1: ; M|N O|P rlm@1: movq mm6, mm0 rlm@1: movq mm4, [eax+ebx+color5] rlm@1: movq mm5, [eax+ebx+color6] rlm@1: pxor mm7, mm7 rlm@1: pand mm6, [ONE] rlm@1: rlm@1: movq mm0, [eax+colorB1] rlm@1: movq mm1, [eax+ebx+color4] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+colorB2] rlm@1: movq mm1, [eax+ebx+colorS2] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm0, [eax+ebx+color1] rlm@1: movq mm1, [eax+ebx+ebx+colorA1] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+ebx+colorS1] rlm@1: movq mm1, [eax+ebx+ebx+colorA2] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: pop eax rlm@1: movq mm1, mm7 rlm@1: pxor mm0, mm0 rlm@1: pcmpgtw mm7, mm0 rlm@1: pcmpgtw mm0, mm1 rlm@1: rlm@1: por mm7, [Mask35] rlm@1: por mm0, [Mask26] rlm@1: movq [Mask35], mm7 rlm@1: movq [Mask26], mm0 rlm@1: rlm@1: .SKIP_GUESS: rlm@1: ;Start the ASSEMBLY !!! rlm@1: rlm@1: movq mm4, [Mask35] rlm@1: movq mm5, [Mask26] rlm@1: movq mm6, [Mask35b] rlm@1: movq mm7, [Mask26b] rlm@1: rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, [eax+ebx+color6] rlm@1: movq mm2, [eax+ebx+ebx+color2] rlm@1: movq mm3, [eax+ebx+ebx+color3] rlm@1: pcmpeqw mm0, mm2 rlm@1: pcmpeqw mm1, mm3 rlm@1: movq mm2, mm4 rlm@1: movq mm3, mm5 rlm@1: por mm0, mm1 rlm@1: por mm2, mm3 rlm@1: pand mm2, mm0 rlm@1: pxor mm0, mm2 rlm@1: movq mm3, mm0 rlm@1: rlm@1: movq mm2, mm0 rlm@1: pxor mm0, mm0 rlm@1: por mm2, mm4 rlm@1: pxor mm4, mm6 rlm@1: por mm2, mm5 rlm@1: pxor mm5, mm7 rlm@1: pcmpeqw mm2, mm0 rlm@1: ;---------------- rlm@1: rlm@1: movq mm0, [eax+ebx+color5] rlm@1: movq mm1, mm3 rlm@1: por mm1, mm4 rlm@1: por mm1, mm6 rlm@1: pand mm0, mm1 rlm@1: movq mm1, mm5 rlm@1: pand mm1, [I56Pixel] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm7 rlm@1: pand mm1, [product1b] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm2 rlm@1: pand mm1, [product1a] rlm@1: por mm0, mm1 rlm@1: movq [final1a], mm0 rlm@1: rlm@1: movq mm0, [eax+ebx+color6] rlm@1: movq mm1, mm3 rlm@1: por mm1, mm5 rlm@1: por mm1, mm7 rlm@1: pand mm0, mm1 rlm@1: movq mm1, mm4 rlm@1: pand mm1, [I56Pixel] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm6 rlm@1: pand mm1, [product1a] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm2 rlm@1: pand mm1, [product1b] rlm@1: por mm0, mm1 rlm@1: movq [final1b], mm0 rlm@1: rlm@1: movq mm0, [eax+ebx+ebx+color2] rlm@1: movq mm1, mm3 rlm@1: por mm1, mm5 rlm@1: por mm1, mm7 rlm@1: pand mm0, mm1 rlm@1: movq mm1, mm4 rlm@1: pand mm1, [I23Pixel] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm6 rlm@1: pand mm1, [product2b] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm2 rlm@1: pand mm1, [product2a] rlm@1: por mm0, mm1 rlm@1: movq [final2a], mm0 rlm@1: rlm@1: movq mm0, [eax+ebx+ebx+color3] rlm@1: movq mm1, mm3 rlm@1: por mm1, mm4 rlm@1: por mm1, mm6 rlm@1: pand mm0, mm1 rlm@1: movq mm1, mm5 rlm@1: pand mm1, [I23Pixel] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm7 rlm@1: pand mm1, [product2a] rlm@1: por mm0, mm1 rlm@1: movq mm1, mm2 rlm@1: pand mm1, [product2b] rlm@1: por mm0, mm1 rlm@1: movq [final2b], mm0 rlm@1: rlm@1: rlm@1: movq mm0, [final1a] rlm@1: movq mm2, [final1b] rlm@1: movq mm1, mm0 rlm@1: movq mm4, [final2a] rlm@1: movq mm6, [final2b] rlm@1: movq mm5, mm4 rlm@1: punpcklwd mm0, mm2 rlm@1: punpckhwd mm1, mm2 rlm@1: punpcklwd mm4, mm6 rlm@1: punpckhwd mm5, mm6 rlm@1: rlm@1: rlm@1: rlm@1: rlm@1: %ifdef FAR_POINTER rlm@1: movq [fs:edx], mm0 rlm@1: movq [fs:edx+8], mm1 rlm@1: push edx rlm@1: add edx, [ebp+dstPitch] rlm@1: movq [fs:edx], mm4 rlm@1: movq [fs:edx+8], mm5 rlm@1: pop edx rlm@1: %else rlm@1: movq [edx], mm0 rlm@1: movq [edx+8], mm1 rlm@1: push edx rlm@1: add edx, [ebp+dstPitch] rlm@1: movq [edx], mm4 rlm@1: movq [edx+8], mm5 rlm@1: pop edx rlm@1: %endif rlm@1: .SKIP_PROCESS: rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: add ecx, 8 rlm@1: mov [ebp+deltaPtr], ecx rlm@1: add edx, 16 rlm@1: add eax, 8 rlm@1: rlm@1: pop ecx rlm@1: sub ecx, 4 rlm@1: cmp ecx, 0 rlm@1: jg near .Loop rlm@1: rlm@1: ; Restore some stuff rlm@1: popad rlm@1: mov esp, ebp rlm@1: pop ebp rlm@1: emms rlm@1: ret rlm@1: rlm@1: rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: rlm@1: rlm@1: ;This is version 0.50 rlm@1: colorI equ -2 rlm@1: colorE equ 0 rlm@1: colorF equ 2 rlm@1: colorJ equ 4 rlm@1: rlm@1: colorG equ -2 rlm@1: colorA equ 0 rlm@1: colorB equ 2 rlm@1: colorK equ 4 rlm@1: rlm@1: colorH equ -2 rlm@1: colorC equ 0 rlm@1: colorD equ 2 rlm@1: colorL equ 4 rlm@1: rlm@1: colorM equ -2 rlm@1: colorN equ 0 rlm@1: colorO equ 2 rlm@1: colorP equ 4 rlm@1: rlm@1: %ifdef __DJGPP__ rlm@1: __2xSaILine: rlm@1: %else rlm@1: _2xSaILine: rlm@1: %endif rlm@1: ; Store some stuff rlm@1: push ebp rlm@1: mov ebp, esp rlm@1: pushad rlm@1: rlm@1: ; Prepare the destination rlm@1: %ifdef FAR_POINTER rlm@1: ; Set the selector rlm@1: mov eax, [ebp+dstSegment] rlm@1: mov fs, ax rlm@1: %endif rlm@1: mov edx, [ebp+dstOffset] ; edx points to the screen rlm@1: ; Prepare the source rlm@1: ; eax points to colorA rlm@1: mov eax, [ebp+srcPtr] rlm@1: mov ebx, [ebp+srcPitch] rlm@1: mov ecx, [ebp+width] rlm@1: ; eax now points to colorE rlm@1: sub eax, ebx rlm@1: rlm@1: rlm@1: ; Main Loop rlm@1: .Loop: push ecx rlm@1: rlm@1: ;-----Check Delta------------------ rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: rlm@1: movq mm0, [eax+colorI] rlm@1: movq mm1, [eax+colorJ] rlm@1: movq mm2, [eax+ebx+colorG] rlm@1: movq mm3, [eax+ebx+colorK] rlm@1: movq mm4, [eax+ebx+ebx+colorH] rlm@1: movq mm5, [eax+ebx+ebx+colorL] rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm6, [eax+ebx+ebx+colorM] rlm@1: movq mm7, [eax+ebx+ebx+colorP] rlm@1: pop eax rlm@1: rlm@1: pcmpeqw mm0, [ecx+2+colorI] rlm@1: pcmpeqw mm1, [ecx+2+colorK] rlm@1: pcmpeqw mm2, [ecx+ebx+2+colorG] rlm@1: pcmpeqw mm3, [ecx+ebx+2+colorK] rlm@1: pcmpeqw mm4, [ecx+ebx+ebx+2+colorH] rlm@1: pcmpeqw mm5, [ecx+ebx+ebx+2+colorL] rlm@1: add ecx, ebx rlm@1: pcmpeqw mm6, [ecx+ebx+ebx+2+colorM] rlm@1: pcmpeqw mm7, [ecx+ebx+ebx+2+colorP] rlm@1: sub ecx, ebx rlm@1: rlm@1: rlm@1: pand mm0, mm1 rlm@1: pand mm2, mm3 rlm@1: pand mm4, mm5 rlm@1: pand mm6, mm7 rlm@1: pand mm0, mm2 rlm@1: pand mm4, mm6 rlm@1: pxor mm7, mm7 rlm@1: pand mm0, mm4 rlm@1: movq mm6, [eax+colorI] rlm@1: pcmpeqw mm7, mm0 rlm@1: rlm@1: movq [ecx+2+colorI], mm6 rlm@1: rlm@1: packsswb mm7, mm7 rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_PROCESS rlm@1: rlm@1: ;End Delta rlm@1: rlm@1: ;--------------------------------- rlm@1: rlm@1: rlm@1: ;1 rlm@1: ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL) rlm@1: movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA rlm@1: movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB rlm@1: rlm@1: movq mm1, mm0 rlm@1: movq mm3, mm2 rlm@1: rlm@1: pcmpeqw mm0, [eax+ebx+ebx+colorD] rlm@1: pcmpeqw mm1, [eax+colorE] rlm@1: pcmpeqw mm2, [eax+ebx+ebx+colorL] rlm@1: pcmpeqw mm3, [eax+ebx+ebx+colorC] rlm@1: rlm@1: pand mm0, mm1 rlm@1: pxor mm1, mm1 rlm@1: pand mm0, mm2 rlm@1: pcmpeqw mm3, mm1 rlm@1: pand mm0, mm3 ;result in mm0 rlm@1: rlm@1: ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ) rlm@1: movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA rlm@1: movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB rlm@1: movq mm5, mm4 rlm@1: movq mm7, mm6 rlm@1: rlm@1: pcmpeqw mm4, [eax+ebx+ebx+colorC] rlm@1: pcmpeqw mm5, [eax+colorF] rlm@1: pcmpeqw mm6, [eax+colorJ] rlm@1: pcmpeqw mm7, [eax+colorE] rlm@1: rlm@1: pand mm4, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm4, mm6 rlm@1: pcmpeqw mm7, mm5 rlm@1: pand mm4, mm7 ;result in mm4 rlm@1: rlm@1: por mm0, mm4 ;combine the masks rlm@1: movq [Mask1], mm0 rlm@1: rlm@1: ;-------------------------------------------- rlm@1: rlm@1: ;2 rlm@1: ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH) rlm@1: movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB rlm@1: movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA rlm@1: movq mm1, mm0 rlm@1: movq mm3, mm2 rlm@1: rlm@1: pcmpeqw mm0, [eax+ebx+ebx+colorC] rlm@1: pcmpeqw mm1, [eax+colorF] rlm@1: pcmpeqw mm2, [eax+ebx+ebx+colorH] rlm@1: pcmpeqw mm3, [eax+ebx+ebx+colorD] rlm@1: rlm@1: pand mm0, mm1 rlm@1: pxor mm1, mm1 rlm@1: pand mm0, mm2 rlm@1: pcmpeqw mm3, mm1 rlm@1: pand mm0, mm3 ;result in mm0 rlm@1: rlm@1: ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI) rlm@1: movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB rlm@1: movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA rlm@1: movq mm5, mm4 rlm@1: movq mm7, mm6 rlm@1: rlm@1: pcmpeqw mm4, [eax+ebx+ebx+colorD] rlm@1: pcmpeqw mm5, [eax+colorE] rlm@1: pcmpeqw mm6, [eax+colorI] rlm@1: pcmpeqw mm7, [eax+colorF] rlm@1: rlm@1: pand mm4, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm4, mm6 rlm@1: pcmpeqw mm7, mm5 rlm@1: pand mm4, mm7 ;result in mm4 rlm@1: rlm@1: por mm0, mm4 ;combine the masks rlm@1: movq [Mask2], mm0 rlm@1: rlm@1: rlm@1: ;interpolate colorA and colorB rlm@1: movq mm0, [eax+ebx+colorA] rlm@1: movq mm1, [eax+ebx+colorB] rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: rlm@1: ;assemble the pixels rlm@1: movq mm1, [eax+ebx+colorA] rlm@1: movq mm2, [eax+ebx+colorB] rlm@1: rlm@1: movq mm3, [Mask1] rlm@1: movq mm5, mm1 rlm@1: movq mm4, [Mask2] rlm@1: movq mm6, mm1 rlm@1: rlm@1: pand mm1, mm3 rlm@1: por mm3, mm4 rlm@1: pxor mm7, mm7 rlm@1: pand mm2, mm4 rlm@1: rlm@1: pcmpeqw mm3, mm7 rlm@1: por mm1, mm2 rlm@1: pand mm0, mm3 rlm@1: rlm@1: por mm0, mm1 rlm@1: rlm@1: punpcklwd mm5, mm0 rlm@1: punpckhwd mm6, mm0 rlm@1: rlm@1: %ifdef FAR_POINTER rlm@1: movq [fs:edx], mm5 rlm@1: movq [fs:edx+8], mm6 rlm@1: %else rlm@1: movq [edx], mm5 rlm@1: movq [edx+8], mm6 rlm@1: %endif rlm@1: rlm@1: ;------------------------------------------------ rlm@1: ; Create the Nextline rlm@1: ;------------------------------------------------ rlm@1: ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO) rlm@1: movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA rlm@1: movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC rlm@1: movq mm1, mm0 rlm@1: movq mm3, mm2 rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: pcmpeqw mm0, [eax+ebx+colorD] rlm@1: pcmpeqw mm1, [eax+colorG] rlm@1: pcmpeqw mm2, [eax+ebx+ebx+colorO] rlm@1: pcmpeqw mm3, [eax+colorB] rlm@1: pop eax rlm@1: rlm@1: pand mm0, mm1 rlm@1: pxor mm1, mm1 rlm@1: pand mm0, mm2 rlm@1: pcmpeqw mm3, mm1 rlm@1: pand mm0, mm3 ;result in mm0 rlm@1: rlm@1: ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM) rlm@1: movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA rlm@1: movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC rlm@1: movq mm5, mm4 rlm@1: movq mm7, mm6 rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: pcmpeqw mm4, [eax+ebx+colorH] rlm@1: pcmpeqw mm5, [eax+colorB] rlm@1: pcmpeqw mm6, [eax+ebx+ebx+colorM] rlm@1: pcmpeqw mm7, [eax+colorG] rlm@1: pop eax rlm@1: rlm@1: pand mm4, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm4, mm6 rlm@1: pcmpeqw mm7, mm5 rlm@1: pand mm4, mm7 ;result in mm4 rlm@1: rlm@1: por mm0, mm4 ;combine the masks rlm@1: movq [Mask1], mm0 rlm@1: ;-------------------------------------------- rlm@1: rlm@1: ;4 rlm@1: ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF) rlm@1: movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC rlm@1: movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA rlm@1: movq mm1, mm0 rlm@1: movq mm3, mm2 rlm@1: rlm@1: pcmpeqw mm0, [eax+ebx+colorB] rlm@1: pcmpeqw mm1, [eax+ebx+ebx+colorH] rlm@1: pcmpeqw mm2, [eax+colorF] rlm@1: pcmpeqw mm3, [eax+ebx+ebx+colorD] rlm@1: rlm@1: pand mm0, mm1 rlm@1: pxor mm1, mm1 rlm@1: pand mm0, mm2 rlm@1: pcmpeqw mm3, mm1 rlm@1: pand mm0, mm3 ;result in mm0 rlm@1: rlm@1: ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI) rlm@1: movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC rlm@1: movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA rlm@1: movq mm5, mm4 rlm@1: movq mm7, mm6 rlm@1: rlm@1: pcmpeqw mm4, [eax+ebx+ebx+colorD] rlm@1: pcmpeqw mm5, [eax+ebx+colorG] rlm@1: pcmpeqw mm6, [eax+colorI] rlm@1: pcmpeqw mm7, [eax+ebx+ebx+colorH] rlm@1: rlm@1: pand mm4, mm5 rlm@1: pxor mm5, mm5 rlm@1: pand mm4, mm6 rlm@1: pcmpeqw mm7, mm5 rlm@1: pand mm4, mm7 ;result in mm4 rlm@1: rlm@1: por mm0, mm4 ;combine the masks rlm@1: movq [Mask2], mm0 rlm@1: ;---------------------------------------------- rlm@1: rlm@1: ;interpolate colorA and colorC rlm@1: movq mm0, [eax+ebx+colorA] rlm@1: movq mm1, [eax+ebx+ebx+colorC] rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: rlm@1: pand mm0, [colorMask] rlm@1: pand mm1, [colorMask] rlm@1: rlm@1: psrlw mm0, 1 rlm@1: psrlw mm1, 1 rlm@1: rlm@1: pand mm3, [lowPixelMask] rlm@1: paddw mm0, mm1 rlm@1: rlm@1: pand mm3, mm2 rlm@1: paddw mm0, mm3 ;mm0 contains the interpolated values rlm@1: ;------------- rlm@1: rlm@1: ;assemble the pixels rlm@1: movq mm1, [eax+ebx+colorA] rlm@1: movq mm2, [eax+ebx+ebx+colorC] rlm@1: rlm@1: movq mm3, [Mask1] rlm@1: movq mm4, [Mask2] rlm@1: rlm@1: pand mm1, mm3 rlm@1: pand mm2, mm4 rlm@1: rlm@1: por mm3, mm4 rlm@1: pxor mm7, mm7 rlm@1: por mm1, mm2 rlm@1: rlm@1: pcmpeqw mm3, mm7 rlm@1: pand mm0, mm3 rlm@1: por mm0, mm1 rlm@1: movq [ACPixel], mm0 rlm@1: rlm@1: ;//////////////////////////////// rlm@1: ; Decide which "branch" to take rlm@1: ;-------------------------------- rlm@1: movq mm0, [eax+ebx+colorA] rlm@1: movq mm1, [eax+ebx+colorB] rlm@1: movq mm6, mm0 rlm@1: movq mm7, mm1 rlm@1: pcmpeqw mm0, [eax+ebx+ebx+colorD] rlm@1: pcmpeqw mm1, [eax+ebx+ebx+colorC] rlm@1: pcmpeqw mm6, mm7 rlm@1: rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm0 rlm@1: rlm@1: pand mm0, mm1 ;colorA == colorD && colorB == colorC rlm@1: pxor mm7, mm7 rlm@1: rlm@1: pcmpeqw mm2, mm7 rlm@1: pand mm6, mm0 rlm@1: pand mm2, mm1 ;colorA != colorD && colorB == colorC rlm@1: rlm@1: pcmpeqw mm1, mm7 rlm@1: rlm@1: pand mm1, mm3 ;colorA == colorD && colorB != colorC rlm@1: pxor mm0, mm6 rlm@1: por mm1, mm6 rlm@1: movq mm7, mm0 rlm@1: movq [Mask2], mm2 rlm@1: packsswb mm7, mm7 rlm@1: movq [Mask1], mm1 rlm@1: rlm@1: movd ecx, mm7 rlm@1: test ecx, ecx rlm@1: jz near .SKIP_GUESS rlm@1: rlm@1: ;--------------------------------------------- rlm@1: ; Map of the pixels: I|E F|J rlm@1: ; G|A B|K rlm@1: ; H|C D|L rlm@1: ; M|N O|P rlm@1: movq mm6, mm0 rlm@1: movq mm4, [eax+ebx+colorA] rlm@1: movq mm5, [eax+ebx+colorB] rlm@1: pxor mm7, mm7 rlm@1: pand mm6, [ONE] rlm@1: rlm@1: movq mm0, [eax+colorE] rlm@1: movq mm1, [eax+ebx+colorG] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+colorF] rlm@1: movq mm1, [eax+ebx+colorK] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: push eax rlm@1: add eax, ebx rlm@1: movq mm0, [eax+ebx+colorH] rlm@1: movq mm1, [eax+ebx+ebx+colorN] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: movq mm0, [eax+ebx+colorL] rlm@1: movq mm1, [eax+ebx+ebx+colorO] rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pcmpeqw mm0, mm4 rlm@1: pcmpeqw mm1, mm4 rlm@1: pcmpeqw mm2, mm5 rlm@1: pcmpeqw mm3, mm5 rlm@1: pand mm0, mm6 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: pxor mm3, mm3 rlm@1: pcmpgtw mm0, mm6 rlm@1: pcmpgtw mm2, mm6 rlm@1: pcmpeqw mm0, mm3 rlm@1: pcmpeqw mm2, mm3 rlm@1: pand mm0, mm6 rlm@1: pand mm2, mm6 rlm@1: paddw mm7, mm0 rlm@1: psubw mm7, mm2 rlm@1: rlm@1: pop eax rlm@1: movq mm1, mm7 rlm@1: pxor mm0, mm0 rlm@1: pcmpgtw mm7, mm0 rlm@1: pcmpgtw mm0, mm1 rlm@1: rlm@1: por mm7, [Mask1] rlm@1: por mm0, [Mask2] rlm@1: movq [Mask1], mm7 rlm@1: movq [Mask2], mm0 rlm@1: rlm@1: .SKIP_GUESS: rlm@1: ;---------------------------- rlm@1: ;interpolate A, B, C and D rlm@1: movq mm0, [eax+ebx+colorA] rlm@1: movq mm1, [eax+ebx+colorB] rlm@1: movq mm4, mm0 rlm@1: movq mm2, [eax+ebx+ebx+colorC] rlm@1: movq mm5, mm1 rlm@1: movq mm3, [qcolorMask] rlm@1: movq mm6, mm2 rlm@1: movq mm7, [qlowpixelMask] rlm@1: rlm@1: pand mm0, mm3 rlm@1: pand mm1, mm3 rlm@1: pand mm2, mm3 rlm@1: pand mm3, [eax+ebx+ebx+colorD] rlm@1: rlm@1: psrlw mm0, 2 rlm@1: pand mm4, mm7 rlm@1: psrlw mm1, 2 rlm@1: pand mm5, mm7 rlm@1: psrlw mm2, 2 rlm@1: pand mm6, mm7 rlm@1: psrlw mm3, 2 rlm@1: pand mm7, [eax+ebx+ebx+colorD] rlm@1: rlm@1: paddw mm0, mm1 rlm@1: paddw mm2, mm3 rlm@1: rlm@1: paddw mm4, mm5 rlm@1: paddw mm6, mm7 rlm@1: rlm@1: paddw mm4, mm6 rlm@1: paddw mm0, mm2 rlm@1: psrlw mm4, 2 rlm@1: pand mm4, [qlowpixelMask] rlm@1: paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D rlm@1: rlm@1: ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ rlm@1: ;assemble the pixels rlm@1: movq mm1, [Mask1] rlm@1: movq mm2, [Mask2] rlm@1: movq mm4, [eax+ebx+colorA] rlm@1: movq mm5, [eax+ebx+colorB] rlm@1: pand mm4, mm1 rlm@1: pand mm5, mm2 rlm@1: rlm@1: pxor mm7, mm7 rlm@1: por mm1, mm2 rlm@1: por mm4, mm5 rlm@1: pcmpeqw mm1, mm7 rlm@1: pand mm0, mm1 rlm@1: por mm4, mm0 ;mm4 contains the diagonal pixels rlm@1: rlm@1: movq mm0, [ACPixel] rlm@1: movq mm1, mm0 rlm@1: punpcklwd mm0, mm4 rlm@1: punpckhwd mm1, mm4 rlm@1: rlm@1: push edx rlm@1: add edx, [ebp+dstPitch] rlm@1: rlm@1: %ifdef FAR_POINTER rlm@1: movq [fs:edx], mm0 rlm@1: movq [fs:edx+8], mm1 rlm@1: %else rlm@1: movq [edx], mm0 rlm@1: movq [edx+8], mm1 rlm@1: %endif rlm@1: pop edx rlm@1: rlm@1: .SKIP_PROCESS: rlm@1: mov ecx, [ebp+deltaPtr] rlm@1: add ecx, 8 rlm@1: mov [ebp+deltaPtr], ecx rlm@1: add edx, 16 rlm@1: add eax, 8 rlm@1: rlm@1: pop ecx rlm@1: sub ecx, 4 rlm@1: cmp ecx, 0 rlm@1: jg near .Loop rlm@1: rlm@1: ; Restore some stuff rlm@1: popad rlm@1: mov esp, ebp rlm@1: pop ebp rlm@1: emms rlm@1: ret rlm@1: rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: rlm@1: %ifdef __DJGPP__ rlm@1: _Init_2xSaIMMX: rlm@1: %else rlm@1: Init_2xSaIMMX: rlm@1: %endif rlm@1: ; Store some stuff rlm@1: push ebp rlm@1: mov ebp, esp rlm@1: push edx rlm@1: rlm@1: rlm@1: ;Damn thing doesn't work rlm@1: ; mov eax,1 rlm@1: ; cpuid rlm@1: ; test edx, 0x00800000 ;test bit 23 rlm@1: ; jz end2 ;bit not set => no MMX detected rlm@1: rlm@1: mov eax, [ebp+8] ;PixelFormat rlm@1: cmp eax, 555 rlm@1: jz Bits555 rlm@1: cmp eax, 565 rlm@1: jz Bits565 rlm@1: end2: rlm@1: mov eax, 1 rlm@1: jmp end3 rlm@1: Bits555: rlm@1: mov edx, 0x7BDE7BDE rlm@1: mov eax, colorMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0x04210421 rlm@1: mov eax, lowPixelMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0x739C739C rlm@1: mov eax, qcolorMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0x0C630C63 rlm@1: mov eax, qlowpixelMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov eax, 0 rlm@1: jmp end3 rlm@1: Bits565: rlm@1: mov edx, 0xF7DEF7DE rlm@1: mov eax, colorMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0x08210821 rlm@1: mov eax, lowPixelMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0xE79CE79C rlm@1: mov eax, qcolorMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov edx, 0x18631863 rlm@1: mov eax, qlowpixelMask rlm@1: mov [eax], edx rlm@1: mov [eax+4], edx rlm@1: mov eax, 0 rlm@1: jmp end3 rlm@1: end3: rlm@1: pop edx rlm@1: mov esp, ebp rlm@1: pop ebp rlm@1: ret rlm@1: rlm@1: rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: ;------------------------------------------------------------------------- rlm@1: rlm@1: SECTION .data ALIGN = 32 rlm@1: ;Some constants rlm@1: colorMask dd 0xF7DEF7DE,0xF7DEF7DE rlm@1: lowPixelMask dd 0x08210821,0x08210821 rlm@1: rlm@1: qcolorMask dd 0xE79CE79C,0xE79CE79C rlm@1: qlowpixelMask dd 0x18631863,0x18631863 rlm@1: rlm@1: darkenMask dd 0xC718C718,0xC718C718 rlm@1: GreenMask dd 0x07E007E0,0x07E007E0 rlm@1: RedBlueMask dd 0xF81FF81F,0xF81FF81F rlm@1: rlm@1: FALSE dd 0x00000000,0x00000000 rlm@1: TRUE dd 0xffffffff,0xffffffff rlm@1: ONE dd 0x00010001,0x00010001 rlm@1: rlm@1: rlm@1: SECTION .bss ALIGN = 32 rlm@1: ACPixel resb 8 rlm@1: Mask1 resb 8 rlm@1: Mask2 resb 8 rlm@1: rlm@1: I56Pixel resb 8 rlm@1: I23Pixel resb 8 rlm@1: I5556Pixel resb 8 rlm@1: I2223Pixel resb 8 rlm@1: I5666Pixel resb 8 rlm@1: I2333Pixel resb 8 rlm@1: Mask26 resb 8 rlm@1: Mask35 resb 8 rlm@1: Mask26b resb 8 rlm@1: Mask35b resb 8 rlm@1: product1a resb 8 rlm@1: product1b resb 8 rlm@1: product2a resb 8 rlm@1: product2b resb 8 rlm@1: final1a resb 8 rlm@1: final1b resb 8 rlm@1: final2a resb 8 rlm@1: final2b resb 8