rlm@1: ;/*---------------------------------------------------------------------*
rlm@1: ; * The following (piece of) code, (part of) the 2xSaI engine,          *
rlm@1: ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa.                    *
rlm@1: ; * Non-Commercial use of this software is allowed and is encouraged,   *
rlm@1: ; * provided that appropriate credit be given.                          *
rlm@1: ; * You may freely modify this code, but I request                      *
rlm@1: ; * that any improvements to the engine be submitted to me, so          *
rlm@1: ; * that I can implement these improvements in newer versions of        *
rlm@1: ; * the software.                                                       *
rlm@1: ; * If you need more information, have any comments or suggestions,     *
rlm@1: ; * you can e-mail me. My e-mail: derek-liauw@usa.net.                  *
rlm@1: ; *---------------------------------------------------------------------*/
rlm@1: 
rlm@1: ;----------------------
rlm@1: ; 2xSaI version 0.59 WIP, soon to become version 0.60
rlm@1: ;----------------------
rlm@1: 
rlm@1: ;%define FAR_POINTER
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1:           BITS 32
rlm@1: %ifdef __DJGPP__
rlm@1:           GLOBAL __2xSaILine
rlm@1:           GLOBAL __2xSaISuperEagleLine
rlm@1:                   GLOBAL __2xSaISuper2xSaILine
rlm@1:           GLOBAL _Init_2xSaIMMX
rlm@1: %else
rlm@1:           GLOBAL _2xSaILine
rlm@1:           GLOBAL _2xSaISuperEagleLine
rlm@1:                   GLOBAL _2xSaISuper2xSaILine
rlm@1:           GLOBAL Init_2xSaIMMX
rlm@1: %endif
rlm@1:           SECTION .text ALIGN = 32
rlm@1: 
rlm@1: %ifdef FAR_POINTER
rlm@1: ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
rlm@1: ;                        uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
rlm@1: %else
rlm@1: ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
rlm@1: ;                        uint8 *dstPtr, uint32 dstPitch);
rlm@1: %endif
rlm@1: 
rlm@1: srcPtr        equ 8
rlm@1: deltaPtr      equ 12
rlm@1: srcPitch      equ 16
rlm@1: width         equ 20
rlm@1: dstOffset     equ 24
rlm@1: dstPitch      equ 28
rlm@1: dstSegment    equ 32
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: colorB0   equ -2
rlm@1: colorB1   equ 0
rlm@1: colorB2   equ 2
rlm@1: colorB3   equ 4
rlm@1: 
rlm@1: color7   equ -2
rlm@1: color8   equ 0
rlm@1: color9   equ 2
rlm@1: 
rlm@1: color4   equ -2
rlm@1: color5   equ 0
rlm@1: color6   equ 2
rlm@1: colorS2   equ 4
rlm@1: 
rlm@1: color1   equ -2
rlm@1: color2   equ 0
rlm@1: color3   equ 2
rlm@1: colorS1   equ 4
rlm@1: 
rlm@1: colorA0   equ -2
rlm@1: colorA1   equ 0
rlm@1: colorA2   equ 2
rlm@1: colorA3   equ 4
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: %ifdef __DJGPP__
rlm@1: __2xSaISuper2xSaILine:
rlm@1: %else
rlm@1: _2xSaISuper2xSaILine:
rlm@1: %endif
rlm@1: ; Store some stuff
rlm@1:          push ebp
rlm@1:          mov ebp, esp
rlm@1:          pushad
rlm@1: 
rlm@1: ; Prepare the destination
rlm@1: %ifdef FAR_POINTER
rlm@1:          ; Set the selector
rlm@1:          mov eax, [ebp+dstSegment]
rlm@1:          mov fs, ax
rlm@1: %endif
rlm@1:          mov edx, [ebp+dstOffset]         ; edx points to the screen
rlm@1: ; Prepare the source
rlm@1:          ; eax points to colorA
rlm@1:          mov eax, [ebp+srcPtr]                          ;eax points to colorA
rlm@1:          mov ebx, [ebp+srcPitch]                        ;ebx contains the source pitch
rlm@1:          mov ecx, [ebp+width]                           ;ecx contains the number of pixels to process
rlm@1:          ; eax now points to colorB1
rlm@1:          sub eax, ebx                                           ;eax points to B1 which is the base 
rlm@1: 
rlm@1: ; Main Loop
rlm@1: .Loop:   push ecx
rlm@1: 
rlm@1:          ;-----Check Delta------------------
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1: 
rlm@1: 
rlm@1:                 ;load source img
rlm@1:          movq mm0, [eax+colorB0]
rlm@1:          movq mm1, [eax+colorB3]
rlm@1:          movq mm2, [eax+ebx+color4]
rlm@1:          movq mm3, [eax+ebx+colorS2]
rlm@1:          movq mm4, [eax+ebx+ebx+color1]
rlm@1:          movq mm5, [eax+ebx+ebx+colorS1]
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm6, [eax+ebx+ebx+colorA0]
rlm@1:          movq mm7, [eax+ebx+ebx+colorA3]
rlm@1:          pop eax
rlm@1: 
rlm@1:                 ;compare to delta
rlm@1:          pcmpeqw mm0, [ecx+2+colorB0]
rlm@1:          pcmpeqw mm1, [ecx+2+colorB3]
rlm@1:          pcmpeqw mm2, [ecx+ebx+2+color4]
rlm@1:          pcmpeqw mm3, [ecx+ebx+2+colorS2]
rlm@1:          pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
rlm@1:          pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
rlm@1:          add ecx, ebx
rlm@1:          pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
rlm@1:          pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
rlm@1:          sub ecx, ebx
rlm@1: 
rlm@1: 
rlm@1:                 ;compose results
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm3
rlm@1:          pand mm4, mm5
rlm@1:          pand mm6, mm7
rlm@1:          pand mm0, mm2
rlm@1:          pand mm4, mm6
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm0, mm4
rlm@1:          movq mm6, [eax+colorB0]
rlm@1:          pcmpeqw mm7, mm0                       ;did any compare give us a zero ?
rlm@1: 
rlm@1:          movq [ecx+2+colorB0], mm6
rlm@1: 
rlm@1:          packsswb mm7, mm7
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx                          
rlm@1:          jz near .SKIP_PROCESS          ;no, so we can skip
rlm@1: 
rlm@1:          ;End Delta
rlm@1: 
rlm@1:          ;---------------------------------
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, [eax+ebx+color6]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          movq mm4, mm0
rlm@1:          movq mm5, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:          movq [I56Pixel], mm0
rlm@1:          movq mm7, mm0
rlm@1: 
rlm@1:          ;-------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm4  ;5,5,5,6
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:          movq [I5556Pixel], mm0
rlm@1:          ;--------------------
rlm@1: 
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm5  ;6,6,6,5
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [I5666Pixel], mm0
rlm@1: 
rlm@1:          ;-------------------------
rlm@1:          ;-------------------------
rlm@1:          movq mm0, [eax+ebx+ebx+color2]
rlm@1:          movq mm1, [eax+ebx+ebx+color3]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          movq mm4, mm0
rlm@1:          movq mm5, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [I23Pixel], mm0
rlm@1:          movq mm7, mm0
rlm@1: 
rlm@1:          ;---------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm4  ;2,2,2,3
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [I2223Pixel], mm0
rlm@1: 
rlm@1:          ;----------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm5  ;3,3,3,2
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [I2333Pixel], mm0
rlm@1: 
rlm@1: 
rlm@1:          ;--------------------
rlm@1: ;////////////////////////////////
rlm@1: ; Decide which "branch" to take
rlm@1: ;--------------------------------
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, [eax+ebx+color6]
rlm@1:          movq mm6, mm0
rlm@1:          movq mm7, mm1
rlm@1:          pcmpeqw mm0, [eax+ebx+ebx+color3]
rlm@1:          pcmpeqw mm1, [eax+ebx+ebx+color2]
rlm@1:          pcmpeqw mm6, mm7
rlm@1: 
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm0
rlm@1: 
rlm@1:          pand mm0, mm1       ;colorA == colorD && colorB == colorC
rlm@1:          pxor mm7, mm7
rlm@1: 
rlm@1:          pcmpeqw mm2, mm7
rlm@1:          pand mm6, mm0
rlm@1:          pand mm2, mm1       ;colorA != colorD && colorB == colorC
rlm@1: 
rlm@1:          pcmpeqw mm1, mm7
rlm@1: 
rlm@1:          pand mm1, mm3       ;colorA == colorD && colorB != colorC
rlm@1:          pxor mm0, mm6
rlm@1:          por mm1, mm6
rlm@1:          movq mm7, mm0
rlm@1:          movq [Mask26], mm2
rlm@1:          packsswb mm7, mm7
rlm@1:          movq [Mask35], mm1
rlm@1: 
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx
rlm@1:          jz near .SKIP_GUESS
rlm@1: 
rlm@1: ;---------------------------------------------
rlm@1:          movq mm6, mm0
rlm@1:          movq mm4, [eax+ebx+colorA]
rlm@1:          movq mm5, [eax+ebx+colorB]
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm6, [ONE]
rlm@1: 
rlm@1:          movq mm0, [eax+colorE]
rlm@1:          movq mm1, [eax+ebx+colorG]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+colorF]
rlm@1:          movq mm1, [eax+ebx+colorK]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm0, [eax+ebx+colorH]
rlm@1:          movq mm1, [eax+ebx+ebx+colorN]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+colorL]
rlm@1:          movq mm1, [eax+ebx+ebx+colorO]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          pop eax
rlm@1:          movq mm1, mm7
rlm@1:          pxor mm0, mm0
rlm@1:          pcmpgtw mm7, mm0
rlm@1:          pcmpgtw mm0, mm1
rlm@1: 
rlm@1:          por mm7, [Mask35]
rlm@1:          por mm0, [Mask26] 
rlm@1:          movq [Mask35], mm7
rlm@1:          movq [Mask26], mm0
rlm@1: 
rlm@1: .SKIP_GUESS:
rlm@1: 
rlm@1:          ;Start the ASSEMBLY !!!        eh... compose all the results together to form the final image...
rlm@1: 
rlm@1:                  
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, [eax+ebx+ebx+color2]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          movq mm4, mm0
rlm@1:          movq mm5, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:                  ;---------------------------
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: %ifdef dfhsdfhsdahdsfhdsfh
rlm@1: 
rlm@1:                 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
rlm@1:                    product2a = INTERPOLATE (color2, color5);
rlm@1:                 else
rlm@1:                 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
rlm@1:                    product2a = INTERPOLATE(color2, color5);
rlm@1:                 else
rlm@1:                    product2a = color2;
rlm@1: 
rlm@1:                 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
rlm@1:                    product1a = INTERPOLATE (color2, color5);
rlm@1:                 else
rlm@1:                 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
rlm@1:                    product1a = INTERPOLATE(color2, color5);
rlm@1:                 else
rlm@1:                    product1a = color5;
rlm@1: 
rlm@1: %endif
rlm@1: 
rlm@1: 
rlm@1:                  movq mm7, [Mask26]
rlm@1:                  movq mm6, [eax+colorB2]
rlm@1:                  movq mm5, [eax+ebx+ebx+color2]
rlm@1:                  movq mm4, [eax+ebx+ebx+color1]
rlm@1:                  pcmpeqw mm4, mm5
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pxor mm5, mm5
rlm@1:                  pand mm7, mm4
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pand mm7, mm6
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1:                  movq mm6, [eax+ebx+ebx+color3]
rlm@1:                  movq mm5, [eax+ebx+ebx+color2]
rlm@1:                  movq mm4, [eax+ebx+ebx+color1]
rlm@1:                  movq mm2, [eax+ebx+color5]
rlm@1:                  movq mm1, [eax+ebx+color4]
rlm@1:                  movq mm3, [eax+colorB0]
rlm@1: 
rlm@1:                  pcmpeqw mm2, mm4
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pcmpeqw mm1, mm5
rlm@1:                  pcmpeqw mm3, mm5
rlm@1:                  pxor mm5, mm5
rlm@1:                  pcmpeqw mm2, mm5
rlm@1:                  pcmpeqw mm3, mm5
rlm@1:                  pand mm6, mm1
rlm@1:                  pand mm2, mm3
rlm@1:                  pand mm6, mm2
rlm@1:                  por mm7, mm6
rlm@1: 
rlm@1:                  
rlm@1:                  movq mm6, mm7
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pand mm7, mm0
rlm@1: 
rlm@1:                  movq mm1, [eax+ebx+color5]
rlm@1:                  pand mm6, mm1
rlm@1:                  por mm7, mm6
rlm@1:                  movq [final1a], mm7                    ;finished  1a
rlm@1: 
rlm@1: 
rlm@1:          
rlm@1:              ;--------------------------------           
rlm@1: 
rlm@1:                  movq mm7, [Mask35]
rlm@1:                  push eax
rlm@1:                  add eax, ebx
rlm@1:                  movq mm6, [eax+ebx+ebx+colorA2]
rlm@1:                  pop eax
rlm@1:                  movq mm5, [eax+ebx+color5]
rlm@1:                  movq mm4, [eax+ebx+color4]
rlm@1:                  pcmpeqw mm4, mm5
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pxor mm5, mm5
rlm@1:                  pand mm7, mm4
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pand mm7, mm6
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1:                  movq mm6, [eax+ebx+color6]
rlm@1:                  movq mm5, [eax+ebx+color5]
rlm@1:                  movq mm4, [eax+ebx+color4]
rlm@1:                  movq mm2, [eax+ebx+ebx+color2]
rlm@1:                  movq mm1, [eax+ebx+ebx+color1]
rlm@1:                  push eax
rlm@1:                  add eax, ebx
rlm@1:                  movq mm3, [eax+ebx+ebx+colorA0]
rlm@1:                  pop eax
rlm@1: 
rlm@1:                  pcmpeqw mm2, mm4
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pcmpeqw mm1, mm5
rlm@1:                  pcmpeqw mm3, mm5
rlm@1:                  pxor mm5, mm5
rlm@1:                  pcmpeqw mm2, mm5
rlm@1:                  pcmpeqw mm3, mm5
rlm@1:                  pand mm6, mm1
rlm@1:                  pand mm2, mm3
rlm@1:                  pand mm6, mm2
rlm@1:                  por mm7, mm6
rlm@1: 
rlm@1:                  
rlm@1:                  movq mm6, mm7
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pand mm7, mm0
rlm@1: 
rlm@1:                  movq mm1, [eax+ebx+ebx+color2]
rlm@1:                  pand mm6, mm1
rlm@1:                  por mm7, mm6
rlm@1:                  movq [final2a], mm7                    ;finished  2a
rlm@1: 
rlm@1: 
rlm@1:                  ;--------------------------------------------
rlm@1:  
rlm@1: 
rlm@1: %ifdef dfhsdfhsdahdsfhdsfh
rlm@1:                    if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
rlm@1:                       product2b = Q_INTERPOLATE (color3, color3, color3, color2);
rlm@1:                    else
rlm@1:                    if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
rlm@1:                       product2b = Q_INTERPOLATE (color2, color2, color2, color3);
rlm@1:                    else
rlm@1:                       product2b = INTERPOLATE (color2, color3);
rlm@1: 
rlm@1:                    if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
rlm@1:                       product1b = Q_INTERPOLATE (color6, color6, color6, color5);
rlm@1:                    else
rlm@1:                    if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
rlm@1:                       product1b = Q_INTERPOLATE (color6, color5, color5, color5);
rlm@1:                    else
rlm@1:                       product1b = INTERPOLATE (color5, color6);
rlm@1: %endif
rlm@1: 
rlm@1:                  push eax
rlm@1:                  add eax, ebx
rlm@1:                  pxor mm7, mm7
rlm@1:                  movq mm0, [eax+ebx+ebx+colorA0]
rlm@1:                  movq mm1, [eax+ebx+ebx+colorA1]
rlm@1:                  movq mm2, [eax+ebx+ebx+colorA2]
rlm@1:                  movq mm3, [eax+ebx+ebx+colorA3]
rlm@1:                  pop eax
rlm@1:                  movq mm4, [eax+ebx+ebx+color2]
rlm@1:                  movq mm5, [eax+ebx+ebx+color3]
rlm@1:                  movq mm6, [eax+ebx+color6]
rlm@1: 
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pcmpeqw mm1, mm5
rlm@1:                  pcmpeqw mm4, mm2
rlm@1:                  pcmpeqw mm0, mm5
rlm@1:                  pcmpeqw mm4, mm7
rlm@1:                  pcmpeqw mm0, mm7
rlm@1:                  pand mm0, mm4
rlm@1:                  pand mm6, mm1
rlm@1:                  pand mm0, mm6
rlm@1: 
rlm@1: 
rlm@1:                  push eax
rlm@1:                  add eax, ebx
rlm@1:                  movq mm1, [eax+ebx+ebx+colorA1]
rlm@1:                  pop eax
rlm@1:                  movq mm4, [eax+ebx+ebx+color2]
rlm@1:                  movq mm5, [eax+ebx+color5]
rlm@1:                  movq mm6, [eax+ebx+ebx+color3]
rlm@1: 
rlm@1:                  pcmpeqw mm5, mm4
rlm@1:                  pcmpeqw mm2, mm4
rlm@1:                  pcmpeqw mm1, mm6
rlm@1:                  pcmpeqw mm3, mm4
rlm@1:                  pcmpeqw mm1, mm7
rlm@1:                  pcmpeqw mm3, mm7
rlm@1:                  pand mm2, mm5
rlm@1:                  pand mm1, mm3
rlm@1:                  pand mm1, mm2
rlm@1: 
rlm@1: 
rlm@1:                  movq mm7, mm0
rlm@1:                  por mm7, mm1
rlm@1: 
rlm@1:                  movq mm4, [Mask35]
rlm@1:                  movq mm3, [Mask26]
rlm@1:                  
rlm@1:                  movq mm6, mm4
rlm@1:                  pand mm6, mm7
rlm@1:                  pxor mm4, mm6
rlm@1: 
rlm@1:                  movq mm6, mm3
rlm@1:                  pand mm6, mm7
rlm@1:                  pxor mm3, mm6
rlm@1: 
rlm@1:                  movq mm2, mm0
rlm@1:                  movq mm7, [I2333Pixel]
rlm@1:                  movq mm6, [I2223Pixel]
rlm@1:                  movq mm5, [I23Pixel]
rlm@1: 
rlm@1: 
rlm@1:                  por mm2, mm4
rlm@1:                  pand mm4, [eax+ebx+ebx+color3]
rlm@1:                  por mm2, mm3
rlm@1:                  pand mm3, [eax+ebx+ebx+color2]
rlm@1:                  por mm2, mm1
rlm@1:                  pand mm0, mm7
rlm@1:                  pand mm1, mm6
rlm@1:                  pxor mm7, mm7
rlm@1:                  pcmpeqw mm2, mm7
rlm@1:                  por mm0, mm1
rlm@1:                  por mm3, mm4
rlm@1:                  pand mm2, mm5
rlm@1:                  por mm0, mm3
rlm@1:                  por mm0, mm2
rlm@1:                  movq [final2b], mm0
rlm@1: 
rlm@1:                  ;-----------------------------------
rlm@1:                  
rlm@1: 
rlm@1:                  pxor mm7, mm7
rlm@1:                  movq mm0, [eax+colorB0]
rlm@1:                  movq mm1, [eax+colorB1]
rlm@1:                  movq mm2, [eax+colorB2]
rlm@1:                  movq mm3, [eax+colorB3]
rlm@1:                  movq mm4, [eax+ebx+color5]
rlm@1:                  movq mm5, [eax+ebx+color6]
rlm@1:                  movq mm6, [eax+ebx+ebx+color3]
rlm@1: 
rlm@1:                  pcmpeqw mm6, mm5
rlm@1:                  pcmpeqw mm1, mm5
rlm@1:                  pcmpeqw mm4, mm2
rlm@1:                  pcmpeqw mm0, mm5
rlm@1:                  pcmpeqw mm4, mm7
rlm@1:                  pcmpeqw mm0, mm7
rlm@1:                  pand mm0, mm4
rlm@1:                  pand mm6, mm1
rlm@1:                  pand mm0, mm6
rlm@1: 
rlm@1:                  movq mm1, [eax+colorB1]
rlm@1:                  movq mm4, [eax+ebx+color5]
rlm@1:                  movq mm5, [eax+ebx+ebx+color2]
rlm@1:                  movq mm6, [eax+ebx+color6]
rlm@1: 
rlm@1:                  pcmpeqw mm5, mm4
rlm@1:                  pcmpeqw mm2, mm4
rlm@1:                  pcmpeqw mm1, mm6
rlm@1:                  pcmpeqw mm3, mm4
rlm@1:                  pcmpeqw mm1, mm7
rlm@1:                  pcmpeqw mm3, mm7
rlm@1:                  pand mm2, mm5
rlm@1:                  pand mm1, mm3
rlm@1:                  pand mm1, mm2
rlm@1: 
rlm@1: 
rlm@1:                  movq mm7, mm0
rlm@1:                  por mm7, mm1
rlm@1: 
rlm@1:                  movq mm4, [Mask35]
rlm@1:                  movq mm3, [Mask26]
rlm@1:                  
rlm@1:                  movq mm6, mm4
rlm@1:                  pand mm6, mm7
rlm@1:                  pxor mm4, mm6
rlm@1: 
rlm@1:                  movq mm6, mm3
rlm@1:                  pand mm6, mm7
rlm@1:                  pxor mm3, mm6
rlm@1: 
rlm@1:                  movq mm2, mm0
rlm@1:                  movq mm7, [I5666Pixel]
rlm@1:                  movq mm6, [I5556Pixel]
rlm@1:                  movq mm5, [I56Pixel]
rlm@1: 
rlm@1: 
rlm@1:                  por mm2, mm4
rlm@1:                  pand mm4, [eax+ebx+color5]
rlm@1:                  por mm2, mm3
rlm@1:                  pand mm3, [eax+ebx+color6]
rlm@1:                  por mm2, mm1
rlm@1:                  pand mm0, mm7
rlm@1:                  pand mm1, mm6
rlm@1:                  pxor mm7, mm7
rlm@1:                  pcmpeqw mm2, mm7
rlm@1:                  por mm0, mm1
rlm@1:                  por mm3, mm4
rlm@1:                  pand mm2, mm5
rlm@1:                  por mm0, mm3
rlm@1:                  por mm0, mm2
rlm@1:                  movq [final1b], mm0
rlm@1:                  
rlm@1:           ;---------
rlm@1: 
rlm@1:                  movq mm0, [final1a]
rlm@1:                  movq mm4, [final2a]
rlm@1:                  movq mm2, [final1b]
rlm@1:                  movq mm6, [final2b]
rlm@1: 
rlm@1: 
rlm@1:                  movq mm1, mm0
rlm@1:                  movq mm5, mm4
rlm@1: 
rlm@1: 
rlm@1:          punpcklwd mm0, mm2
rlm@1:          punpckhwd mm1, mm2
rlm@1: 
rlm@1:          punpcklwd mm4, mm6
rlm@1:          punpckhwd mm5, mm6
rlm@1: 
rlm@1: 
rlm@1: %ifdef FAR_POINTER
rlm@1:          movq [fs:edx], mm0
rlm@1:          movq [fs:edx+8], mm1
rlm@1:          push edx
rlm@1:          add edx, [ebp+dstPitch]
rlm@1:          movq [fs:edx], mm4
rlm@1:          movq [fs:edx+8], mm5
rlm@1:          pop edx
rlm@1: %else
rlm@1:          movq [edx], mm0
rlm@1:          movq [edx+8], mm1
rlm@1:          push edx
rlm@1:          add edx, [ebp+dstPitch]
rlm@1:          movq [edx], mm4
rlm@1:          movq [edx+8], mm5
rlm@1:          pop edx
rlm@1: %endif
rlm@1: .SKIP_PROCESS:
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1:          add ecx, 8
rlm@1:          mov [ebp+deltaPtr], ecx
rlm@1:          add edx, 16
rlm@1:          add eax, 8
rlm@1: 
rlm@1:          pop ecx
rlm@1:          sub ecx, 4
rlm@1:          cmp ecx, 0
rlm@1:          jg  near .Loop
rlm@1: 
rlm@1: ; Restore some stuff
rlm@1:          popad
rlm@1:          mov esp, ebp
rlm@1:          pop ebp
rlm@1:          emms
rlm@1:          ret
rlm@1: 
rlm@1: 
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: %ifdef __DJGPP__
rlm@1: __2xSaISuperEagleLine:
rlm@1: %else
rlm@1: _2xSaISuperEagleLine:
rlm@1: %endif
rlm@1: ; Store some stuff
rlm@1:          push ebp
rlm@1:          mov ebp, esp
rlm@1:          pushad
rlm@1: 
rlm@1: ; Prepare the destination
rlm@1: %ifdef FAR_POINTER
rlm@1:          ; Set the selector
rlm@1:          mov eax, [ebp+dstSegment]
rlm@1:          mov fs, ax
rlm@1: %endif
rlm@1:          mov edx, [ebp+dstOffset]         ; edx points to the screen
rlm@1: ; Prepare the source
rlm@1:          ; eax points to colorA
rlm@1:          mov eax, [ebp+srcPtr]
rlm@1:          mov ebx, [ebp+srcPitch]
rlm@1:          mov ecx, [ebp+width]
rlm@1:          ; eax now points to colorB1
rlm@1:          sub eax, ebx
rlm@1: 
rlm@1: ; Main Loop
rlm@1: .Loop:   push ecx
rlm@1: 
rlm@1:          ;-----Check Delta------------------
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1: 
rlm@1:          movq mm0, [eax+colorB0]
rlm@1:          movq mm1, [eax+colorB3]
rlm@1:          movq mm2, [eax+ebx+color4]
rlm@1:          movq mm3, [eax+ebx+colorS2]
rlm@1:          movq mm4, [eax+ebx+ebx+color1]
rlm@1:          movq mm5, [eax+ebx+ebx+colorS1]
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm6, [eax+ebx+ebx+colorA0]
rlm@1:          movq mm7, [eax+ebx+ebx+colorA3]
rlm@1:          pop eax
rlm@1: 
rlm@1:          pcmpeqw mm0, [ecx+2+colorB0]
rlm@1:          pcmpeqw mm1, [ecx+2+colorB3]
rlm@1:          pcmpeqw mm2, [ecx+ebx+2+color4]
rlm@1:          pcmpeqw mm3, [ecx+ebx+2+colorS2]
rlm@1:          pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
rlm@1:          pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
rlm@1:          add ecx, ebx
rlm@1:          pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
rlm@1:          pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
rlm@1:          sub ecx, ebx
rlm@1: 
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm3
rlm@1:          pand mm4, mm5
rlm@1:          pand mm6, mm7
rlm@1:          pand mm0, mm2
rlm@1:          pand mm4, mm6
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm0, mm4
rlm@1:          movq mm6, [eax+colorB0]
rlm@1:          pcmpeqw mm7, mm0
rlm@1: 
rlm@1:          movq [ecx+2+colorB0], mm6
rlm@1: 
rlm@1:          packsswb mm7, mm7
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx
rlm@1:          jz near .SKIP_PROCESS
rlm@1: 
rlm@1:          ;End Delta
rlm@1: 
rlm@1:          ;---------------------------------
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, [eax+ebx+color6]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          movq mm4, mm0
rlm@1:          movq mm5, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:          movq [I56Pixel], mm0
rlm@1:          movq mm7, mm0
rlm@1: 
rlm@1:          ;-------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm4  ;5,5,5,6
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:          movq [product1a], mm0
rlm@1:          ;--------------------
rlm@1: 
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm5  ;6,6,6,5
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [product1b], mm0
rlm@1: 
rlm@1:          ;-------------------------
rlm@1:          ;-------------------------
rlm@1:          movq mm0, [eax+ebx+ebx+color2]
rlm@1:          movq mm1, [eax+ebx+ebx+color3]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          movq mm4, mm0
rlm@1:          movq mm5, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [I23Pixel], mm0
rlm@1:          movq mm7, mm0
rlm@1: 
rlm@1:          ;---------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm4  ;2,2,2,3
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [product2a], mm0
rlm@1: 
rlm@1:          ;----------------------
rlm@1:          movq mm0, mm7
rlm@1:          movq mm1, mm5  ;3,3,3,2
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3
rlm@1:          movq [product2b], mm0
rlm@1: 
rlm@1: 
rlm@1:          ;////////////////////////////////
rlm@1:          ; Decide which "branch" to take
rlm@1:          ;--------------------------------
rlm@1:          movq mm4, [eax+ebx+color5]
rlm@1:          movq mm5, [eax+ebx+color6]
rlm@1:          movq mm6, [eax+ebx+ebx+color3]
rlm@1:          movq mm7, [eax+ebx+ebx+color2]
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          movq mm0, mm4
rlm@1:          movq mm1, mm5
rlm@1: 
rlm@1:          pcmpeqw mm0, mm6
rlm@1:          pcmpeqw mm1, mm7
rlm@1:          pcmpeqw mm1, mm3
rlm@1:          pand mm0, mm1
rlm@1:          movq [Mask35], mm0
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+ebx+colorS1]
rlm@1:          movq mm1, [eax+ebx+color4]
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm2, [eax+ebx+ebx+colorA2]
rlm@1:          pop eax
rlm@1:          movq mm3, [eax+colorB1]
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm4
rlm@1:          pcmpeqw mm3, mm4
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm3
rlm@1:          por mm0, mm2
rlm@1:          pand mm0, [Mask35]
rlm@1:          movq [Mask35b], mm0
rlm@1: 
rlm@1:          ;-----------
rlm@1:          pxor mm3, mm3
rlm@1:          movq mm0, mm4
rlm@1:          movq mm1, mm5
rlm@1: 
rlm@1:          pcmpeqw mm0, mm6
rlm@1:          pcmpeqw mm1, mm7
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pand mm0, mm1
rlm@1:          movq [Mask26], mm0
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+ebx+color1]
rlm@1:          movq mm1, [eax+ebx+colorS2]
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm2, [eax+ebx+ebx+colorA1]
rlm@1:          pop eax
rlm@1:          movq mm3, [eax+colorB2]
rlm@1:          pcmpeqw mm0, mm5
rlm@1:          pcmpeqw mm1, mm5
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm3
rlm@1:          por mm0, mm2
rlm@1:          pand mm0, [Mask26]
rlm@1:          movq [Mask26b], mm0
rlm@1: 
rlm@1:          ;--------------------
rlm@1:          movq mm0, mm4
rlm@1:          movq mm1, mm5
rlm@1:          movq mm2, mm0
rlm@1: 
rlm@1:          pcmpeqw mm2, mm1
rlm@1:          pcmpeqw mm0, mm6
rlm@1:          pcmpeqw mm1, mm7
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm0
rlm@1:          pxor mm0, mm2
rlm@1:          movq mm7, mm0
rlm@1: 
rlm@1:          ;------------------
rlm@1:          packsswb mm7, mm7
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx
rlm@1:          jz near .SKIP_GUESS
rlm@1: 
rlm@1: ;---------------------------------------------
rlm@1: ; Map of the pixels:                    I|E F|J
rlm@1: ;                                       G|A B|K
rlm@1: ;                                       H|C D|L
rlm@1: ;                                       M|N O|P
rlm@1:          movq mm6, mm0
rlm@1:          movq mm4, [eax+ebx+color5]
rlm@1:          movq mm5, [eax+ebx+color6]
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm6, [ONE]
rlm@1: 
rlm@1:          movq mm0, [eax+colorB1]
rlm@1:          movq mm1, [eax+ebx+color4]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+colorB2]
rlm@1:          movq mm1, [eax+ebx+colorS2]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm0, [eax+ebx+color1]
rlm@1:          movq mm1, [eax+ebx+ebx+colorA1]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+colorS1]
rlm@1:          movq mm1, [eax+ebx+ebx+colorA2]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          pop eax
rlm@1:          movq mm1, mm7
rlm@1:          pxor mm0, mm0
rlm@1:          pcmpgtw mm7, mm0
rlm@1:          pcmpgtw mm0, mm1
rlm@1: 
rlm@1:          por mm7, [Mask35]
rlm@1:          por mm0, [Mask26]
rlm@1:          movq [Mask35], mm7
rlm@1:          movq [Mask26], mm0
rlm@1: 
rlm@1: .SKIP_GUESS:
rlm@1:          ;Start the ASSEMBLY !!!
rlm@1: 
rlm@1:          movq mm4, [Mask35]
rlm@1:          movq mm5, [Mask26]
rlm@1:          movq mm6, [Mask35b]
rlm@1:          movq mm7, [Mask26b]
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, [eax+ebx+color6]
rlm@1:          movq mm2, [eax+ebx+ebx+color2]
rlm@1:          movq mm3, [eax+ebx+ebx+color3]
rlm@1:          pcmpeqw mm0, mm2
rlm@1:          pcmpeqw mm1, mm3
rlm@1:          movq mm2, mm4
rlm@1:          movq mm3, mm5
rlm@1:          por mm0, mm1
rlm@1:          por mm2, mm3
rlm@1:          pand mm2, mm0
rlm@1:          pxor mm0, mm2
rlm@1:          movq mm3, mm0
rlm@1: 
rlm@1:          movq mm2, mm0
rlm@1:          pxor mm0, mm0
rlm@1:          por mm2, mm4
rlm@1:          pxor mm4, mm6
rlm@1:          por mm2, mm5
rlm@1:          pxor mm5, mm7
rlm@1:          pcmpeqw mm2, mm0
rlm@1:          ;----------------
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+color5]
rlm@1:          movq mm1, mm3
rlm@1:          por mm1, mm4
rlm@1:          por mm1, mm6
rlm@1:          pand mm0, mm1
rlm@1:          movq mm1, mm5
rlm@1:          pand mm1, [I56Pixel]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm7
rlm@1:          pand mm1, [product1b]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm2
rlm@1:          pand mm1, [product1a]
rlm@1:          por mm0, mm1
rlm@1:          movq [final1a], mm0
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+color6]
rlm@1:          movq mm1, mm3
rlm@1:          por mm1, mm5
rlm@1:          por mm1, mm7
rlm@1:          pand mm0, mm1
rlm@1:          movq mm1, mm4
rlm@1:          pand mm1, [I56Pixel]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm6
rlm@1:          pand mm1, [product1a]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm2
rlm@1:          pand mm1, [product1b]
rlm@1:          por mm0, mm1
rlm@1:          movq [final1b], mm0
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+ebx+color2]
rlm@1:          movq mm1, mm3
rlm@1:          por mm1, mm5
rlm@1:          por mm1, mm7
rlm@1:          pand mm0, mm1
rlm@1:          movq mm1, mm4
rlm@1:          pand mm1, [I23Pixel]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm6
rlm@1:          pand mm1, [product2b]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm2
rlm@1:          pand mm1, [product2a]
rlm@1:          por mm0, mm1
rlm@1:          movq [final2a], mm0
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+ebx+color3]
rlm@1:          movq mm1, mm3
rlm@1:          por mm1, mm4
rlm@1:          por mm1, mm6
rlm@1:          pand mm0, mm1
rlm@1:          movq mm1, mm5
rlm@1:          pand mm1, [I23Pixel]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm7
rlm@1:          pand mm1, [product2a]
rlm@1:          por mm0, mm1
rlm@1:          movq mm1, mm2
rlm@1:          pand mm1, [product2b]
rlm@1:          por mm0, mm1
rlm@1:          movq [final2b], mm0
rlm@1: 
rlm@1: 
rlm@1:          movq mm0, [final1a]
rlm@1:          movq mm2, [final1b]
rlm@1:          movq mm1, mm0
rlm@1:          movq mm4, [final2a]
rlm@1:          movq mm6, [final2b]
rlm@1:          movq mm5, mm4
rlm@1:          punpcklwd mm0, mm2
rlm@1:          punpckhwd mm1, mm2
rlm@1:          punpcklwd mm4, mm6
rlm@1:          punpckhwd mm5, mm6
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: 
rlm@1: %ifdef FAR_POINTER
rlm@1:          movq [fs:edx], mm0
rlm@1:          movq [fs:edx+8], mm1
rlm@1:          push edx
rlm@1:          add edx, [ebp+dstPitch]
rlm@1:          movq [fs:edx], mm4
rlm@1:          movq [fs:edx+8], mm5
rlm@1:          pop edx
rlm@1: %else
rlm@1:          movq [edx], mm0
rlm@1:          movq [edx+8], mm1
rlm@1:          push edx
rlm@1:          add edx, [ebp+dstPitch]
rlm@1:          movq [edx], mm4
rlm@1:          movq [edx+8], mm5
rlm@1:          pop edx
rlm@1: %endif
rlm@1: .SKIP_PROCESS:
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1:          add ecx, 8
rlm@1:          mov [ebp+deltaPtr], ecx
rlm@1:          add edx, 16
rlm@1:          add eax, 8
rlm@1: 
rlm@1:          pop ecx
rlm@1:          sub ecx, 4
rlm@1:          cmp ecx, 0
rlm@1:          jg  near .Loop
rlm@1: 
rlm@1: ; Restore some stuff
rlm@1:          popad
rlm@1:          mov esp, ebp
rlm@1:          pop ebp
rlm@1:          emms
rlm@1:          ret
rlm@1: 
rlm@1: 
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: 
rlm@1: 
rlm@1: ;This is version 0.50
rlm@1: colorI   equ -2
rlm@1: colorE   equ 0
rlm@1: colorF   equ 2
rlm@1: colorJ   equ 4
rlm@1: 
rlm@1: colorG   equ -2
rlm@1: colorA   equ 0
rlm@1: colorB   equ 2
rlm@1: colorK   equ 4
rlm@1: 
rlm@1: colorH   equ -2
rlm@1: colorC   equ 0
rlm@1: colorD   equ 2
rlm@1: colorL   equ 4
rlm@1: 
rlm@1: colorM   equ -2
rlm@1: colorN   equ 0
rlm@1: colorO   equ 2
rlm@1: colorP   equ 4
rlm@1: 
rlm@1: %ifdef __DJGPP__
rlm@1: __2xSaILine:
rlm@1: %else
rlm@1: _2xSaILine:
rlm@1: %endif
rlm@1: ; Store some stuff
rlm@1:          push ebp
rlm@1:          mov ebp, esp
rlm@1:          pushad
rlm@1: 
rlm@1: ; Prepare the destination
rlm@1: %ifdef FAR_POINTER
rlm@1:          ; Set the selector
rlm@1:          mov eax, [ebp+dstSegment]
rlm@1:          mov fs, ax
rlm@1: %endif
rlm@1:          mov edx, [ebp+dstOffset]         ; edx points to the screen
rlm@1: ; Prepare the source
rlm@1:          ; eax points to colorA
rlm@1:          mov eax, [ebp+srcPtr]
rlm@1:          mov ebx, [ebp+srcPitch]
rlm@1:          mov ecx, [ebp+width]
rlm@1:          ; eax now points to colorE
rlm@1:          sub eax, ebx
rlm@1: 
rlm@1: 
rlm@1: ; Main Loop
rlm@1: .Loop:   push ecx
rlm@1: 
rlm@1:          ;-----Check Delta------------------
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1: 
rlm@1:          movq mm0, [eax+colorI]
rlm@1:          movq mm1, [eax+colorJ]
rlm@1:          movq mm2, [eax+ebx+colorG]
rlm@1:          movq mm3, [eax+ebx+colorK]
rlm@1:          movq mm4, [eax+ebx+ebx+colorH]
rlm@1:          movq mm5, [eax+ebx+ebx+colorL]
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm6, [eax+ebx+ebx+colorM]
rlm@1:          movq mm7, [eax+ebx+ebx+colorP]
rlm@1:          pop eax
rlm@1: 
rlm@1:          pcmpeqw mm0, [ecx+2+colorI]
rlm@1:          pcmpeqw mm1, [ecx+2+colorK]
rlm@1:          pcmpeqw mm2, [ecx+ebx+2+colorG]
rlm@1:          pcmpeqw mm3, [ecx+ebx+2+colorK]
rlm@1:          pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
rlm@1:          pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
rlm@1:          add ecx, ebx
rlm@1:          pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
rlm@1:          pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
rlm@1:          sub ecx, ebx
rlm@1: 
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pand mm2, mm3
rlm@1:          pand mm4, mm5
rlm@1:          pand mm6, mm7
rlm@1:          pand mm0, mm2
rlm@1:          pand mm4, mm6
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm0, mm4
rlm@1:          movq mm6, [eax+colorI]
rlm@1:          pcmpeqw mm7, mm0
rlm@1: 
rlm@1:          movq [ecx+2+colorI], mm6
rlm@1: 
rlm@1:          packsswb mm7, mm7
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx
rlm@1:          jz near .SKIP_PROCESS
rlm@1: 
rlm@1:          ;End Delta
rlm@1: 
rlm@1:          ;---------------------------------
rlm@1: 
rlm@1: 
rlm@1: ;1
rlm@1:          ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
rlm@1:          movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
rlm@1:          movq mm2, [eax+ebx+colorB]        ;mm2 and mm3 contain colorB
rlm@1: 
rlm@1:          movq mm1, mm0
rlm@1:          movq mm3, mm2
rlm@1: 
rlm@1:          pcmpeqw mm0, [eax+ebx+ebx+colorD]
rlm@1:          pcmpeqw mm1, [eax+colorE]
rlm@1:          pcmpeqw mm2, [eax+ebx+ebx+colorL]
rlm@1:          pcmpeqw mm3, [eax+ebx+ebx+colorC]
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pxor mm1, mm1
rlm@1:          pand mm0, mm2
rlm@1:          pcmpeqw mm3, mm1
rlm@1:          pand mm0, mm3                 ;result in mm0
rlm@1: 
rlm@1:          ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
rlm@1:          movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
rlm@1:          movq mm6, [eax+ebx+colorB]        ;mm6 and mm7 contain colorB
rlm@1:          movq mm5, mm4
rlm@1:          movq mm7, mm6
rlm@1: 
rlm@1:          pcmpeqw mm4, [eax+ebx+ebx+colorC]
rlm@1:          pcmpeqw mm5, [eax+colorF]
rlm@1:          pcmpeqw mm6, [eax+colorJ]
rlm@1:          pcmpeqw mm7, [eax+colorE]
rlm@1: 
rlm@1:          pand mm4, mm5
rlm@1:          pxor mm5, mm5
rlm@1:          pand mm4, mm6
rlm@1:          pcmpeqw mm7, mm5
rlm@1:          pand mm4, mm7                 ;result in mm4
rlm@1: 
rlm@1:          por mm0, mm4                  ;combine the masks
rlm@1:          movq [Mask1], mm0
rlm@1: 
rlm@1:          ;--------------------------------------------
rlm@1: 
rlm@1: ;2
rlm@1:          ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
rlm@1:          movq mm0, [eax+ebx+colorB]        ;mm0 and mm1 contain colorB
rlm@1:          movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
rlm@1:          movq mm1, mm0
rlm@1:          movq mm3, mm2
rlm@1: 
rlm@1:          pcmpeqw mm0, [eax+ebx+ebx+colorC]
rlm@1:          pcmpeqw mm1, [eax+colorF]
rlm@1:          pcmpeqw mm2, [eax+ebx+ebx+colorH]
rlm@1:          pcmpeqw mm3, [eax+ebx+ebx+colorD]
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pxor mm1, mm1
rlm@1:          pand mm0, mm2
rlm@1:          pcmpeqw mm3, mm1
rlm@1:          pand mm0, mm3                 ;result in mm0
rlm@1: 
rlm@1:          ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
rlm@1:          movq mm4, [eax+ebx+colorB]        ;mm4 and mm5 contain colorB
rlm@1:          movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
rlm@1:          movq mm5, mm4
rlm@1:          movq mm7, mm6
rlm@1: 
rlm@1:          pcmpeqw mm4, [eax+ebx+ebx+colorD]
rlm@1:          pcmpeqw mm5, [eax+colorE]
rlm@1:          pcmpeqw mm6, [eax+colorI]
rlm@1:          pcmpeqw mm7, [eax+colorF]
rlm@1: 
rlm@1:          pand mm4, mm5
rlm@1:          pxor mm5, mm5
rlm@1:          pand mm4, mm6
rlm@1:          pcmpeqw mm7, mm5
rlm@1:          pand mm4, mm7                 ;result in mm4
rlm@1: 
rlm@1:          por mm0, mm4                  ;combine the masks
rlm@1:          movq [Mask2], mm0
rlm@1: 
rlm@1: 
rlm@1: ;interpolate colorA and colorB
rlm@1:          movq mm0, [eax+ebx+colorA]
rlm@1:          movq mm1, [eax+ebx+colorB]
rlm@1: 
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1: 
rlm@1:          ;assemble the pixels
rlm@1:          movq mm1, [eax+ebx+colorA]
rlm@1:          movq mm2, [eax+ebx+colorB]
rlm@1: 
rlm@1:          movq mm3, [Mask1]
rlm@1:          movq mm5, mm1
rlm@1:          movq mm4, [Mask2]
rlm@1:          movq mm6, mm1
rlm@1: 
rlm@1:          pand mm1, mm3
rlm@1:          por mm3, mm4
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm2, mm4
rlm@1: 
rlm@1:          pcmpeqw mm3, mm7
rlm@1:          por mm1, mm2
rlm@1:          pand mm0, mm3
rlm@1: 
rlm@1:          por mm0, mm1
rlm@1: 
rlm@1:          punpcklwd mm5, mm0
rlm@1:          punpckhwd mm6, mm0
rlm@1: 
rlm@1: %ifdef FAR_POINTER
rlm@1:          movq [fs:edx], mm5
rlm@1:          movq [fs:edx+8], mm6
rlm@1: %else
rlm@1:          movq [edx], mm5
rlm@1:          movq [edx+8], mm6
rlm@1: %endif
rlm@1: 
rlm@1: ;------------------------------------------------
rlm@1: ;        Create the Nextline
rlm@1: ;------------------------------------------------
rlm@1: ;3       ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
rlm@1:          movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
rlm@1:          movq mm2, [eax+ebx+ebx+colorC]        ;mm2 and mm3 contain colorC
rlm@1:          movq mm1, mm0
rlm@1:          movq mm3, mm2
rlm@1: 
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          pcmpeqw mm0, [eax+ebx+colorD]
rlm@1:          pcmpeqw mm1, [eax+colorG]
rlm@1:          pcmpeqw mm2, [eax+ebx+ebx+colorO]
rlm@1:          pcmpeqw mm3, [eax+colorB]
rlm@1:          pop eax
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pxor mm1, mm1
rlm@1:          pand mm0, mm2
rlm@1:          pcmpeqw mm3, mm1
rlm@1:          pand mm0, mm3                 ;result in mm0
rlm@1: 
rlm@1:          ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
rlm@1:          movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
rlm@1:          movq mm6, [eax+ebx+ebx+colorC]        ;mm6 and mm7 contain colorC
rlm@1:          movq mm5, mm4
rlm@1:          movq mm7, mm6
rlm@1: 
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          pcmpeqw mm4, [eax+ebx+colorH]
rlm@1:          pcmpeqw mm5, [eax+colorB]
rlm@1:          pcmpeqw mm6, [eax+ebx+ebx+colorM]
rlm@1:          pcmpeqw mm7, [eax+colorG]
rlm@1:          pop eax
rlm@1: 
rlm@1:          pand mm4, mm5
rlm@1:          pxor mm5, mm5
rlm@1:          pand mm4, mm6
rlm@1:          pcmpeqw mm7, mm5
rlm@1:          pand mm4, mm7                 ;result in mm4
rlm@1: 
rlm@1:          por mm0, mm4                  ;combine the masks
rlm@1:          movq [Mask1], mm0
rlm@1:          ;--------------------------------------------
rlm@1: 
rlm@1: ;4
rlm@1:          ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
rlm@1:          movq mm0, [eax+ebx+ebx+colorC]        ;mm0 and mm1 contain colorC
rlm@1:          movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
rlm@1:          movq mm1, mm0
rlm@1:          movq mm3, mm2
rlm@1: 
rlm@1:          pcmpeqw mm0, [eax+ebx+colorB]
rlm@1:          pcmpeqw mm1, [eax+ebx+ebx+colorH]
rlm@1:          pcmpeqw mm2, [eax+colorF]
rlm@1:          pcmpeqw mm3, [eax+ebx+ebx+colorD]
rlm@1: 
rlm@1:          pand mm0, mm1
rlm@1:          pxor mm1, mm1
rlm@1:          pand mm0, mm2
rlm@1:          pcmpeqw mm3, mm1
rlm@1:          pand mm0, mm3                 ;result in mm0
rlm@1: 
rlm@1:          ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
rlm@1:          movq mm4, [eax+ebx+ebx+colorC]        ;mm4 and mm5 contain colorC
rlm@1:          movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
rlm@1:          movq mm5, mm4
rlm@1:          movq mm7, mm6
rlm@1: 
rlm@1:          pcmpeqw mm4, [eax+ebx+ebx+colorD]
rlm@1:          pcmpeqw mm5, [eax+ebx+colorG]
rlm@1:          pcmpeqw mm6, [eax+colorI]
rlm@1:          pcmpeqw mm7, [eax+ebx+ebx+colorH]
rlm@1: 
rlm@1:          pand mm4, mm5
rlm@1:          pxor mm5, mm5
rlm@1:          pand mm4, mm6
rlm@1:          pcmpeqw mm7, mm5
rlm@1:          pand mm4, mm7                 ;result in mm4
rlm@1: 
rlm@1:          por mm0, mm4                  ;combine the masks
rlm@1:          movq [Mask2], mm0
rlm@1:          ;----------------------------------------------
rlm@1: 
rlm@1: ;interpolate colorA and colorC
rlm@1:          movq mm0, [eax+ebx+colorA]
rlm@1:          movq mm1, [eax+ebx+ebx+colorC]
rlm@1: 
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1: 
rlm@1:          pand mm0, [colorMask]
rlm@1:          pand mm1, [colorMask]
rlm@1: 
rlm@1:          psrlw mm0, 1
rlm@1:          psrlw mm1, 1
rlm@1: 
rlm@1:          pand mm3, [lowPixelMask]
rlm@1:          paddw mm0, mm1
rlm@1: 
rlm@1:          pand mm3, mm2
rlm@1:          paddw mm0, mm3                ;mm0 contains the interpolated values
rlm@1:          ;-------------
rlm@1: 
rlm@1:          ;assemble the pixels
rlm@1:          movq mm1, [eax+ebx+colorA]
rlm@1:          movq mm2, [eax+ebx+ebx+colorC]
rlm@1: 
rlm@1:          movq mm3, [Mask1]
rlm@1:          movq mm4, [Mask2]
rlm@1: 
rlm@1:          pand mm1, mm3
rlm@1:          pand mm2, mm4
rlm@1: 
rlm@1:          por mm3, mm4
rlm@1:          pxor mm7, mm7
rlm@1:          por mm1, mm2
rlm@1: 
rlm@1:          pcmpeqw mm3, mm7
rlm@1:          pand mm0, mm3
rlm@1:          por mm0, mm1
rlm@1:          movq [ACPixel], mm0
rlm@1: 
rlm@1: ;////////////////////////////////
rlm@1: ; Decide which "branch" to take
rlm@1: ;--------------------------------
rlm@1:          movq mm0, [eax+ebx+colorA]
rlm@1:          movq mm1, [eax+ebx+colorB]
rlm@1:          movq mm6, mm0
rlm@1:          movq mm7, mm1
rlm@1:          pcmpeqw mm0, [eax+ebx+ebx+colorD]
rlm@1:          pcmpeqw mm1, [eax+ebx+ebx+colorC]
rlm@1:          pcmpeqw mm6, mm7
rlm@1: 
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm0
rlm@1: 
rlm@1:          pand mm0, mm1       ;colorA == colorD && colorB == colorC
rlm@1:          pxor mm7, mm7
rlm@1: 
rlm@1:          pcmpeqw mm2, mm7
rlm@1:          pand mm6, mm0
rlm@1:          pand mm2, mm1       ;colorA != colorD && colorB == colorC
rlm@1: 
rlm@1:          pcmpeqw mm1, mm7
rlm@1: 
rlm@1:          pand mm1, mm3       ;colorA == colorD && colorB != colorC
rlm@1:          pxor mm0, mm6
rlm@1:          por mm1, mm6
rlm@1:          movq mm7, mm0
rlm@1:          movq [Mask2], mm2
rlm@1:          packsswb mm7, mm7
rlm@1:          movq [Mask1], mm1
rlm@1: 
rlm@1:          movd ecx, mm7
rlm@1:          test ecx, ecx
rlm@1:          jz near .SKIP_GUESS
rlm@1: 
rlm@1: ;---------------------------------------------
rlm@1: ; Map of the pixels:                    I|E F|J
rlm@1: ;                                       G|A B|K
rlm@1: ;                                       H|C D|L
rlm@1: ;                                       M|N O|P
rlm@1:          movq mm6, mm0
rlm@1:          movq mm4, [eax+ebx+colorA]
rlm@1:          movq mm5, [eax+ebx+colorB]
rlm@1:          pxor mm7, mm7
rlm@1:          pand mm6, [ONE]
rlm@1: 
rlm@1:          movq mm0, [eax+colorE]
rlm@1:          movq mm1, [eax+ebx+colorG]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+colorF]
rlm@1:          movq mm1, [eax+ebx+colorK]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          push eax
rlm@1:          add eax, ebx
rlm@1:          movq mm0, [eax+ebx+colorH]
rlm@1:          movq mm1, [eax+ebx+ebx+colorN]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          movq mm0, [eax+ebx+colorL]
rlm@1:          movq mm1, [eax+ebx+ebx+colorO]
rlm@1:          movq mm2, mm0
rlm@1:          movq mm3, mm1
rlm@1:          pcmpeqw mm0, mm4
rlm@1:          pcmpeqw mm1, mm4
rlm@1:          pcmpeqw mm2, mm5
rlm@1:          pcmpeqw mm3, mm5
rlm@1:          pand mm0, mm6
rlm@1:          pand mm1, mm6
rlm@1:          pand mm2, mm6
rlm@1:          pand mm3, mm6
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          pxor mm3, mm3
rlm@1:          pcmpgtw mm0, mm6
rlm@1:          pcmpgtw mm2, mm6
rlm@1:          pcmpeqw mm0, mm3
rlm@1:          pcmpeqw mm2, mm3
rlm@1:          pand mm0, mm6
rlm@1:          pand mm2, mm6
rlm@1:          paddw mm7, mm0
rlm@1:          psubw mm7, mm2
rlm@1: 
rlm@1:          pop eax
rlm@1:          movq mm1, mm7
rlm@1:          pxor mm0, mm0
rlm@1:          pcmpgtw mm7, mm0
rlm@1:          pcmpgtw mm0, mm1
rlm@1: 
rlm@1:          por mm7, [Mask1]
rlm@1:          por mm0, [Mask2]
rlm@1:          movq [Mask1], mm7
rlm@1:          movq [Mask2], mm0
rlm@1: 
rlm@1: .SKIP_GUESS:
rlm@1:          ;----------------------------
rlm@1:          ;interpolate A, B, C and D
rlm@1:          movq mm0, [eax+ebx+colorA]
rlm@1:          movq mm1, [eax+ebx+colorB]
rlm@1:          movq mm4, mm0
rlm@1:          movq mm2, [eax+ebx+ebx+colorC]
rlm@1:          movq mm5, mm1
rlm@1:          movq mm3, [qcolorMask]
rlm@1:          movq mm6, mm2
rlm@1:          movq mm7, [qlowpixelMask]
rlm@1: 
rlm@1:          pand mm0, mm3
rlm@1:          pand mm1, mm3
rlm@1:          pand mm2, mm3
rlm@1:          pand mm3, [eax+ebx+ebx+colorD]
rlm@1: 
rlm@1:          psrlw mm0, 2
rlm@1:          pand mm4, mm7
rlm@1:          psrlw mm1, 2
rlm@1:          pand mm5, mm7
rlm@1:          psrlw mm2, 2
rlm@1:          pand mm6, mm7
rlm@1:          psrlw mm3, 2
rlm@1:          pand mm7, [eax+ebx+ebx+colorD]
rlm@1: 
rlm@1:          paddw mm0, mm1
rlm@1:          paddw mm2, mm3
rlm@1: 
rlm@1:          paddw mm4, mm5
rlm@1:          paddw mm6, mm7
rlm@1: 
rlm@1:          paddw mm4, mm6
rlm@1:          paddw mm0, mm2
rlm@1:          psrlw mm4, 2
rlm@1:          pand mm4, [qlowpixelMask]
rlm@1:          paddw mm0, mm4      ;mm0 contains the interpolated value of A, B, C and D
rlm@1: 
rlm@1: ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
rlm@1:          ;assemble the pixels
rlm@1:          movq mm1, [Mask1]
rlm@1:          movq mm2, [Mask2]
rlm@1:          movq mm4, [eax+ebx+colorA]
rlm@1:          movq mm5, [eax+ebx+colorB]
rlm@1:          pand mm4, mm1
rlm@1:          pand mm5, mm2
rlm@1: 
rlm@1:          pxor mm7, mm7
rlm@1:          por mm1, mm2
rlm@1:          por mm4, mm5
rlm@1:          pcmpeqw mm1, mm7
rlm@1:          pand mm0, mm1
rlm@1:          por mm4, mm0        ;mm4 contains the diagonal pixels
rlm@1: 
rlm@1:          movq mm0, [ACPixel]
rlm@1:          movq mm1, mm0
rlm@1:          punpcklwd mm0, mm4
rlm@1:          punpckhwd mm1, mm4
rlm@1: 
rlm@1:          push edx
rlm@1:          add edx, [ebp+dstPitch]
rlm@1: 
rlm@1: %ifdef FAR_POINTER
rlm@1:          movq [fs:edx], mm0
rlm@1:          movq [fs:edx+8], mm1
rlm@1: %else
rlm@1:          movq [edx], mm0
rlm@1:          movq [edx+8], mm1
rlm@1: %endif
rlm@1:          pop edx
rlm@1: 
rlm@1: .SKIP_PROCESS:
rlm@1:          mov ecx, [ebp+deltaPtr]
rlm@1:          add ecx, 8
rlm@1:          mov [ebp+deltaPtr], ecx
rlm@1:          add edx, 16
rlm@1:          add eax, 8
rlm@1: 
rlm@1:          pop ecx
rlm@1:          sub ecx, 4
rlm@1:          cmp ecx, 0
rlm@1:          jg  near .Loop
rlm@1: 
rlm@1: ; Restore some stuff
rlm@1:          popad
rlm@1:          mov esp, ebp
rlm@1:          pop ebp
rlm@1:          emms
rlm@1:          ret
rlm@1: 
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: 
rlm@1: %ifdef __DJGPP__
rlm@1: _Init_2xSaIMMX:
rlm@1: %else
rlm@1: Init_2xSaIMMX:
rlm@1: %endif
rlm@1: ; Store some stuff
rlm@1:          push ebp
rlm@1:          mov ebp, esp
rlm@1:          push edx
rlm@1: 
rlm@1: 
rlm@1: ;Damn thing doesn't work
rlm@1: ;        mov eax,1
rlm@1: ;        cpuid
rlm@1: ;        test edx, 0x00800000     ;test bit 23
rlm@1: ;        jz end2 ;bit not set => no MMX detected
rlm@1: 
rlm@1:          mov eax, [ebp+8]         ;PixelFormat
rlm@1:          cmp eax, 555
rlm@1:          jz Bits555
rlm@1:          cmp eax, 565
rlm@1:          jz Bits565
rlm@1: end2:
rlm@1:          mov eax, 1
rlm@1:          jmp end3
rlm@1: Bits555:
rlm@1:          mov edx, 0x7BDE7BDE
rlm@1:          mov eax, colorMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0x04210421
rlm@1:          mov eax, lowPixelMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0x739C739C
rlm@1:          mov eax, qcolorMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0x0C630C63
rlm@1:          mov eax, qlowpixelMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov eax, 0
rlm@1:          jmp end3
rlm@1: Bits565:
rlm@1:          mov edx, 0xF7DEF7DE
rlm@1:          mov eax, colorMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0x08210821
rlm@1:          mov eax, lowPixelMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0xE79CE79C
rlm@1:          mov eax, qcolorMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov edx, 0x18631863
rlm@1:          mov eax, qlowpixelMask
rlm@1:          mov [eax], edx
rlm@1:          mov [eax+4], edx
rlm@1:          mov eax, 0
rlm@1:          jmp end3
rlm@1: end3:   
rlm@1:          pop edx
rlm@1:          mov esp, ebp
rlm@1:          pop ebp
rlm@1:          ret
rlm@1: 
rlm@1: 
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: ;-------------------------------------------------------------------------
rlm@1: 
rlm@1:         SECTION .data ALIGN = 32
rlm@1: ;Some constants
rlm@1: colorMask     dd 0xF7DEF7DE,0xF7DEF7DE
rlm@1: lowPixelMask  dd 0x08210821,0x08210821
rlm@1: 
rlm@1: qcolorMask    dd 0xE79CE79C,0xE79CE79C
rlm@1: qlowpixelMask dd 0x18631863,0x18631863
rlm@1: 
rlm@1: darkenMask    dd 0xC718C718,0xC718C718
rlm@1: GreenMask     dd 0x07E007E0,0x07E007E0
rlm@1: RedBlueMask   dd 0xF81FF81F,0xF81FF81F
rlm@1: 
rlm@1: FALSE         dd 0x00000000,0x00000000
rlm@1: TRUE          dd 0xffffffff,0xffffffff
rlm@1: ONE           dd 0x00010001,0x00010001
rlm@1: 
rlm@1: 
rlm@1:         SECTION .bss ALIGN = 32
rlm@1: ACPixel       resb 8
rlm@1: Mask1         resb 8
rlm@1: Mask2         resb 8
rlm@1: 
rlm@1: I56Pixel      resb 8
rlm@1: I23Pixel      resb 8
rlm@1: I5556Pixel    resb 8
rlm@1: I2223Pixel    resb 8
rlm@1: I5666Pixel    resb 8
rlm@1: I2333Pixel    resb 8
rlm@1: Mask26        resb 8
rlm@1: Mask35        resb 8
rlm@1: Mask26b       resb 8
rlm@1: Mask35b       resb 8
rlm@1: product1a     resb 8
rlm@1: product1b     resb 8
rlm@1: product2a     resb 8
rlm@1: product2b     resb 8
rlm@1: final1a       resb 8
rlm@1: final1b       resb 8
rlm@1: final2a       resb 8
rlm@1: final2b       resb 8