Mercurial > vba-linux
diff src/filters/2xSaImmx.asm @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/filters/2xSaImmx.asm Sat Mar 03 10:31:27 2012 -0600 1.3 @@ -0,0 +1,2109 @@ 1.4 +;/*---------------------------------------------------------------------* 1.5 +; * The following (piece of) code, (part of) the 2xSaI engine, * 1.6 +; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. * 1.7 +; * Non-Commercial use of this software is allowed and is encouraged, * 1.8 +; * provided that appropriate credit be given. * 1.9 +; * You may freely modify this code, but I request * 1.10 +; * that any improvements to the engine be submitted to me, so * 1.11 +; * that I can implement these improvements in newer versions of * 1.12 +; * the software. * 1.13 +; * If you need more information, have any comments or suggestions, * 1.14 +; * you can e-mail me. My e-mail: derek-liauw@usa.net. * 1.15 +; *---------------------------------------------------------------------*/ 1.16 + 1.17 +;---------------------- 1.18 +; 2xSaI version 0.59 WIP, soon to become version 0.60 1.19 +;---------------------- 1.20 + 1.21 +;%define FAR_POINTER 1.22 + 1.23 + 1.24 + 1.25 + BITS 32 1.26 +%ifdef __DJGPP__ 1.27 + GLOBAL __2xSaILine 1.28 + GLOBAL __2xSaISuperEagleLine 1.29 + GLOBAL __2xSaISuper2xSaILine 1.30 + GLOBAL _Init_2xSaIMMX 1.31 +%else 1.32 + GLOBAL _2xSaILine 1.33 + GLOBAL _2xSaISuperEagleLine 1.34 + GLOBAL _2xSaISuper2xSaILine 1.35 + GLOBAL Init_2xSaIMMX 1.36 +%endif 1.37 + SECTION .text ALIGN = 32 1.38 + 1.39 +%ifdef FAR_POINTER 1.40 +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, 1.41 +; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment); 1.42 +%else 1.43 +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, 1.44 +; uint8 *dstPtr, uint32 dstPitch); 1.45 +%endif 1.46 + 1.47 +srcPtr equ 8 1.48 +deltaPtr equ 12 1.49 +srcPitch equ 16 1.50 +width equ 20 1.51 +dstOffset equ 24 1.52 +dstPitch equ 28 1.53 +dstSegment equ 32 1.54 + 1.55 + 1.56 + 1.57 + 1.58 +colorB0 equ -2 1.59 +colorB1 equ 0 1.60 +colorB2 equ 2 1.61 +colorB3 equ 4 1.62 + 1.63 +color7 equ -2 1.64 +color8 equ 0 1.65 +color9 equ 2 1.66 + 1.67 +color4 equ -2 1.68 +color5 equ 0 1.69 +color6 equ 2 1.70 +colorS2 equ 4 1.71 + 1.72 +color1 equ -2 1.73 +color2 equ 0 1.74 +color3 equ 2 1.75 +colorS1 equ 4 1.76 + 1.77 +colorA0 equ -2 1.78 +colorA1 equ 0 1.79 +colorA2 equ 2 1.80 +colorA3 equ 4 1.81 + 1.82 + 1.83 + 1.84 + 1.85 +%ifdef __DJGPP__ 1.86 +__2xSaISuper2xSaILine: 1.87 +%else 1.88 +_2xSaISuper2xSaILine: 1.89 +%endif 1.90 +; Store some stuff 1.91 + push ebp 1.92 + mov ebp, esp 1.93 + pushad 1.94 + 1.95 +; Prepare the destination 1.96 +%ifdef FAR_POINTER 1.97 + ; Set the selector 1.98 + mov eax, [ebp+dstSegment] 1.99 + mov fs, ax 1.100 +%endif 1.101 + mov edx, [ebp+dstOffset] ; edx points to the screen 1.102 +; Prepare the source 1.103 + ; eax points to colorA 1.104 + mov eax, [ebp+srcPtr] ;eax points to colorA 1.105 + mov ebx, [ebp+srcPitch] ;ebx contains the source pitch 1.106 + mov ecx, [ebp+width] ;ecx contains the number of pixels to process 1.107 + ; eax now points to colorB1 1.108 + sub eax, ebx ;eax points to B1 which is the base 1.109 + 1.110 +; Main Loop 1.111 +.Loop: push ecx 1.112 + 1.113 + ;-----Check Delta------------------ 1.114 + mov ecx, [ebp+deltaPtr] 1.115 + 1.116 + 1.117 + ;load source img 1.118 + movq mm0, [eax+colorB0] 1.119 + movq mm1, [eax+colorB3] 1.120 + movq mm2, [eax+ebx+color4] 1.121 + movq mm3, [eax+ebx+colorS2] 1.122 + movq mm4, [eax+ebx+ebx+color1] 1.123 + movq mm5, [eax+ebx+ebx+colorS1] 1.124 + push eax 1.125 + add eax, ebx 1.126 + movq mm6, [eax+ebx+ebx+colorA0] 1.127 + movq mm7, [eax+ebx+ebx+colorA3] 1.128 + pop eax 1.129 + 1.130 + ;compare to delta 1.131 + pcmpeqw mm0, [ecx+2+colorB0] 1.132 + pcmpeqw mm1, [ecx+2+colorB3] 1.133 + pcmpeqw mm2, [ecx+ebx+2+color4] 1.134 + pcmpeqw mm3, [ecx+ebx+2+colorS2] 1.135 + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] 1.136 + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] 1.137 + add ecx, ebx 1.138 + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] 1.139 + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] 1.140 + sub ecx, ebx 1.141 + 1.142 + 1.143 + ;compose results 1.144 + pand mm0, mm1 1.145 + pand mm2, mm3 1.146 + pand mm4, mm5 1.147 + pand mm6, mm7 1.148 + pand mm0, mm2 1.149 + pand mm4, mm6 1.150 + pxor mm7, mm7 1.151 + pand mm0, mm4 1.152 + movq mm6, [eax+colorB0] 1.153 + pcmpeqw mm7, mm0 ;did any compare give us a zero ? 1.154 + 1.155 + movq [ecx+2+colorB0], mm6 1.156 + 1.157 + packsswb mm7, mm7 1.158 + movd ecx, mm7 1.159 + test ecx, ecx 1.160 + jz near .SKIP_PROCESS ;no, so we can skip 1.161 + 1.162 + ;End Delta 1.163 + 1.164 + ;--------------------------------- 1.165 + movq mm0, [eax+ebx+color5] 1.166 + movq mm1, [eax+ebx+color6] 1.167 + movq mm2, mm0 1.168 + movq mm3, mm1 1.169 + movq mm4, mm0 1.170 + movq mm5, mm1 1.171 + 1.172 + pand mm0, [colorMask] 1.173 + pand mm1, [colorMask] 1.174 + 1.175 + psrlw mm0, 1 1.176 + psrlw mm1, 1 1.177 + 1.178 + pand mm3, [lowPixelMask] 1.179 + paddw mm0, mm1 1.180 + 1.181 + pand mm3, mm2 1.182 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.183 + movq [I56Pixel], mm0 1.184 + movq mm7, mm0 1.185 + 1.186 + ;------------------- 1.187 + movq mm0, mm7 1.188 + movq mm1, mm4 ;5,5,5,6 1.189 + movq mm2, mm0 1.190 + movq mm3, mm1 1.191 + 1.192 + pand mm0, [colorMask] 1.193 + pand mm1, [colorMask] 1.194 + 1.195 + psrlw mm0, 1 1.196 + psrlw mm1, 1 1.197 + 1.198 + pand mm3, [lowPixelMask] 1.199 + paddw mm0, mm1 1.200 + 1.201 + pand mm3, mm2 1.202 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.203 + movq [I5556Pixel], mm0 1.204 + ;-------------------- 1.205 + 1.206 + movq mm0, mm7 1.207 + movq mm1, mm5 ;6,6,6,5 1.208 + movq mm2, mm0 1.209 + movq mm3, mm1 1.210 + 1.211 + pand mm0, [colorMask] 1.212 + pand mm1, [colorMask] 1.213 + 1.214 + psrlw mm0, 1 1.215 + psrlw mm1, 1 1.216 + 1.217 + pand mm3, [lowPixelMask] 1.218 + paddw mm0, mm1 1.219 + 1.220 + pand mm3, mm2 1.221 + paddw mm0, mm3 1.222 + movq [I5666Pixel], mm0 1.223 + 1.224 + ;------------------------- 1.225 + ;------------------------- 1.226 + movq mm0, [eax+ebx+ebx+color2] 1.227 + movq mm1, [eax+ebx+ebx+color3] 1.228 + movq mm2, mm0 1.229 + movq mm3, mm1 1.230 + movq mm4, mm0 1.231 + movq mm5, mm1 1.232 + 1.233 + pand mm0, [colorMask] 1.234 + pand mm1, [colorMask] 1.235 + 1.236 + psrlw mm0, 1 1.237 + psrlw mm1, 1 1.238 + 1.239 + pand mm3, [lowPixelMask] 1.240 + paddw mm0, mm1 1.241 + 1.242 + pand mm3, mm2 1.243 + paddw mm0, mm3 1.244 + movq [I23Pixel], mm0 1.245 + movq mm7, mm0 1.246 + 1.247 + ;--------------------- 1.248 + movq mm0, mm7 1.249 + movq mm1, mm4 ;2,2,2,3 1.250 + movq mm2, mm0 1.251 + movq mm3, mm1 1.252 + 1.253 + pand mm0, [colorMask] 1.254 + pand mm1, [colorMask] 1.255 + 1.256 + psrlw mm0, 1 1.257 + psrlw mm1, 1 1.258 + 1.259 + pand mm3, [lowPixelMask] 1.260 + paddw mm0, mm1 1.261 + 1.262 + pand mm3, mm2 1.263 + paddw mm0, mm3 1.264 + movq [I2223Pixel], mm0 1.265 + 1.266 + ;---------------------- 1.267 + movq mm0, mm7 1.268 + movq mm1, mm5 ;3,3,3,2 1.269 + movq mm2, mm0 1.270 + movq mm3, mm1 1.271 + 1.272 + pand mm0, [colorMask] 1.273 + pand mm1, [colorMask] 1.274 + 1.275 + psrlw mm0, 1 1.276 + psrlw mm1, 1 1.277 + 1.278 + pand mm3, [lowPixelMask] 1.279 + paddw mm0, mm1 1.280 + 1.281 + pand mm3, mm2 1.282 + paddw mm0, mm3 1.283 + movq [I2333Pixel], mm0 1.284 + 1.285 + 1.286 + ;-------------------- 1.287 +;//////////////////////////////// 1.288 +; Decide which "branch" to take 1.289 +;-------------------------------- 1.290 + movq mm0, [eax+ebx+color5] 1.291 + movq mm1, [eax+ebx+color6] 1.292 + movq mm6, mm0 1.293 + movq mm7, mm1 1.294 + pcmpeqw mm0, [eax+ebx+ebx+color3] 1.295 + pcmpeqw mm1, [eax+ebx+ebx+color2] 1.296 + pcmpeqw mm6, mm7 1.297 + 1.298 + movq mm2, mm0 1.299 + movq mm3, mm0 1.300 + 1.301 + pand mm0, mm1 ;colorA == colorD && colorB == colorC 1.302 + pxor mm7, mm7 1.303 + 1.304 + pcmpeqw mm2, mm7 1.305 + pand mm6, mm0 1.306 + pand mm2, mm1 ;colorA != colorD && colorB == colorC 1.307 + 1.308 + pcmpeqw mm1, mm7 1.309 + 1.310 + pand mm1, mm3 ;colorA == colorD && colorB != colorC 1.311 + pxor mm0, mm6 1.312 + por mm1, mm6 1.313 + movq mm7, mm0 1.314 + movq [Mask26], mm2 1.315 + packsswb mm7, mm7 1.316 + movq [Mask35], mm1 1.317 + 1.318 + movd ecx, mm7 1.319 + test ecx, ecx 1.320 + jz near .SKIP_GUESS 1.321 + 1.322 +;--------------------------------------------- 1.323 + movq mm6, mm0 1.324 + movq mm4, [eax+ebx+colorA] 1.325 + movq mm5, [eax+ebx+colorB] 1.326 + pxor mm7, mm7 1.327 + pand mm6, [ONE] 1.328 + 1.329 + movq mm0, [eax+colorE] 1.330 + movq mm1, [eax+ebx+colorG] 1.331 + movq mm2, mm0 1.332 + movq mm3, mm1 1.333 + pcmpeqw mm0, mm4 1.334 + pcmpeqw mm1, mm4 1.335 + pcmpeqw mm2, mm5 1.336 + pcmpeqw mm3, mm5 1.337 + pand mm0, mm6 1.338 + pand mm1, mm6 1.339 + pand mm2, mm6 1.340 + pand mm3, mm6 1.341 + paddw mm0, mm1 1.342 + paddw mm2, mm3 1.343 + 1.344 + pxor mm3, mm3 1.345 + pcmpgtw mm0, mm6 1.346 + pcmpgtw mm2, mm6 1.347 + pcmpeqw mm0, mm3 1.348 + pcmpeqw mm2, mm3 1.349 + pand mm0, mm6 1.350 + pand mm2, mm6 1.351 + paddw mm7, mm0 1.352 + psubw mm7, mm2 1.353 + 1.354 + movq mm0, [eax+colorF] 1.355 + movq mm1, [eax+ebx+colorK] 1.356 + movq mm2, mm0 1.357 + movq mm3, mm1 1.358 + pcmpeqw mm0, mm4 1.359 + pcmpeqw mm1, mm4 1.360 + pcmpeqw mm2, mm5 1.361 + pcmpeqw mm3, mm5 1.362 + pand mm0, mm6 1.363 + pand mm1, mm6 1.364 + pand mm2, mm6 1.365 + pand mm3, mm6 1.366 + paddw mm0, mm1 1.367 + paddw mm2, mm3 1.368 + 1.369 + pxor mm3, mm3 1.370 + pcmpgtw mm0, mm6 1.371 + pcmpgtw mm2, mm6 1.372 + pcmpeqw mm0, mm3 1.373 + pcmpeqw mm2, mm3 1.374 + pand mm0, mm6 1.375 + pand mm2, mm6 1.376 + paddw mm7, mm0 1.377 + psubw mm7, mm2 1.378 + 1.379 + push eax 1.380 + add eax, ebx 1.381 + movq mm0, [eax+ebx+colorH] 1.382 + movq mm1, [eax+ebx+ebx+colorN] 1.383 + movq mm2, mm0 1.384 + movq mm3, mm1 1.385 + pcmpeqw mm0, mm4 1.386 + pcmpeqw mm1, mm4 1.387 + pcmpeqw mm2, mm5 1.388 + pcmpeqw mm3, mm5 1.389 + pand mm0, mm6 1.390 + pand mm1, mm6 1.391 + pand mm2, mm6 1.392 + pand mm3, mm6 1.393 + paddw mm0, mm1 1.394 + paddw mm2, mm3 1.395 + 1.396 + pxor mm3, mm3 1.397 + pcmpgtw mm0, mm6 1.398 + pcmpgtw mm2, mm6 1.399 + pcmpeqw mm0, mm3 1.400 + pcmpeqw mm2, mm3 1.401 + pand mm0, mm6 1.402 + pand mm2, mm6 1.403 + paddw mm7, mm0 1.404 + psubw mm7, mm2 1.405 + 1.406 + movq mm0, [eax+ebx+colorL] 1.407 + movq mm1, [eax+ebx+ebx+colorO] 1.408 + movq mm2, mm0 1.409 + movq mm3, mm1 1.410 + pcmpeqw mm0, mm4 1.411 + pcmpeqw mm1, mm4 1.412 + pcmpeqw mm2, mm5 1.413 + pcmpeqw mm3, mm5 1.414 + pand mm0, mm6 1.415 + pand mm1, mm6 1.416 + pand mm2, mm6 1.417 + pand mm3, mm6 1.418 + paddw mm0, mm1 1.419 + paddw mm2, mm3 1.420 + 1.421 + pxor mm3, mm3 1.422 + pcmpgtw mm0, mm6 1.423 + pcmpgtw mm2, mm6 1.424 + pcmpeqw mm0, mm3 1.425 + pcmpeqw mm2, mm3 1.426 + pand mm0, mm6 1.427 + pand mm2, mm6 1.428 + paddw mm7, mm0 1.429 + psubw mm7, mm2 1.430 + 1.431 + pop eax 1.432 + movq mm1, mm7 1.433 + pxor mm0, mm0 1.434 + pcmpgtw mm7, mm0 1.435 + pcmpgtw mm0, mm1 1.436 + 1.437 + por mm7, [Mask35] 1.438 + por mm0, [Mask26] 1.439 + movq [Mask35], mm7 1.440 + movq [Mask26], mm0 1.441 + 1.442 +.SKIP_GUESS: 1.443 + 1.444 + ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... 1.445 + 1.446 + 1.447 + movq mm0, [eax+ebx+color5] 1.448 + movq mm1, [eax+ebx+ebx+color2] 1.449 + movq mm2, mm0 1.450 + movq mm3, mm1 1.451 + movq mm4, mm0 1.452 + movq mm5, mm1 1.453 + 1.454 + pand mm0, [colorMask] 1.455 + pand mm1, [colorMask] 1.456 + 1.457 + psrlw mm0, 1 1.458 + psrlw mm1, 1 1.459 + 1.460 + pand mm3, [lowPixelMask] 1.461 + paddw mm0, mm1 1.462 + 1.463 + pand mm3, mm2 1.464 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.465 + ;--------------------------- 1.466 + 1.467 + 1.468 + 1.469 +%ifdef dfhsdfhsdahdsfhdsfh 1.470 + 1.471 + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) 1.472 + product2a = INTERPOLATE (color2, color5); 1.473 + else 1.474 + if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) 1.475 + product2a = INTERPOLATE(color2, color5); 1.476 + else 1.477 + product2a = color2; 1.478 + 1.479 + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) 1.480 + product1a = INTERPOLATE (color2, color5); 1.481 + else 1.482 + if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) 1.483 + product1a = INTERPOLATE(color2, color5); 1.484 + else 1.485 + product1a = color5; 1.486 + 1.487 +%endif 1.488 + 1.489 + 1.490 + movq mm7, [Mask26] 1.491 + movq mm6, [eax+colorB2] 1.492 + movq mm5, [eax+ebx+ebx+color2] 1.493 + movq mm4, [eax+ebx+ebx+color1] 1.494 + pcmpeqw mm4, mm5 1.495 + pcmpeqw mm6, mm5 1.496 + pxor mm5, mm5 1.497 + pand mm7, mm4 1.498 + pcmpeqw mm6, mm5 1.499 + pand mm7, mm6 1.500 + 1.501 + 1.502 + 1.503 + movq mm6, [eax+ebx+ebx+color3] 1.504 + movq mm5, [eax+ebx+ebx+color2] 1.505 + movq mm4, [eax+ebx+ebx+color1] 1.506 + movq mm2, [eax+ebx+color5] 1.507 + movq mm1, [eax+ebx+color4] 1.508 + movq mm3, [eax+colorB0] 1.509 + 1.510 + pcmpeqw mm2, mm4 1.511 + pcmpeqw mm6, mm5 1.512 + pcmpeqw mm1, mm5 1.513 + pcmpeqw mm3, mm5 1.514 + pxor mm5, mm5 1.515 + pcmpeqw mm2, mm5 1.516 + pcmpeqw mm3, mm5 1.517 + pand mm6, mm1 1.518 + pand mm2, mm3 1.519 + pand mm6, mm2 1.520 + por mm7, mm6 1.521 + 1.522 + 1.523 + movq mm6, mm7 1.524 + pcmpeqw mm6, mm5 1.525 + pand mm7, mm0 1.526 + 1.527 + movq mm1, [eax+ebx+color5] 1.528 + pand mm6, mm1 1.529 + por mm7, mm6 1.530 + movq [final1a], mm7 ;finished 1a 1.531 + 1.532 + 1.533 + 1.534 + ;-------------------------------- 1.535 + 1.536 + movq mm7, [Mask35] 1.537 + push eax 1.538 + add eax, ebx 1.539 + movq mm6, [eax+ebx+ebx+colorA2] 1.540 + pop eax 1.541 + movq mm5, [eax+ebx+color5] 1.542 + movq mm4, [eax+ebx+color4] 1.543 + pcmpeqw mm4, mm5 1.544 + pcmpeqw mm6, mm5 1.545 + pxor mm5, mm5 1.546 + pand mm7, mm4 1.547 + pcmpeqw mm6, mm5 1.548 + pand mm7, mm6 1.549 + 1.550 + 1.551 + 1.552 + movq mm6, [eax+ebx+color6] 1.553 + movq mm5, [eax+ebx+color5] 1.554 + movq mm4, [eax+ebx+color4] 1.555 + movq mm2, [eax+ebx+ebx+color2] 1.556 + movq mm1, [eax+ebx+ebx+color1] 1.557 + push eax 1.558 + add eax, ebx 1.559 + movq mm3, [eax+ebx+ebx+colorA0] 1.560 + pop eax 1.561 + 1.562 + pcmpeqw mm2, mm4 1.563 + pcmpeqw mm6, mm5 1.564 + pcmpeqw mm1, mm5 1.565 + pcmpeqw mm3, mm5 1.566 + pxor mm5, mm5 1.567 + pcmpeqw mm2, mm5 1.568 + pcmpeqw mm3, mm5 1.569 + pand mm6, mm1 1.570 + pand mm2, mm3 1.571 + pand mm6, mm2 1.572 + por mm7, mm6 1.573 + 1.574 + 1.575 + movq mm6, mm7 1.576 + pcmpeqw mm6, mm5 1.577 + pand mm7, mm0 1.578 + 1.579 + movq mm1, [eax+ebx+ebx+color2] 1.580 + pand mm6, mm1 1.581 + por mm7, mm6 1.582 + movq [final2a], mm7 ;finished 2a 1.583 + 1.584 + 1.585 + ;-------------------------------------------- 1.586 + 1.587 + 1.588 +%ifdef dfhsdfhsdahdsfhdsfh 1.589 + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) 1.590 + product2b = Q_INTERPOLATE (color3, color3, color3, color2); 1.591 + else 1.592 + if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) 1.593 + product2b = Q_INTERPOLATE (color2, color2, color2, color3); 1.594 + else 1.595 + product2b = INTERPOLATE (color2, color3); 1.596 + 1.597 + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) 1.598 + product1b = Q_INTERPOLATE (color6, color6, color6, color5); 1.599 + else 1.600 + if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) 1.601 + product1b = Q_INTERPOLATE (color6, color5, color5, color5); 1.602 + else 1.603 + product1b = INTERPOLATE (color5, color6); 1.604 +%endif 1.605 + 1.606 + push eax 1.607 + add eax, ebx 1.608 + pxor mm7, mm7 1.609 + movq mm0, [eax+ebx+ebx+colorA0] 1.610 + movq mm1, [eax+ebx+ebx+colorA1] 1.611 + movq mm2, [eax+ebx+ebx+colorA2] 1.612 + movq mm3, [eax+ebx+ebx+colorA3] 1.613 + pop eax 1.614 + movq mm4, [eax+ebx+ebx+color2] 1.615 + movq mm5, [eax+ebx+ebx+color3] 1.616 + movq mm6, [eax+ebx+color6] 1.617 + 1.618 + pcmpeqw mm6, mm5 1.619 + pcmpeqw mm1, mm5 1.620 + pcmpeqw mm4, mm2 1.621 + pcmpeqw mm0, mm5 1.622 + pcmpeqw mm4, mm7 1.623 + pcmpeqw mm0, mm7 1.624 + pand mm0, mm4 1.625 + pand mm6, mm1 1.626 + pand mm0, mm6 1.627 + 1.628 + 1.629 + push eax 1.630 + add eax, ebx 1.631 + movq mm1, [eax+ebx+ebx+colorA1] 1.632 + pop eax 1.633 + movq mm4, [eax+ebx+ebx+color2] 1.634 + movq mm5, [eax+ebx+color5] 1.635 + movq mm6, [eax+ebx+ebx+color3] 1.636 + 1.637 + pcmpeqw mm5, mm4 1.638 + pcmpeqw mm2, mm4 1.639 + pcmpeqw mm1, mm6 1.640 + pcmpeqw mm3, mm4 1.641 + pcmpeqw mm1, mm7 1.642 + pcmpeqw mm3, mm7 1.643 + pand mm2, mm5 1.644 + pand mm1, mm3 1.645 + pand mm1, mm2 1.646 + 1.647 + 1.648 + movq mm7, mm0 1.649 + por mm7, mm1 1.650 + 1.651 + movq mm4, [Mask35] 1.652 + movq mm3, [Mask26] 1.653 + 1.654 + movq mm6, mm4 1.655 + pand mm6, mm7 1.656 + pxor mm4, mm6 1.657 + 1.658 + movq mm6, mm3 1.659 + pand mm6, mm7 1.660 + pxor mm3, mm6 1.661 + 1.662 + movq mm2, mm0 1.663 + movq mm7, [I2333Pixel] 1.664 + movq mm6, [I2223Pixel] 1.665 + movq mm5, [I23Pixel] 1.666 + 1.667 + 1.668 + por mm2, mm4 1.669 + pand mm4, [eax+ebx+ebx+color3] 1.670 + por mm2, mm3 1.671 + pand mm3, [eax+ebx+ebx+color2] 1.672 + por mm2, mm1 1.673 + pand mm0, mm7 1.674 + pand mm1, mm6 1.675 + pxor mm7, mm7 1.676 + pcmpeqw mm2, mm7 1.677 + por mm0, mm1 1.678 + por mm3, mm4 1.679 + pand mm2, mm5 1.680 + por mm0, mm3 1.681 + por mm0, mm2 1.682 + movq [final2b], mm0 1.683 + 1.684 + ;----------------------------------- 1.685 + 1.686 + 1.687 + pxor mm7, mm7 1.688 + movq mm0, [eax+colorB0] 1.689 + movq mm1, [eax+colorB1] 1.690 + movq mm2, [eax+colorB2] 1.691 + movq mm3, [eax+colorB3] 1.692 + movq mm4, [eax+ebx+color5] 1.693 + movq mm5, [eax+ebx+color6] 1.694 + movq mm6, [eax+ebx+ebx+color3] 1.695 + 1.696 + pcmpeqw mm6, mm5 1.697 + pcmpeqw mm1, mm5 1.698 + pcmpeqw mm4, mm2 1.699 + pcmpeqw mm0, mm5 1.700 + pcmpeqw mm4, mm7 1.701 + pcmpeqw mm0, mm7 1.702 + pand mm0, mm4 1.703 + pand mm6, mm1 1.704 + pand mm0, mm6 1.705 + 1.706 + movq mm1, [eax+colorB1] 1.707 + movq mm4, [eax+ebx+color5] 1.708 + movq mm5, [eax+ebx+ebx+color2] 1.709 + movq mm6, [eax+ebx+color6] 1.710 + 1.711 + pcmpeqw mm5, mm4 1.712 + pcmpeqw mm2, mm4 1.713 + pcmpeqw mm1, mm6 1.714 + pcmpeqw mm3, mm4 1.715 + pcmpeqw mm1, mm7 1.716 + pcmpeqw mm3, mm7 1.717 + pand mm2, mm5 1.718 + pand mm1, mm3 1.719 + pand mm1, mm2 1.720 + 1.721 + 1.722 + movq mm7, mm0 1.723 + por mm7, mm1 1.724 + 1.725 + movq mm4, [Mask35] 1.726 + movq mm3, [Mask26] 1.727 + 1.728 + movq mm6, mm4 1.729 + pand mm6, mm7 1.730 + pxor mm4, mm6 1.731 + 1.732 + movq mm6, mm3 1.733 + pand mm6, mm7 1.734 + pxor mm3, mm6 1.735 + 1.736 + movq mm2, mm0 1.737 + movq mm7, [I5666Pixel] 1.738 + movq mm6, [I5556Pixel] 1.739 + movq mm5, [I56Pixel] 1.740 + 1.741 + 1.742 + por mm2, mm4 1.743 + pand mm4, [eax+ebx+color5] 1.744 + por mm2, mm3 1.745 + pand mm3, [eax+ebx+color6] 1.746 + por mm2, mm1 1.747 + pand mm0, mm7 1.748 + pand mm1, mm6 1.749 + pxor mm7, mm7 1.750 + pcmpeqw mm2, mm7 1.751 + por mm0, mm1 1.752 + por mm3, mm4 1.753 + pand mm2, mm5 1.754 + por mm0, mm3 1.755 + por mm0, mm2 1.756 + movq [final1b], mm0 1.757 + 1.758 + ;--------- 1.759 + 1.760 + movq mm0, [final1a] 1.761 + movq mm4, [final2a] 1.762 + movq mm2, [final1b] 1.763 + movq mm6, [final2b] 1.764 + 1.765 + 1.766 + movq mm1, mm0 1.767 + movq mm5, mm4 1.768 + 1.769 + 1.770 + punpcklwd mm0, mm2 1.771 + punpckhwd mm1, mm2 1.772 + 1.773 + punpcklwd mm4, mm6 1.774 + punpckhwd mm5, mm6 1.775 + 1.776 + 1.777 +%ifdef FAR_POINTER 1.778 + movq [fs:edx], mm0 1.779 + movq [fs:edx+8], mm1 1.780 + push edx 1.781 + add edx, [ebp+dstPitch] 1.782 + movq [fs:edx], mm4 1.783 + movq [fs:edx+8], mm5 1.784 + pop edx 1.785 +%else 1.786 + movq [edx], mm0 1.787 + movq [edx+8], mm1 1.788 + push edx 1.789 + add edx, [ebp+dstPitch] 1.790 + movq [edx], mm4 1.791 + movq [edx+8], mm5 1.792 + pop edx 1.793 +%endif 1.794 +.SKIP_PROCESS: 1.795 + mov ecx, [ebp+deltaPtr] 1.796 + add ecx, 8 1.797 + mov [ebp+deltaPtr], ecx 1.798 + add edx, 16 1.799 + add eax, 8 1.800 + 1.801 + pop ecx 1.802 + sub ecx, 4 1.803 + cmp ecx, 0 1.804 + jg near .Loop 1.805 + 1.806 +; Restore some stuff 1.807 + popad 1.808 + mov esp, ebp 1.809 + pop ebp 1.810 + emms 1.811 + ret 1.812 + 1.813 + 1.814 +;------------------------------------------------------------------------- 1.815 +;------------------------------------------------------------------------- 1.816 +;------------------------------------------------------------------------- 1.817 +;------------------------------------------------------------------------- 1.818 +;------------------------------------------------------------------------- 1.819 +;------------------------------------------------------------------------- 1.820 +;------------------------------------------------------------------------- 1.821 + 1.822 + 1.823 + 1.824 +%ifdef __DJGPP__ 1.825 +__2xSaISuperEagleLine: 1.826 +%else 1.827 +_2xSaISuperEagleLine: 1.828 +%endif 1.829 +; Store some stuff 1.830 + push ebp 1.831 + mov ebp, esp 1.832 + pushad 1.833 + 1.834 +; Prepare the destination 1.835 +%ifdef FAR_POINTER 1.836 + ; Set the selector 1.837 + mov eax, [ebp+dstSegment] 1.838 + mov fs, ax 1.839 +%endif 1.840 + mov edx, [ebp+dstOffset] ; edx points to the screen 1.841 +; Prepare the source 1.842 + ; eax points to colorA 1.843 + mov eax, [ebp+srcPtr] 1.844 + mov ebx, [ebp+srcPitch] 1.845 + mov ecx, [ebp+width] 1.846 + ; eax now points to colorB1 1.847 + sub eax, ebx 1.848 + 1.849 +; Main Loop 1.850 +.Loop: push ecx 1.851 + 1.852 + ;-----Check Delta------------------ 1.853 + mov ecx, [ebp+deltaPtr] 1.854 + 1.855 + movq mm0, [eax+colorB0] 1.856 + movq mm1, [eax+colorB3] 1.857 + movq mm2, [eax+ebx+color4] 1.858 + movq mm3, [eax+ebx+colorS2] 1.859 + movq mm4, [eax+ebx+ebx+color1] 1.860 + movq mm5, [eax+ebx+ebx+colorS1] 1.861 + push eax 1.862 + add eax, ebx 1.863 + movq mm6, [eax+ebx+ebx+colorA0] 1.864 + movq mm7, [eax+ebx+ebx+colorA3] 1.865 + pop eax 1.866 + 1.867 + pcmpeqw mm0, [ecx+2+colorB0] 1.868 + pcmpeqw mm1, [ecx+2+colorB3] 1.869 + pcmpeqw mm2, [ecx+ebx+2+color4] 1.870 + pcmpeqw mm3, [ecx+ebx+2+colorS2] 1.871 + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] 1.872 + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] 1.873 + add ecx, ebx 1.874 + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] 1.875 + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] 1.876 + sub ecx, ebx 1.877 + 1.878 + 1.879 + pand mm0, mm1 1.880 + pand mm2, mm3 1.881 + pand mm4, mm5 1.882 + pand mm6, mm7 1.883 + pand mm0, mm2 1.884 + pand mm4, mm6 1.885 + pxor mm7, mm7 1.886 + pand mm0, mm4 1.887 + movq mm6, [eax+colorB0] 1.888 + pcmpeqw mm7, mm0 1.889 + 1.890 + movq [ecx+2+colorB0], mm6 1.891 + 1.892 + packsswb mm7, mm7 1.893 + movd ecx, mm7 1.894 + test ecx, ecx 1.895 + jz near .SKIP_PROCESS 1.896 + 1.897 + ;End Delta 1.898 + 1.899 + ;--------------------------------- 1.900 + movq mm0, [eax+ebx+color5] 1.901 + movq mm1, [eax+ebx+color6] 1.902 + movq mm2, mm0 1.903 + movq mm3, mm1 1.904 + movq mm4, mm0 1.905 + movq mm5, mm1 1.906 + 1.907 + pand mm0, [colorMask] 1.908 + pand mm1, [colorMask] 1.909 + 1.910 + psrlw mm0, 1 1.911 + psrlw mm1, 1 1.912 + 1.913 + pand mm3, [lowPixelMask] 1.914 + paddw mm0, mm1 1.915 + 1.916 + pand mm3, mm2 1.917 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.918 + movq [I56Pixel], mm0 1.919 + movq mm7, mm0 1.920 + 1.921 + ;------------------- 1.922 + movq mm0, mm7 1.923 + movq mm1, mm4 ;5,5,5,6 1.924 + movq mm2, mm0 1.925 + movq mm3, mm1 1.926 + 1.927 + pand mm0, [colorMask] 1.928 + pand mm1, [colorMask] 1.929 + 1.930 + psrlw mm0, 1 1.931 + psrlw mm1, 1 1.932 + 1.933 + pand mm3, [lowPixelMask] 1.934 + paddw mm0, mm1 1.935 + 1.936 + pand mm3, mm2 1.937 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.938 + movq [product1a], mm0 1.939 + ;-------------------- 1.940 + 1.941 + movq mm0, mm7 1.942 + movq mm1, mm5 ;6,6,6,5 1.943 + movq mm2, mm0 1.944 + movq mm3, mm1 1.945 + 1.946 + pand mm0, [colorMask] 1.947 + pand mm1, [colorMask] 1.948 + 1.949 + psrlw mm0, 1 1.950 + psrlw mm1, 1 1.951 + 1.952 + pand mm3, [lowPixelMask] 1.953 + paddw mm0, mm1 1.954 + 1.955 + pand mm3, mm2 1.956 + paddw mm0, mm3 1.957 + movq [product1b], mm0 1.958 + 1.959 + ;------------------------- 1.960 + ;------------------------- 1.961 + movq mm0, [eax+ebx+ebx+color2] 1.962 + movq mm1, [eax+ebx+ebx+color3] 1.963 + movq mm2, mm0 1.964 + movq mm3, mm1 1.965 + movq mm4, mm0 1.966 + movq mm5, mm1 1.967 + 1.968 + pand mm0, [colorMask] 1.969 + pand mm1, [colorMask] 1.970 + 1.971 + psrlw mm0, 1 1.972 + psrlw mm1, 1 1.973 + 1.974 + pand mm3, [lowPixelMask] 1.975 + paddw mm0, mm1 1.976 + 1.977 + pand mm3, mm2 1.978 + paddw mm0, mm3 1.979 + movq [I23Pixel], mm0 1.980 + movq mm7, mm0 1.981 + 1.982 + ;--------------------- 1.983 + movq mm0, mm7 1.984 + movq mm1, mm4 ;2,2,2,3 1.985 + movq mm2, mm0 1.986 + movq mm3, mm1 1.987 + 1.988 + pand mm0, [colorMask] 1.989 + pand mm1, [colorMask] 1.990 + 1.991 + psrlw mm0, 1 1.992 + psrlw mm1, 1 1.993 + 1.994 + pand mm3, [lowPixelMask] 1.995 + paddw mm0, mm1 1.996 + 1.997 + pand mm3, mm2 1.998 + paddw mm0, mm3 1.999 + movq [product2a], mm0 1.1000 + 1.1001 + ;---------------------- 1.1002 + movq mm0, mm7 1.1003 + movq mm1, mm5 ;3,3,3,2 1.1004 + movq mm2, mm0 1.1005 + movq mm3, mm1 1.1006 + 1.1007 + pand mm0, [colorMask] 1.1008 + pand mm1, [colorMask] 1.1009 + 1.1010 + psrlw mm0, 1 1.1011 + psrlw mm1, 1 1.1012 + 1.1013 + pand mm3, [lowPixelMask] 1.1014 + paddw mm0, mm1 1.1015 + 1.1016 + pand mm3, mm2 1.1017 + paddw mm0, mm3 1.1018 + movq [product2b], mm0 1.1019 + 1.1020 + 1.1021 + ;//////////////////////////////// 1.1022 + ; Decide which "branch" to take 1.1023 + ;-------------------------------- 1.1024 + movq mm4, [eax+ebx+color5] 1.1025 + movq mm5, [eax+ebx+color6] 1.1026 + movq mm6, [eax+ebx+ebx+color3] 1.1027 + movq mm7, [eax+ebx+ebx+color2] 1.1028 + 1.1029 + pxor mm3, mm3 1.1030 + movq mm0, mm4 1.1031 + movq mm1, mm5 1.1032 + 1.1033 + pcmpeqw mm0, mm6 1.1034 + pcmpeqw mm1, mm7 1.1035 + pcmpeqw mm1, mm3 1.1036 + pand mm0, mm1 1.1037 + movq [Mask35], mm0 1.1038 + 1.1039 + movq mm0, [eax+ebx+ebx+colorS1] 1.1040 + movq mm1, [eax+ebx+color4] 1.1041 + push eax 1.1042 + add eax, ebx 1.1043 + movq mm2, [eax+ebx+ebx+colorA2] 1.1044 + pop eax 1.1045 + movq mm3, [eax+colorB1] 1.1046 + pcmpeqw mm0, mm4 1.1047 + pcmpeqw mm1, mm4 1.1048 + pcmpeqw mm2, mm4 1.1049 + pcmpeqw mm3, mm4 1.1050 + pand mm0, mm1 1.1051 + pand mm2, mm3 1.1052 + por mm0, mm2 1.1053 + pand mm0, [Mask35] 1.1054 + movq [Mask35b], mm0 1.1055 + 1.1056 + ;----------- 1.1057 + pxor mm3, mm3 1.1058 + movq mm0, mm4 1.1059 + movq mm1, mm5 1.1060 + 1.1061 + pcmpeqw mm0, mm6 1.1062 + pcmpeqw mm1, mm7 1.1063 + pcmpeqw mm0, mm3 1.1064 + pand mm0, mm1 1.1065 + movq [Mask26], mm0 1.1066 + 1.1067 + movq mm0, [eax+ebx+ebx+color1] 1.1068 + movq mm1, [eax+ebx+colorS2] 1.1069 + push eax 1.1070 + add eax, ebx 1.1071 + movq mm2, [eax+ebx+ebx+colorA1] 1.1072 + pop eax 1.1073 + movq mm3, [eax+colorB2] 1.1074 + pcmpeqw mm0, mm5 1.1075 + pcmpeqw mm1, mm5 1.1076 + pcmpeqw mm2, mm5 1.1077 + pcmpeqw mm3, mm5 1.1078 + pand mm0, mm1 1.1079 + pand mm2, mm3 1.1080 + por mm0, mm2 1.1081 + pand mm0, [Mask26] 1.1082 + movq [Mask26b], mm0 1.1083 + 1.1084 + ;-------------------- 1.1085 + movq mm0, mm4 1.1086 + movq mm1, mm5 1.1087 + movq mm2, mm0 1.1088 + 1.1089 + pcmpeqw mm2, mm1 1.1090 + pcmpeqw mm0, mm6 1.1091 + pcmpeqw mm1, mm7 1.1092 + pand mm0, mm1 1.1093 + pand mm2, mm0 1.1094 + pxor mm0, mm2 1.1095 + movq mm7, mm0 1.1096 + 1.1097 + ;------------------ 1.1098 + packsswb mm7, mm7 1.1099 + movd ecx, mm7 1.1100 + test ecx, ecx 1.1101 + jz near .SKIP_GUESS 1.1102 + 1.1103 +;--------------------------------------------- 1.1104 +; Map of the pixels: I|E F|J 1.1105 +; G|A B|K 1.1106 +; H|C D|L 1.1107 +; M|N O|P 1.1108 + movq mm6, mm0 1.1109 + movq mm4, [eax+ebx+color5] 1.1110 + movq mm5, [eax+ebx+color6] 1.1111 + pxor mm7, mm7 1.1112 + pand mm6, [ONE] 1.1113 + 1.1114 + movq mm0, [eax+colorB1] 1.1115 + movq mm1, [eax+ebx+color4] 1.1116 + movq mm2, mm0 1.1117 + movq mm3, mm1 1.1118 + pcmpeqw mm0, mm4 1.1119 + pcmpeqw mm1, mm4 1.1120 + pcmpeqw mm2, mm5 1.1121 + pcmpeqw mm3, mm5 1.1122 + pand mm0, mm6 1.1123 + pand mm1, mm6 1.1124 + pand mm2, mm6 1.1125 + pand mm3, mm6 1.1126 + paddw mm0, mm1 1.1127 + paddw mm2, mm3 1.1128 + 1.1129 + pxor mm3, mm3 1.1130 + pcmpgtw mm0, mm6 1.1131 + pcmpgtw mm2, mm6 1.1132 + pcmpeqw mm0, mm3 1.1133 + pcmpeqw mm2, mm3 1.1134 + pand mm0, mm6 1.1135 + pand mm2, mm6 1.1136 + paddw mm7, mm0 1.1137 + psubw mm7, mm2 1.1138 + 1.1139 + movq mm0, [eax+colorB2] 1.1140 + movq mm1, [eax+ebx+colorS2] 1.1141 + movq mm2, mm0 1.1142 + movq mm3, mm1 1.1143 + pcmpeqw mm0, mm4 1.1144 + pcmpeqw mm1, mm4 1.1145 + pcmpeqw mm2, mm5 1.1146 + pcmpeqw mm3, mm5 1.1147 + pand mm0, mm6 1.1148 + pand mm1, mm6 1.1149 + pand mm2, mm6 1.1150 + pand mm3, mm6 1.1151 + paddw mm0, mm1 1.1152 + paddw mm2, mm3 1.1153 + 1.1154 + pxor mm3, mm3 1.1155 + pcmpgtw mm0, mm6 1.1156 + pcmpgtw mm2, mm6 1.1157 + pcmpeqw mm0, mm3 1.1158 + pcmpeqw mm2, mm3 1.1159 + pand mm0, mm6 1.1160 + pand mm2, mm6 1.1161 + paddw mm7, mm0 1.1162 + psubw mm7, mm2 1.1163 + 1.1164 + push eax 1.1165 + add eax, ebx 1.1166 + movq mm0, [eax+ebx+color1] 1.1167 + movq mm1, [eax+ebx+ebx+colorA1] 1.1168 + movq mm2, mm0 1.1169 + movq mm3, mm1 1.1170 + pcmpeqw mm0, mm4 1.1171 + pcmpeqw mm1, mm4 1.1172 + pcmpeqw mm2, mm5 1.1173 + pcmpeqw mm3, mm5 1.1174 + pand mm0, mm6 1.1175 + pand mm1, mm6 1.1176 + pand mm2, mm6 1.1177 + pand mm3, mm6 1.1178 + paddw mm0, mm1 1.1179 + paddw mm2, mm3 1.1180 + 1.1181 + pxor mm3, mm3 1.1182 + pcmpgtw mm0, mm6 1.1183 + pcmpgtw mm2, mm6 1.1184 + pcmpeqw mm0, mm3 1.1185 + pcmpeqw mm2, mm3 1.1186 + pand mm0, mm6 1.1187 + pand mm2, mm6 1.1188 + paddw mm7, mm0 1.1189 + psubw mm7, mm2 1.1190 + 1.1191 + movq mm0, [eax+ebx+colorS1] 1.1192 + movq mm1, [eax+ebx+ebx+colorA2] 1.1193 + movq mm2, mm0 1.1194 + movq mm3, mm1 1.1195 + pcmpeqw mm0, mm4 1.1196 + pcmpeqw mm1, mm4 1.1197 + pcmpeqw mm2, mm5 1.1198 + pcmpeqw mm3, mm5 1.1199 + pand mm0, mm6 1.1200 + pand mm1, mm6 1.1201 + pand mm2, mm6 1.1202 + pand mm3, mm6 1.1203 + paddw mm0, mm1 1.1204 + paddw mm2, mm3 1.1205 + 1.1206 + pxor mm3, mm3 1.1207 + pcmpgtw mm0, mm6 1.1208 + pcmpgtw mm2, mm6 1.1209 + pcmpeqw mm0, mm3 1.1210 + pcmpeqw mm2, mm3 1.1211 + pand mm0, mm6 1.1212 + pand mm2, mm6 1.1213 + paddw mm7, mm0 1.1214 + psubw mm7, mm2 1.1215 + 1.1216 + pop eax 1.1217 + movq mm1, mm7 1.1218 + pxor mm0, mm0 1.1219 + pcmpgtw mm7, mm0 1.1220 + pcmpgtw mm0, mm1 1.1221 + 1.1222 + por mm7, [Mask35] 1.1223 + por mm0, [Mask26] 1.1224 + movq [Mask35], mm7 1.1225 + movq [Mask26], mm0 1.1226 + 1.1227 +.SKIP_GUESS: 1.1228 + ;Start the ASSEMBLY !!! 1.1229 + 1.1230 + movq mm4, [Mask35] 1.1231 + movq mm5, [Mask26] 1.1232 + movq mm6, [Mask35b] 1.1233 + movq mm7, [Mask26b] 1.1234 + 1.1235 + movq mm0, [eax+ebx+color5] 1.1236 + movq mm1, [eax+ebx+color6] 1.1237 + movq mm2, [eax+ebx+ebx+color2] 1.1238 + movq mm3, [eax+ebx+ebx+color3] 1.1239 + pcmpeqw mm0, mm2 1.1240 + pcmpeqw mm1, mm3 1.1241 + movq mm2, mm4 1.1242 + movq mm3, mm5 1.1243 + por mm0, mm1 1.1244 + por mm2, mm3 1.1245 + pand mm2, mm0 1.1246 + pxor mm0, mm2 1.1247 + movq mm3, mm0 1.1248 + 1.1249 + movq mm2, mm0 1.1250 + pxor mm0, mm0 1.1251 + por mm2, mm4 1.1252 + pxor mm4, mm6 1.1253 + por mm2, mm5 1.1254 + pxor mm5, mm7 1.1255 + pcmpeqw mm2, mm0 1.1256 + ;---------------- 1.1257 + 1.1258 + movq mm0, [eax+ebx+color5] 1.1259 + movq mm1, mm3 1.1260 + por mm1, mm4 1.1261 + por mm1, mm6 1.1262 + pand mm0, mm1 1.1263 + movq mm1, mm5 1.1264 + pand mm1, [I56Pixel] 1.1265 + por mm0, mm1 1.1266 + movq mm1, mm7 1.1267 + pand mm1, [product1b] 1.1268 + por mm0, mm1 1.1269 + movq mm1, mm2 1.1270 + pand mm1, [product1a] 1.1271 + por mm0, mm1 1.1272 + movq [final1a], mm0 1.1273 + 1.1274 + movq mm0, [eax+ebx+color6] 1.1275 + movq mm1, mm3 1.1276 + por mm1, mm5 1.1277 + por mm1, mm7 1.1278 + pand mm0, mm1 1.1279 + movq mm1, mm4 1.1280 + pand mm1, [I56Pixel] 1.1281 + por mm0, mm1 1.1282 + movq mm1, mm6 1.1283 + pand mm1, [product1a] 1.1284 + por mm0, mm1 1.1285 + movq mm1, mm2 1.1286 + pand mm1, [product1b] 1.1287 + por mm0, mm1 1.1288 + movq [final1b], mm0 1.1289 + 1.1290 + movq mm0, [eax+ebx+ebx+color2] 1.1291 + movq mm1, mm3 1.1292 + por mm1, mm5 1.1293 + por mm1, mm7 1.1294 + pand mm0, mm1 1.1295 + movq mm1, mm4 1.1296 + pand mm1, [I23Pixel] 1.1297 + por mm0, mm1 1.1298 + movq mm1, mm6 1.1299 + pand mm1, [product2b] 1.1300 + por mm0, mm1 1.1301 + movq mm1, mm2 1.1302 + pand mm1, [product2a] 1.1303 + por mm0, mm1 1.1304 + movq [final2a], mm0 1.1305 + 1.1306 + movq mm0, [eax+ebx+ebx+color3] 1.1307 + movq mm1, mm3 1.1308 + por mm1, mm4 1.1309 + por mm1, mm6 1.1310 + pand mm0, mm1 1.1311 + movq mm1, mm5 1.1312 + pand mm1, [I23Pixel] 1.1313 + por mm0, mm1 1.1314 + movq mm1, mm7 1.1315 + pand mm1, [product2a] 1.1316 + por mm0, mm1 1.1317 + movq mm1, mm2 1.1318 + pand mm1, [product2b] 1.1319 + por mm0, mm1 1.1320 + movq [final2b], mm0 1.1321 + 1.1322 + 1.1323 + movq mm0, [final1a] 1.1324 + movq mm2, [final1b] 1.1325 + movq mm1, mm0 1.1326 + movq mm4, [final2a] 1.1327 + movq mm6, [final2b] 1.1328 + movq mm5, mm4 1.1329 + punpcklwd mm0, mm2 1.1330 + punpckhwd mm1, mm2 1.1331 + punpcklwd mm4, mm6 1.1332 + punpckhwd mm5, mm6 1.1333 + 1.1334 + 1.1335 + 1.1336 + 1.1337 +%ifdef FAR_POINTER 1.1338 + movq [fs:edx], mm0 1.1339 + movq [fs:edx+8], mm1 1.1340 + push edx 1.1341 + add edx, [ebp+dstPitch] 1.1342 + movq [fs:edx], mm4 1.1343 + movq [fs:edx+8], mm5 1.1344 + pop edx 1.1345 +%else 1.1346 + movq [edx], mm0 1.1347 + movq [edx+8], mm1 1.1348 + push edx 1.1349 + add edx, [ebp+dstPitch] 1.1350 + movq [edx], mm4 1.1351 + movq [edx+8], mm5 1.1352 + pop edx 1.1353 +%endif 1.1354 +.SKIP_PROCESS: 1.1355 + mov ecx, [ebp+deltaPtr] 1.1356 + add ecx, 8 1.1357 + mov [ebp+deltaPtr], ecx 1.1358 + add edx, 16 1.1359 + add eax, 8 1.1360 + 1.1361 + pop ecx 1.1362 + sub ecx, 4 1.1363 + cmp ecx, 0 1.1364 + jg near .Loop 1.1365 + 1.1366 +; Restore some stuff 1.1367 + popad 1.1368 + mov esp, ebp 1.1369 + pop ebp 1.1370 + emms 1.1371 + ret 1.1372 + 1.1373 + 1.1374 +;------------------------------------------------------------------------- 1.1375 +;------------------------------------------------------------------------- 1.1376 +;------------------------------------------------------------------------- 1.1377 +;------------------------------------------------------------------------- 1.1378 +;------------------------------------------------------------------------- 1.1379 +;------------------------------------------------------------------------- 1.1380 +;------------------------------------------------------------------------- 1.1381 + 1.1382 + 1.1383 +;This is version 0.50 1.1384 +colorI equ -2 1.1385 +colorE equ 0 1.1386 +colorF equ 2 1.1387 +colorJ equ 4 1.1388 + 1.1389 +colorG equ -2 1.1390 +colorA equ 0 1.1391 +colorB equ 2 1.1392 +colorK equ 4 1.1393 + 1.1394 +colorH equ -2 1.1395 +colorC equ 0 1.1396 +colorD equ 2 1.1397 +colorL equ 4 1.1398 + 1.1399 +colorM equ -2 1.1400 +colorN equ 0 1.1401 +colorO equ 2 1.1402 +colorP equ 4 1.1403 + 1.1404 +%ifdef __DJGPP__ 1.1405 +__2xSaILine: 1.1406 +%else 1.1407 +_2xSaILine: 1.1408 +%endif 1.1409 +; Store some stuff 1.1410 + push ebp 1.1411 + mov ebp, esp 1.1412 + pushad 1.1413 + 1.1414 +; Prepare the destination 1.1415 +%ifdef FAR_POINTER 1.1416 + ; Set the selector 1.1417 + mov eax, [ebp+dstSegment] 1.1418 + mov fs, ax 1.1419 +%endif 1.1420 + mov edx, [ebp+dstOffset] ; edx points to the screen 1.1421 +; Prepare the source 1.1422 + ; eax points to colorA 1.1423 + mov eax, [ebp+srcPtr] 1.1424 + mov ebx, [ebp+srcPitch] 1.1425 + mov ecx, [ebp+width] 1.1426 + ; eax now points to colorE 1.1427 + sub eax, ebx 1.1428 + 1.1429 + 1.1430 +; Main Loop 1.1431 +.Loop: push ecx 1.1432 + 1.1433 + ;-----Check Delta------------------ 1.1434 + mov ecx, [ebp+deltaPtr] 1.1435 + 1.1436 + movq mm0, [eax+colorI] 1.1437 + movq mm1, [eax+colorJ] 1.1438 + movq mm2, [eax+ebx+colorG] 1.1439 + movq mm3, [eax+ebx+colorK] 1.1440 + movq mm4, [eax+ebx+ebx+colorH] 1.1441 + movq mm5, [eax+ebx+ebx+colorL] 1.1442 + push eax 1.1443 + add eax, ebx 1.1444 + movq mm6, [eax+ebx+ebx+colorM] 1.1445 + movq mm7, [eax+ebx+ebx+colorP] 1.1446 + pop eax 1.1447 + 1.1448 + pcmpeqw mm0, [ecx+2+colorI] 1.1449 + pcmpeqw mm1, [ecx+2+colorK] 1.1450 + pcmpeqw mm2, [ecx+ebx+2+colorG] 1.1451 + pcmpeqw mm3, [ecx+ebx+2+colorK] 1.1452 + pcmpeqw mm4, [ecx+ebx+ebx+2+colorH] 1.1453 + pcmpeqw mm5, [ecx+ebx+ebx+2+colorL] 1.1454 + add ecx, ebx 1.1455 + pcmpeqw mm6, [ecx+ebx+ebx+2+colorM] 1.1456 + pcmpeqw mm7, [ecx+ebx+ebx+2+colorP] 1.1457 + sub ecx, ebx 1.1458 + 1.1459 + 1.1460 + pand mm0, mm1 1.1461 + pand mm2, mm3 1.1462 + pand mm4, mm5 1.1463 + pand mm6, mm7 1.1464 + pand mm0, mm2 1.1465 + pand mm4, mm6 1.1466 + pxor mm7, mm7 1.1467 + pand mm0, mm4 1.1468 + movq mm6, [eax+colorI] 1.1469 + pcmpeqw mm7, mm0 1.1470 + 1.1471 + movq [ecx+2+colorI], mm6 1.1472 + 1.1473 + packsswb mm7, mm7 1.1474 + movd ecx, mm7 1.1475 + test ecx, ecx 1.1476 + jz near .SKIP_PROCESS 1.1477 + 1.1478 + ;End Delta 1.1479 + 1.1480 + ;--------------------------------- 1.1481 + 1.1482 + 1.1483 +;1 1.1484 + ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL) 1.1485 + movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA 1.1486 + movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB 1.1487 + 1.1488 + movq mm1, mm0 1.1489 + movq mm3, mm2 1.1490 + 1.1491 + pcmpeqw mm0, [eax+ebx+ebx+colorD] 1.1492 + pcmpeqw mm1, [eax+colorE] 1.1493 + pcmpeqw mm2, [eax+ebx+ebx+colorL] 1.1494 + pcmpeqw mm3, [eax+ebx+ebx+colorC] 1.1495 + 1.1496 + pand mm0, mm1 1.1497 + pxor mm1, mm1 1.1498 + pand mm0, mm2 1.1499 + pcmpeqw mm3, mm1 1.1500 + pand mm0, mm3 ;result in mm0 1.1501 + 1.1502 + ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ) 1.1503 + movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA 1.1504 + movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB 1.1505 + movq mm5, mm4 1.1506 + movq mm7, mm6 1.1507 + 1.1508 + pcmpeqw mm4, [eax+ebx+ebx+colorC] 1.1509 + pcmpeqw mm5, [eax+colorF] 1.1510 + pcmpeqw mm6, [eax+colorJ] 1.1511 + pcmpeqw mm7, [eax+colorE] 1.1512 + 1.1513 + pand mm4, mm5 1.1514 + pxor mm5, mm5 1.1515 + pand mm4, mm6 1.1516 + pcmpeqw mm7, mm5 1.1517 + pand mm4, mm7 ;result in mm4 1.1518 + 1.1519 + por mm0, mm4 ;combine the masks 1.1520 + movq [Mask1], mm0 1.1521 + 1.1522 + ;-------------------------------------------- 1.1523 + 1.1524 +;2 1.1525 + ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH) 1.1526 + movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB 1.1527 + movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA 1.1528 + movq mm1, mm0 1.1529 + movq mm3, mm2 1.1530 + 1.1531 + pcmpeqw mm0, [eax+ebx+ebx+colorC] 1.1532 + pcmpeqw mm1, [eax+colorF] 1.1533 + pcmpeqw mm2, [eax+ebx+ebx+colorH] 1.1534 + pcmpeqw mm3, [eax+ebx+ebx+colorD] 1.1535 + 1.1536 + pand mm0, mm1 1.1537 + pxor mm1, mm1 1.1538 + pand mm0, mm2 1.1539 + pcmpeqw mm3, mm1 1.1540 + pand mm0, mm3 ;result in mm0 1.1541 + 1.1542 + ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI) 1.1543 + movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB 1.1544 + movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA 1.1545 + movq mm5, mm4 1.1546 + movq mm7, mm6 1.1547 + 1.1548 + pcmpeqw mm4, [eax+ebx+ebx+colorD] 1.1549 + pcmpeqw mm5, [eax+colorE] 1.1550 + pcmpeqw mm6, [eax+colorI] 1.1551 + pcmpeqw mm7, [eax+colorF] 1.1552 + 1.1553 + pand mm4, mm5 1.1554 + pxor mm5, mm5 1.1555 + pand mm4, mm6 1.1556 + pcmpeqw mm7, mm5 1.1557 + pand mm4, mm7 ;result in mm4 1.1558 + 1.1559 + por mm0, mm4 ;combine the masks 1.1560 + movq [Mask2], mm0 1.1561 + 1.1562 + 1.1563 +;interpolate colorA and colorB 1.1564 + movq mm0, [eax+ebx+colorA] 1.1565 + movq mm1, [eax+ebx+colorB] 1.1566 + 1.1567 + movq mm2, mm0 1.1568 + movq mm3, mm1 1.1569 + 1.1570 + pand mm0, [colorMask] 1.1571 + pand mm1, [colorMask] 1.1572 + 1.1573 + psrlw mm0, 1 1.1574 + psrlw mm1, 1 1.1575 + 1.1576 + pand mm3, [lowPixelMask] 1.1577 + paddw mm0, mm1 1.1578 + 1.1579 + pand mm3, mm2 1.1580 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.1581 + 1.1582 + ;assemble the pixels 1.1583 + movq mm1, [eax+ebx+colorA] 1.1584 + movq mm2, [eax+ebx+colorB] 1.1585 + 1.1586 + movq mm3, [Mask1] 1.1587 + movq mm5, mm1 1.1588 + movq mm4, [Mask2] 1.1589 + movq mm6, mm1 1.1590 + 1.1591 + pand mm1, mm3 1.1592 + por mm3, mm4 1.1593 + pxor mm7, mm7 1.1594 + pand mm2, mm4 1.1595 + 1.1596 + pcmpeqw mm3, mm7 1.1597 + por mm1, mm2 1.1598 + pand mm0, mm3 1.1599 + 1.1600 + por mm0, mm1 1.1601 + 1.1602 + punpcklwd mm5, mm0 1.1603 + punpckhwd mm6, mm0 1.1604 + 1.1605 +%ifdef FAR_POINTER 1.1606 + movq [fs:edx], mm5 1.1607 + movq [fs:edx+8], mm6 1.1608 +%else 1.1609 + movq [edx], mm5 1.1610 + movq [edx+8], mm6 1.1611 +%endif 1.1612 + 1.1613 +;------------------------------------------------ 1.1614 +; Create the Nextline 1.1615 +;------------------------------------------------ 1.1616 +;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO) 1.1617 + movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA 1.1618 + movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC 1.1619 + movq mm1, mm0 1.1620 + movq mm3, mm2 1.1621 + 1.1622 + push eax 1.1623 + add eax, ebx 1.1624 + pcmpeqw mm0, [eax+ebx+colorD] 1.1625 + pcmpeqw mm1, [eax+colorG] 1.1626 + pcmpeqw mm2, [eax+ebx+ebx+colorO] 1.1627 + pcmpeqw mm3, [eax+colorB] 1.1628 + pop eax 1.1629 + 1.1630 + pand mm0, mm1 1.1631 + pxor mm1, mm1 1.1632 + pand mm0, mm2 1.1633 + pcmpeqw mm3, mm1 1.1634 + pand mm0, mm3 ;result in mm0 1.1635 + 1.1636 + ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM) 1.1637 + movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA 1.1638 + movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC 1.1639 + movq mm5, mm4 1.1640 + movq mm7, mm6 1.1641 + 1.1642 + push eax 1.1643 + add eax, ebx 1.1644 + pcmpeqw mm4, [eax+ebx+colorH] 1.1645 + pcmpeqw mm5, [eax+colorB] 1.1646 + pcmpeqw mm6, [eax+ebx+ebx+colorM] 1.1647 + pcmpeqw mm7, [eax+colorG] 1.1648 + pop eax 1.1649 + 1.1650 + pand mm4, mm5 1.1651 + pxor mm5, mm5 1.1652 + pand mm4, mm6 1.1653 + pcmpeqw mm7, mm5 1.1654 + pand mm4, mm7 ;result in mm4 1.1655 + 1.1656 + por mm0, mm4 ;combine the masks 1.1657 + movq [Mask1], mm0 1.1658 + ;-------------------------------------------- 1.1659 + 1.1660 +;4 1.1661 + ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF) 1.1662 + movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC 1.1663 + movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA 1.1664 + movq mm1, mm0 1.1665 + movq mm3, mm2 1.1666 + 1.1667 + pcmpeqw mm0, [eax+ebx+colorB] 1.1668 + pcmpeqw mm1, [eax+ebx+ebx+colorH] 1.1669 + pcmpeqw mm2, [eax+colorF] 1.1670 + pcmpeqw mm3, [eax+ebx+ebx+colorD] 1.1671 + 1.1672 + pand mm0, mm1 1.1673 + pxor mm1, mm1 1.1674 + pand mm0, mm2 1.1675 + pcmpeqw mm3, mm1 1.1676 + pand mm0, mm3 ;result in mm0 1.1677 + 1.1678 + ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI) 1.1679 + movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC 1.1680 + movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA 1.1681 + movq mm5, mm4 1.1682 + movq mm7, mm6 1.1683 + 1.1684 + pcmpeqw mm4, [eax+ebx+ebx+colorD] 1.1685 + pcmpeqw mm5, [eax+ebx+colorG] 1.1686 + pcmpeqw mm6, [eax+colorI] 1.1687 + pcmpeqw mm7, [eax+ebx+ebx+colorH] 1.1688 + 1.1689 + pand mm4, mm5 1.1690 + pxor mm5, mm5 1.1691 + pand mm4, mm6 1.1692 + pcmpeqw mm7, mm5 1.1693 + pand mm4, mm7 ;result in mm4 1.1694 + 1.1695 + por mm0, mm4 ;combine the masks 1.1696 + movq [Mask2], mm0 1.1697 + ;---------------------------------------------- 1.1698 + 1.1699 +;interpolate colorA and colorC 1.1700 + movq mm0, [eax+ebx+colorA] 1.1701 + movq mm1, [eax+ebx+ebx+colorC] 1.1702 + 1.1703 + movq mm2, mm0 1.1704 + movq mm3, mm1 1.1705 + 1.1706 + pand mm0, [colorMask] 1.1707 + pand mm1, [colorMask] 1.1708 + 1.1709 + psrlw mm0, 1 1.1710 + psrlw mm1, 1 1.1711 + 1.1712 + pand mm3, [lowPixelMask] 1.1713 + paddw mm0, mm1 1.1714 + 1.1715 + pand mm3, mm2 1.1716 + paddw mm0, mm3 ;mm0 contains the interpolated values 1.1717 + ;------------- 1.1718 + 1.1719 + ;assemble the pixels 1.1720 + movq mm1, [eax+ebx+colorA] 1.1721 + movq mm2, [eax+ebx+ebx+colorC] 1.1722 + 1.1723 + movq mm3, [Mask1] 1.1724 + movq mm4, [Mask2] 1.1725 + 1.1726 + pand mm1, mm3 1.1727 + pand mm2, mm4 1.1728 + 1.1729 + por mm3, mm4 1.1730 + pxor mm7, mm7 1.1731 + por mm1, mm2 1.1732 + 1.1733 + pcmpeqw mm3, mm7 1.1734 + pand mm0, mm3 1.1735 + por mm0, mm1 1.1736 + movq [ACPixel], mm0 1.1737 + 1.1738 +;//////////////////////////////// 1.1739 +; Decide which "branch" to take 1.1740 +;-------------------------------- 1.1741 + movq mm0, [eax+ebx+colorA] 1.1742 + movq mm1, [eax+ebx+colorB] 1.1743 + movq mm6, mm0 1.1744 + movq mm7, mm1 1.1745 + pcmpeqw mm0, [eax+ebx+ebx+colorD] 1.1746 + pcmpeqw mm1, [eax+ebx+ebx+colorC] 1.1747 + pcmpeqw mm6, mm7 1.1748 + 1.1749 + movq mm2, mm0 1.1750 + movq mm3, mm0 1.1751 + 1.1752 + pand mm0, mm1 ;colorA == colorD && colorB == colorC 1.1753 + pxor mm7, mm7 1.1754 + 1.1755 + pcmpeqw mm2, mm7 1.1756 + pand mm6, mm0 1.1757 + pand mm2, mm1 ;colorA != colorD && colorB == colorC 1.1758 + 1.1759 + pcmpeqw mm1, mm7 1.1760 + 1.1761 + pand mm1, mm3 ;colorA == colorD && colorB != colorC 1.1762 + pxor mm0, mm6 1.1763 + por mm1, mm6 1.1764 + movq mm7, mm0 1.1765 + movq [Mask2], mm2 1.1766 + packsswb mm7, mm7 1.1767 + movq [Mask1], mm1 1.1768 + 1.1769 + movd ecx, mm7 1.1770 + test ecx, ecx 1.1771 + jz near .SKIP_GUESS 1.1772 + 1.1773 +;--------------------------------------------- 1.1774 +; Map of the pixels: I|E F|J 1.1775 +; G|A B|K 1.1776 +; H|C D|L 1.1777 +; M|N O|P 1.1778 + movq mm6, mm0 1.1779 + movq mm4, [eax+ebx+colorA] 1.1780 + movq mm5, [eax+ebx+colorB] 1.1781 + pxor mm7, mm7 1.1782 + pand mm6, [ONE] 1.1783 + 1.1784 + movq mm0, [eax+colorE] 1.1785 + movq mm1, [eax+ebx+colorG] 1.1786 + movq mm2, mm0 1.1787 + movq mm3, mm1 1.1788 + pcmpeqw mm0, mm4 1.1789 + pcmpeqw mm1, mm4 1.1790 + pcmpeqw mm2, mm5 1.1791 + pcmpeqw mm3, mm5 1.1792 + pand mm0, mm6 1.1793 + pand mm1, mm6 1.1794 + pand mm2, mm6 1.1795 + pand mm3, mm6 1.1796 + paddw mm0, mm1 1.1797 + paddw mm2, mm3 1.1798 + 1.1799 + pxor mm3, mm3 1.1800 + pcmpgtw mm0, mm6 1.1801 + pcmpgtw mm2, mm6 1.1802 + pcmpeqw mm0, mm3 1.1803 + pcmpeqw mm2, mm3 1.1804 + pand mm0, mm6 1.1805 + pand mm2, mm6 1.1806 + paddw mm7, mm0 1.1807 + psubw mm7, mm2 1.1808 + 1.1809 + movq mm0, [eax+colorF] 1.1810 + movq mm1, [eax+ebx+colorK] 1.1811 + movq mm2, mm0 1.1812 + movq mm3, mm1 1.1813 + pcmpeqw mm0, mm4 1.1814 + pcmpeqw mm1, mm4 1.1815 + pcmpeqw mm2, mm5 1.1816 + pcmpeqw mm3, mm5 1.1817 + pand mm0, mm6 1.1818 + pand mm1, mm6 1.1819 + pand mm2, mm6 1.1820 + pand mm3, mm6 1.1821 + paddw mm0, mm1 1.1822 + paddw mm2, mm3 1.1823 + 1.1824 + pxor mm3, mm3 1.1825 + pcmpgtw mm0, mm6 1.1826 + pcmpgtw mm2, mm6 1.1827 + pcmpeqw mm0, mm3 1.1828 + pcmpeqw mm2, mm3 1.1829 + pand mm0, mm6 1.1830 + pand mm2, mm6 1.1831 + paddw mm7, mm0 1.1832 + psubw mm7, mm2 1.1833 + 1.1834 + push eax 1.1835 + add eax, ebx 1.1836 + movq mm0, [eax+ebx+colorH] 1.1837 + movq mm1, [eax+ebx+ebx+colorN] 1.1838 + movq mm2, mm0 1.1839 + movq mm3, mm1 1.1840 + pcmpeqw mm0, mm4 1.1841 + pcmpeqw mm1, mm4 1.1842 + pcmpeqw mm2, mm5 1.1843 + pcmpeqw mm3, mm5 1.1844 + pand mm0, mm6 1.1845 + pand mm1, mm6 1.1846 + pand mm2, mm6 1.1847 + pand mm3, mm6 1.1848 + paddw mm0, mm1 1.1849 + paddw mm2, mm3 1.1850 + 1.1851 + pxor mm3, mm3 1.1852 + pcmpgtw mm0, mm6 1.1853 + pcmpgtw mm2, mm6 1.1854 + pcmpeqw mm0, mm3 1.1855 + pcmpeqw mm2, mm3 1.1856 + pand mm0, mm6 1.1857 + pand mm2, mm6 1.1858 + paddw mm7, mm0 1.1859 + psubw mm7, mm2 1.1860 + 1.1861 + movq mm0, [eax+ebx+colorL] 1.1862 + movq mm1, [eax+ebx+ebx+colorO] 1.1863 + movq mm2, mm0 1.1864 + movq mm3, mm1 1.1865 + pcmpeqw mm0, mm4 1.1866 + pcmpeqw mm1, mm4 1.1867 + pcmpeqw mm2, mm5 1.1868 + pcmpeqw mm3, mm5 1.1869 + pand mm0, mm6 1.1870 + pand mm1, mm6 1.1871 + pand mm2, mm6 1.1872 + pand mm3, mm6 1.1873 + paddw mm0, mm1 1.1874 + paddw mm2, mm3 1.1875 + 1.1876 + pxor mm3, mm3 1.1877 + pcmpgtw mm0, mm6 1.1878 + pcmpgtw mm2, mm6 1.1879 + pcmpeqw mm0, mm3 1.1880 + pcmpeqw mm2, mm3 1.1881 + pand mm0, mm6 1.1882 + pand mm2, mm6 1.1883 + paddw mm7, mm0 1.1884 + psubw mm7, mm2 1.1885 + 1.1886 + pop eax 1.1887 + movq mm1, mm7 1.1888 + pxor mm0, mm0 1.1889 + pcmpgtw mm7, mm0 1.1890 + pcmpgtw mm0, mm1 1.1891 + 1.1892 + por mm7, [Mask1] 1.1893 + por mm0, [Mask2] 1.1894 + movq [Mask1], mm7 1.1895 + movq [Mask2], mm0 1.1896 + 1.1897 +.SKIP_GUESS: 1.1898 + ;---------------------------- 1.1899 + ;interpolate A, B, C and D 1.1900 + movq mm0, [eax+ebx+colorA] 1.1901 + movq mm1, [eax+ebx+colorB] 1.1902 + movq mm4, mm0 1.1903 + movq mm2, [eax+ebx+ebx+colorC] 1.1904 + movq mm5, mm1 1.1905 + movq mm3, [qcolorMask] 1.1906 + movq mm6, mm2 1.1907 + movq mm7, [qlowpixelMask] 1.1908 + 1.1909 + pand mm0, mm3 1.1910 + pand mm1, mm3 1.1911 + pand mm2, mm3 1.1912 + pand mm3, [eax+ebx+ebx+colorD] 1.1913 + 1.1914 + psrlw mm0, 2 1.1915 + pand mm4, mm7 1.1916 + psrlw mm1, 2 1.1917 + pand mm5, mm7 1.1918 + psrlw mm2, 2 1.1919 + pand mm6, mm7 1.1920 + psrlw mm3, 2 1.1921 + pand mm7, [eax+ebx+ebx+colorD] 1.1922 + 1.1923 + paddw mm0, mm1 1.1924 + paddw mm2, mm3 1.1925 + 1.1926 + paddw mm4, mm5 1.1927 + paddw mm6, mm7 1.1928 + 1.1929 + paddw mm4, mm6 1.1930 + paddw mm0, mm2 1.1931 + psrlw mm4, 2 1.1932 + pand mm4, [qlowpixelMask] 1.1933 + paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D 1.1934 + 1.1935 +;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ 1.1936 + ;assemble the pixels 1.1937 + movq mm1, [Mask1] 1.1938 + movq mm2, [Mask2] 1.1939 + movq mm4, [eax+ebx+colorA] 1.1940 + movq mm5, [eax+ebx+colorB] 1.1941 + pand mm4, mm1 1.1942 + pand mm5, mm2 1.1943 + 1.1944 + pxor mm7, mm7 1.1945 + por mm1, mm2 1.1946 + por mm4, mm5 1.1947 + pcmpeqw mm1, mm7 1.1948 + pand mm0, mm1 1.1949 + por mm4, mm0 ;mm4 contains the diagonal pixels 1.1950 + 1.1951 + movq mm0, [ACPixel] 1.1952 + movq mm1, mm0 1.1953 + punpcklwd mm0, mm4 1.1954 + punpckhwd mm1, mm4 1.1955 + 1.1956 + push edx 1.1957 + add edx, [ebp+dstPitch] 1.1958 + 1.1959 +%ifdef FAR_POINTER 1.1960 + movq [fs:edx], mm0 1.1961 + movq [fs:edx+8], mm1 1.1962 +%else 1.1963 + movq [edx], mm0 1.1964 + movq [edx+8], mm1 1.1965 +%endif 1.1966 + pop edx 1.1967 + 1.1968 +.SKIP_PROCESS: 1.1969 + mov ecx, [ebp+deltaPtr] 1.1970 + add ecx, 8 1.1971 + mov [ebp+deltaPtr], ecx 1.1972 + add edx, 16 1.1973 + add eax, 8 1.1974 + 1.1975 + pop ecx 1.1976 + sub ecx, 4 1.1977 + cmp ecx, 0 1.1978 + jg near .Loop 1.1979 + 1.1980 +; Restore some stuff 1.1981 + popad 1.1982 + mov esp, ebp 1.1983 + pop ebp 1.1984 + emms 1.1985 + ret 1.1986 + 1.1987 +;------------------------------------------------------------------------- 1.1988 +;------------------------------------------------------------------------- 1.1989 +;------------------------------------------------------------------------- 1.1990 +;------------------------------------------------------------------------- 1.1991 +;------------------------------------------------------------------------- 1.1992 +;------------------------------------------------------------------------- 1.1993 +;------------------------------------------------------------------------- 1.1994 + 1.1995 +%ifdef __DJGPP__ 1.1996 +_Init_2xSaIMMX: 1.1997 +%else 1.1998 +Init_2xSaIMMX: 1.1999 +%endif 1.2000 +; Store some stuff 1.2001 + push ebp 1.2002 + mov ebp, esp 1.2003 + push edx 1.2004 + 1.2005 + 1.2006 +;Damn thing doesn't work 1.2007 +; mov eax,1 1.2008 +; cpuid 1.2009 +; test edx, 0x00800000 ;test bit 23 1.2010 +; jz end2 ;bit not set => no MMX detected 1.2011 + 1.2012 + mov eax, [ebp+8] ;PixelFormat 1.2013 + cmp eax, 555 1.2014 + jz Bits555 1.2015 + cmp eax, 565 1.2016 + jz Bits565 1.2017 +end2: 1.2018 + mov eax, 1 1.2019 + jmp end3 1.2020 +Bits555: 1.2021 + mov edx, 0x7BDE7BDE 1.2022 + mov eax, colorMask 1.2023 + mov [eax], edx 1.2024 + mov [eax+4], edx 1.2025 + mov edx, 0x04210421 1.2026 + mov eax, lowPixelMask 1.2027 + mov [eax], edx 1.2028 + mov [eax+4], edx 1.2029 + mov edx, 0x739C739C 1.2030 + mov eax, qcolorMask 1.2031 + mov [eax], edx 1.2032 + mov [eax+4], edx 1.2033 + mov edx, 0x0C630C63 1.2034 + mov eax, qlowpixelMask 1.2035 + mov [eax], edx 1.2036 + mov [eax+4], edx 1.2037 + mov eax, 0 1.2038 + jmp end3 1.2039 +Bits565: 1.2040 + mov edx, 0xF7DEF7DE 1.2041 + mov eax, colorMask 1.2042 + mov [eax], edx 1.2043 + mov [eax+4], edx 1.2044 + mov edx, 0x08210821 1.2045 + mov eax, lowPixelMask 1.2046 + mov [eax], edx 1.2047 + mov [eax+4], edx 1.2048 + mov edx, 0xE79CE79C 1.2049 + mov eax, qcolorMask 1.2050 + mov [eax], edx 1.2051 + mov [eax+4], edx 1.2052 + mov edx, 0x18631863 1.2053 + mov eax, qlowpixelMask 1.2054 + mov [eax], edx 1.2055 + mov [eax+4], edx 1.2056 + mov eax, 0 1.2057 + jmp end3 1.2058 +end3: 1.2059 + pop edx 1.2060 + mov esp, ebp 1.2061 + pop ebp 1.2062 + ret 1.2063 + 1.2064 + 1.2065 +;------------------------------------------------------------------------- 1.2066 +;------------------------------------------------------------------------- 1.2067 +;------------------------------------------------------------------------- 1.2068 +;------------------------------------------------------------------------- 1.2069 +;------------------------------------------------------------------------- 1.2070 +;------------------------------------------------------------------------- 1.2071 +;------------------------------------------------------------------------- 1.2072 + 1.2073 + SECTION .data ALIGN = 32 1.2074 +;Some constants 1.2075 +colorMask dd 0xF7DEF7DE,0xF7DEF7DE 1.2076 +lowPixelMask dd 0x08210821,0x08210821 1.2077 + 1.2078 +qcolorMask dd 0xE79CE79C,0xE79CE79C 1.2079 +qlowpixelMask dd 0x18631863,0x18631863 1.2080 + 1.2081 +darkenMask dd 0xC718C718,0xC718C718 1.2082 +GreenMask dd 0x07E007E0,0x07E007E0 1.2083 +RedBlueMask dd 0xF81FF81F,0xF81FF81F 1.2084 + 1.2085 +FALSE dd 0x00000000,0x00000000 1.2086 +TRUE dd 0xffffffff,0xffffffff 1.2087 +ONE dd 0x00010001,0x00010001 1.2088 + 1.2089 + 1.2090 + SECTION .bss ALIGN = 32 1.2091 +ACPixel resb 8 1.2092 +Mask1 resb 8 1.2093 +Mask2 resb 8 1.2094 + 1.2095 +I56Pixel resb 8 1.2096 +I23Pixel resb 8 1.2097 +I5556Pixel resb 8 1.2098 +I2223Pixel resb 8 1.2099 +I5666Pixel resb 8 1.2100 +I2333Pixel resb 8 1.2101 +Mask26 resb 8 1.2102 +Mask35 resb 8 1.2103 +Mask26b resb 8 1.2104 +Mask35b resb 8 1.2105 +product1a resb 8 1.2106 +product1b resb 8 1.2107 +product2a resb 8 1.2108 +product2b resb 8 1.2109 +final1a resb 8 1.2110 +final1b resb 8 1.2111 +final2a resb 8 1.2112 +final2b resb 8