annotate src/filters/2xSaImmx.asm @ 294:9f466a332448

merge
author Robert McIntyre <rlm@mit.edu>
date Fri, 30 Mar 2012 18:15:28 -0500
parents f9f4f1b99eed
children
rev   line source
rlm@1 1 ;/*---------------------------------------------------------------------*
rlm@1 2 ; * The following (piece of) code, (part of) the 2xSaI engine, *
rlm@1 3 ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. *
rlm@1 4 ; * Non-Commercial use of this software is allowed and is encouraged, *
rlm@1 5 ; * provided that appropriate credit be given. *
rlm@1 6 ; * You may freely modify this code, but I request *
rlm@1 7 ; * that any improvements to the engine be submitted to me, so *
rlm@1 8 ; * that I can implement these improvements in newer versions of *
rlm@1 9 ; * the software. *
rlm@1 10 ; * If you need more information, have any comments or suggestions, *
rlm@1 11 ; * you can e-mail me. My e-mail: derek-liauw@usa.net. *
rlm@1 12 ; *---------------------------------------------------------------------*/
rlm@1 13
rlm@1 14 ;----------------------
rlm@1 15 ; 2xSaI version 0.59 WIP, soon to become version 0.60
rlm@1 16 ;----------------------
rlm@1 17
rlm@1 18 ;%define FAR_POINTER
rlm@1 19
rlm@1 20
rlm@1 21
rlm@1 22 BITS 32
rlm@1 23 %ifdef __DJGPP__
rlm@1 24 GLOBAL __2xSaILine
rlm@1 25 GLOBAL __2xSaISuperEagleLine
rlm@1 26 GLOBAL __2xSaISuper2xSaILine
rlm@1 27 GLOBAL _Init_2xSaIMMX
rlm@1 28 %else
rlm@1 29 GLOBAL _2xSaILine
rlm@1 30 GLOBAL _2xSaISuperEagleLine
rlm@1 31 GLOBAL _2xSaISuper2xSaILine
rlm@1 32 GLOBAL Init_2xSaIMMX
rlm@1 33 %endif
rlm@1 34 SECTION .text ALIGN = 32
rlm@1 35
rlm@1 36 %ifdef FAR_POINTER
rlm@1 37 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
rlm@1 38 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
rlm@1 39 %else
rlm@1 40 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
rlm@1 41 ; uint8 *dstPtr, uint32 dstPitch);
rlm@1 42 %endif
rlm@1 43
rlm@1 44 srcPtr equ 8
rlm@1 45 deltaPtr equ 12
rlm@1 46 srcPitch equ 16
rlm@1 47 width equ 20
rlm@1 48 dstOffset equ 24
rlm@1 49 dstPitch equ 28
rlm@1 50 dstSegment equ 32
rlm@1 51
rlm@1 52
rlm@1 53
rlm@1 54
rlm@1 55 colorB0 equ -2
rlm@1 56 colorB1 equ 0
rlm@1 57 colorB2 equ 2
rlm@1 58 colorB3 equ 4
rlm@1 59
rlm@1 60 color7 equ -2
rlm@1 61 color8 equ 0
rlm@1 62 color9 equ 2
rlm@1 63
rlm@1 64 color4 equ -2
rlm@1 65 color5 equ 0
rlm@1 66 color6 equ 2
rlm@1 67 colorS2 equ 4
rlm@1 68
rlm@1 69 color1 equ -2
rlm@1 70 color2 equ 0
rlm@1 71 color3 equ 2
rlm@1 72 colorS1 equ 4
rlm@1 73
rlm@1 74 colorA0 equ -2
rlm@1 75 colorA1 equ 0
rlm@1 76 colorA2 equ 2
rlm@1 77 colorA3 equ 4
rlm@1 78
rlm@1 79
rlm@1 80
rlm@1 81
rlm@1 82 %ifdef __DJGPP__
rlm@1 83 __2xSaISuper2xSaILine:
rlm@1 84 %else
rlm@1 85 _2xSaISuper2xSaILine:
rlm@1 86 %endif
rlm@1 87 ; Store some stuff
rlm@1 88 push ebp
rlm@1 89 mov ebp, esp
rlm@1 90 pushad
rlm@1 91
rlm@1 92 ; Prepare the destination
rlm@1 93 %ifdef FAR_POINTER
rlm@1 94 ; Set the selector
rlm@1 95 mov eax, [ebp+dstSegment]
rlm@1 96 mov fs, ax
rlm@1 97 %endif
rlm@1 98 mov edx, [ebp+dstOffset] ; edx points to the screen
rlm@1 99 ; Prepare the source
rlm@1 100 ; eax points to colorA
rlm@1 101 mov eax, [ebp+srcPtr] ;eax points to colorA
rlm@1 102 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
rlm@1 103 mov ecx, [ebp+width] ;ecx contains the number of pixels to process
rlm@1 104 ; eax now points to colorB1
rlm@1 105 sub eax, ebx ;eax points to B1 which is the base
rlm@1 106
rlm@1 107 ; Main Loop
rlm@1 108 .Loop: push ecx
rlm@1 109
rlm@1 110 ;-----Check Delta------------------
rlm@1 111 mov ecx, [ebp+deltaPtr]
rlm@1 112
rlm@1 113
rlm@1 114 ;load source img
rlm@1 115 movq mm0, [eax+colorB0]
rlm@1 116 movq mm1, [eax+colorB3]
rlm@1 117 movq mm2, [eax+ebx+color4]
rlm@1 118 movq mm3, [eax+ebx+colorS2]
rlm@1 119 movq mm4, [eax+ebx+ebx+color1]
rlm@1 120 movq mm5, [eax+ebx+ebx+colorS1]
rlm@1 121 push eax
rlm@1 122 add eax, ebx
rlm@1 123 movq mm6, [eax+ebx+ebx+colorA0]
rlm@1 124 movq mm7, [eax+ebx+ebx+colorA3]
rlm@1 125 pop eax
rlm@1 126
rlm@1 127 ;compare to delta
rlm@1 128 pcmpeqw mm0, [ecx+2+colorB0]
rlm@1 129 pcmpeqw mm1, [ecx+2+colorB3]
rlm@1 130 pcmpeqw mm2, [ecx+ebx+2+color4]
rlm@1 131 pcmpeqw mm3, [ecx+ebx+2+colorS2]
rlm@1 132 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
rlm@1 133 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
rlm@1 134 add ecx, ebx
rlm@1 135 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
rlm@1 136 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
rlm@1 137 sub ecx, ebx
rlm@1 138
rlm@1 139
rlm@1 140 ;compose results
rlm@1 141 pand mm0, mm1
rlm@1 142 pand mm2, mm3
rlm@1 143 pand mm4, mm5
rlm@1 144 pand mm6, mm7
rlm@1 145 pand mm0, mm2
rlm@1 146 pand mm4, mm6
rlm@1 147 pxor mm7, mm7
rlm@1 148 pand mm0, mm4
rlm@1 149 movq mm6, [eax+colorB0]
rlm@1 150 pcmpeqw mm7, mm0 ;did any compare give us a zero ?
rlm@1 151
rlm@1 152 movq [ecx+2+colorB0], mm6
rlm@1 153
rlm@1 154 packsswb mm7, mm7
rlm@1 155 movd ecx, mm7
rlm@1 156 test ecx, ecx
rlm@1 157 jz near .SKIP_PROCESS ;no, so we can skip
rlm@1 158
rlm@1 159 ;End Delta
rlm@1 160
rlm@1 161 ;---------------------------------
rlm@1 162 movq mm0, [eax+ebx+color5]
rlm@1 163 movq mm1, [eax+ebx+color6]
rlm@1 164 movq mm2, mm0
rlm@1 165 movq mm3, mm1
rlm@1 166 movq mm4, mm0
rlm@1 167 movq mm5, mm1
rlm@1 168
rlm@1 169 pand mm0, [colorMask]
rlm@1 170 pand mm1, [colorMask]
rlm@1 171
rlm@1 172 psrlw mm0, 1
rlm@1 173 psrlw mm1, 1
rlm@1 174
rlm@1 175 pand mm3, [lowPixelMask]
rlm@1 176 paddw mm0, mm1
rlm@1 177
rlm@1 178 pand mm3, mm2
rlm@1 179 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 180 movq [I56Pixel], mm0
rlm@1 181 movq mm7, mm0
rlm@1 182
rlm@1 183 ;-------------------
rlm@1 184 movq mm0, mm7
rlm@1 185 movq mm1, mm4 ;5,5,5,6
rlm@1 186 movq mm2, mm0
rlm@1 187 movq mm3, mm1
rlm@1 188
rlm@1 189 pand mm0, [colorMask]
rlm@1 190 pand mm1, [colorMask]
rlm@1 191
rlm@1 192 psrlw mm0, 1
rlm@1 193 psrlw mm1, 1
rlm@1 194
rlm@1 195 pand mm3, [lowPixelMask]
rlm@1 196 paddw mm0, mm1
rlm@1 197
rlm@1 198 pand mm3, mm2
rlm@1 199 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 200 movq [I5556Pixel], mm0
rlm@1 201 ;--------------------
rlm@1 202
rlm@1 203 movq mm0, mm7
rlm@1 204 movq mm1, mm5 ;6,6,6,5
rlm@1 205 movq mm2, mm0
rlm@1 206 movq mm3, mm1
rlm@1 207
rlm@1 208 pand mm0, [colorMask]
rlm@1 209 pand mm1, [colorMask]
rlm@1 210
rlm@1 211 psrlw mm0, 1
rlm@1 212 psrlw mm1, 1
rlm@1 213
rlm@1 214 pand mm3, [lowPixelMask]
rlm@1 215 paddw mm0, mm1
rlm@1 216
rlm@1 217 pand mm3, mm2
rlm@1 218 paddw mm0, mm3
rlm@1 219 movq [I5666Pixel], mm0
rlm@1 220
rlm@1 221 ;-------------------------
rlm@1 222 ;-------------------------
rlm@1 223 movq mm0, [eax+ebx+ebx+color2]
rlm@1 224 movq mm1, [eax+ebx+ebx+color3]
rlm@1 225 movq mm2, mm0
rlm@1 226 movq mm3, mm1
rlm@1 227 movq mm4, mm0
rlm@1 228 movq mm5, mm1
rlm@1 229
rlm@1 230 pand mm0, [colorMask]
rlm@1 231 pand mm1, [colorMask]
rlm@1 232
rlm@1 233 psrlw mm0, 1
rlm@1 234 psrlw mm1, 1
rlm@1 235
rlm@1 236 pand mm3, [lowPixelMask]
rlm@1 237 paddw mm0, mm1
rlm@1 238
rlm@1 239 pand mm3, mm2
rlm@1 240 paddw mm0, mm3
rlm@1 241 movq [I23Pixel], mm0
rlm@1 242 movq mm7, mm0
rlm@1 243
rlm@1 244 ;---------------------
rlm@1 245 movq mm0, mm7
rlm@1 246 movq mm1, mm4 ;2,2,2,3
rlm@1 247 movq mm2, mm0
rlm@1 248 movq mm3, mm1
rlm@1 249
rlm@1 250 pand mm0, [colorMask]
rlm@1 251 pand mm1, [colorMask]
rlm@1 252
rlm@1 253 psrlw mm0, 1
rlm@1 254 psrlw mm1, 1
rlm@1 255
rlm@1 256 pand mm3, [lowPixelMask]
rlm@1 257 paddw mm0, mm1
rlm@1 258
rlm@1 259 pand mm3, mm2
rlm@1 260 paddw mm0, mm3
rlm@1 261 movq [I2223Pixel], mm0
rlm@1 262
rlm@1 263 ;----------------------
rlm@1 264 movq mm0, mm7
rlm@1 265 movq mm1, mm5 ;3,3,3,2
rlm@1 266 movq mm2, mm0
rlm@1 267 movq mm3, mm1
rlm@1 268
rlm@1 269 pand mm0, [colorMask]
rlm@1 270 pand mm1, [colorMask]
rlm@1 271
rlm@1 272 psrlw mm0, 1
rlm@1 273 psrlw mm1, 1
rlm@1 274
rlm@1 275 pand mm3, [lowPixelMask]
rlm@1 276 paddw mm0, mm1
rlm@1 277
rlm@1 278 pand mm3, mm2
rlm@1 279 paddw mm0, mm3
rlm@1 280 movq [I2333Pixel], mm0
rlm@1 281
rlm@1 282
rlm@1 283 ;--------------------
rlm@1 284 ;////////////////////////////////
rlm@1 285 ; Decide which "branch" to take
rlm@1 286 ;--------------------------------
rlm@1 287 movq mm0, [eax+ebx+color5]
rlm@1 288 movq mm1, [eax+ebx+color6]
rlm@1 289 movq mm6, mm0
rlm@1 290 movq mm7, mm1
rlm@1 291 pcmpeqw mm0, [eax+ebx+ebx+color3]
rlm@1 292 pcmpeqw mm1, [eax+ebx+ebx+color2]
rlm@1 293 pcmpeqw mm6, mm7
rlm@1 294
rlm@1 295 movq mm2, mm0
rlm@1 296 movq mm3, mm0
rlm@1 297
rlm@1 298 pand mm0, mm1 ;colorA == colorD && colorB == colorC
rlm@1 299 pxor mm7, mm7
rlm@1 300
rlm@1 301 pcmpeqw mm2, mm7
rlm@1 302 pand mm6, mm0
rlm@1 303 pand mm2, mm1 ;colorA != colorD && colorB == colorC
rlm@1 304
rlm@1 305 pcmpeqw mm1, mm7
rlm@1 306
rlm@1 307 pand mm1, mm3 ;colorA == colorD && colorB != colorC
rlm@1 308 pxor mm0, mm6
rlm@1 309 por mm1, mm6
rlm@1 310 movq mm7, mm0
rlm@1 311 movq [Mask26], mm2
rlm@1 312 packsswb mm7, mm7
rlm@1 313 movq [Mask35], mm1
rlm@1 314
rlm@1 315 movd ecx, mm7
rlm@1 316 test ecx, ecx
rlm@1 317 jz near .SKIP_GUESS
rlm@1 318
rlm@1 319 ;---------------------------------------------
rlm@1 320 movq mm6, mm0
rlm@1 321 movq mm4, [eax+ebx+colorA]
rlm@1 322 movq mm5, [eax+ebx+colorB]
rlm@1 323 pxor mm7, mm7
rlm@1 324 pand mm6, [ONE]
rlm@1 325
rlm@1 326 movq mm0, [eax+colorE]
rlm@1 327 movq mm1, [eax+ebx+colorG]
rlm@1 328 movq mm2, mm0
rlm@1 329 movq mm3, mm1
rlm@1 330 pcmpeqw mm0, mm4
rlm@1 331 pcmpeqw mm1, mm4
rlm@1 332 pcmpeqw mm2, mm5
rlm@1 333 pcmpeqw mm3, mm5
rlm@1 334 pand mm0, mm6
rlm@1 335 pand mm1, mm6
rlm@1 336 pand mm2, mm6
rlm@1 337 pand mm3, mm6
rlm@1 338 paddw mm0, mm1
rlm@1 339 paddw mm2, mm3
rlm@1 340
rlm@1 341 pxor mm3, mm3
rlm@1 342 pcmpgtw mm0, mm6
rlm@1 343 pcmpgtw mm2, mm6
rlm@1 344 pcmpeqw mm0, mm3
rlm@1 345 pcmpeqw mm2, mm3
rlm@1 346 pand mm0, mm6
rlm@1 347 pand mm2, mm6
rlm@1 348 paddw mm7, mm0
rlm@1 349 psubw mm7, mm2
rlm@1 350
rlm@1 351 movq mm0, [eax+colorF]
rlm@1 352 movq mm1, [eax+ebx+colorK]
rlm@1 353 movq mm2, mm0
rlm@1 354 movq mm3, mm1
rlm@1 355 pcmpeqw mm0, mm4
rlm@1 356 pcmpeqw mm1, mm4
rlm@1 357 pcmpeqw mm2, mm5
rlm@1 358 pcmpeqw mm3, mm5
rlm@1 359 pand mm0, mm6
rlm@1 360 pand mm1, mm6
rlm@1 361 pand mm2, mm6
rlm@1 362 pand mm3, mm6
rlm@1 363 paddw mm0, mm1
rlm@1 364 paddw mm2, mm3
rlm@1 365
rlm@1 366 pxor mm3, mm3
rlm@1 367 pcmpgtw mm0, mm6
rlm@1 368 pcmpgtw mm2, mm6
rlm@1 369 pcmpeqw mm0, mm3
rlm@1 370 pcmpeqw mm2, mm3
rlm@1 371 pand mm0, mm6
rlm@1 372 pand mm2, mm6
rlm@1 373 paddw mm7, mm0
rlm@1 374 psubw mm7, mm2
rlm@1 375
rlm@1 376 push eax
rlm@1 377 add eax, ebx
rlm@1 378 movq mm0, [eax+ebx+colorH]
rlm@1 379 movq mm1, [eax+ebx+ebx+colorN]
rlm@1 380 movq mm2, mm0
rlm@1 381 movq mm3, mm1
rlm@1 382 pcmpeqw mm0, mm4
rlm@1 383 pcmpeqw mm1, mm4
rlm@1 384 pcmpeqw mm2, mm5
rlm@1 385 pcmpeqw mm3, mm5
rlm@1 386 pand mm0, mm6
rlm@1 387 pand mm1, mm6
rlm@1 388 pand mm2, mm6
rlm@1 389 pand mm3, mm6
rlm@1 390 paddw mm0, mm1
rlm@1 391 paddw mm2, mm3
rlm@1 392
rlm@1 393 pxor mm3, mm3
rlm@1 394 pcmpgtw mm0, mm6
rlm@1 395 pcmpgtw mm2, mm6
rlm@1 396 pcmpeqw mm0, mm3
rlm@1 397 pcmpeqw mm2, mm3
rlm@1 398 pand mm0, mm6
rlm@1 399 pand mm2, mm6
rlm@1 400 paddw mm7, mm0
rlm@1 401 psubw mm7, mm2
rlm@1 402
rlm@1 403 movq mm0, [eax+ebx+colorL]
rlm@1 404 movq mm1, [eax+ebx+ebx+colorO]
rlm@1 405 movq mm2, mm0
rlm@1 406 movq mm3, mm1
rlm@1 407 pcmpeqw mm0, mm4
rlm@1 408 pcmpeqw mm1, mm4
rlm@1 409 pcmpeqw mm2, mm5
rlm@1 410 pcmpeqw mm3, mm5
rlm@1 411 pand mm0, mm6
rlm@1 412 pand mm1, mm6
rlm@1 413 pand mm2, mm6
rlm@1 414 pand mm3, mm6
rlm@1 415 paddw mm0, mm1
rlm@1 416 paddw mm2, mm3
rlm@1 417
rlm@1 418 pxor mm3, mm3
rlm@1 419 pcmpgtw mm0, mm6
rlm@1 420 pcmpgtw mm2, mm6
rlm@1 421 pcmpeqw mm0, mm3
rlm@1 422 pcmpeqw mm2, mm3
rlm@1 423 pand mm0, mm6
rlm@1 424 pand mm2, mm6
rlm@1 425 paddw mm7, mm0
rlm@1 426 psubw mm7, mm2
rlm@1 427
rlm@1 428 pop eax
rlm@1 429 movq mm1, mm7
rlm@1 430 pxor mm0, mm0
rlm@1 431 pcmpgtw mm7, mm0
rlm@1 432 pcmpgtw mm0, mm1
rlm@1 433
rlm@1 434 por mm7, [Mask35]
rlm@1 435 por mm0, [Mask26]
rlm@1 436 movq [Mask35], mm7
rlm@1 437 movq [Mask26], mm0
rlm@1 438
rlm@1 439 .SKIP_GUESS:
rlm@1 440
rlm@1 441 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
rlm@1 442
rlm@1 443
rlm@1 444 movq mm0, [eax+ebx+color5]
rlm@1 445 movq mm1, [eax+ebx+ebx+color2]
rlm@1 446 movq mm2, mm0
rlm@1 447 movq mm3, mm1
rlm@1 448 movq mm4, mm0
rlm@1 449 movq mm5, mm1
rlm@1 450
rlm@1 451 pand mm0, [colorMask]
rlm@1 452 pand mm1, [colorMask]
rlm@1 453
rlm@1 454 psrlw mm0, 1
rlm@1 455 psrlw mm1, 1
rlm@1 456
rlm@1 457 pand mm3, [lowPixelMask]
rlm@1 458 paddw mm0, mm1
rlm@1 459
rlm@1 460 pand mm3, mm2
rlm@1 461 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 462 ;---------------------------
rlm@1 463
rlm@1 464
rlm@1 465
rlm@1 466 %ifdef dfhsdfhsdahdsfhdsfh
rlm@1 467
rlm@1 468 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
rlm@1 469 product2a = INTERPOLATE (color2, color5);
rlm@1 470 else
rlm@1 471 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
rlm@1 472 product2a = INTERPOLATE(color2, color5);
rlm@1 473 else
rlm@1 474 product2a = color2;
rlm@1 475
rlm@1 476 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
rlm@1 477 product1a = INTERPOLATE (color2, color5);
rlm@1 478 else
rlm@1 479 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
rlm@1 480 product1a = INTERPOLATE(color2, color5);
rlm@1 481 else
rlm@1 482 product1a = color5;
rlm@1 483
rlm@1 484 %endif
rlm@1 485
rlm@1 486
rlm@1 487 movq mm7, [Mask26]
rlm@1 488 movq mm6, [eax+colorB2]
rlm@1 489 movq mm5, [eax+ebx+ebx+color2]
rlm@1 490 movq mm4, [eax+ebx+ebx+color1]
rlm@1 491 pcmpeqw mm4, mm5
rlm@1 492 pcmpeqw mm6, mm5
rlm@1 493 pxor mm5, mm5
rlm@1 494 pand mm7, mm4
rlm@1 495 pcmpeqw mm6, mm5
rlm@1 496 pand mm7, mm6
rlm@1 497
rlm@1 498
rlm@1 499
rlm@1 500 movq mm6, [eax+ebx+ebx+color3]
rlm@1 501 movq mm5, [eax+ebx+ebx+color2]
rlm@1 502 movq mm4, [eax+ebx+ebx+color1]
rlm@1 503 movq mm2, [eax+ebx+color5]
rlm@1 504 movq mm1, [eax+ebx+color4]
rlm@1 505 movq mm3, [eax+colorB0]
rlm@1 506
rlm@1 507 pcmpeqw mm2, mm4
rlm@1 508 pcmpeqw mm6, mm5
rlm@1 509 pcmpeqw mm1, mm5
rlm@1 510 pcmpeqw mm3, mm5
rlm@1 511 pxor mm5, mm5
rlm@1 512 pcmpeqw mm2, mm5
rlm@1 513 pcmpeqw mm3, mm5
rlm@1 514 pand mm6, mm1
rlm@1 515 pand mm2, mm3
rlm@1 516 pand mm6, mm2
rlm@1 517 por mm7, mm6
rlm@1 518
rlm@1 519
rlm@1 520 movq mm6, mm7
rlm@1 521 pcmpeqw mm6, mm5
rlm@1 522 pand mm7, mm0
rlm@1 523
rlm@1 524 movq mm1, [eax+ebx+color5]
rlm@1 525 pand mm6, mm1
rlm@1 526 por mm7, mm6
rlm@1 527 movq [final1a], mm7 ;finished 1a
rlm@1 528
rlm@1 529
rlm@1 530
rlm@1 531 ;--------------------------------
rlm@1 532
rlm@1 533 movq mm7, [Mask35]
rlm@1 534 push eax
rlm@1 535 add eax, ebx
rlm@1 536 movq mm6, [eax+ebx+ebx+colorA2]
rlm@1 537 pop eax
rlm@1 538 movq mm5, [eax+ebx+color5]
rlm@1 539 movq mm4, [eax+ebx+color4]
rlm@1 540 pcmpeqw mm4, mm5
rlm@1 541 pcmpeqw mm6, mm5
rlm@1 542 pxor mm5, mm5
rlm@1 543 pand mm7, mm4
rlm@1 544 pcmpeqw mm6, mm5
rlm@1 545 pand mm7, mm6
rlm@1 546
rlm@1 547
rlm@1 548
rlm@1 549 movq mm6, [eax+ebx+color6]
rlm@1 550 movq mm5, [eax+ebx+color5]
rlm@1 551 movq mm4, [eax+ebx+color4]
rlm@1 552 movq mm2, [eax+ebx+ebx+color2]
rlm@1 553 movq mm1, [eax+ebx+ebx+color1]
rlm@1 554 push eax
rlm@1 555 add eax, ebx
rlm@1 556 movq mm3, [eax+ebx+ebx+colorA0]
rlm@1 557 pop eax
rlm@1 558
rlm@1 559 pcmpeqw mm2, mm4
rlm@1 560 pcmpeqw mm6, mm5
rlm@1 561 pcmpeqw mm1, mm5
rlm@1 562 pcmpeqw mm3, mm5
rlm@1 563 pxor mm5, mm5
rlm@1 564 pcmpeqw mm2, mm5
rlm@1 565 pcmpeqw mm3, mm5
rlm@1 566 pand mm6, mm1
rlm@1 567 pand mm2, mm3
rlm@1 568 pand mm6, mm2
rlm@1 569 por mm7, mm6
rlm@1 570
rlm@1 571
rlm@1 572 movq mm6, mm7
rlm@1 573 pcmpeqw mm6, mm5
rlm@1 574 pand mm7, mm0
rlm@1 575
rlm@1 576 movq mm1, [eax+ebx+ebx+color2]
rlm@1 577 pand mm6, mm1
rlm@1 578 por mm7, mm6
rlm@1 579 movq [final2a], mm7 ;finished 2a
rlm@1 580
rlm@1 581
rlm@1 582 ;--------------------------------------------
rlm@1 583
rlm@1 584
rlm@1 585 %ifdef dfhsdfhsdahdsfhdsfh
rlm@1 586 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
rlm@1 587 product2b = Q_INTERPOLATE (color3, color3, color3, color2);
rlm@1 588 else
rlm@1 589 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
rlm@1 590 product2b = Q_INTERPOLATE (color2, color2, color2, color3);
rlm@1 591 else
rlm@1 592 product2b = INTERPOLATE (color2, color3);
rlm@1 593
rlm@1 594 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
rlm@1 595 product1b = Q_INTERPOLATE (color6, color6, color6, color5);
rlm@1 596 else
rlm@1 597 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
rlm@1 598 product1b = Q_INTERPOLATE (color6, color5, color5, color5);
rlm@1 599 else
rlm@1 600 product1b = INTERPOLATE (color5, color6);
rlm@1 601 %endif
rlm@1 602
rlm@1 603 push eax
rlm@1 604 add eax, ebx
rlm@1 605 pxor mm7, mm7
rlm@1 606 movq mm0, [eax+ebx+ebx+colorA0]
rlm@1 607 movq mm1, [eax+ebx+ebx+colorA1]
rlm@1 608 movq mm2, [eax+ebx+ebx+colorA2]
rlm@1 609 movq mm3, [eax+ebx+ebx+colorA3]
rlm@1 610 pop eax
rlm@1 611 movq mm4, [eax+ebx+ebx+color2]
rlm@1 612 movq mm5, [eax+ebx+ebx+color3]
rlm@1 613 movq mm6, [eax+ebx+color6]
rlm@1 614
rlm@1 615 pcmpeqw mm6, mm5
rlm@1 616 pcmpeqw mm1, mm5
rlm@1 617 pcmpeqw mm4, mm2
rlm@1 618 pcmpeqw mm0, mm5
rlm@1 619 pcmpeqw mm4, mm7
rlm@1 620 pcmpeqw mm0, mm7
rlm@1 621 pand mm0, mm4
rlm@1 622 pand mm6, mm1
rlm@1 623 pand mm0, mm6
rlm@1 624
rlm@1 625
rlm@1 626 push eax
rlm@1 627 add eax, ebx
rlm@1 628 movq mm1, [eax+ebx+ebx+colorA1]
rlm@1 629 pop eax
rlm@1 630 movq mm4, [eax+ebx+ebx+color2]
rlm@1 631 movq mm5, [eax+ebx+color5]
rlm@1 632 movq mm6, [eax+ebx+ebx+color3]
rlm@1 633
rlm@1 634 pcmpeqw mm5, mm4
rlm@1 635 pcmpeqw mm2, mm4
rlm@1 636 pcmpeqw mm1, mm6
rlm@1 637 pcmpeqw mm3, mm4
rlm@1 638 pcmpeqw mm1, mm7
rlm@1 639 pcmpeqw mm3, mm7
rlm@1 640 pand mm2, mm5
rlm@1 641 pand mm1, mm3
rlm@1 642 pand mm1, mm2
rlm@1 643
rlm@1 644
rlm@1 645 movq mm7, mm0
rlm@1 646 por mm7, mm1
rlm@1 647
rlm@1 648 movq mm4, [Mask35]
rlm@1 649 movq mm3, [Mask26]
rlm@1 650
rlm@1 651 movq mm6, mm4
rlm@1 652 pand mm6, mm7
rlm@1 653 pxor mm4, mm6
rlm@1 654
rlm@1 655 movq mm6, mm3
rlm@1 656 pand mm6, mm7
rlm@1 657 pxor mm3, mm6
rlm@1 658
rlm@1 659 movq mm2, mm0
rlm@1 660 movq mm7, [I2333Pixel]
rlm@1 661 movq mm6, [I2223Pixel]
rlm@1 662 movq mm5, [I23Pixel]
rlm@1 663
rlm@1 664
rlm@1 665 por mm2, mm4
rlm@1 666 pand mm4, [eax+ebx+ebx+color3]
rlm@1 667 por mm2, mm3
rlm@1 668 pand mm3, [eax+ebx+ebx+color2]
rlm@1 669 por mm2, mm1
rlm@1 670 pand mm0, mm7
rlm@1 671 pand mm1, mm6
rlm@1 672 pxor mm7, mm7
rlm@1 673 pcmpeqw mm2, mm7
rlm@1 674 por mm0, mm1
rlm@1 675 por mm3, mm4
rlm@1 676 pand mm2, mm5
rlm@1 677 por mm0, mm3
rlm@1 678 por mm0, mm2
rlm@1 679 movq [final2b], mm0
rlm@1 680
rlm@1 681 ;-----------------------------------
rlm@1 682
rlm@1 683
rlm@1 684 pxor mm7, mm7
rlm@1 685 movq mm0, [eax+colorB0]
rlm@1 686 movq mm1, [eax+colorB1]
rlm@1 687 movq mm2, [eax+colorB2]
rlm@1 688 movq mm3, [eax+colorB3]
rlm@1 689 movq mm4, [eax+ebx+color5]
rlm@1 690 movq mm5, [eax+ebx+color6]
rlm@1 691 movq mm6, [eax+ebx+ebx+color3]
rlm@1 692
rlm@1 693 pcmpeqw mm6, mm5
rlm@1 694 pcmpeqw mm1, mm5
rlm@1 695 pcmpeqw mm4, mm2
rlm@1 696 pcmpeqw mm0, mm5
rlm@1 697 pcmpeqw mm4, mm7
rlm@1 698 pcmpeqw mm0, mm7
rlm@1 699 pand mm0, mm4
rlm@1 700 pand mm6, mm1
rlm@1 701 pand mm0, mm6
rlm@1 702
rlm@1 703 movq mm1, [eax+colorB1]
rlm@1 704 movq mm4, [eax+ebx+color5]
rlm@1 705 movq mm5, [eax+ebx+ebx+color2]
rlm@1 706 movq mm6, [eax+ebx+color6]
rlm@1 707
rlm@1 708 pcmpeqw mm5, mm4
rlm@1 709 pcmpeqw mm2, mm4
rlm@1 710 pcmpeqw mm1, mm6
rlm@1 711 pcmpeqw mm3, mm4
rlm@1 712 pcmpeqw mm1, mm7
rlm@1 713 pcmpeqw mm3, mm7
rlm@1 714 pand mm2, mm5
rlm@1 715 pand mm1, mm3
rlm@1 716 pand mm1, mm2
rlm@1 717
rlm@1 718
rlm@1 719 movq mm7, mm0
rlm@1 720 por mm7, mm1
rlm@1 721
rlm@1 722 movq mm4, [Mask35]
rlm@1 723 movq mm3, [Mask26]
rlm@1 724
rlm@1 725 movq mm6, mm4
rlm@1 726 pand mm6, mm7
rlm@1 727 pxor mm4, mm6
rlm@1 728
rlm@1 729 movq mm6, mm3
rlm@1 730 pand mm6, mm7
rlm@1 731 pxor mm3, mm6
rlm@1 732
rlm@1 733 movq mm2, mm0
rlm@1 734 movq mm7, [I5666Pixel]
rlm@1 735 movq mm6, [I5556Pixel]
rlm@1 736 movq mm5, [I56Pixel]
rlm@1 737
rlm@1 738
rlm@1 739 por mm2, mm4
rlm@1 740 pand mm4, [eax+ebx+color5]
rlm@1 741 por mm2, mm3
rlm@1 742 pand mm3, [eax+ebx+color6]
rlm@1 743 por mm2, mm1
rlm@1 744 pand mm0, mm7
rlm@1 745 pand mm1, mm6
rlm@1 746 pxor mm7, mm7
rlm@1 747 pcmpeqw mm2, mm7
rlm@1 748 por mm0, mm1
rlm@1 749 por mm3, mm4
rlm@1 750 pand mm2, mm5
rlm@1 751 por mm0, mm3
rlm@1 752 por mm0, mm2
rlm@1 753 movq [final1b], mm0
rlm@1 754
rlm@1 755 ;---------
rlm@1 756
rlm@1 757 movq mm0, [final1a]
rlm@1 758 movq mm4, [final2a]
rlm@1 759 movq mm2, [final1b]
rlm@1 760 movq mm6, [final2b]
rlm@1 761
rlm@1 762
rlm@1 763 movq mm1, mm0
rlm@1 764 movq mm5, mm4
rlm@1 765
rlm@1 766
rlm@1 767 punpcklwd mm0, mm2
rlm@1 768 punpckhwd mm1, mm2
rlm@1 769
rlm@1 770 punpcklwd mm4, mm6
rlm@1 771 punpckhwd mm5, mm6
rlm@1 772
rlm@1 773
rlm@1 774 %ifdef FAR_POINTER
rlm@1 775 movq [fs:edx], mm0
rlm@1 776 movq [fs:edx+8], mm1
rlm@1 777 push edx
rlm@1 778 add edx, [ebp+dstPitch]
rlm@1 779 movq [fs:edx], mm4
rlm@1 780 movq [fs:edx+8], mm5
rlm@1 781 pop edx
rlm@1 782 %else
rlm@1 783 movq [edx], mm0
rlm@1 784 movq [edx+8], mm1
rlm@1 785 push edx
rlm@1 786 add edx, [ebp+dstPitch]
rlm@1 787 movq [edx], mm4
rlm@1 788 movq [edx+8], mm5
rlm@1 789 pop edx
rlm@1 790 %endif
rlm@1 791 .SKIP_PROCESS:
rlm@1 792 mov ecx, [ebp+deltaPtr]
rlm@1 793 add ecx, 8
rlm@1 794 mov [ebp+deltaPtr], ecx
rlm@1 795 add edx, 16
rlm@1 796 add eax, 8
rlm@1 797
rlm@1 798 pop ecx
rlm@1 799 sub ecx, 4
rlm@1 800 cmp ecx, 0
rlm@1 801 jg near .Loop
rlm@1 802
rlm@1 803 ; Restore some stuff
rlm@1 804 popad
rlm@1 805 mov esp, ebp
rlm@1 806 pop ebp
rlm@1 807 emms
rlm@1 808 ret
rlm@1 809
rlm@1 810
rlm@1 811 ;-------------------------------------------------------------------------
rlm@1 812 ;-------------------------------------------------------------------------
rlm@1 813 ;-------------------------------------------------------------------------
rlm@1 814 ;-------------------------------------------------------------------------
rlm@1 815 ;-------------------------------------------------------------------------
rlm@1 816 ;-------------------------------------------------------------------------
rlm@1 817 ;-------------------------------------------------------------------------
rlm@1 818
rlm@1 819
rlm@1 820
rlm@1 821 %ifdef __DJGPP__
rlm@1 822 __2xSaISuperEagleLine:
rlm@1 823 %else
rlm@1 824 _2xSaISuperEagleLine:
rlm@1 825 %endif
rlm@1 826 ; Store some stuff
rlm@1 827 push ebp
rlm@1 828 mov ebp, esp
rlm@1 829 pushad
rlm@1 830
rlm@1 831 ; Prepare the destination
rlm@1 832 %ifdef FAR_POINTER
rlm@1 833 ; Set the selector
rlm@1 834 mov eax, [ebp+dstSegment]
rlm@1 835 mov fs, ax
rlm@1 836 %endif
rlm@1 837 mov edx, [ebp+dstOffset] ; edx points to the screen
rlm@1 838 ; Prepare the source
rlm@1 839 ; eax points to colorA
rlm@1 840 mov eax, [ebp+srcPtr]
rlm@1 841 mov ebx, [ebp+srcPitch]
rlm@1 842 mov ecx, [ebp+width]
rlm@1 843 ; eax now points to colorB1
rlm@1 844 sub eax, ebx
rlm@1 845
rlm@1 846 ; Main Loop
rlm@1 847 .Loop: push ecx
rlm@1 848
rlm@1 849 ;-----Check Delta------------------
rlm@1 850 mov ecx, [ebp+deltaPtr]
rlm@1 851
rlm@1 852 movq mm0, [eax+colorB0]
rlm@1 853 movq mm1, [eax+colorB3]
rlm@1 854 movq mm2, [eax+ebx+color4]
rlm@1 855 movq mm3, [eax+ebx+colorS2]
rlm@1 856 movq mm4, [eax+ebx+ebx+color1]
rlm@1 857 movq mm5, [eax+ebx+ebx+colorS1]
rlm@1 858 push eax
rlm@1 859 add eax, ebx
rlm@1 860 movq mm6, [eax+ebx+ebx+colorA0]
rlm@1 861 movq mm7, [eax+ebx+ebx+colorA3]
rlm@1 862 pop eax
rlm@1 863
rlm@1 864 pcmpeqw mm0, [ecx+2+colorB0]
rlm@1 865 pcmpeqw mm1, [ecx+2+colorB3]
rlm@1 866 pcmpeqw mm2, [ecx+ebx+2+color4]
rlm@1 867 pcmpeqw mm3, [ecx+ebx+2+colorS2]
rlm@1 868 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
rlm@1 869 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
rlm@1 870 add ecx, ebx
rlm@1 871 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
rlm@1 872 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
rlm@1 873 sub ecx, ebx
rlm@1 874
rlm@1 875
rlm@1 876 pand mm0, mm1
rlm@1 877 pand mm2, mm3
rlm@1 878 pand mm4, mm5
rlm@1 879 pand mm6, mm7
rlm@1 880 pand mm0, mm2
rlm@1 881 pand mm4, mm6
rlm@1 882 pxor mm7, mm7
rlm@1 883 pand mm0, mm4
rlm@1 884 movq mm6, [eax+colorB0]
rlm@1 885 pcmpeqw mm7, mm0
rlm@1 886
rlm@1 887 movq [ecx+2+colorB0], mm6
rlm@1 888
rlm@1 889 packsswb mm7, mm7
rlm@1 890 movd ecx, mm7
rlm@1 891 test ecx, ecx
rlm@1 892 jz near .SKIP_PROCESS
rlm@1 893
rlm@1 894 ;End Delta
rlm@1 895
rlm@1 896 ;---------------------------------
rlm@1 897 movq mm0, [eax+ebx+color5]
rlm@1 898 movq mm1, [eax+ebx+color6]
rlm@1 899 movq mm2, mm0
rlm@1 900 movq mm3, mm1
rlm@1 901 movq mm4, mm0
rlm@1 902 movq mm5, mm1
rlm@1 903
rlm@1 904 pand mm0, [colorMask]
rlm@1 905 pand mm1, [colorMask]
rlm@1 906
rlm@1 907 psrlw mm0, 1
rlm@1 908 psrlw mm1, 1
rlm@1 909
rlm@1 910 pand mm3, [lowPixelMask]
rlm@1 911 paddw mm0, mm1
rlm@1 912
rlm@1 913 pand mm3, mm2
rlm@1 914 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 915 movq [I56Pixel], mm0
rlm@1 916 movq mm7, mm0
rlm@1 917
rlm@1 918 ;-------------------
rlm@1 919 movq mm0, mm7
rlm@1 920 movq mm1, mm4 ;5,5,5,6
rlm@1 921 movq mm2, mm0
rlm@1 922 movq mm3, mm1
rlm@1 923
rlm@1 924 pand mm0, [colorMask]
rlm@1 925 pand mm1, [colorMask]
rlm@1 926
rlm@1 927 psrlw mm0, 1
rlm@1 928 psrlw mm1, 1
rlm@1 929
rlm@1 930 pand mm3, [lowPixelMask]
rlm@1 931 paddw mm0, mm1
rlm@1 932
rlm@1 933 pand mm3, mm2
rlm@1 934 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 935 movq [product1a], mm0
rlm@1 936 ;--------------------
rlm@1 937
rlm@1 938 movq mm0, mm7
rlm@1 939 movq mm1, mm5 ;6,6,6,5
rlm@1 940 movq mm2, mm0
rlm@1 941 movq mm3, mm1
rlm@1 942
rlm@1 943 pand mm0, [colorMask]
rlm@1 944 pand mm1, [colorMask]
rlm@1 945
rlm@1 946 psrlw mm0, 1
rlm@1 947 psrlw mm1, 1
rlm@1 948
rlm@1 949 pand mm3, [lowPixelMask]
rlm@1 950 paddw mm0, mm1
rlm@1 951
rlm@1 952 pand mm3, mm2
rlm@1 953 paddw mm0, mm3
rlm@1 954 movq [product1b], mm0
rlm@1 955
rlm@1 956 ;-------------------------
rlm@1 957 ;-------------------------
rlm@1 958 movq mm0, [eax+ebx+ebx+color2]
rlm@1 959 movq mm1, [eax+ebx+ebx+color3]
rlm@1 960 movq mm2, mm0
rlm@1 961 movq mm3, mm1
rlm@1 962 movq mm4, mm0
rlm@1 963 movq mm5, mm1
rlm@1 964
rlm@1 965 pand mm0, [colorMask]
rlm@1 966 pand mm1, [colorMask]
rlm@1 967
rlm@1 968 psrlw mm0, 1
rlm@1 969 psrlw mm1, 1
rlm@1 970
rlm@1 971 pand mm3, [lowPixelMask]
rlm@1 972 paddw mm0, mm1
rlm@1 973
rlm@1 974 pand mm3, mm2
rlm@1 975 paddw mm0, mm3
rlm@1 976 movq [I23Pixel], mm0
rlm@1 977 movq mm7, mm0
rlm@1 978
rlm@1 979 ;---------------------
rlm@1 980 movq mm0, mm7
rlm@1 981 movq mm1, mm4 ;2,2,2,3
rlm@1 982 movq mm2, mm0
rlm@1 983 movq mm3, mm1
rlm@1 984
rlm@1 985 pand mm0, [colorMask]
rlm@1 986 pand mm1, [colorMask]
rlm@1 987
rlm@1 988 psrlw mm0, 1
rlm@1 989 psrlw mm1, 1
rlm@1 990
rlm@1 991 pand mm3, [lowPixelMask]
rlm@1 992 paddw mm0, mm1
rlm@1 993
rlm@1 994 pand mm3, mm2
rlm@1 995 paddw mm0, mm3
rlm@1 996 movq [product2a], mm0
rlm@1 997
rlm@1 998 ;----------------------
rlm@1 999 movq mm0, mm7
rlm@1 1000 movq mm1, mm5 ;3,3,3,2
rlm@1 1001 movq mm2, mm0
rlm@1 1002 movq mm3, mm1
rlm@1 1003
rlm@1 1004 pand mm0, [colorMask]
rlm@1 1005 pand mm1, [colorMask]
rlm@1 1006
rlm@1 1007 psrlw mm0, 1
rlm@1 1008 psrlw mm1, 1
rlm@1 1009
rlm@1 1010 pand mm3, [lowPixelMask]
rlm@1 1011 paddw mm0, mm1
rlm@1 1012
rlm@1 1013 pand mm3, mm2
rlm@1 1014 paddw mm0, mm3
rlm@1 1015 movq [product2b], mm0
rlm@1 1016
rlm@1 1017
rlm@1 1018 ;////////////////////////////////
rlm@1 1019 ; Decide which "branch" to take
rlm@1 1020 ;--------------------------------
rlm@1 1021 movq mm4, [eax+ebx+color5]
rlm@1 1022 movq mm5, [eax+ebx+color6]
rlm@1 1023 movq mm6, [eax+ebx+ebx+color3]
rlm@1 1024 movq mm7, [eax+ebx+ebx+color2]
rlm@1 1025
rlm@1 1026 pxor mm3, mm3
rlm@1 1027 movq mm0, mm4
rlm@1 1028 movq mm1, mm5
rlm@1 1029
rlm@1 1030 pcmpeqw mm0, mm6
rlm@1 1031 pcmpeqw mm1, mm7
rlm@1 1032 pcmpeqw mm1, mm3
rlm@1 1033 pand mm0, mm1
rlm@1 1034 movq [Mask35], mm0
rlm@1 1035
rlm@1 1036 movq mm0, [eax+ebx+ebx+colorS1]
rlm@1 1037 movq mm1, [eax+ebx+color4]
rlm@1 1038 push eax
rlm@1 1039 add eax, ebx
rlm@1 1040 movq mm2, [eax+ebx+ebx+colorA2]
rlm@1 1041 pop eax
rlm@1 1042 movq mm3, [eax+colorB1]
rlm@1 1043 pcmpeqw mm0, mm4
rlm@1 1044 pcmpeqw mm1, mm4
rlm@1 1045 pcmpeqw mm2, mm4
rlm@1 1046 pcmpeqw mm3, mm4
rlm@1 1047 pand mm0, mm1
rlm@1 1048 pand mm2, mm3
rlm@1 1049 por mm0, mm2
rlm@1 1050 pand mm0, [Mask35]
rlm@1 1051 movq [Mask35b], mm0
rlm@1 1052
rlm@1 1053 ;-----------
rlm@1 1054 pxor mm3, mm3
rlm@1 1055 movq mm0, mm4
rlm@1 1056 movq mm1, mm5
rlm@1 1057
rlm@1 1058 pcmpeqw mm0, mm6
rlm@1 1059 pcmpeqw mm1, mm7
rlm@1 1060 pcmpeqw mm0, mm3
rlm@1 1061 pand mm0, mm1
rlm@1 1062 movq [Mask26], mm0
rlm@1 1063
rlm@1 1064 movq mm0, [eax+ebx+ebx+color1]
rlm@1 1065 movq mm1, [eax+ebx+colorS2]
rlm@1 1066 push eax
rlm@1 1067 add eax, ebx
rlm@1 1068 movq mm2, [eax+ebx+ebx+colorA1]
rlm@1 1069 pop eax
rlm@1 1070 movq mm3, [eax+colorB2]
rlm@1 1071 pcmpeqw mm0, mm5
rlm@1 1072 pcmpeqw mm1, mm5
rlm@1 1073 pcmpeqw mm2, mm5
rlm@1 1074 pcmpeqw mm3, mm5
rlm@1 1075 pand mm0, mm1
rlm@1 1076 pand mm2, mm3
rlm@1 1077 por mm0, mm2
rlm@1 1078 pand mm0, [Mask26]
rlm@1 1079 movq [Mask26b], mm0
rlm@1 1080
rlm@1 1081 ;--------------------
rlm@1 1082 movq mm0, mm4
rlm@1 1083 movq mm1, mm5
rlm@1 1084 movq mm2, mm0
rlm@1 1085
rlm@1 1086 pcmpeqw mm2, mm1
rlm@1 1087 pcmpeqw mm0, mm6
rlm@1 1088 pcmpeqw mm1, mm7
rlm@1 1089 pand mm0, mm1
rlm@1 1090 pand mm2, mm0
rlm@1 1091 pxor mm0, mm2
rlm@1 1092 movq mm7, mm0
rlm@1 1093
rlm@1 1094 ;------------------
rlm@1 1095 packsswb mm7, mm7
rlm@1 1096 movd ecx, mm7
rlm@1 1097 test ecx, ecx
rlm@1 1098 jz near .SKIP_GUESS
rlm@1 1099
rlm@1 1100 ;---------------------------------------------
rlm@1 1101 ; Map of the pixels: I|E F|J
rlm@1 1102 ; G|A B|K
rlm@1 1103 ; H|C D|L
rlm@1 1104 ; M|N O|P
rlm@1 1105 movq mm6, mm0
rlm@1 1106 movq mm4, [eax+ebx+color5]
rlm@1 1107 movq mm5, [eax+ebx+color6]
rlm@1 1108 pxor mm7, mm7
rlm@1 1109 pand mm6, [ONE]
rlm@1 1110
rlm@1 1111 movq mm0, [eax+colorB1]
rlm@1 1112 movq mm1, [eax+ebx+color4]
rlm@1 1113 movq mm2, mm0
rlm@1 1114 movq mm3, mm1
rlm@1 1115 pcmpeqw mm0, mm4
rlm@1 1116 pcmpeqw mm1, mm4
rlm@1 1117 pcmpeqw mm2, mm5
rlm@1 1118 pcmpeqw mm3, mm5
rlm@1 1119 pand mm0, mm6
rlm@1 1120 pand mm1, mm6
rlm@1 1121 pand mm2, mm6
rlm@1 1122 pand mm3, mm6
rlm@1 1123 paddw mm0, mm1
rlm@1 1124 paddw mm2, mm3
rlm@1 1125
rlm@1 1126 pxor mm3, mm3
rlm@1 1127 pcmpgtw mm0, mm6
rlm@1 1128 pcmpgtw mm2, mm6
rlm@1 1129 pcmpeqw mm0, mm3
rlm@1 1130 pcmpeqw mm2, mm3
rlm@1 1131 pand mm0, mm6
rlm@1 1132 pand mm2, mm6
rlm@1 1133 paddw mm7, mm0
rlm@1 1134 psubw mm7, mm2
rlm@1 1135
rlm@1 1136 movq mm0, [eax+colorB2]
rlm@1 1137 movq mm1, [eax+ebx+colorS2]
rlm@1 1138 movq mm2, mm0
rlm@1 1139 movq mm3, mm1
rlm@1 1140 pcmpeqw mm0, mm4
rlm@1 1141 pcmpeqw mm1, mm4
rlm@1 1142 pcmpeqw mm2, mm5
rlm@1 1143 pcmpeqw mm3, mm5
rlm@1 1144 pand mm0, mm6
rlm@1 1145 pand mm1, mm6
rlm@1 1146 pand mm2, mm6
rlm@1 1147 pand mm3, mm6
rlm@1 1148 paddw mm0, mm1
rlm@1 1149 paddw mm2, mm3
rlm@1 1150
rlm@1 1151 pxor mm3, mm3
rlm@1 1152 pcmpgtw mm0, mm6
rlm@1 1153 pcmpgtw mm2, mm6
rlm@1 1154 pcmpeqw mm0, mm3
rlm@1 1155 pcmpeqw mm2, mm3
rlm@1 1156 pand mm0, mm6
rlm@1 1157 pand mm2, mm6
rlm@1 1158 paddw mm7, mm0
rlm@1 1159 psubw mm7, mm2
rlm@1 1160
rlm@1 1161 push eax
rlm@1 1162 add eax, ebx
rlm@1 1163 movq mm0, [eax+ebx+color1]
rlm@1 1164 movq mm1, [eax+ebx+ebx+colorA1]
rlm@1 1165 movq mm2, mm0
rlm@1 1166 movq mm3, mm1
rlm@1 1167 pcmpeqw mm0, mm4
rlm@1 1168 pcmpeqw mm1, mm4
rlm@1 1169 pcmpeqw mm2, mm5
rlm@1 1170 pcmpeqw mm3, mm5
rlm@1 1171 pand mm0, mm6
rlm@1 1172 pand mm1, mm6
rlm@1 1173 pand mm2, mm6
rlm@1 1174 pand mm3, mm6
rlm@1 1175 paddw mm0, mm1
rlm@1 1176 paddw mm2, mm3
rlm@1 1177
rlm@1 1178 pxor mm3, mm3
rlm@1 1179 pcmpgtw mm0, mm6
rlm@1 1180 pcmpgtw mm2, mm6
rlm@1 1181 pcmpeqw mm0, mm3
rlm@1 1182 pcmpeqw mm2, mm3
rlm@1 1183 pand mm0, mm6
rlm@1 1184 pand mm2, mm6
rlm@1 1185 paddw mm7, mm0
rlm@1 1186 psubw mm7, mm2
rlm@1 1187
rlm@1 1188 movq mm0, [eax+ebx+colorS1]
rlm@1 1189 movq mm1, [eax+ebx+ebx+colorA2]
rlm@1 1190 movq mm2, mm0
rlm@1 1191 movq mm3, mm1
rlm@1 1192 pcmpeqw mm0, mm4
rlm@1 1193 pcmpeqw mm1, mm4
rlm@1 1194 pcmpeqw mm2, mm5
rlm@1 1195 pcmpeqw mm3, mm5
rlm@1 1196 pand mm0, mm6
rlm@1 1197 pand mm1, mm6
rlm@1 1198 pand mm2, mm6
rlm@1 1199 pand mm3, mm6
rlm@1 1200 paddw mm0, mm1
rlm@1 1201 paddw mm2, mm3
rlm@1 1202
rlm@1 1203 pxor mm3, mm3
rlm@1 1204 pcmpgtw mm0, mm6
rlm@1 1205 pcmpgtw mm2, mm6
rlm@1 1206 pcmpeqw mm0, mm3
rlm@1 1207 pcmpeqw mm2, mm3
rlm@1 1208 pand mm0, mm6
rlm@1 1209 pand mm2, mm6
rlm@1 1210 paddw mm7, mm0
rlm@1 1211 psubw mm7, mm2
rlm@1 1212
rlm@1 1213 pop eax
rlm@1 1214 movq mm1, mm7
rlm@1 1215 pxor mm0, mm0
rlm@1 1216 pcmpgtw mm7, mm0
rlm@1 1217 pcmpgtw mm0, mm1
rlm@1 1218
rlm@1 1219 por mm7, [Mask35]
rlm@1 1220 por mm0, [Mask26]
rlm@1 1221 movq [Mask35], mm7
rlm@1 1222 movq [Mask26], mm0
rlm@1 1223
rlm@1 1224 .SKIP_GUESS:
rlm@1 1225 ;Start the ASSEMBLY !!!
rlm@1 1226
rlm@1 1227 movq mm4, [Mask35]
rlm@1 1228 movq mm5, [Mask26]
rlm@1 1229 movq mm6, [Mask35b]
rlm@1 1230 movq mm7, [Mask26b]
rlm@1 1231
rlm@1 1232 movq mm0, [eax+ebx+color5]
rlm@1 1233 movq mm1, [eax+ebx+color6]
rlm@1 1234 movq mm2, [eax+ebx+ebx+color2]
rlm@1 1235 movq mm3, [eax+ebx+ebx+color3]
rlm@1 1236 pcmpeqw mm0, mm2
rlm@1 1237 pcmpeqw mm1, mm3
rlm@1 1238 movq mm2, mm4
rlm@1 1239 movq mm3, mm5
rlm@1 1240 por mm0, mm1
rlm@1 1241 por mm2, mm3
rlm@1 1242 pand mm2, mm0
rlm@1 1243 pxor mm0, mm2
rlm@1 1244 movq mm3, mm0
rlm@1 1245
rlm@1 1246 movq mm2, mm0
rlm@1 1247 pxor mm0, mm0
rlm@1 1248 por mm2, mm4
rlm@1 1249 pxor mm4, mm6
rlm@1 1250 por mm2, mm5
rlm@1 1251 pxor mm5, mm7
rlm@1 1252 pcmpeqw mm2, mm0
rlm@1 1253 ;----------------
rlm@1 1254
rlm@1 1255 movq mm0, [eax+ebx+color5]
rlm@1 1256 movq mm1, mm3
rlm@1 1257 por mm1, mm4
rlm@1 1258 por mm1, mm6
rlm@1 1259 pand mm0, mm1
rlm@1 1260 movq mm1, mm5
rlm@1 1261 pand mm1, [I56Pixel]
rlm@1 1262 por mm0, mm1
rlm@1 1263 movq mm1, mm7
rlm@1 1264 pand mm1, [product1b]
rlm@1 1265 por mm0, mm1
rlm@1 1266 movq mm1, mm2
rlm@1 1267 pand mm1, [product1a]
rlm@1 1268 por mm0, mm1
rlm@1 1269 movq [final1a], mm0
rlm@1 1270
rlm@1 1271 movq mm0, [eax+ebx+color6]
rlm@1 1272 movq mm1, mm3
rlm@1 1273 por mm1, mm5
rlm@1 1274 por mm1, mm7
rlm@1 1275 pand mm0, mm1
rlm@1 1276 movq mm1, mm4
rlm@1 1277 pand mm1, [I56Pixel]
rlm@1 1278 por mm0, mm1
rlm@1 1279 movq mm1, mm6
rlm@1 1280 pand mm1, [product1a]
rlm@1 1281 por mm0, mm1
rlm@1 1282 movq mm1, mm2
rlm@1 1283 pand mm1, [product1b]
rlm@1 1284 por mm0, mm1
rlm@1 1285 movq [final1b], mm0
rlm@1 1286
rlm@1 1287 movq mm0, [eax+ebx+ebx+color2]
rlm@1 1288 movq mm1, mm3
rlm@1 1289 por mm1, mm5
rlm@1 1290 por mm1, mm7
rlm@1 1291 pand mm0, mm1
rlm@1 1292 movq mm1, mm4
rlm@1 1293 pand mm1, [I23Pixel]
rlm@1 1294 por mm0, mm1
rlm@1 1295 movq mm1, mm6
rlm@1 1296 pand mm1, [product2b]
rlm@1 1297 por mm0, mm1
rlm@1 1298 movq mm1, mm2
rlm@1 1299 pand mm1, [product2a]
rlm@1 1300 por mm0, mm1
rlm@1 1301 movq [final2a], mm0
rlm@1 1302
rlm@1 1303 movq mm0, [eax+ebx+ebx+color3]
rlm@1 1304 movq mm1, mm3
rlm@1 1305 por mm1, mm4
rlm@1 1306 por mm1, mm6
rlm@1 1307 pand mm0, mm1
rlm@1 1308 movq mm1, mm5
rlm@1 1309 pand mm1, [I23Pixel]
rlm@1 1310 por mm0, mm1
rlm@1 1311 movq mm1, mm7
rlm@1 1312 pand mm1, [product2a]
rlm@1 1313 por mm0, mm1
rlm@1 1314 movq mm1, mm2
rlm@1 1315 pand mm1, [product2b]
rlm@1 1316 por mm0, mm1
rlm@1 1317 movq [final2b], mm0
rlm@1 1318
rlm@1 1319
rlm@1 1320 movq mm0, [final1a]
rlm@1 1321 movq mm2, [final1b]
rlm@1 1322 movq mm1, mm0
rlm@1 1323 movq mm4, [final2a]
rlm@1 1324 movq mm6, [final2b]
rlm@1 1325 movq mm5, mm4
rlm@1 1326 punpcklwd mm0, mm2
rlm@1 1327 punpckhwd mm1, mm2
rlm@1 1328 punpcklwd mm4, mm6
rlm@1 1329 punpckhwd mm5, mm6
rlm@1 1330
rlm@1 1331
rlm@1 1332
rlm@1 1333
rlm@1 1334 %ifdef FAR_POINTER
rlm@1 1335 movq [fs:edx], mm0
rlm@1 1336 movq [fs:edx+8], mm1
rlm@1 1337 push edx
rlm@1 1338 add edx, [ebp+dstPitch]
rlm@1 1339 movq [fs:edx], mm4
rlm@1 1340 movq [fs:edx+8], mm5
rlm@1 1341 pop edx
rlm@1 1342 %else
rlm@1 1343 movq [edx], mm0
rlm@1 1344 movq [edx+8], mm1
rlm@1 1345 push edx
rlm@1 1346 add edx, [ebp+dstPitch]
rlm@1 1347 movq [edx], mm4
rlm@1 1348 movq [edx+8], mm5
rlm@1 1349 pop edx
rlm@1 1350 %endif
rlm@1 1351 .SKIP_PROCESS:
rlm@1 1352 mov ecx, [ebp+deltaPtr]
rlm@1 1353 add ecx, 8
rlm@1 1354 mov [ebp+deltaPtr], ecx
rlm@1 1355 add edx, 16
rlm@1 1356 add eax, 8
rlm@1 1357
rlm@1 1358 pop ecx
rlm@1 1359 sub ecx, 4
rlm@1 1360 cmp ecx, 0
rlm@1 1361 jg near .Loop
rlm@1 1362
rlm@1 1363 ; Restore some stuff
rlm@1 1364 popad
rlm@1 1365 mov esp, ebp
rlm@1 1366 pop ebp
rlm@1 1367 emms
rlm@1 1368 ret
rlm@1 1369
rlm@1 1370
rlm@1 1371 ;-------------------------------------------------------------------------
rlm@1 1372 ;-------------------------------------------------------------------------
rlm@1 1373 ;-------------------------------------------------------------------------
rlm@1 1374 ;-------------------------------------------------------------------------
rlm@1 1375 ;-------------------------------------------------------------------------
rlm@1 1376 ;-------------------------------------------------------------------------
rlm@1 1377 ;-------------------------------------------------------------------------
rlm@1 1378
rlm@1 1379
rlm@1 1380 ;This is version 0.50
rlm@1 1381 colorI equ -2
rlm@1 1382 colorE equ 0
rlm@1 1383 colorF equ 2
rlm@1 1384 colorJ equ 4
rlm@1 1385
rlm@1 1386 colorG equ -2
rlm@1 1387 colorA equ 0
rlm@1 1388 colorB equ 2
rlm@1 1389 colorK equ 4
rlm@1 1390
rlm@1 1391 colorH equ -2
rlm@1 1392 colorC equ 0
rlm@1 1393 colorD equ 2
rlm@1 1394 colorL equ 4
rlm@1 1395
rlm@1 1396 colorM equ -2
rlm@1 1397 colorN equ 0
rlm@1 1398 colorO equ 2
rlm@1 1399 colorP equ 4
rlm@1 1400
rlm@1 1401 %ifdef __DJGPP__
rlm@1 1402 __2xSaILine:
rlm@1 1403 %else
rlm@1 1404 _2xSaILine:
rlm@1 1405 %endif
rlm@1 1406 ; Store some stuff
rlm@1 1407 push ebp
rlm@1 1408 mov ebp, esp
rlm@1 1409 pushad
rlm@1 1410
rlm@1 1411 ; Prepare the destination
rlm@1 1412 %ifdef FAR_POINTER
rlm@1 1413 ; Set the selector
rlm@1 1414 mov eax, [ebp+dstSegment]
rlm@1 1415 mov fs, ax
rlm@1 1416 %endif
rlm@1 1417 mov edx, [ebp+dstOffset] ; edx points to the screen
rlm@1 1418 ; Prepare the source
rlm@1 1419 ; eax points to colorA
rlm@1 1420 mov eax, [ebp+srcPtr]
rlm@1 1421 mov ebx, [ebp+srcPitch]
rlm@1 1422 mov ecx, [ebp+width]
rlm@1 1423 ; eax now points to colorE
rlm@1 1424 sub eax, ebx
rlm@1 1425
rlm@1 1426
rlm@1 1427 ; Main Loop
rlm@1 1428 .Loop: push ecx
rlm@1 1429
rlm@1 1430 ;-----Check Delta------------------
rlm@1 1431 mov ecx, [ebp+deltaPtr]
rlm@1 1432
rlm@1 1433 movq mm0, [eax+colorI]
rlm@1 1434 movq mm1, [eax+colorJ]
rlm@1 1435 movq mm2, [eax+ebx+colorG]
rlm@1 1436 movq mm3, [eax+ebx+colorK]
rlm@1 1437 movq mm4, [eax+ebx+ebx+colorH]
rlm@1 1438 movq mm5, [eax+ebx+ebx+colorL]
rlm@1 1439 push eax
rlm@1 1440 add eax, ebx
rlm@1 1441 movq mm6, [eax+ebx+ebx+colorM]
rlm@1 1442 movq mm7, [eax+ebx+ebx+colorP]
rlm@1 1443 pop eax
rlm@1 1444
rlm@1 1445 pcmpeqw mm0, [ecx+2+colorI]
rlm@1 1446 pcmpeqw mm1, [ecx+2+colorK]
rlm@1 1447 pcmpeqw mm2, [ecx+ebx+2+colorG]
rlm@1 1448 pcmpeqw mm3, [ecx+ebx+2+colorK]
rlm@1 1449 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
rlm@1 1450 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
rlm@1 1451 add ecx, ebx
rlm@1 1452 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
rlm@1 1453 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
rlm@1 1454 sub ecx, ebx
rlm@1 1455
rlm@1 1456
rlm@1 1457 pand mm0, mm1
rlm@1 1458 pand mm2, mm3
rlm@1 1459 pand mm4, mm5
rlm@1 1460 pand mm6, mm7
rlm@1 1461 pand mm0, mm2
rlm@1 1462 pand mm4, mm6
rlm@1 1463 pxor mm7, mm7
rlm@1 1464 pand mm0, mm4
rlm@1 1465 movq mm6, [eax+colorI]
rlm@1 1466 pcmpeqw mm7, mm0
rlm@1 1467
rlm@1 1468 movq [ecx+2+colorI], mm6
rlm@1 1469
rlm@1 1470 packsswb mm7, mm7
rlm@1 1471 movd ecx, mm7
rlm@1 1472 test ecx, ecx
rlm@1 1473 jz near .SKIP_PROCESS
rlm@1 1474
rlm@1 1475 ;End Delta
rlm@1 1476
rlm@1 1477 ;---------------------------------
rlm@1 1478
rlm@1 1479
rlm@1 1480 ;1
rlm@1 1481 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
rlm@1 1482 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
rlm@1 1483 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB
rlm@1 1484
rlm@1 1485 movq mm1, mm0
rlm@1 1486 movq mm3, mm2
rlm@1 1487
rlm@1 1488 pcmpeqw mm0, [eax+ebx+ebx+colorD]
rlm@1 1489 pcmpeqw mm1, [eax+colorE]
rlm@1 1490 pcmpeqw mm2, [eax+ebx+ebx+colorL]
rlm@1 1491 pcmpeqw mm3, [eax+ebx+ebx+colorC]
rlm@1 1492
rlm@1 1493 pand mm0, mm1
rlm@1 1494 pxor mm1, mm1
rlm@1 1495 pand mm0, mm2
rlm@1 1496 pcmpeqw mm3, mm1
rlm@1 1497 pand mm0, mm3 ;result in mm0
rlm@1 1498
rlm@1 1499 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
rlm@1 1500 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
rlm@1 1501 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB
rlm@1 1502 movq mm5, mm4
rlm@1 1503 movq mm7, mm6
rlm@1 1504
rlm@1 1505 pcmpeqw mm4, [eax+ebx+ebx+colorC]
rlm@1 1506 pcmpeqw mm5, [eax+colorF]
rlm@1 1507 pcmpeqw mm6, [eax+colorJ]
rlm@1 1508 pcmpeqw mm7, [eax+colorE]
rlm@1 1509
rlm@1 1510 pand mm4, mm5
rlm@1 1511 pxor mm5, mm5
rlm@1 1512 pand mm4, mm6
rlm@1 1513 pcmpeqw mm7, mm5
rlm@1 1514 pand mm4, mm7 ;result in mm4
rlm@1 1515
rlm@1 1516 por mm0, mm4 ;combine the masks
rlm@1 1517 movq [Mask1], mm0
rlm@1 1518
rlm@1 1519 ;--------------------------------------------
rlm@1 1520
rlm@1 1521 ;2
rlm@1 1522 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
rlm@1 1523 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB
rlm@1 1524 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
rlm@1 1525 movq mm1, mm0
rlm@1 1526 movq mm3, mm2
rlm@1 1527
rlm@1 1528 pcmpeqw mm0, [eax+ebx+ebx+colorC]
rlm@1 1529 pcmpeqw mm1, [eax+colorF]
rlm@1 1530 pcmpeqw mm2, [eax+ebx+ebx+colorH]
rlm@1 1531 pcmpeqw mm3, [eax+ebx+ebx+colorD]
rlm@1 1532
rlm@1 1533 pand mm0, mm1
rlm@1 1534 pxor mm1, mm1
rlm@1 1535 pand mm0, mm2
rlm@1 1536 pcmpeqw mm3, mm1
rlm@1 1537 pand mm0, mm3 ;result in mm0
rlm@1 1538
rlm@1 1539 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
rlm@1 1540 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB
rlm@1 1541 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
rlm@1 1542 movq mm5, mm4
rlm@1 1543 movq mm7, mm6
rlm@1 1544
rlm@1 1545 pcmpeqw mm4, [eax+ebx+ebx+colorD]
rlm@1 1546 pcmpeqw mm5, [eax+colorE]
rlm@1 1547 pcmpeqw mm6, [eax+colorI]
rlm@1 1548 pcmpeqw mm7, [eax+colorF]
rlm@1 1549
rlm@1 1550 pand mm4, mm5
rlm@1 1551 pxor mm5, mm5
rlm@1 1552 pand mm4, mm6
rlm@1 1553 pcmpeqw mm7, mm5
rlm@1 1554 pand mm4, mm7 ;result in mm4
rlm@1 1555
rlm@1 1556 por mm0, mm4 ;combine the masks
rlm@1 1557 movq [Mask2], mm0
rlm@1 1558
rlm@1 1559
rlm@1 1560 ;interpolate colorA and colorB
rlm@1 1561 movq mm0, [eax+ebx+colorA]
rlm@1 1562 movq mm1, [eax+ebx+colorB]
rlm@1 1563
rlm@1 1564 movq mm2, mm0
rlm@1 1565 movq mm3, mm1
rlm@1 1566
rlm@1 1567 pand mm0, [colorMask]
rlm@1 1568 pand mm1, [colorMask]
rlm@1 1569
rlm@1 1570 psrlw mm0, 1
rlm@1 1571 psrlw mm1, 1
rlm@1 1572
rlm@1 1573 pand mm3, [lowPixelMask]
rlm@1 1574 paddw mm0, mm1
rlm@1 1575
rlm@1 1576 pand mm3, mm2
rlm@1 1577 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 1578
rlm@1 1579 ;assemble the pixels
rlm@1 1580 movq mm1, [eax+ebx+colorA]
rlm@1 1581 movq mm2, [eax+ebx+colorB]
rlm@1 1582
rlm@1 1583 movq mm3, [Mask1]
rlm@1 1584 movq mm5, mm1
rlm@1 1585 movq mm4, [Mask2]
rlm@1 1586 movq mm6, mm1
rlm@1 1587
rlm@1 1588 pand mm1, mm3
rlm@1 1589 por mm3, mm4
rlm@1 1590 pxor mm7, mm7
rlm@1 1591 pand mm2, mm4
rlm@1 1592
rlm@1 1593 pcmpeqw mm3, mm7
rlm@1 1594 por mm1, mm2
rlm@1 1595 pand mm0, mm3
rlm@1 1596
rlm@1 1597 por mm0, mm1
rlm@1 1598
rlm@1 1599 punpcklwd mm5, mm0
rlm@1 1600 punpckhwd mm6, mm0
rlm@1 1601
rlm@1 1602 %ifdef FAR_POINTER
rlm@1 1603 movq [fs:edx], mm5
rlm@1 1604 movq [fs:edx+8], mm6
rlm@1 1605 %else
rlm@1 1606 movq [edx], mm5
rlm@1 1607 movq [edx+8], mm6
rlm@1 1608 %endif
rlm@1 1609
rlm@1 1610 ;------------------------------------------------
rlm@1 1611 ; Create the Nextline
rlm@1 1612 ;------------------------------------------------
rlm@1 1613 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
rlm@1 1614 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
rlm@1 1615 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC
rlm@1 1616 movq mm1, mm0
rlm@1 1617 movq mm3, mm2
rlm@1 1618
rlm@1 1619 push eax
rlm@1 1620 add eax, ebx
rlm@1 1621 pcmpeqw mm0, [eax+ebx+colorD]
rlm@1 1622 pcmpeqw mm1, [eax+colorG]
rlm@1 1623 pcmpeqw mm2, [eax+ebx+ebx+colorO]
rlm@1 1624 pcmpeqw mm3, [eax+colorB]
rlm@1 1625 pop eax
rlm@1 1626
rlm@1 1627 pand mm0, mm1
rlm@1 1628 pxor mm1, mm1
rlm@1 1629 pand mm0, mm2
rlm@1 1630 pcmpeqw mm3, mm1
rlm@1 1631 pand mm0, mm3 ;result in mm0
rlm@1 1632
rlm@1 1633 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
rlm@1 1634 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
rlm@1 1635 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC
rlm@1 1636 movq mm5, mm4
rlm@1 1637 movq mm7, mm6
rlm@1 1638
rlm@1 1639 push eax
rlm@1 1640 add eax, ebx
rlm@1 1641 pcmpeqw mm4, [eax+ebx+colorH]
rlm@1 1642 pcmpeqw mm5, [eax+colorB]
rlm@1 1643 pcmpeqw mm6, [eax+ebx+ebx+colorM]
rlm@1 1644 pcmpeqw mm7, [eax+colorG]
rlm@1 1645 pop eax
rlm@1 1646
rlm@1 1647 pand mm4, mm5
rlm@1 1648 pxor mm5, mm5
rlm@1 1649 pand mm4, mm6
rlm@1 1650 pcmpeqw mm7, mm5
rlm@1 1651 pand mm4, mm7 ;result in mm4
rlm@1 1652
rlm@1 1653 por mm0, mm4 ;combine the masks
rlm@1 1654 movq [Mask1], mm0
rlm@1 1655 ;--------------------------------------------
rlm@1 1656
rlm@1 1657 ;4
rlm@1 1658 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
rlm@1 1659 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC
rlm@1 1660 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
rlm@1 1661 movq mm1, mm0
rlm@1 1662 movq mm3, mm2
rlm@1 1663
rlm@1 1664 pcmpeqw mm0, [eax+ebx+colorB]
rlm@1 1665 pcmpeqw mm1, [eax+ebx+ebx+colorH]
rlm@1 1666 pcmpeqw mm2, [eax+colorF]
rlm@1 1667 pcmpeqw mm3, [eax+ebx+ebx+colorD]
rlm@1 1668
rlm@1 1669 pand mm0, mm1
rlm@1 1670 pxor mm1, mm1
rlm@1 1671 pand mm0, mm2
rlm@1 1672 pcmpeqw mm3, mm1
rlm@1 1673 pand mm0, mm3 ;result in mm0
rlm@1 1674
rlm@1 1675 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
rlm@1 1676 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC
rlm@1 1677 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
rlm@1 1678 movq mm5, mm4
rlm@1 1679 movq mm7, mm6
rlm@1 1680
rlm@1 1681 pcmpeqw mm4, [eax+ebx+ebx+colorD]
rlm@1 1682 pcmpeqw mm5, [eax+ebx+colorG]
rlm@1 1683 pcmpeqw mm6, [eax+colorI]
rlm@1 1684 pcmpeqw mm7, [eax+ebx+ebx+colorH]
rlm@1 1685
rlm@1 1686 pand mm4, mm5
rlm@1 1687 pxor mm5, mm5
rlm@1 1688 pand mm4, mm6
rlm@1 1689 pcmpeqw mm7, mm5
rlm@1 1690 pand mm4, mm7 ;result in mm4
rlm@1 1691
rlm@1 1692 por mm0, mm4 ;combine the masks
rlm@1 1693 movq [Mask2], mm0
rlm@1 1694 ;----------------------------------------------
rlm@1 1695
rlm@1 1696 ;interpolate colorA and colorC
rlm@1 1697 movq mm0, [eax+ebx+colorA]
rlm@1 1698 movq mm1, [eax+ebx+ebx+colorC]
rlm@1 1699
rlm@1 1700 movq mm2, mm0
rlm@1 1701 movq mm3, mm1
rlm@1 1702
rlm@1 1703 pand mm0, [colorMask]
rlm@1 1704 pand mm1, [colorMask]
rlm@1 1705
rlm@1 1706 psrlw mm0, 1
rlm@1 1707 psrlw mm1, 1
rlm@1 1708
rlm@1 1709 pand mm3, [lowPixelMask]
rlm@1 1710 paddw mm0, mm1
rlm@1 1711
rlm@1 1712 pand mm3, mm2
rlm@1 1713 paddw mm0, mm3 ;mm0 contains the interpolated values
rlm@1 1714 ;-------------
rlm@1 1715
rlm@1 1716 ;assemble the pixels
rlm@1 1717 movq mm1, [eax+ebx+colorA]
rlm@1 1718 movq mm2, [eax+ebx+ebx+colorC]
rlm@1 1719
rlm@1 1720 movq mm3, [Mask1]
rlm@1 1721 movq mm4, [Mask2]
rlm@1 1722
rlm@1 1723 pand mm1, mm3
rlm@1 1724 pand mm2, mm4
rlm@1 1725
rlm@1 1726 por mm3, mm4
rlm@1 1727 pxor mm7, mm7
rlm@1 1728 por mm1, mm2
rlm@1 1729
rlm@1 1730 pcmpeqw mm3, mm7
rlm@1 1731 pand mm0, mm3
rlm@1 1732 por mm0, mm1
rlm@1 1733 movq [ACPixel], mm0
rlm@1 1734
rlm@1 1735 ;////////////////////////////////
rlm@1 1736 ; Decide which "branch" to take
rlm@1 1737 ;--------------------------------
rlm@1 1738 movq mm0, [eax+ebx+colorA]
rlm@1 1739 movq mm1, [eax+ebx+colorB]
rlm@1 1740 movq mm6, mm0
rlm@1 1741 movq mm7, mm1
rlm@1 1742 pcmpeqw mm0, [eax+ebx+ebx+colorD]
rlm@1 1743 pcmpeqw mm1, [eax+ebx+ebx+colorC]
rlm@1 1744 pcmpeqw mm6, mm7
rlm@1 1745
rlm@1 1746 movq mm2, mm0
rlm@1 1747 movq mm3, mm0
rlm@1 1748
rlm@1 1749 pand mm0, mm1 ;colorA == colorD && colorB == colorC
rlm@1 1750 pxor mm7, mm7
rlm@1 1751
rlm@1 1752 pcmpeqw mm2, mm7
rlm@1 1753 pand mm6, mm0
rlm@1 1754 pand mm2, mm1 ;colorA != colorD && colorB == colorC
rlm@1 1755
rlm@1 1756 pcmpeqw mm1, mm7
rlm@1 1757
rlm@1 1758 pand mm1, mm3 ;colorA == colorD && colorB != colorC
rlm@1 1759 pxor mm0, mm6
rlm@1 1760 por mm1, mm6
rlm@1 1761 movq mm7, mm0
rlm@1 1762 movq [Mask2], mm2
rlm@1 1763 packsswb mm7, mm7
rlm@1 1764 movq [Mask1], mm1
rlm@1 1765
rlm@1 1766 movd ecx, mm7
rlm@1 1767 test ecx, ecx
rlm@1 1768 jz near .SKIP_GUESS
rlm@1 1769
rlm@1 1770 ;---------------------------------------------
rlm@1 1771 ; Map of the pixels: I|E F|J
rlm@1 1772 ; G|A B|K
rlm@1 1773 ; H|C D|L
rlm@1 1774 ; M|N O|P
rlm@1 1775 movq mm6, mm0
rlm@1 1776 movq mm4, [eax+ebx+colorA]
rlm@1 1777 movq mm5, [eax+ebx+colorB]
rlm@1 1778 pxor mm7, mm7
rlm@1 1779 pand mm6, [ONE]
rlm@1 1780
rlm@1 1781 movq mm0, [eax+colorE]
rlm@1 1782 movq mm1, [eax+ebx+colorG]
rlm@1 1783 movq mm2, mm0
rlm@1 1784 movq mm3, mm1
rlm@1 1785 pcmpeqw mm0, mm4
rlm@1 1786 pcmpeqw mm1, mm4
rlm@1 1787 pcmpeqw mm2, mm5
rlm@1 1788 pcmpeqw mm3, mm5
rlm@1 1789 pand mm0, mm6
rlm@1 1790 pand mm1, mm6
rlm@1 1791 pand mm2, mm6
rlm@1 1792 pand mm3, mm6
rlm@1 1793 paddw mm0, mm1
rlm@1 1794 paddw mm2, mm3
rlm@1 1795
rlm@1 1796 pxor mm3, mm3
rlm@1 1797 pcmpgtw mm0, mm6
rlm@1 1798 pcmpgtw mm2, mm6
rlm@1 1799 pcmpeqw mm0, mm3
rlm@1 1800 pcmpeqw mm2, mm3
rlm@1 1801 pand mm0, mm6
rlm@1 1802 pand mm2, mm6
rlm@1 1803 paddw mm7, mm0
rlm@1 1804 psubw mm7, mm2
rlm@1 1805
rlm@1 1806 movq mm0, [eax+colorF]
rlm@1 1807 movq mm1, [eax+ebx+colorK]
rlm@1 1808 movq mm2, mm0
rlm@1 1809 movq mm3, mm1
rlm@1 1810 pcmpeqw mm0, mm4
rlm@1 1811 pcmpeqw mm1, mm4
rlm@1 1812 pcmpeqw mm2, mm5
rlm@1 1813 pcmpeqw mm3, mm5
rlm@1 1814 pand mm0, mm6
rlm@1 1815 pand mm1, mm6
rlm@1 1816 pand mm2, mm6
rlm@1 1817 pand mm3, mm6
rlm@1 1818 paddw mm0, mm1
rlm@1 1819 paddw mm2, mm3
rlm@1 1820
rlm@1 1821 pxor mm3, mm3
rlm@1 1822 pcmpgtw mm0, mm6
rlm@1 1823 pcmpgtw mm2, mm6
rlm@1 1824 pcmpeqw mm0, mm3
rlm@1 1825 pcmpeqw mm2, mm3
rlm@1 1826 pand mm0, mm6
rlm@1 1827 pand mm2, mm6
rlm@1 1828 paddw mm7, mm0
rlm@1 1829 psubw mm7, mm2
rlm@1 1830
rlm@1 1831 push eax
rlm@1 1832 add eax, ebx
rlm@1 1833 movq mm0, [eax+ebx+colorH]
rlm@1 1834 movq mm1, [eax+ebx+ebx+colorN]
rlm@1 1835 movq mm2, mm0
rlm@1 1836 movq mm3, mm1
rlm@1 1837 pcmpeqw mm0, mm4
rlm@1 1838 pcmpeqw mm1, mm4
rlm@1 1839 pcmpeqw mm2, mm5
rlm@1 1840 pcmpeqw mm3, mm5
rlm@1 1841 pand mm0, mm6
rlm@1 1842 pand mm1, mm6
rlm@1 1843 pand mm2, mm6
rlm@1 1844 pand mm3, mm6
rlm@1 1845 paddw mm0, mm1
rlm@1 1846 paddw mm2, mm3
rlm@1 1847
rlm@1 1848 pxor mm3, mm3
rlm@1 1849 pcmpgtw mm0, mm6
rlm@1 1850 pcmpgtw mm2, mm6
rlm@1 1851 pcmpeqw mm0, mm3
rlm@1 1852 pcmpeqw mm2, mm3
rlm@1 1853 pand mm0, mm6
rlm@1 1854 pand mm2, mm6
rlm@1 1855 paddw mm7, mm0
rlm@1 1856 psubw mm7, mm2
rlm@1 1857
rlm@1 1858 movq mm0, [eax+ebx+colorL]
rlm@1 1859 movq mm1, [eax+ebx+ebx+colorO]
rlm@1 1860 movq mm2, mm0
rlm@1 1861 movq mm3, mm1
rlm@1 1862 pcmpeqw mm0, mm4
rlm@1 1863 pcmpeqw mm1, mm4
rlm@1 1864 pcmpeqw mm2, mm5
rlm@1 1865 pcmpeqw mm3, mm5
rlm@1 1866 pand mm0, mm6
rlm@1 1867 pand mm1, mm6
rlm@1 1868 pand mm2, mm6
rlm@1 1869 pand mm3, mm6
rlm@1 1870 paddw mm0, mm1
rlm@1 1871 paddw mm2, mm3
rlm@1 1872
rlm@1 1873 pxor mm3, mm3
rlm@1 1874 pcmpgtw mm0, mm6
rlm@1 1875 pcmpgtw mm2, mm6
rlm@1 1876 pcmpeqw mm0, mm3
rlm@1 1877 pcmpeqw mm2, mm3
rlm@1 1878 pand mm0, mm6
rlm@1 1879 pand mm2, mm6
rlm@1 1880 paddw mm7, mm0
rlm@1 1881 psubw mm7, mm2
rlm@1 1882
rlm@1 1883 pop eax
rlm@1 1884 movq mm1, mm7
rlm@1 1885 pxor mm0, mm0
rlm@1 1886 pcmpgtw mm7, mm0
rlm@1 1887 pcmpgtw mm0, mm1
rlm@1 1888
rlm@1 1889 por mm7, [Mask1]
rlm@1 1890 por mm0, [Mask2]
rlm@1 1891 movq [Mask1], mm7
rlm@1 1892 movq [Mask2], mm0
rlm@1 1893
rlm@1 1894 .SKIP_GUESS:
rlm@1 1895 ;----------------------------
rlm@1 1896 ;interpolate A, B, C and D
rlm@1 1897 movq mm0, [eax+ebx+colorA]
rlm@1 1898 movq mm1, [eax+ebx+colorB]
rlm@1 1899 movq mm4, mm0
rlm@1 1900 movq mm2, [eax+ebx+ebx+colorC]
rlm@1 1901 movq mm5, mm1
rlm@1 1902 movq mm3, [qcolorMask]
rlm@1 1903 movq mm6, mm2
rlm@1 1904 movq mm7, [qlowpixelMask]
rlm@1 1905
rlm@1 1906 pand mm0, mm3
rlm@1 1907 pand mm1, mm3
rlm@1 1908 pand mm2, mm3
rlm@1 1909 pand mm3, [eax+ebx+ebx+colorD]
rlm@1 1910
rlm@1 1911 psrlw mm0, 2
rlm@1 1912 pand mm4, mm7
rlm@1 1913 psrlw mm1, 2
rlm@1 1914 pand mm5, mm7
rlm@1 1915 psrlw mm2, 2
rlm@1 1916 pand mm6, mm7
rlm@1 1917 psrlw mm3, 2
rlm@1 1918 pand mm7, [eax+ebx+ebx+colorD]
rlm@1 1919
rlm@1 1920 paddw mm0, mm1
rlm@1 1921 paddw mm2, mm3
rlm@1 1922
rlm@1 1923 paddw mm4, mm5
rlm@1 1924 paddw mm6, mm7
rlm@1 1925
rlm@1 1926 paddw mm4, mm6
rlm@1 1927 paddw mm0, mm2
rlm@1 1928 psrlw mm4, 2
rlm@1 1929 pand mm4, [qlowpixelMask]
rlm@1 1930 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D
rlm@1 1931
rlm@1 1932 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
rlm@1 1933 ;assemble the pixels
rlm@1 1934 movq mm1, [Mask1]
rlm@1 1935 movq mm2, [Mask2]
rlm@1 1936 movq mm4, [eax+ebx+colorA]
rlm@1 1937 movq mm5, [eax+ebx+colorB]
rlm@1 1938 pand mm4, mm1
rlm@1 1939 pand mm5, mm2
rlm@1 1940
rlm@1 1941 pxor mm7, mm7
rlm@1 1942 por mm1, mm2
rlm@1 1943 por mm4, mm5
rlm@1 1944 pcmpeqw mm1, mm7
rlm@1 1945 pand mm0, mm1
rlm@1 1946 por mm4, mm0 ;mm4 contains the diagonal pixels
rlm@1 1947
rlm@1 1948 movq mm0, [ACPixel]
rlm@1 1949 movq mm1, mm0
rlm@1 1950 punpcklwd mm0, mm4
rlm@1 1951 punpckhwd mm1, mm4
rlm@1 1952
rlm@1 1953 push edx
rlm@1 1954 add edx, [ebp+dstPitch]
rlm@1 1955
rlm@1 1956 %ifdef FAR_POINTER
rlm@1 1957 movq [fs:edx], mm0
rlm@1 1958 movq [fs:edx+8], mm1
rlm@1 1959 %else
rlm@1 1960 movq [edx], mm0
rlm@1 1961 movq [edx+8], mm1
rlm@1 1962 %endif
rlm@1 1963 pop edx
rlm@1 1964
rlm@1 1965 .SKIP_PROCESS:
rlm@1 1966 mov ecx, [ebp+deltaPtr]
rlm@1 1967 add ecx, 8
rlm@1 1968 mov [ebp+deltaPtr], ecx
rlm@1 1969 add edx, 16
rlm@1 1970 add eax, 8
rlm@1 1971
rlm@1 1972 pop ecx
rlm@1 1973 sub ecx, 4
rlm@1 1974 cmp ecx, 0
rlm@1 1975 jg near .Loop
rlm@1 1976
rlm@1 1977 ; Restore some stuff
rlm@1 1978 popad
rlm@1 1979 mov esp, ebp
rlm@1 1980 pop ebp
rlm@1 1981 emms
rlm@1 1982 ret
rlm@1 1983
rlm@1 1984 ;-------------------------------------------------------------------------
rlm@1 1985 ;-------------------------------------------------------------------------
rlm@1 1986 ;-------------------------------------------------------------------------
rlm@1 1987 ;-------------------------------------------------------------------------
rlm@1 1988 ;-------------------------------------------------------------------------
rlm@1 1989 ;-------------------------------------------------------------------------
rlm@1 1990 ;-------------------------------------------------------------------------
rlm@1 1991
rlm@1 1992 %ifdef __DJGPP__
rlm@1 1993 _Init_2xSaIMMX:
rlm@1 1994 %else
rlm@1 1995 Init_2xSaIMMX:
rlm@1 1996 %endif
rlm@1 1997 ; Store some stuff
rlm@1 1998 push ebp
rlm@1 1999 mov ebp, esp
rlm@1 2000 push edx
rlm@1 2001
rlm@1 2002
rlm@1 2003 ;Damn thing doesn't work
rlm@1 2004 ; mov eax,1
rlm@1 2005 ; cpuid
rlm@1 2006 ; test edx, 0x00800000 ;test bit 23
rlm@1 2007 ; jz end2 ;bit not set => no MMX detected
rlm@1 2008
rlm@1 2009 mov eax, [ebp+8] ;PixelFormat
rlm@1 2010 cmp eax, 555
rlm@1 2011 jz Bits555
rlm@1 2012 cmp eax, 565
rlm@1 2013 jz Bits565
rlm@1 2014 end2:
rlm@1 2015 mov eax, 1
rlm@1 2016 jmp end3
rlm@1 2017 Bits555:
rlm@1 2018 mov edx, 0x7BDE7BDE
rlm@1 2019 mov eax, colorMask
rlm@1 2020 mov [eax], edx
rlm@1 2021 mov [eax+4], edx
rlm@1 2022 mov edx, 0x04210421
rlm@1 2023 mov eax, lowPixelMask
rlm@1 2024 mov [eax], edx
rlm@1 2025 mov [eax+4], edx
rlm@1 2026 mov edx, 0x739C739C
rlm@1 2027 mov eax, qcolorMask
rlm@1 2028 mov [eax], edx
rlm@1 2029 mov [eax+4], edx
rlm@1 2030 mov edx, 0x0C630C63
rlm@1 2031 mov eax, qlowpixelMask
rlm@1 2032 mov [eax], edx
rlm@1 2033 mov [eax+4], edx
rlm@1 2034 mov eax, 0
rlm@1 2035 jmp end3
rlm@1 2036 Bits565:
rlm@1 2037 mov edx, 0xF7DEF7DE
rlm@1 2038 mov eax, colorMask
rlm@1 2039 mov [eax], edx
rlm@1 2040 mov [eax+4], edx
rlm@1 2041 mov edx, 0x08210821
rlm@1 2042 mov eax, lowPixelMask
rlm@1 2043 mov [eax], edx
rlm@1 2044 mov [eax+4], edx
rlm@1 2045 mov edx, 0xE79CE79C
rlm@1 2046 mov eax, qcolorMask
rlm@1 2047 mov [eax], edx
rlm@1 2048 mov [eax+4], edx
rlm@1 2049 mov edx, 0x18631863
rlm@1 2050 mov eax, qlowpixelMask
rlm@1 2051 mov [eax], edx
rlm@1 2052 mov [eax+4], edx
rlm@1 2053 mov eax, 0
rlm@1 2054 jmp end3
rlm@1 2055 end3:
rlm@1 2056 pop edx
rlm@1 2057 mov esp, ebp
rlm@1 2058 pop ebp
rlm@1 2059 ret
rlm@1 2060
rlm@1 2061
rlm@1 2062 ;-------------------------------------------------------------------------
rlm@1 2063 ;-------------------------------------------------------------------------
rlm@1 2064 ;-------------------------------------------------------------------------
rlm@1 2065 ;-------------------------------------------------------------------------
rlm@1 2066 ;-------------------------------------------------------------------------
rlm@1 2067 ;-------------------------------------------------------------------------
rlm@1 2068 ;-------------------------------------------------------------------------
rlm@1 2069
rlm@1 2070 SECTION .data ALIGN = 32
rlm@1 2071 ;Some constants
rlm@1 2072 colorMask dd 0xF7DEF7DE,0xF7DEF7DE
rlm@1 2073 lowPixelMask dd 0x08210821,0x08210821
rlm@1 2074
rlm@1 2075 qcolorMask dd 0xE79CE79C,0xE79CE79C
rlm@1 2076 qlowpixelMask dd 0x18631863,0x18631863
rlm@1 2077
rlm@1 2078 darkenMask dd 0xC718C718,0xC718C718
rlm@1 2079 GreenMask dd 0x07E007E0,0x07E007E0
rlm@1 2080 RedBlueMask dd 0xF81FF81F,0xF81FF81F
rlm@1 2081
rlm@1 2082 FALSE dd 0x00000000,0x00000000
rlm@1 2083 TRUE dd 0xffffffff,0xffffffff
rlm@1 2084 ONE dd 0x00010001,0x00010001
rlm@1 2085
rlm@1 2086
rlm@1 2087 SECTION .bss ALIGN = 32
rlm@1 2088 ACPixel resb 8
rlm@1 2089 Mask1 resb 8
rlm@1 2090 Mask2 resb 8
rlm@1 2091
rlm@1 2092 I56Pixel resb 8
rlm@1 2093 I23Pixel resb 8
rlm@1 2094 I5556Pixel resb 8
rlm@1 2095 I2223Pixel resb 8
rlm@1 2096 I5666Pixel resb 8
rlm@1 2097 I2333Pixel resb 8
rlm@1 2098 Mask26 resb 8
rlm@1 2099 Mask35 resb 8
rlm@1 2100 Mask26b resb 8
rlm@1 2101 Mask35b resb 8
rlm@1 2102 product1a resb 8
rlm@1 2103 product1b resb 8
rlm@1 2104 product2a resb 8
rlm@1 2105 product2b resb 8
rlm@1 2106 final1a resb 8
rlm@1 2107 final1b resb 8
rlm@1 2108 final2a resb 8
rlm@1 2109 final2b resb 8