annotate src/filters/interframe.cpp @ 294:9f466a332448

merge
author Robert McIntyre <rlm@mit.edu>
date Fri, 30 Mar 2012 18:15:28 -0500
parents f9f4f1b99eed
children
rev   line source
rlm@1 1 #include <cstdlib>
rlm@1 2 #include <cstring>
rlm@1 3 #include "../Port.h"
rlm@1 4
rlm@1 5 #ifdef MMX
rlm@1 6 extern "C" bool cpu_mmx;
rlm@1 7 #endif
rlm@1 8
rlm@1 9 /*
rlm@1 10 * Thanks to Kawaks' Mr. K for the code
rlm@1 11
rlm@1 12 Incorporated into vba by Anthony Di Franco
rlm@1 13 */
rlm@1 14
rlm@1 15 static u8 *frm1 = NULL;
rlm@1 16 static u8 *frm2 = NULL;
rlm@1 17 static u8 *frm3 = NULL;
rlm@1 18
rlm@1 19 extern u32 RGB_LOW_BITS_MASK;
rlm@1 20 extern u32 qRGB_COLOR_MASK[2];
rlm@1 21
rlm@1 22 static void Init()
rlm@1 23 {
rlm@1 24 frm1 = (u8 *)calloc(322 * 242, 4);
rlm@1 25 // 1 frame ago
rlm@1 26 frm2 = (u8 *)calloc(322 * 242, 4);
rlm@1 27 // 2 frames ago
rlm@1 28 frm3 = (u8 *)calloc(322 * 242, 4);
rlm@1 29 // 3 frames ago
rlm@1 30 }
rlm@1 31
rlm@1 32 void InterframeCleanup()
rlm@1 33 {
rlm@1 34 if (frm1)
rlm@1 35 free(frm1);
rlm@1 36 if (frm2)
rlm@1 37 free(frm2);
rlm@1 38 if (frm3)
rlm@1 39 free(frm3);
rlm@1 40 frm1 = frm2 = frm3 = NULL;
rlm@1 41 }
rlm@1 42
rlm@1 43 #ifdef MMX
rlm@1 44 static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 45 {
rlm@1 46 u16 *src0 = (u16 *)srcPtr;
rlm@1 47 u16 *src1 = (u16 *)frm1;
rlm@1 48 u16 *src2 = (u16 *)frm2;
rlm@1 49 u16 *src3 = (u16 *)frm3;
rlm@1 50
rlm@1 51 int count = width >> 2;
rlm@1 52
rlm@1 53 for (int i = 0; i < height; i++)
rlm@1 54 {
rlm@1 55 #ifdef __GNUC__
rlm@1 56 asm volatile (
rlm@1 57 "push %4\n"
rlm@1 58 "movq 0(%5), %%mm7\n" // colorMask
rlm@1 59 "0:\n"
rlm@1 60 "movq 0(%0), %%mm0\n" // src0
rlm@1 61 "movq 0(%1), %%mm1\n" // src1
rlm@1 62 "movq 0(%2), %%mm2\n" // src2
rlm@1 63 "movq 0(%3), %%mm3\n" // src3
rlm@1 64 "movq %%mm0, 0(%3)\n" // src3 = src0
rlm@1 65 "movq %%mm0, %%mm4\n"
rlm@1 66 "movq %%mm1, %%mm5\n"
rlm@1 67 "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
rlm@1 68 "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
rlm@1 69 "por %%mm5, %%mm4\n" // A | B
rlm@1 70 "movq %%mm2, %%mm5\n"
rlm@1 71 "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
rlm@1 72 "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
rlm@1 73 "por %%mm3, %%mm5\n" // C|D
rlm@1 74 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
rlm@1 75 "movq %%mm0, %%mm2\n"
rlm@1 76 "pand %%mm7, %%mm2\n" // color & colorMask
rlm@1 77 "pand %%mm7, %%mm1\n" // src1 & colorMask
rlm@1 78 "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
rlm@1 79 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
rlm@1 80 "paddw %%mm2, %%mm1\n" // E+F
rlm@1 81 "pand %%mm4, %%mm1\n" // (E+F) & res
rlm@1 82 "pandn %%mm0, %%mm4\n" // color& !res
rlm@1 83
rlm@1 84 "por %%mm1, %%mm4\n"
rlm@1 85 "movq %%mm4, 0(%0)\n" // src0 = res
rlm@1 86
rlm@1 87 "addl $8, %0\n"
rlm@1 88 "addl $8, %1\n"
rlm@1 89 "addl $8, %2\n"
rlm@1 90 "addl $8, %3\n"
rlm@1 91
rlm@1 92 "decl %4\n"
rlm@1 93 "jnz 0b\n"
rlm@1 94 "pop %4\n"
rlm@1 95 "emms\n"
rlm@1 96 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
rlm@1 97 : "r" (count), "r" (qRGB_COLOR_MASK)
rlm@1 98 );
rlm@1 99 #else
rlm@1 100 __asm {
rlm@1 101 movq mm7, qword ptr [qRGB_COLOR_MASK];
rlm@1 102 mov eax, src0;
rlm@1 103 mov ebx, src1;
rlm@1 104 mov ecx, src2;
rlm@1 105 mov edx, src3;
rlm@1 106 mov edi, count;
rlm@1 107 label0:
rlm@1 108 movq mm0, qword ptr [eax]; // src0
rlm@1 109 movq mm1, qword ptr [ebx]; // src1
rlm@1 110 movq mm2, qword ptr [ecx]; // src2
rlm@1 111 movq mm3, qword ptr [edx]; // src3
rlm@1 112 movq qword ptr [edx], mm0; // src3 = src0
rlm@1 113 movq mm4, mm0;
rlm@1 114 movq mm5, mm1;
rlm@1 115 pcmpeqw mm5, mm2; // src1 == src2 (A)
rlm@1 116 pcmpeqw mm4, mm3; // src3 == src0 (B)
rlm@1 117 por mm4, mm5; // A | B
rlm@1 118 movq mm5, mm2;
rlm@1 119 pcmpeqw mm5, mm0; // src0 == src2 (C)
rlm@1 120 pcmpeqw mm3, mm1; // src1 == src3 (D)
rlm@1 121 por mm5, mm3; // C|D
rlm@1 122 pandn mm4, mm5; // (!(A|B))&(C|D)
rlm@1 123 movq mm2, mm0;
rlm@1 124 pand mm2, mm7; // color & colorMask
rlm@1 125 pand mm1, mm7; // src1 & colorMask
rlm@1 126 psrlw mm2, 1; // (color & colorMask) >> 1 (E)
rlm@1 127 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
rlm@1 128 paddw mm1, mm2; // E+F
rlm@1 129 pand mm1, mm4; // (E+F) & res
rlm@1 130 pandn mm4, mm0; // color & !res
rlm@1 131
rlm@1 132 por mm4, mm1;
rlm@1 133 movq qword ptr [eax], mm4; // src0 = res
rlm@1 134
rlm@1 135 add eax, 8;
rlm@1 136 add ebx, 8;
rlm@1 137 add ecx, 8;
rlm@1 138 add edx, 8;
rlm@1 139
rlm@1 140 dec edi;
rlm@1 141 jnz label0;
rlm@1 142 mov src0, eax;
rlm@1 143 mov src1, ebx;
rlm@1 144 mov src2, ecx;
rlm@1 145 mov src3, edx;
rlm@1 146 emms;
rlm@1 147 }
rlm@1 148 #endif
rlm@1 149 src0 += 2;
rlm@1 150 src1 += 2;
rlm@1 151 src2 += 2;
rlm@1 152 src3 += 2;
rlm@1 153 }
rlm@1 154
rlm@1 155 /* Swap buffers around */
rlm@1 156 u8 *temp = frm1;
rlm@1 157 frm1 = frm3;
rlm@1 158 frm3 = frm2;
rlm@1 159 frm2 = temp;
rlm@1 160 }
rlm@1 161
rlm@1 162 #endif
rlm@1 163
rlm@1 164 void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 165 {
rlm@1 166 if (frm1 == NULL)
rlm@1 167 {
rlm@1 168 Init();
rlm@1 169 }
rlm@1 170 #ifdef MMX
rlm@1 171 if (cpu_mmx)
rlm@1 172 {
rlm@1 173 SmartIB_MMX(srcPtr, srcPitch, width, height);
rlm@1 174 return;
rlm@1 175 }
rlm@1 176 #endif
rlm@1 177
rlm@1 178 u16 colorMask = ~RGB_LOW_BITS_MASK;
rlm@1 179
rlm@1 180 u16 *src0 = (u16 *)srcPtr;
rlm@1 181 u16 *src1 = (u16 *)frm1;
rlm@1 182 u16 *src2 = (u16 *)frm2;
rlm@1 183 u16 *src3 = (u16 *)frm3;
rlm@1 184
rlm@1 185 int sPitch = srcPitch >> 1;
rlm@1 186
rlm@1 187 int pos = 0;
rlm@1 188 for (int j = 0; j < height; j++)
rlm@1 189 for (int i = 0; i < sPitch; i++)
rlm@1 190 {
rlm@1 191 u16 color = src0[pos];
rlm@1 192 src0[pos] =
rlm@1 193 (src1[pos] != src2[pos]) &&
rlm@1 194 (src3[pos] != color) &&
rlm@1 195 ((color == src2[pos]) || (src1[pos] == src3[pos]))
rlm@1 196 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
rlm@1 197 color;
rlm@1 198 src3[pos] = color; /* oldest buffer now holds newest frame */
rlm@1 199 pos++;
rlm@1 200 }
rlm@1 201
rlm@1 202 /* Swap buffers around */
rlm@1 203 u8 *temp = frm1;
rlm@1 204 frm1 = frm3;
rlm@1 205 frm3 = frm2;
rlm@1 206 frm2 = temp;
rlm@1 207 }
rlm@1 208
rlm@1 209 #ifdef MMX
rlm@1 210 static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 211 {
rlm@1 212 u32 *src0 = (u32 *)srcPtr;
rlm@1 213 u32 *src1 = (u32 *)frm1;
rlm@1 214 u32 *src2 = (u32 *)frm2;
rlm@1 215 u32 *src3 = (u32 *)frm3;
rlm@1 216
rlm@1 217 int count = width >> 1;
rlm@1 218
rlm@1 219 for (int i = 0; i < height; i++)
rlm@1 220 {
rlm@1 221 #ifdef __GNUC__
rlm@1 222 asm volatile (
rlm@1 223 "push %4\n"
rlm@1 224 "movq 0(%5), %%mm7\n" // colorMask
rlm@1 225 "0:\n"
rlm@1 226 "movq 0(%0), %%mm0\n" // src0
rlm@1 227 "movq 0(%1), %%mm1\n" // src1
rlm@1 228 "movq 0(%2), %%mm2\n" // src2
rlm@1 229 "movq 0(%3), %%mm3\n" // src3
rlm@1 230 "movq %%mm0, 0(%3)\n" // src3 = src0
rlm@1 231 "movq %%mm0, %%mm4\n"
rlm@1 232 "movq %%mm1, %%mm5\n"
rlm@1 233 "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
rlm@1 234 "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
rlm@1 235 "por %%mm5, %%mm4\n" // A | B
rlm@1 236 "movq %%mm2, %%mm5\n"
rlm@1 237 "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
rlm@1 238 "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
rlm@1 239 "por %%mm3, %%mm5\n" // C|D
rlm@1 240 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
rlm@1 241 "movq %%mm0, %%mm2\n"
rlm@1 242 "pand %%mm7, %%mm2\n" // color & colorMask
rlm@1 243 "pand %%mm7, %%mm1\n" // src1 & colorMask
rlm@1 244 "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
rlm@1 245 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
rlm@1 246 "paddd %%mm2, %%mm1\n" // E+F
rlm@1 247 "pand %%mm4, %%mm1\n" // (E+F) & res
rlm@1 248 "pandn %%mm0, %%mm4\n" // color& !res
rlm@1 249
rlm@1 250 "por %%mm1, %%mm4\n"
rlm@1 251 "movq %%mm4, 0(%0)\n" // src0 = res
rlm@1 252
rlm@1 253 "addl $8, %0\n"
rlm@1 254 "addl $8, %1\n"
rlm@1 255 "addl $8, %2\n"
rlm@1 256 "addl $8, %3\n"
rlm@1 257
rlm@1 258 "decl %4\n"
rlm@1 259 "jnz 0b\n"
rlm@1 260 "pop %4\n"
rlm@1 261 "emms\n"
rlm@1 262 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
rlm@1 263 : "r" (count), "r" (qRGB_COLOR_MASK)
rlm@1 264 );
rlm@1 265 #else
rlm@1 266 __asm {
rlm@1 267 movq mm7, qword ptr [qRGB_COLOR_MASK];
rlm@1 268 mov eax, src0;
rlm@1 269 mov ebx, src1;
rlm@1 270 mov ecx, src2;
rlm@1 271 mov edx, src3;
rlm@1 272 mov edi, count;
rlm@1 273 label0:
rlm@1 274 movq mm0, qword ptr [eax]; // src0
rlm@1 275 movq mm1, qword ptr [ebx]; // src1
rlm@1 276 movq mm2, qword ptr [ecx]; // src2
rlm@1 277 movq mm3, qword ptr [edx]; // src3
rlm@1 278 movq qword ptr [edx], mm0; // src3 = src0
rlm@1 279 movq mm4, mm0;
rlm@1 280 movq mm5, mm1;
rlm@1 281 pcmpeqd mm5, mm2; // src1 == src2 (A)
rlm@1 282 pcmpeqd mm4, mm3; // src3 == src0 (B)
rlm@1 283 por mm4, mm5; // A | B
rlm@1 284 movq mm5, mm2;
rlm@1 285 pcmpeqd mm5, mm0; // src0 == src2 (C)
rlm@1 286 pcmpeqd mm3, mm1; // src1 == src3 (D)
rlm@1 287 por mm5, mm3; // C|D
rlm@1 288 pandn mm4, mm5; // (!(A|B))&(C|D)
rlm@1 289 movq mm2, mm0;
rlm@1 290 pand mm2, mm7; // color & colorMask
rlm@1 291 pand mm1, mm7; // src1 & colorMask
rlm@1 292 psrld mm2, 1; // (color & colorMask) >> 1 (E)
rlm@1 293 psrld mm1, 1; // (src & colorMask) >> 1 (F)
rlm@1 294 paddd mm1, mm2; // E+F
rlm@1 295 pand mm1, mm4; // (E+F) & res
rlm@1 296 pandn mm4, mm0; // color & !res
rlm@1 297
rlm@1 298 por mm4, mm1;
rlm@1 299 movq qword ptr [eax], mm4; // src0 = res
rlm@1 300
rlm@1 301 add eax, 8;
rlm@1 302 add ebx, 8;
rlm@1 303 add ecx, 8;
rlm@1 304 add edx, 8;
rlm@1 305
rlm@1 306 dec edi;
rlm@1 307 jnz label0;
rlm@1 308 mov src0, eax;
rlm@1 309 mov src1, ebx;
rlm@1 310 mov src2, ecx;
rlm@1 311 mov src3, edx;
rlm@1 312 emms;
rlm@1 313 }
rlm@1 314 #endif
rlm@1 315
rlm@1 316 src0++;
rlm@1 317 src1++;
rlm@1 318 src2++;
rlm@1 319 src3++;
rlm@1 320 }
rlm@1 321 /* Swap buffers around */
rlm@1 322 u8 *temp = frm1;
rlm@1 323 frm1 = frm3;
rlm@1 324 frm3 = frm2;
rlm@1 325 frm2 = temp;
rlm@1 326 }
rlm@1 327
rlm@1 328 #endif
rlm@1 329
rlm@1 330 void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 331 {
rlm@1 332 if (frm1 == NULL)
rlm@1 333 {
rlm@1 334 Init();
rlm@1 335 }
rlm@1 336 #ifdef MMX
rlm@1 337 if (cpu_mmx)
rlm@1 338 {
rlm@1 339 SmartIB32_MMX(srcPtr, srcPitch, width, height);
rlm@1 340 return;
rlm@1 341 }
rlm@1 342 #endif
rlm@1 343
rlm@1 344 u32 *src0 = (u32 *)srcPtr;
rlm@1 345 u32 *src1 = (u32 *)frm1;
rlm@1 346 u32 *src2 = (u32 *)frm2;
rlm@1 347 u32 *src3 = (u32 *)frm3;
rlm@1 348
rlm@1 349 u32 colorMask = 0xfefefe;
rlm@1 350
rlm@1 351 int sPitch = srcPitch >> 2;
rlm@1 352 int pos = 0;
rlm@1 353
rlm@1 354 for (int j = 0; j < height; j++)
rlm@1 355 for (int i = 0; i < sPitch; i++)
rlm@1 356 {
rlm@1 357 u32 color = src0[pos];
rlm@1 358 src0[pos] =
rlm@1 359 (src1[pos] != src2[pos]) &&
rlm@1 360 (src3[pos] != color) &&
rlm@1 361 ((color == src2[pos]) || (src1[pos] == src3[pos]))
rlm@1 362 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
rlm@1 363 color;
rlm@1 364 src3[pos] = color; /* oldest buffer now holds newest frame */
rlm@1 365 pos++;
rlm@1 366 }
rlm@1 367
rlm@1 368 /* Swap buffers around */
rlm@1 369 u8 *temp = frm1;
rlm@1 370 frm1 = frm3;
rlm@1 371 frm3 = frm2;
rlm@1 372 frm2 = temp;
rlm@1 373 }
rlm@1 374
rlm@1 375 #ifdef MMX
rlm@1 376 static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 377 {
rlm@1 378 u16 *src0 = (u16 *)srcPtr;
rlm@1 379 u16 *src1 = (u16 *)frm1;
rlm@1 380
rlm@1 381 int count = width >> 2;
rlm@1 382
rlm@1 383 for (int i = 0; i < height; i++)
rlm@1 384 {
rlm@1 385 #ifdef __GNUC__
rlm@1 386 asm volatile (
rlm@1 387 "push %2\n"
rlm@1 388 "movq 0(%3), %%mm7\n" // colorMask
rlm@1 389 "0:\n"
rlm@1 390 "movq 0(%0), %%mm0\n" // src0
rlm@1 391 "movq 0(%1), %%mm1\n" // src1
rlm@1 392 "movq %%mm0, 0(%1)\n" // src1 = src0
rlm@1 393 "pand %%mm7, %%mm0\n" // color & colorMask
rlm@1 394 "pand %%mm7, %%mm1\n" // src1 & colorMask
rlm@1 395 "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
rlm@1 396 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
rlm@1 397 "paddw %%mm1, %%mm0\n" // E+F
rlm@1 398
rlm@1 399 "movq %%mm0, 0(%0)\n" // src0 = res
rlm@1 400
rlm@1 401 "addl $8, %0\n"
rlm@1 402 "addl $8, %1\n"
rlm@1 403
rlm@1 404 "decl %2\n"
rlm@1 405 "jnz 0b\n"
rlm@1 406 "pop %2\n"
rlm@1 407 "emms\n"
rlm@1 408 : "+r" (src0), "+r" (src1)
rlm@1 409 : "r" (count), "r" (qRGB_COLOR_MASK)
rlm@1 410 );
rlm@1 411 #else
rlm@1 412 __asm {
rlm@1 413 movq mm7, qword ptr [qRGB_COLOR_MASK];
rlm@1 414 mov eax, src0;
rlm@1 415 mov ebx, src1;
rlm@1 416 mov edi, count;
rlm@1 417 label0:
rlm@1 418 movq mm0, qword ptr [eax]; // src0
rlm@1 419 movq mm1, qword ptr [ebx]; // src1
rlm@1 420 movq qword ptr [ebx], mm0; // src1 = src0
rlm@1 421 pand mm0, mm7; // color & colorMask
rlm@1 422 pand mm1, mm7; // src1 & colorMask
rlm@1 423 psrlw mm0, 1; // (color & colorMask) >> 1 (E)
rlm@1 424 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
rlm@1 425 paddw mm0, mm1; // E+F
rlm@1 426
rlm@1 427 movq qword ptr [eax], mm0; // src0 = res
rlm@1 428
rlm@1 429 add eax, 8;
rlm@1 430 add ebx, 8;
rlm@1 431
rlm@1 432 dec edi;
rlm@1 433 jnz label0;
rlm@1 434 mov src0, eax;
rlm@1 435 mov src1, ebx;
rlm@1 436 emms;
rlm@1 437 }
rlm@1 438 #endif
rlm@1 439 src0 += 2;
rlm@1 440 src1 += 2;
rlm@1 441 }
rlm@1 442 }
rlm@1 443
rlm@1 444 #endif
rlm@1 445
rlm@1 446 void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 447 {
rlm@1 448 if (frm1 == NULL)
rlm@1 449 {
rlm@1 450 Init();
rlm@1 451 }
rlm@1 452
rlm@1 453 #ifdef MMX
rlm@1 454 if (cpu_mmx)
rlm@1 455 {
rlm@1 456 MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
rlm@1 457 return;
rlm@1 458 }
rlm@1 459 #endif
rlm@1 460
rlm@1 461 u16 colorMask = ~RGB_LOW_BITS_MASK;
rlm@1 462
rlm@1 463 u16 *src0 = (u16 *)srcPtr;
rlm@1 464 u16 *src1 = (u16 *)frm1;
rlm@1 465
rlm@1 466 int sPitch = srcPitch >> 1;
rlm@1 467
rlm@1 468 int pos = 0;
rlm@1 469 for (int j = 0; j < height; j++)
rlm@1 470 for (int i = 0; i < sPitch; i++)
rlm@1 471 {
rlm@1 472 u16 color = src0[pos];
rlm@1 473 src0[pos] =
rlm@1 474 (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
rlm@1 475 src1[pos] = color;
rlm@1 476 pos++;
rlm@1 477 }
rlm@1 478 }
rlm@1 479
rlm@1 480 #ifdef MMX
rlm@1 481 static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 482 {
rlm@1 483 u32 *src0 = (u32 *)srcPtr;
rlm@1 484 u32 *src1 = (u32 *)frm1;
rlm@1 485
rlm@1 486 int count = width >> 1;
rlm@1 487
rlm@1 488 for (int i = 0; i < height; i++)
rlm@1 489 {
rlm@1 490 #ifdef __GNUC__
rlm@1 491 asm volatile (
rlm@1 492 "push %2\n"
rlm@1 493 "movq 0(%3), %%mm7\n" // colorMask
rlm@1 494 "0:\n"
rlm@1 495 "movq 0(%0), %%mm0\n" // src0
rlm@1 496 "movq 0(%1), %%mm1\n" // src1
rlm@1 497 "movq %%mm0, 0(%1)\n" // src1 = src0
rlm@1 498 "pand %%mm7, %%mm0\n" // color & colorMask
rlm@1 499 "pand %%mm7, %%mm1\n" // src1 & colorMask
rlm@1 500 "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
rlm@1 501 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
rlm@1 502 "paddd %%mm1, %%mm0\n" // E+F
rlm@1 503
rlm@1 504 "movq %%mm0, 0(%0)\n" // src0 = res
rlm@1 505
rlm@1 506 "addl $8, %0\n"
rlm@1 507 "addl $8, %1\n"
rlm@1 508
rlm@1 509 "decl %2\n"
rlm@1 510 "jnz 0b\n"
rlm@1 511 "pop %2\n"
rlm@1 512 "emms\n"
rlm@1 513 : "+r" (src0), "+r" (src1)
rlm@1 514 : "r" (count), "r" (qRGB_COLOR_MASK)
rlm@1 515 );
rlm@1 516 #else
rlm@1 517 __asm {
rlm@1 518 movq mm7, qword ptr [qRGB_COLOR_MASK];
rlm@1 519 mov eax, src0;
rlm@1 520 mov ebx, src1;
rlm@1 521 mov edi, count;
rlm@1 522 label0:
rlm@1 523 movq mm0, qword ptr [eax]; // src0
rlm@1 524 movq mm1, qword ptr [ebx]; // src1
rlm@1 525 movq qword ptr [ebx], mm0; // src1 = src0
rlm@1 526 pand mm0, mm7; // color & colorMask
rlm@1 527 pand mm1, mm7; // src1 & colorMask
rlm@1 528 psrld mm0, 1; // (color & colorMask) >> 1 (E)
rlm@1 529 psrld mm1, 1; // (src & colorMask) >> 1 (F)
rlm@1 530 paddd mm0, mm1; // E+F
rlm@1 531
rlm@1 532 movq qword ptr [eax], mm0; // src0 = res
rlm@1 533
rlm@1 534 add eax, 8;
rlm@1 535 add ebx, 8;
rlm@1 536
rlm@1 537 dec edi;
rlm@1 538 jnz label0;
rlm@1 539 mov src0, eax;
rlm@1 540 mov src1, ebx;
rlm@1 541 emms;
rlm@1 542 }
rlm@1 543 #endif
rlm@1 544 src0++;
rlm@1 545 src1++;
rlm@1 546 }
rlm@1 547 }
rlm@1 548
rlm@1 549 #endif
rlm@1 550
rlm@1 551 void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 552 {
rlm@1 553 if (frm1 == NULL)
rlm@1 554 {
rlm@1 555 Init();
rlm@1 556 }
rlm@1 557
rlm@1 558 #ifdef MMX
rlm@1 559 if (cpu_mmx)
rlm@1 560 {
rlm@1 561 MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
rlm@1 562 return;
rlm@1 563 }
rlm@1 564 #endif
rlm@1 565
rlm@1 566 u32 *src0 = (u32 *)srcPtr;
rlm@1 567 u32 *src1 = (u32 *)frm1;
rlm@1 568
rlm@1 569 u32 colorMask = 0xfefefe;
rlm@1 570
rlm@1 571 int sPitch = srcPitch >> 2;
rlm@1 572 int pos = 0;
rlm@1 573
rlm@1 574 for (int j = 0; j < height; j++)
rlm@1 575 for (int i = 0; i < sPitch; i++)
rlm@1 576 {
rlm@1 577 u32 color = src0[pos];
rlm@1 578 src0[pos] = (((color & colorMask) >> 1) +
rlm@1 579 ((src1[pos] & colorMask) >> 1));
rlm@1 580 src1[pos] = color;
rlm@1 581 pos++;
rlm@1 582 }
rlm@1 583 }
rlm@1 584
rlm@1 585 static int count = 0;
rlm@1 586
rlm@1 587 void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
rlm@1 588 {
rlm@1 589 if (frm1 == NULL)
rlm@1 590 {
rlm@1 591 Init();
rlm@1 592 }
rlm@1 593
rlm@1 594 u16 colorMask = ~RGB_LOW_BITS_MASK;
rlm@1 595
rlm@1 596 u16 *src0 = (u16 *)srcPtr;
rlm@1 597 u16 *src1 = (u16 *)frm1;
rlm@1 598
rlm@1 599 int sPitch = srcPitch >> 1;
rlm@1 600
rlm@1 601 int pos = 0;
rlm@1 602 for (int j = 0; j < height; j++)
rlm@1 603 {
rlm@1 604 bool render = count ? (j & 1) != 0 : (j & 1) == 0;
rlm@1 605 if (render)
rlm@1 606 {
rlm@1 607 for (int i = 0; i < sPitch; i++)
rlm@1 608 {
rlm@1 609 u16 color = src0[pos];
rlm@1 610 src0[pos] =
rlm@1 611 (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
rlm@1 612 src1[pos] = color;
rlm@1 613 pos++;
rlm@1 614 }
rlm@1 615 }
rlm@1 616 else
rlm@1 617 {
rlm@1 618 for (int i = 0; i < sPitch; i++)
rlm@1 619 {
rlm@1 620 u16 color = src0[pos];
rlm@1 621 src0[pos] =
rlm@1 622 (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
rlm@1 623 src1[pos] = color;
rlm@1 624 pos++;
rlm@1 625 }
rlm@1 626 }
rlm@1 627 }
rlm@1 628 count = count ^ 1;
rlm@1 629 }
rlm@1 630