view src/filters/interframe.cpp @ 39:3e36553d0cbf

got some speedruns to work!
author Robert McIntyre <rlm@mit.edu>
date Mon, 05 Mar 2012 16:37:38 -0600
parents f9f4f1b99eed
children
line wrap: on
line source
1 #include <cstdlib>
2 #include <cstring>
3 #include "../Port.h"
5 #ifdef MMX
6 extern "C" bool cpu_mmx;
7 #endif
9 /*
10 * Thanks to Kawaks' Mr. K for the code
12 Incorporated into vba by Anthony Di Franco
13 */
15 static u8 *frm1 = NULL;
16 static u8 *frm2 = NULL;
17 static u8 *frm3 = NULL;
19 extern u32 RGB_LOW_BITS_MASK;
20 extern u32 qRGB_COLOR_MASK[2];
22 static void Init()
23 {
24 frm1 = (u8 *)calloc(322 * 242, 4);
25 // 1 frame ago
26 frm2 = (u8 *)calloc(322 * 242, 4);
27 // 2 frames ago
28 frm3 = (u8 *)calloc(322 * 242, 4);
29 // 3 frames ago
30 }
32 void InterframeCleanup()
33 {
34 if (frm1)
35 free(frm1);
36 if (frm2)
37 free(frm2);
38 if (frm3)
39 free(frm3);
40 frm1 = frm2 = frm3 = NULL;
41 }
43 #ifdef MMX
44 static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
45 {
46 u16 *src0 = (u16 *)srcPtr;
47 u16 *src1 = (u16 *)frm1;
48 u16 *src2 = (u16 *)frm2;
49 u16 *src3 = (u16 *)frm3;
51 int count = width >> 2;
53 for (int i = 0; i < height; i++)
54 {
55 #ifdef __GNUC__
56 asm volatile (
57 "push %4\n"
58 "movq 0(%5), %%mm7\n" // colorMask
59 "0:\n"
60 "movq 0(%0), %%mm0\n" // src0
61 "movq 0(%1), %%mm1\n" // src1
62 "movq 0(%2), %%mm2\n" // src2
63 "movq 0(%3), %%mm3\n" // src3
64 "movq %%mm0, 0(%3)\n" // src3 = src0
65 "movq %%mm0, %%mm4\n"
66 "movq %%mm1, %%mm5\n"
67 "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
68 "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
69 "por %%mm5, %%mm4\n" // A | B
70 "movq %%mm2, %%mm5\n"
71 "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
72 "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
73 "por %%mm3, %%mm5\n" // C|D
74 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
75 "movq %%mm0, %%mm2\n"
76 "pand %%mm7, %%mm2\n" // color & colorMask
77 "pand %%mm7, %%mm1\n" // src1 & colorMask
78 "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
79 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
80 "paddw %%mm2, %%mm1\n" // E+F
81 "pand %%mm4, %%mm1\n" // (E+F) & res
82 "pandn %%mm0, %%mm4\n" // color& !res
84 "por %%mm1, %%mm4\n"
85 "movq %%mm4, 0(%0)\n" // src0 = res
87 "addl $8, %0\n"
88 "addl $8, %1\n"
89 "addl $8, %2\n"
90 "addl $8, %3\n"
92 "decl %4\n"
93 "jnz 0b\n"
94 "pop %4\n"
95 "emms\n"
96 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
97 : "r" (count), "r" (qRGB_COLOR_MASK)
98 );
99 #else
100 __asm {
101 movq mm7, qword ptr [qRGB_COLOR_MASK];
102 mov eax, src0;
103 mov ebx, src1;
104 mov ecx, src2;
105 mov edx, src3;
106 mov edi, count;
107 label0:
108 movq mm0, qword ptr [eax]; // src0
109 movq mm1, qword ptr [ebx]; // src1
110 movq mm2, qword ptr [ecx]; // src2
111 movq mm3, qword ptr [edx]; // src3
112 movq qword ptr [edx], mm0; // src3 = src0
113 movq mm4, mm0;
114 movq mm5, mm1;
115 pcmpeqw mm5, mm2; // src1 == src2 (A)
116 pcmpeqw mm4, mm3; // src3 == src0 (B)
117 por mm4, mm5; // A | B
118 movq mm5, mm2;
119 pcmpeqw mm5, mm0; // src0 == src2 (C)
120 pcmpeqw mm3, mm1; // src1 == src3 (D)
121 por mm5, mm3; // C|D
122 pandn mm4, mm5; // (!(A|B))&(C|D)
123 movq mm2, mm0;
124 pand mm2, mm7; // color & colorMask
125 pand mm1, mm7; // src1 & colorMask
126 psrlw mm2, 1; // (color & colorMask) >> 1 (E)
127 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
128 paddw mm1, mm2; // E+F
129 pand mm1, mm4; // (E+F) & res
130 pandn mm4, mm0; // color & !res
132 por mm4, mm1;
133 movq qword ptr [eax], mm4; // src0 = res
135 add eax, 8;
136 add ebx, 8;
137 add ecx, 8;
138 add edx, 8;
140 dec edi;
141 jnz label0;
142 mov src0, eax;
143 mov src1, ebx;
144 mov src2, ecx;
145 mov src3, edx;
146 emms;
147 }
148 #endif
149 src0 += 2;
150 src1 += 2;
151 src2 += 2;
152 src3 += 2;
153 }
155 /* Swap buffers around */
156 u8 *temp = frm1;
157 frm1 = frm3;
158 frm3 = frm2;
159 frm2 = temp;
160 }
162 #endif
164 void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
165 {
166 if (frm1 == NULL)
167 {
168 Init();
169 }
170 #ifdef MMX
171 if (cpu_mmx)
172 {
173 SmartIB_MMX(srcPtr, srcPitch, width, height);
174 return;
175 }
176 #endif
178 u16 colorMask = ~RGB_LOW_BITS_MASK;
180 u16 *src0 = (u16 *)srcPtr;
181 u16 *src1 = (u16 *)frm1;
182 u16 *src2 = (u16 *)frm2;
183 u16 *src3 = (u16 *)frm3;
185 int sPitch = srcPitch >> 1;
187 int pos = 0;
188 for (int j = 0; j < height; j++)
189 for (int i = 0; i < sPitch; i++)
190 {
191 u16 color = src0[pos];
192 src0[pos] =
193 (src1[pos] != src2[pos]) &&
194 (src3[pos] != color) &&
195 ((color == src2[pos]) || (src1[pos] == src3[pos]))
196 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
197 color;
198 src3[pos] = color; /* oldest buffer now holds newest frame */
199 pos++;
200 }
202 /* Swap buffers around */
203 u8 *temp = frm1;
204 frm1 = frm3;
205 frm3 = frm2;
206 frm2 = temp;
207 }
209 #ifdef MMX
210 static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
211 {
212 u32 *src0 = (u32 *)srcPtr;
213 u32 *src1 = (u32 *)frm1;
214 u32 *src2 = (u32 *)frm2;
215 u32 *src3 = (u32 *)frm3;
217 int count = width >> 1;
219 for (int i = 0; i < height; i++)
220 {
221 #ifdef __GNUC__
222 asm volatile (
223 "push %4\n"
224 "movq 0(%5), %%mm7\n" // colorMask
225 "0:\n"
226 "movq 0(%0), %%mm0\n" // src0
227 "movq 0(%1), %%mm1\n" // src1
228 "movq 0(%2), %%mm2\n" // src2
229 "movq 0(%3), %%mm3\n" // src3
230 "movq %%mm0, 0(%3)\n" // src3 = src0
231 "movq %%mm0, %%mm4\n"
232 "movq %%mm1, %%mm5\n"
233 "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
234 "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
235 "por %%mm5, %%mm4\n" // A | B
236 "movq %%mm2, %%mm5\n"
237 "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
238 "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
239 "por %%mm3, %%mm5\n" // C|D
240 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
241 "movq %%mm0, %%mm2\n"
242 "pand %%mm7, %%mm2\n" // color & colorMask
243 "pand %%mm7, %%mm1\n" // src1 & colorMask
244 "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
245 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
246 "paddd %%mm2, %%mm1\n" // E+F
247 "pand %%mm4, %%mm1\n" // (E+F) & res
248 "pandn %%mm0, %%mm4\n" // color& !res
250 "por %%mm1, %%mm4\n"
251 "movq %%mm4, 0(%0)\n" // src0 = res
253 "addl $8, %0\n"
254 "addl $8, %1\n"
255 "addl $8, %2\n"
256 "addl $8, %3\n"
258 "decl %4\n"
259 "jnz 0b\n"
260 "pop %4\n"
261 "emms\n"
262 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
263 : "r" (count), "r" (qRGB_COLOR_MASK)
264 );
265 #else
266 __asm {
267 movq mm7, qword ptr [qRGB_COLOR_MASK];
268 mov eax, src0;
269 mov ebx, src1;
270 mov ecx, src2;
271 mov edx, src3;
272 mov edi, count;
273 label0:
274 movq mm0, qword ptr [eax]; // src0
275 movq mm1, qword ptr [ebx]; // src1
276 movq mm2, qword ptr [ecx]; // src2
277 movq mm3, qword ptr [edx]; // src3
278 movq qword ptr [edx], mm0; // src3 = src0
279 movq mm4, mm0;
280 movq mm5, mm1;
281 pcmpeqd mm5, mm2; // src1 == src2 (A)
282 pcmpeqd mm4, mm3; // src3 == src0 (B)
283 por mm4, mm5; // A | B
284 movq mm5, mm2;
285 pcmpeqd mm5, mm0; // src0 == src2 (C)
286 pcmpeqd mm3, mm1; // src1 == src3 (D)
287 por mm5, mm3; // C|D
288 pandn mm4, mm5; // (!(A|B))&(C|D)
289 movq mm2, mm0;
290 pand mm2, mm7; // color & colorMask
291 pand mm1, mm7; // src1 & colorMask
292 psrld mm2, 1; // (color & colorMask) >> 1 (E)
293 psrld mm1, 1; // (src & colorMask) >> 1 (F)
294 paddd mm1, mm2; // E+F
295 pand mm1, mm4; // (E+F) & res
296 pandn mm4, mm0; // color & !res
298 por mm4, mm1;
299 movq qword ptr [eax], mm4; // src0 = res
301 add eax, 8;
302 add ebx, 8;
303 add ecx, 8;
304 add edx, 8;
306 dec edi;
307 jnz label0;
308 mov src0, eax;
309 mov src1, ebx;
310 mov src2, ecx;
311 mov src3, edx;
312 emms;
313 }
314 #endif
316 src0++;
317 src1++;
318 src2++;
319 src3++;
320 }
321 /* Swap buffers around */
322 u8 *temp = frm1;
323 frm1 = frm3;
324 frm3 = frm2;
325 frm2 = temp;
326 }
328 #endif
330 void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
331 {
332 if (frm1 == NULL)
333 {
334 Init();
335 }
336 #ifdef MMX
337 if (cpu_mmx)
338 {
339 SmartIB32_MMX(srcPtr, srcPitch, width, height);
340 return;
341 }
342 #endif
344 u32 *src0 = (u32 *)srcPtr;
345 u32 *src1 = (u32 *)frm1;
346 u32 *src2 = (u32 *)frm2;
347 u32 *src3 = (u32 *)frm3;
349 u32 colorMask = 0xfefefe;
351 int sPitch = srcPitch >> 2;
352 int pos = 0;
354 for (int j = 0; j < height; j++)
355 for (int i = 0; i < sPitch; i++)
356 {
357 u32 color = src0[pos];
358 src0[pos] =
359 (src1[pos] != src2[pos]) &&
360 (src3[pos] != color) &&
361 ((color == src2[pos]) || (src1[pos] == src3[pos]))
362 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
363 color;
364 src3[pos] = color; /* oldest buffer now holds newest frame */
365 pos++;
366 }
368 /* Swap buffers around */
369 u8 *temp = frm1;
370 frm1 = frm3;
371 frm3 = frm2;
372 frm2 = temp;
373 }
375 #ifdef MMX
376 static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
377 {
378 u16 *src0 = (u16 *)srcPtr;
379 u16 *src1 = (u16 *)frm1;
381 int count = width >> 2;
383 for (int i = 0; i < height; i++)
384 {
385 #ifdef __GNUC__
386 asm volatile (
387 "push %2\n"
388 "movq 0(%3), %%mm7\n" // colorMask
389 "0:\n"
390 "movq 0(%0), %%mm0\n" // src0
391 "movq 0(%1), %%mm1\n" // src1
392 "movq %%mm0, 0(%1)\n" // src1 = src0
393 "pand %%mm7, %%mm0\n" // color & colorMask
394 "pand %%mm7, %%mm1\n" // src1 & colorMask
395 "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
396 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
397 "paddw %%mm1, %%mm0\n" // E+F
399 "movq %%mm0, 0(%0)\n" // src0 = res
401 "addl $8, %0\n"
402 "addl $8, %1\n"
404 "decl %2\n"
405 "jnz 0b\n"
406 "pop %2\n"
407 "emms\n"
408 : "+r" (src0), "+r" (src1)
409 : "r" (count), "r" (qRGB_COLOR_MASK)
410 );
411 #else
412 __asm {
413 movq mm7, qword ptr [qRGB_COLOR_MASK];
414 mov eax, src0;
415 mov ebx, src1;
416 mov edi, count;
417 label0:
418 movq mm0, qword ptr [eax]; // src0
419 movq mm1, qword ptr [ebx]; // src1
420 movq qword ptr [ebx], mm0; // src1 = src0
421 pand mm0, mm7; // color & colorMask
422 pand mm1, mm7; // src1 & colorMask
423 psrlw mm0, 1; // (color & colorMask) >> 1 (E)
424 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
425 paddw mm0, mm1; // E+F
427 movq qword ptr [eax], mm0; // src0 = res
429 add eax, 8;
430 add ebx, 8;
432 dec edi;
433 jnz label0;
434 mov src0, eax;
435 mov src1, ebx;
436 emms;
437 }
438 #endif
439 src0 += 2;
440 src1 += 2;
441 }
442 }
444 #endif
446 void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
447 {
448 if (frm1 == NULL)
449 {
450 Init();
451 }
453 #ifdef MMX
454 if (cpu_mmx)
455 {
456 MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
457 return;
458 }
459 #endif
461 u16 colorMask = ~RGB_LOW_BITS_MASK;
463 u16 *src0 = (u16 *)srcPtr;
464 u16 *src1 = (u16 *)frm1;
466 int sPitch = srcPitch >> 1;
468 int pos = 0;
469 for (int j = 0; j < height; j++)
470 for (int i = 0; i < sPitch; i++)
471 {
472 u16 color = src0[pos];
473 src0[pos] =
474 (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
475 src1[pos] = color;
476 pos++;
477 }
478 }
480 #ifdef MMX
481 static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
482 {
483 u32 *src0 = (u32 *)srcPtr;
484 u32 *src1 = (u32 *)frm1;
486 int count = width >> 1;
488 for (int i = 0; i < height; i++)
489 {
490 #ifdef __GNUC__
491 asm volatile (
492 "push %2\n"
493 "movq 0(%3), %%mm7\n" // colorMask
494 "0:\n"
495 "movq 0(%0), %%mm0\n" // src0
496 "movq 0(%1), %%mm1\n" // src1
497 "movq %%mm0, 0(%1)\n" // src1 = src0
498 "pand %%mm7, %%mm0\n" // color & colorMask
499 "pand %%mm7, %%mm1\n" // src1 & colorMask
500 "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
501 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
502 "paddd %%mm1, %%mm0\n" // E+F
504 "movq %%mm0, 0(%0)\n" // src0 = res
506 "addl $8, %0\n"
507 "addl $8, %1\n"
509 "decl %2\n"
510 "jnz 0b\n"
511 "pop %2\n"
512 "emms\n"
513 : "+r" (src0), "+r" (src1)
514 : "r" (count), "r" (qRGB_COLOR_MASK)
515 );
516 #else
517 __asm {
518 movq mm7, qword ptr [qRGB_COLOR_MASK];
519 mov eax, src0;
520 mov ebx, src1;
521 mov edi, count;
522 label0:
523 movq mm0, qword ptr [eax]; // src0
524 movq mm1, qword ptr [ebx]; // src1
525 movq qword ptr [ebx], mm0; // src1 = src0
526 pand mm0, mm7; // color & colorMask
527 pand mm1, mm7; // src1 & colorMask
528 psrld mm0, 1; // (color & colorMask) >> 1 (E)
529 psrld mm1, 1; // (src & colorMask) >> 1 (F)
530 paddd mm0, mm1; // E+F
532 movq qword ptr [eax], mm0; // src0 = res
534 add eax, 8;
535 add ebx, 8;
537 dec edi;
538 jnz label0;
539 mov src0, eax;
540 mov src1, ebx;
541 emms;
542 }
543 #endif
544 src0++;
545 src1++;
546 }
547 }
549 #endif
551 void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
552 {
553 if (frm1 == NULL)
554 {
555 Init();
556 }
558 #ifdef MMX
559 if (cpu_mmx)
560 {
561 MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
562 return;
563 }
564 #endif
566 u32 *src0 = (u32 *)srcPtr;
567 u32 *src1 = (u32 *)frm1;
569 u32 colorMask = 0xfefefe;
571 int sPitch = srcPitch >> 2;
572 int pos = 0;
574 for (int j = 0; j < height; j++)
575 for (int i = 0; i < sPitch; i++)
576 {
577 u32 color = src0[pos];
578 src0[pos] = (((color & colorMask) >> 1) +
579 ((src1[pos] & colorMask) >> 1));
580 src1[pos] = color;
581 pos++;
582 }
583 }
585 static int count = 0;
587 void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
588 {
589 if (frm1 == NULL)
590 {
591 Init();
592 }
594 u16 colorMask = ~RGB_LOW_BITS_MASK;
596 u16 *src0 = (u16 *)srcPtr;
597 u16 *src1 = (u16 *)frm1;
599 int sPitch = srcPitch >> 1;
601 int pos = 0;
602 for (int j = 0; j < height; j++)
603 {
604 bool render = count ? (j & 1) != 0 : (j & 1) == 0;
605 if (render)
606 {
607 for (int i = 0; i < sPitch; i++)
608 {
609 u16 color = src0[pos];
610 src0[pos] =
611 (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
612 src1[pos] = color;
613 pos++;
614 }
615 }
616 else
617 {
618 for (int i = 0; i < sPitch; i++)
619 {
620 u16 color = src0[pos];
621 src0[pos] =
622 (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
623 src1[pos] = color;
624 pos++;
625 }
626 }
627 }
628 count = count ^ 1;
629 }