diff src/filters/interframe.cpp @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/filters/interframe.cpp	Sat Mar 03 10:31:27 2012 -0600
     1.3 @@ -0,0 +1,630 @@
     1.4 +#include <cstdlib>
     1.5 +#include <cstring>
     1.6 +#include "../Port.h"
     1.7 +
     1.8 +#ifdef MMX
     1.9 +extern "C" bool cpu_mmx;
    1.10 +#endif
    1.11 +
    1.12 +/*
    1.13 + * Thanks to Kawaks' Mr. K for the code
    1.14 +
    1.15 +   Incorporated into vba by Anthony Di Franco
    1.16 + */
    1.17 +
    1.18 +static u8 *frm1 = NULL;
    1.19 +static u8 *frm2 = NULL;
    1.20 +static u8 *frm3 = NULL;
    1.21 +
    1.22 +extern u32 RGB_LOW_BITS_MASK;
    1.23 +extern u32 qRGB_COLOR_MASK[2];
    1.24 +
    1.25 +static void Init()
    1.26 +{
    1.27 +	frm1 = (u8 *)calloc(322 * 242, 4);
    1.28 +	// 1 frame ago
    1.29 +	frm2 = (u8 *)calloc(322 * 242, 4);
    1.30 +	// 2 frames ago
    1.31 +	frm3 = (u8 *)calloc(322 * 242, 4);
    1.32 +	// 3 frames ago
    1.33 +}
    1.34 +
    1.35 +void InterframeCleanup()
    1.36 +{
    1.37 +	if (frm1)
    1.38 +		free(frm1);
    1.39 +	if (frm2)
    1.40 +		free(frm2);
    1.41 +	if (frm3)
    1.42 +		free(frm3);
    1.43 +	frm1 = frm2 = frm3 = NULL;
    1.44 +}
    1.45 +
    1.46 +#ifdef MMX
    1.47 +static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
    1.48 +{
    1.49 +	u16 *src0 = (u16 *)srcPtr;
    1.50 +	u16 *src1 = (u16 *)frm1;
    1.51 +	u16 *src2 = (u16 *)frm2;
    1.52 +	u16 *src3 = (u16 *)frm3;
    1.53 +
    1.54 +	int count = width >> 2;
    1.55 +
    1.56 +	for (int i = 0; i < height; i++)
    1.57 +	{
    1.58 +#ifdef __GNUC__
    1.59 +		asm volatile (
    1.60 +		    "push %4\n"
    1.61 +		    "movq 0(%5), %%mm7\n"       // colorMask
    1.62 +		    "0:\n"
    1.63 +		    "movq 0(%0), %%mm0\n"       // src0
    1.64 +		    "movq 0(%1), %%mm1\n"       // src1
    1.65 +		    "movq 0(%2), %%mm2\n"       // src2
    1.66 +		    "movq 0(%3), %%mm3\n"       // src3
    1.67 +		    "movq %%mm0, 0(%3)\n"       // src3 = src0
    1.68 +		    "movq %%mm0, %%mm4\n"
    1.69 +		    "movq %%mm1, %%mm5\n"
    1.70 +		    "pcmpeqw %%mm2, %%mm5\n"       // src1 == src2 (A)
    1.71 +		    "pcmpeqw %%mm3, %%mm4\n"       // src3 == src0 (B)
    1.72 +		    "por %%mm5, %%mm4\n"       // A | B
    1.73 +		    "movq %%mm2, %%mm5\n"
    1.74 +		    "pcmpeqw %%mm0, %%mm5\n"       // src0 == src2 (C)
    1.75 +		    "pcmpeqw %%mm1, %%mm3\n"       // src1 == src3 (D)
    1.76 +		    "por %%mm3, %%mm5\n"       // C|D
    1.77 +		    "pandn %%mm5, %%mm4\n"       // (!(A|B))&(C|D)
    1.78 +		    "movq %%mm0, %%mm2\n"
    1.79 +		    "pand %%mm7, %%mm2\n"       // color & colorMask
    1.80 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
    1.81 +		    "psrlw $1, %%mm2\n"       // (color & colorMask) >> 1 (E)
    1.82 +		    "psrlw $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
    1.83 +		    "paddw %%mm2, %%mm1\n"       // E+F
    1.84 +		    "pand %%mm4, %%mm1\n"       // (E+F) & res
    1.85 +		    "pandn %%mm0, %%mm4\n"       // color& !res
    1.86 +
    1.87 +		    "por %%mm1, %%mm4\n"
    1.88 +		    "movq %%mm4, 0(%0)\n"       // src0 = res
    1.89 +
    1.90 +		    "addl $8, %0\n"
    1.91 +		    "addl $8, %1\n"
    1.92 +		    "addl $8, %2\n"
    1.93 +		    "addl $8, %3\n"
    1.94 +
    1.95 +		    "decl %4\n"
    1.96 +		    "jnz 0b\n"
    1.97 +		    "pop %4\n"
    1.98 +		    "emms\n"
    1.99 +			: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
   1.100 +			: "r" (count), "r" (qRGB_COLOR_MASK)
   1.101 +		    );
   1.102 +#else
   1.103 +		__asm {
   1.104 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
   1.105 +			mov	 eax, src0;
   1.106 +			mov	 ebx, src1;
   1.107 +			mov	 ecx, src2;
   1.108 +			mov	 edx, src3;
   1.109 +			mov	 edi, count;
   1.110 +label0:
   1.111 +			movq mm0, qword ptr [eax]; // src0
   1.112 +			movq	   mm1, qword ptr [ebx]; // src1
   1.113 +			movq	   mm2, qword ptr [ecx]; // src2
   1.114 +			movq	   mm3, qword ptr [edx]; // src3
   1.115 +			movq qword ptr [edx], mm0; // src3 = src0
   1.116 +			movq	   mm4, mm0;
   1.117 +			movq	   mm5, mm1;
   1.118 +			pcmpeqw	   mm5, mm2; // src1 == src2 (A)
   1.119 +			pcmpeqw	   mm4, mm3; // src3 == src0 (B)
   1.120 +			por		   mm4, mm5; // A | B
   1.121 +			movq	   mm5, mm2;
   1.122 +			pcmpeqw	   mm5, mm0; // src0 == src2 (C)
   1.123 +			pcmpeqw	   mm3, mm1; // src1 == src3 (D)
   1.124 +			por		   mm5, mm3; // C|D
   1.125 +			pandn	   mm4, mm5; // (!(A|B))&(C|D)
   1.126 +			movq	   mm2, mm0;
   1.127 +			pand	   mm2, mm7; // color & colorMask
   1.128 +			pand	   mm1, mm7; // src1 & colorMask
   1.129 +			psrlw	   mm2, 1; // (color & colorMask) >> 1 (E)
   1.130 +			psrlw	   mm1, 1; // (src & colorMask) >> 1 (F)
   1.131 +			paddw	   mm1, mm2; // E+F
   1.132 +			pand	   mm1, mm4; // (E+F) & res
   1.133 +			pandn	   mm4, mm0; // color & !res
   1.134 +
   1.135 +			por		   mm4, mm1;
   1.136 +			movq qword ptr [eax], mm4; // src0 = res
   1.137 +
   1.138 +			add eax, 8;
   1.139 +			add ebx, 8;
   1.140 +			add ecx, 8;
   1.141 +			add edx, 8;
   1.142 +
   1.143 +			dec edi;
   1.144 +			jnz label0;
   1.145 +			mov src0, eax;
   1.146 +			mov src1, ebx;
   1.147 +			mov src2, ecx;
   1.148 +			mov src3, edx;
   1.149 +			emms;
   1.150 +		}
   1.151 +#endif
   1.152 +		src0 += 2;
   1.153 +		src1 += 2;
   1.154 +		src2 += 2;
   1.155 +		src3 += 2;
   1.156 +	}
   1.157 +
   1.158 +	/* Swap buffers around */
   1.159 +	u8 *temp = frm1;
   1.160 +	frm1 = frm3;
   1.161 +	frm3 = frm2;
   1.162 +	frm2 = temp;
   1.163 +}
   1.164 +
   1.165 +#endif
   1.166 +
   1.167 +void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.168 +{
   1.169 +	if (frm1 == NULL)
   1.170 +	{
   1.171 +		Init();
   1.172 +	}
   1.173 +#ifdef MMX
   1.174 +	if (cpu_mmx)
   1.175 +	{
   1.176 +		SmartIB_MMX(srcPtr, srcPitch, width, height);
   1.177 +		return;
   1.178 +	}
   1.179 +#endif
   1.180 +
   1.181 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
   1.182 +
   1.183 +	u16 *src0 = (u16 *)srcPtr;
   1.184 +	u16 *src1 = (u16 *)frm1;
   1.185 +	u16 *src2 = (u16 *)frm2;
   1.186 +	u16 *src3 = (u16 *)frm3;
   1.187 +
   1.188 +	int sPitch = srcPitch >> 1;
   1.189 +
   1.190 +	int pos = 0;
   1.191 +	for (int j = 0; j < height; j++)
   1.192 +		for (int i = 0; i < sPitch; i++)
   1.193 +		{
   1.194 +			u16 color = src0[pos];
   1.195 +			src0[pos] =
   1.196 +			    (src1[pos] != src2[pos]) &&
   1.197 +			    (src3[pos] != color) &&
   1.198 +			    ((color == src2[pos]) || (src1[pos] == src3[pos]))
   1.199 +			    ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
   1.200 +			    color;
   1.201 +			src3[pos] = color; /* oldest buffer now holds newest frame */
   1.202 +			pos++;
   1.203 +		}
   1.204 +
   1.205 +	/* Swap buffers around */
   1.206 +	u8 *temp = frm1;
   1.207 +	frm1 = frm3;
   1.208 +	frm3 = frm2;
   1.209 +	frm2 = temp;
   1.210 +}
   1.211 +
   1.212 +#ifdef MMX
   1.213 +static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.214 +{
   1.215 +	u32 *src0 = (u32 *)srcPtr;
   1.216 +	u32 *src1 = (u32 *)frm1;
   1.217 +	u32 *src2 = (u32 *)frm2;
   1.218 +	u32 *src3 = (u32 *)frm3;
   1.219 +
   1.220 +	int count = width >> 1;
   1.221 +
   1.222 +	for (int i = 0; i < height; i++)
   1.223 +	{
   1.224 +#ifdef __GNUC__
   1.225 +		asm volatile (
   1.226 +		    "push %4\n"
   1.227 +		    "movq 0(%5), %%mm7\n"       // colorMask
   1.228 +		    "0:\n"
   1.229 +		    "movq 0(%0), %%mm0\n"       // src0
   1.230 +		    "movq 0(%1), %%mm1\n"       // src1
   1.231 +		    "movq 0(%2), %%mm2\n"       // src2
   1.232 +		    "movq 0(%3), %%mm3\n"       // src3
   1.233 +		    "movq %%mm0, 0(%3)\n"       // src3 = src0
   1.234 +		    "movq %%mm0, %%mm4\n"
   1.235 +		    "movq %%mm1, %%mm5\n"
   1.236 +		    "pcmpeqd %%mm2, %%mm5\n"       // src1 == src2 (A)
   1.237 +		    "pcmpeqd %%mm3, %%mm4\n"       // src3 == src0 (B)
   1.238 +		    "por %%mm5, %%mm4\n"       // A | B
   1.239 +		    "movq %%mm2, %%mm5\n"
   1.240 +		    "pcmpeqd %%mm0, %%mm5\n"       // src0 == src2 (C)
   1.241 +		    "pcmpeqd %%mm1, %%mm3\n"       // src1 == src3 (D)
   1.242 +		    "por %%mm3, %%mm5\n"       // C|D
   1.243 +		    "pandn %%mm5, %%mm4\n"       // (!(A|B))&(C|D)
   1.244 +		    "movq %%mm0, %%mm2\n"
   1.245 +		    "pand %%mm7, %%mm2\n"       // color & colorMask
   1.246 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
   1.247 +		    "psrld $1, %%mm2\n"       // (color & colorMask) >> 1 (E)
   1.248 +		    "psrld $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
   1.249 +		    "paddd %%mm2, %%mm1\n"       // E+F
   1.250 +		    "pand %%mm4, %%mm1\n"       // (E+F) & res
   1.251 +		    "pandn %%mm0, %%mm4\n"       // color& !res
   1.252 +
   1.253 +		    "por %%mm1, %%mm4\n"
   1.254 +		    "movq %%mm4, 0(%0)\n"       // src0 = res
   1.255 +
   1.256 +		    "addl $8, %0\n"
   1.257 +		    "addl $8, %1\n"
   1.258 +		    "addl $8, %2\n"
   1.259 +		    "addl $8, %3\n"
   1.260 +
   1.261 +		    "decl %4\n"
   1.262 +		    "jnz 0b\n"
   1.263 +		    "pop %4\n"
   1.264 +		    "emms\n"
   1.265 +			: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
   1.266 +			: "r" (count), "r" (qRGB_COLOR_MASK)
   1.267 +		    );
   1.268 +#else
   1.269 +		__asm {
   1.270 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
   1.271 +			mov	 eax, src0;
   1.272 +			mov	 ebx, src1;
   1.273 +			mov	 ecx, src2;
   1.274 +			mov	 edx, src3;
   1.275 +			mov	 edi, count;
   1.276 +label0:
   1.277 +			movq mm0, qword ptr [eax]; // src0
   1.278 +			movq	   mm1, qword ptr [ebx]; // src1
   1.279 +			movq	   mm2, qword ptr [ecx]; // src2
   1.280 +			movq	   mm3, qword ptr [edx]; // src3
   1.281 +			movq qword ptr [edx], mm0; // src3 = src0
   1.282 +			movq	   mm4, mm0;
   1.283 +			movq	   mm5, mm1;
   1.284 +			pcmpeqd	   mm5, mm2; // src1 == src2 (A)
   1.285 +			pcmpeqd	   mm4, mm3; // src3 == src0 (B)
   1.286 +			por		   mm4, mm5; // A | B
   1.287 +			movq	   mm5, mm2;
   1.288 +			pcmpeqd	   mm5, mm0; // src0 == src2 (C)
   1.289 +			pcmpeqd	   mm3, mm1; // src1 == src3 (D)
   1.290 +			por		   mm5, mm3; // C|D
   1.291 +			pandn	   mm4, mm5; // (!(A|B))&(C|D)
   1.292 +			movq	   mm2, mm0;
   1.293 +			pand	   mm2, mm7; // color & colorMask
   1.294 +			pand	   mm1, mm7; // src1 & colorMask
   1.295 +			psrld	   mm2, 1; // (color & colorMask) >> 1 (E)
   1.296 +			psrld	   mm1, 1; // (src & colorMask) >> 1 (F)
   1.297 +			paddd	   mm1, mm2; // E+F
   1.298 +			pand	   mm1, mm4; // (E+F) & res
   1.299 +			pandn	   mm4, mm0; // color & !res
   1.300 +
   1.301 +			por		   mm4, mm1;
   1.302 +			movq qword ptr [eax], mm4; // src0 = res
   1.303 +
   1.304 +			add eax, 8;
   1.305 +			add ebx, 8;
   1.306 +			add ecx, 8;
   1.307 +			add edx, 8;
   1.308 +
   1.309 +			dec edi;
   1.310 +			jnz label0;
   1.311 +			mov src0, eax;
   1.312 +			mov src1, ebx;
   1.313 +			mov src2, ecx;
   1.314 +			mov src3, edx;
   1.315 +			emms;
   1.316 +		}
   1.317 +#endif
   1.318 +
   1.319 +		src0++;
   1.320 +		src1++;
   1.321 +		src2++;
   1.322 +		src3++;
   1.323 +	}
   1.324 +	/* Swap buffers around */
   1.325 +	u8 *temp = frm1;
   1.326 +	frm1 = frm3;
   1.327 +	frm3 = frm2;
   1.328 +	frm2 = temp;
   1.329 +}
   1.330 +
   1.331 +#endif
   1.332 +
   1.333 +void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.334 +{
   1.335 +	if (frm1 == NULL)
   1.336 +	{
   1.337 +		Init();
   1.338 +	}
   1.339 +#ifdef MMX
   1.340 +	if (cpu_mmx)
   1.341 +	{
   1.342 +		SmartIB32_MMX(srcPtr, srcPitch, width, height);
   1.343 +		return;
   1.344 +	}
   1.345 +#endif
   1.346 +
   1.347 +	u32 *src0 = (u32 *)srcPtr;
   1.348 +	u32 *src1 = (u32 *)frm1;
   1.349 +	u32 *src2 = (u32 *)frm2;
   1.350 +	u32 *src3 = (u32 *)frm3;
   1.351 +
   1.352 +	u32 colorMask = 0xfefefe;
   1.353 +
   1.354 +	int sPitch = srcPitch >> 2;
   1.355 +	int pos	   = 0;
   1.356 +
   1.357 +	for (int j = 0; j < height; j++)
   1.358 +		for (int i = 0; i < sPitch; i++)
   1.359 +		{
   1.360 +			u32 color = src0[pos];
   1.361 +			src0[pos] =
   1.362 +			    (src1[pos] != src2[pos]) &&
   1.363 +			    (src3[pos] != color) &&
   1.364 +			    ((color == src2[pos]) || (src1[pos] == src3[pos]))
   1.365 +			    ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
   1.366 +			    color;
   1.367 +			src3[pos] = color; /* oldest buffer now holds newest frame */
   1.368 +			pos++;
   1.369 +		}
   1.370 +
   1.371 +	/* Swap buffers around */
   1.372 +	u8 *temp = frm1;
   1.373 +	frm1 = frm3;
   1.374 +	frm3 = frm2;
   1.375 +	frm2 = temp;
   1.376 +}
   1.377 +
   1.378 +#ifdef MMX
   1.379 +static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.380 +{
   1.381 +	u16 *src0 = (u16 *)srcPtr;
   1.382 +	u16 *src1 = (u16 *)frm1;
   1.383 +
   1.384 +	int count = width >> 2;
   1.385 +
   1.386 +	for (int i = 0; i < height; i++)
   1.387 +	{
   1.388 +#ifdef __GNUC__
   1.389 +		asm volatile (
   1.390 +		    "push %2\n"
   1.391 +		    "movq 0(%3), %%mm7\n"       // colorMask
   1.392 +		    "0:\n"
   1.393 +		    "movq 0(%0), %%mm0\n"       // src0
   1.394 +		    "movq 0(%1), %%mm1\n"       // src1
   1.395 +		    "movq %%mm0, 0(%1)\n"       // src1 = src0
   1.396 +		    "pand %%mm7, %%mm0\n"       // color & colorMask
   1.397 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
   1.398 +		    "psrlw $1, %%mm0\n"       // (color & colorMask) >> 1 (E)
   1.399 +		    "psrlw $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
   1.400 +		    "paddw %%mm1, %%mm0\n"       // E+F
   1.401 +
   1.402 +		    "movq %%mm0, 0(%0)\n"       // src0 = res
   1.403 +
   1.404 +		    "addl $8, %0\n"
   1.405 +		    "addl $8, %1\n"
   1.406 +
   1.407 +		    "decl %2\n"
   1.408 +		    "jnz 0b\n"
   1.409 +		    "pop %2\n"
   1.410 +		    "emms\n"
   1.411 +			: "+r" (src0), "+r" (src1)
   1.412 +			: "r" (count), "r" (qRGB_COLOR_MASK)
   1.413 +		    );
   1.414 +#else
   1.415 +		__asm {
   1.416 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
   1.417 +			mov	 eax, src0;
   1.418 +			mov	 ebx, src1;
   1.419 +			mov	 edi, count;
   1.420 +label0:
   1.421 +			movq mm0, qword ptr [eax]; // src0
   1.422 +			movq	   mm1, qword ptr [ebx]; // src1
   1.423 +			movq qword ptr [ebx], mm0; // src1 = src0
   1.424 +			pand	   mm0, mm7; // color & colorMask
   1.425 +			pand	   mm1, mm7; // src1 & colorMask
   1.426 +			psrlw	   mm0, 1; // (color & colorMask) >> 1 (E)
   1.427 +			psrlw	   mm1, 1; // (src & colorMask) >> 1 (F)
   1.428 +			paddw	   mm0, mm1; // E+F
   1.429 +
   1.430 +			movq qword ptr [eax], mm0; // src0 = res
   1.431 +
   1.432 +			add eax, 8;
   1.433 +			add ebx, 8;
   1.434 +
   1.435 +			dec edi;
   1.436 +			jnz label0;
   1.437 +			mov src0, eax;
   1.438 +			mov src1, ebx;
   1.439 +			emms;
   1.440 +		}
   1.441 +#endif
   1.442 +		src0 += 2;
   1.443 +		src1 += 2;
   1.444 +	}
   1.445 +}
   1.446 +
   1.447 +#endif
   1.448 +
   1.449 +void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.450 +{
   1.451 +	if (frm1 == NULL)
   1.452 +	{
   1.453 +		Init();
   1.454 +	}
   1.455 +
   1.456 +#ifdef MMX
   1.457 +	if (cpu_mmx)
   1.458 +	{
   1.459 +		MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
   1.460 +		return;
   1.461 +	}
   1.462 +#endif
   1.463 +
   1.464 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
   1.465 +
   1.466 +	u16 *src0 = (u16 *)srcPtr;
   1.467 +	u16 *src1 = (u16 *)frm1;
   1.468 +
   1.469 +	int sPitch = srcPitch >> 1;
   1.470 +
   1.471 +	int pos = 0;
   1.472 +	for (int j = 0; j < height; j++)
   1.473 +		for (int i = 0; i < sPitch; i++)
   1.474 +		{
   1.475 +			u16 color = src0[pos];
   1.476 +			src0[pos] =
   1.477 +			    (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
   1.478 +			src1[pos] = color;
   1.479 +			pos++;
   1.480 +		}
   1.481 +}
   1.482 +
   1.483 +#ifdef MMX
   1.484 +static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.485 +{
   1.486 +	u32 *src0 = (u32 *)srcPtr;
   1.487 +	u32 *src1 = (u32 *)frm1;
   1.488 +
   1.489 +	int count = width >> 1;
   1.490 +
   1.491 +	for (int i = 0; i < height; i++)
   1.492 +	{
   1.493 +#ifdef __GNUC__
   1.494 +		asm volatile (
   1.495 +		    "push %2\n"
   1.496 +		    "movq 0(%3), %%mm7\n"       // colorMask
   1.497 +		    "0:\n"
   1.498 +		    "movq 0(%0), %%mm0\n"       // src0
   1.499 +		    "movq 0(%1), %%mm1\n"       // src1
   1.500 +		    "movq %%mm0, 0(%1)\n"       // src1 = src0
   1.501 +		    "pand %%mm7, %%mm0\n"       // color & colorMask
   1.502 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
   1.503 +		    "psrld $1, %%mm0\n"       // (color & colorMask) >> 1 (E)
   1.504 +		    "psrld $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
   1.505 +		    "paddd %%mm1, %%mm0\n"       // E+F
   1.506 +
   1.507 +		    "movq %%mm0, 0(%0)\n"       // src0 = res
   1.508 +
   1.509 +		    "addl $8, %0\n"
   1.510 +		    "addl $8, %1\n"
   1.511 +
   1.512 +		    "decl %2\n"
   1.513 +		    "jnz 0b\n"
   1.514 +		    "pop %2\n"
   1.515 +		    "emms\n"
   1.516 +			: "+r" (src0), "+r" (src1)
   1.517 +			: "r" (count), "r" (qRGB_COLOR_MASK)
   1.518 +		    );
   1.519 +#else
   1.520 +		__asm {
   1.521 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
   1.522 +			mov	 eax, src0;
   1.523 +			mov	 ebx, src1;
   1.524 +			mov	 edi, count;
   1.525 +label0:
   1.526 +			movq mm0, qword ptr [eax]; // src0
   1.527 +			movq	   mm1, qword ptr [ebx]; // src1
   1.528 +			movq qword ptr [ebx], mm0; // src1 = src0
   1.529 +			pand	   mm0, mm7; // color & colorMask
   1.530 +			pand	   mm1, mm7; // src1 & colorMask
   1.531 +			psrld	   mm0, 1; // (color & colorMask) >> 1 (E)
   1.532 +			psrld	   mm1, 1; // (src & colorMask) >> 1 (F)
   1.533 +			paddd	   mm0, mm1; // E+F
   1.534 +
   1.535 +			movq qword ptr [eax], mm0; // src0 = res
   1.536 +
   1.537 +			add eax, 8;
   1.538 +			add ebx, 8;
   1.539 +
   1.540 +			dec edi;
   1.541 +			jnz label0;
   1.542 +			mov src0, eax;
   1.543 +			mov src1, ebx;
   1.544 +			emms;
   1.545 +		}
   1.546 +#endif
   1.547 +		src0++;
   1.548 +		src1++;
   1.549 +	}
   1.550 +}
   1.551 +
   1.552 +#endif
   1.553 +
   1.554 +void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.555 +{
   1.556 +	if (frm1 == NULL)
   1.557 +	{
   1.558 +		Init();
   1.559 +	}
   1.560 +
   1.561 +#ifdef MMX
   1.562 +	if (cpu_mmx)
   1.563 +	{
   1.564 +		MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
   1.565 +		return;
   1.566 +	}
   1.567 +#endif
   1.568 +
   1.569 +	u32 *src0 = (u32 *)srcPtr;
   1.570 +	u32 *src1 = (u32 *)frm1;
   1.571 +
   1.572 +	u32 colorMask = 0xfefefe;
   1.573 +
   1.574 +	int sPitch = srcPitch >> 2;
   1.575 +	int pos	   = 0;
   1.576 +
   1.577 +	for (int j = 0; j < height; j++)
   1.578 +		for (int i = 0; i < sPitch; i++)
   1.579 +		{
   1.580 +			u32 color = src0[pos];
   1.581 +			src0[pos] = (((color & colorMask) >> 1) +
   1.582 +			             ((src1[pos] & colorMask) >> 1));
   1.583 +			src1[pos] = color;
   1.584 +			pos++;
   1.585 +		}
   1.586 +}
   1.587 +
   1.588 +static int count = 0;
   1.589 +
   1.590 +void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
   1.591 +{
   1.592 +	if (frm1 == NULL)
   1.593 +	{
   1.594 +		Init();
   1.595 +	}
   1.596 +
   1.597 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
   1.598 +
   1.599 +	u16 *src0 = (u16 *)srcPtr;
   1.600 +	u16 *src1 = (u16 *)frm1;
   1.601 +
   1.602 +	int sPitch = srcPitch >> 1;
   1.603 +
   1.604 +	int pos = 0;
   1.605 +	for (int j = 0; j < height; j++)
   1.606 +	{
   1.607 +		bool render = count ? (j & 1) != 0 : (j & 1) == 0;
   1.608 +		if (render)
   1.609 +		{
   1.610 +			for (int i = 0; i < sPitch; i++)
   1.611 +			{
   1.612 +				u16 color = src0[pos];
   1.613 +				src0[pos] =
   1.614 +				    (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
   1.615 +				src1[pos] = color;
   1.616 +				pos++;
   1.617 +			}
   1.618 +		}
   1.619 +		else
   1.620 +		{
   1.621 +			for (int i = 0; i < sPitch; i++)
   1.622 +			{
   1.623 +				u16 color = src0[pos];
   1.624 +				src0[pos] =
   1.625 +				    (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
   1.626 +				src1[pos] = color;
   1.627 +				pos++;
   1.628 +			}
   1.629 +		}
   1.630 +	}
   1.631 +	count = count ^ 1;
   1.632 +}
   1.633 +