changeset 27:b970226568d2

brought in filters package
author Robert McIntyre <rlm@mit.edu>
date Sun, 04 Mar 2012 20:32:31 -0600 (2012-03-05)
parents 18eaae41bde3
children 2efb971df515
files configure.ac src/Makefile.am src/filters/2xSaI.cpp src/filters/2xSaImmx.asm src/filters/Makefile src/filters/Makefile.am src/filters/admame.cpp src/filters/bilinear.cpp src/filters/filters.h src/filters/hq2x.cpp src/filters/hq2x.h src/filters/hq3x32.cpp src/filters/hq3x32.h src/filters/hq_shared32.cpp src/filters/hq_shared32.h src/filters/interframe.cpp src/filters/interp.h src/filters/lq2x.h src/filters/motionblur.cpp src/filters/pixel.cpp src/filters/scanline.cpp src/filters/simple2x.cpp
diffstat 22 files changed, 15956 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
     1.1 --- a/configure.ac	Sun Mar 04 18:30:06 2012 -0600
     1.2 +++ b/configure.ac	Sun Mar 04 20:32:31 2012 -0600
     1.3 @@ -18,7 +18,7 @@
     1.4  AC_PROG_RANLIB
     1.5  AC_PROG_CPP
     1.6  AC_PROG_MKDIR_P
     1.7 -
     1.8 +AC_PATH_PROG(NASM, nasm)
     1.9  
    1.10  # Checks for libraries.
    1.11  AC_CHECK_LIB([SDL], [SDL_Init])
    1.12 @@ -61,7 +61,8 @@
    1.13  		 src/gb/Makefile
    1.14  		 src/gba/Makefile
    1.15  		 src/common/Makefile
    1.16 -		 src/SFMT/Makefile])
    1.17 +		 src/SFMT/Makefile
    1.18 +		 src/filters/Makefile])
    1.19  
    1.20  
    1.21  
     2.1 --- a/src/Makefile.am	Sun Mar 04 18:30:06 2012 -0600
     2.2 +++ b/src/Makefile.am	Sun Mar 04 20:32:31 2012 -0600
     2.3 @@ -1,3 +1,3 @@
     2.4 -SUBDIRS = SFMT lua gb gba common
     2.5 +SUBDIRS = SFMT lua gb gba common filters
     2.6  
     2.7  noinst_HEADERS = Port.h NLS.h
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/filters/2xSaI.cpp	Sun Mar 04 20:32:31 2012 -0600
     3.3 @@ -0,0 +1,1434 @@
     3.4 +#include "../common/System.h"
     3.5 +
     3.6 +extern "C"
     3.7 +{
     3.8 +#ifdef MMX
     3.9 +	void _2xSaILine(u8 *srcPtr, u8 *deltaPtr, u32 srcPitch,
    3.10 +	                u32 width, u8 *dstPtr, u32 dstPitch);
    3.11 +	void _2xSaISuperEagleLine(u8 *srcPtr, u8 *deltaPtr,
    3.12 +	                          u32 srcPitch, u32 width,
    3.13 +	                          u8 *dstPtr, u32 dstPitch);
    3.14 +	void _2xSaISuper2xSaILine(u8 *srcPtr, u8 *deltaPtr,
    3.15 +	                          u32 srcPitch, u32 width,
    3.16 +	                          u8 *dstPtr, u32 dstPitch);
    3.17 +	void Init_2xSaIMMX(u32 BitFormat);
    3.18 +	void BilinearMMX(u16 *A, u16 *B, u16 *C, u16 *D,
    3.19 +	                 u16 *dx, u16 *dy, u8 *dP);
    3.20 +	void BilinearMMXGrid0(u16 *A, u16 *B, u16 *C, u16 *D,
    3.21 +	                      u16 *dx, u16 *dy, u8 *dP);
    3.22 +	void BilinearMMXGrid1(u16 *A, u16 *B, u16 *C, u16 *D,
    3.23 +	                      u16 *dx, u16 *dy, u8 *dP);
    3.24 +	void EndMMX();
    3.25 +
    3.26 +	bool cpu_mmx = 1;
    3.27 +#endif
    3.28 +}
    3.29 +static u32 colorMask	 = 0xF7DEF7DE;
    3.30 +static u32 lowPixelMask	 = 0x08210821;
    3.31 +static u32 qcolorMask	 = 0xE79CE79C;
    3.32 +static u32 qlowpixelMask = 0x18631863;
    3.33 +static u32 redblueMask	 = 0xF81F;
    3.34 +static u32 greenMask	 = 0x7E0;
    3.35 +
    3.36 +u32 qRGB_COLOR_MASK[2] = { 0xF7DEF7DE, 0xF7DEF7DE };
    3.37 +
    3.38 +extern void hq2x_init(unsigned);
    3.39 +
    3.40 +int Init_2xSaI(u32 BitFormat)
    3.41 +{
    3.42 +	if (systemColorDepth == 16)
    3.43 +	{
    3.44 +		if (BitFormat == 565)
    3.45 +		{
    3.46 +			colorMask		   = 0xF7DEF7DE;
    3.47 +			lowPixelMask	   = 0x08210821;
    3.48 +			qcolorMask		   = 0xE79CE79C;
    3.49 +			qlowpixelMask	   = 0x18631863;
    3.50 +			redblueMask		   = 0xF81F;
    3.51 +			greenMask		   = 0x7E0;
    3.52 +			qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0xF7DEF7DE;
    3.53 +			hq2x_init(16);
    3.54 +		}
    3.55 +		else if (BitFormat == 555)
    3.56 +		{
    3.57 +			colorMask		   = 0x7BDE7BDE;
    3.58 +			lowPixelMask	   = 0x04210421;
    3.59 +			qcolorMask		   = 0x739C739C;
    3.60 +			qlowpixelMask	   = 0x0C630C63;
    3.61 +			redblueMask		   = 0x7C1F;
    3.62 +			greenMask		   = 0x3E0;
    3.63 +			qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0x7BDE7BDE;
    3.64 +			hq2x_init(15);
    3.65 +		}
    3.66 +		else
    3.67 +		{
    3.68 +			return 0;
    3.69 +		}
    3.70 +	}
    3.71 +	else if (systemColorDepth == 32)
    3.72 +	{
    3.73 +		colorMask		   = 0xfefefe;
    3.74 +		lowPixelMask	   = 0x010101;
    3.75 +		qcolorMask		   = 0xfcfcfc;
    3.76 +		qlowpixelMask	   = 0x030303;
    3.77 +		qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0xfefefe;
    3.78 +		hq2x_init(32);
    3.79 +	}
    3.80 +	else
    3.81 +		return 0;
    3.82 +
    3.83 +#ifdef MMX
    3.84 +	Init_2xSaIMMX(BitFormat);
    3.85 +#endif
    3.86 +
    3.87 +	return 1;
    3.88 +}
    3.89 +
    3.90 +static inline int GetResult1(u32 A, u32 B, u32 C, u32 D,
    3.91 +                             u32 /* E */)
    3.92 +{
    3.93 +	int x = 0;
    3.94 +	int y = 0;
    3.95 +	int r = 0;
    3.96 +
    3.97 +	if (A == C)
    3.98 +		x += 1;
    3.99 +	else if (B == C)
   3.100 +		y += 1;
   3.101 +	if (A == D)
   3.102 +		x += 1;
   3.103 +	else if (B == D)
   3.104 +		y += 1;
   3.105 +	if (x <= 1)
   3.106 +		r += 1;
   3.107 +	if (y <= 1)
   3.108 +		r -= 1;
   3.109 +	return r;
   3.110 +}
   3.111 +
   3.112 +static inline int GetResult2(u32 A, u32 B, u32 C, u32 D,
   3.113 +                             u32 /* E */)
   3.114 +{
   3.115 +	int x = 0;
   3.116 +	int y = 0;
   3.117 +	int r = 0;
   3.118 +
   3.119 +	if (A == C)
   3.120 +		x += 1;
   3.121 +	else if (B == C)
   3.122 +		y += 1;
   3.123 +	if (A == D)
   3.124 +		x += 1;
   3.125 +	else if (B == D)
   3.126 +		y += 1;
   3.127 +	if (x <= 1)
   3.128 +		r -= 1;
   3.129 +	if (y <= 1)
   3.130 +		r += 1;
   3.131 +	return r;
   3.132 +}
   3.133 +
   3.134 +static inline int GetResult(u32 A, u32 B, u32 C, u32 D)
   3.135 +{
   3.136 +	int x = 0;
   3.137 +	int y = 0;
   3.138 +	int r = 0;
   3.139 +
   3.140 +	if (A == C)
   3.141 +		x += 1;
   3.142 +	else if (B == C)
   3.143 +		y += 1;
   3.144 +	if (A == D)
   3.145 +		x += 1;
   3.146 +	else if (B == D)
   3.147 +		y += 1;
   3.148 +	if (x <= 1)
   3.149 +		r += 1;
   3.150 +	if (y <= 1)
   3.151 +		r -= 1;
   3.152 +	return r;
   3.153 +}
   3.154 +
   3.155 +static inline u32 INTERPOLATE(u32 A, u32 B)
   3.156 +{
   3.157 +	if (A != B)
   3.158 +	{
   3.159 +		return (((A & colorMask) >> 1) + ((B & colorMask) >> 1) +
   3.160 +		        (A & B & lowPixelMask));
   3.161 +	}
   3.162 +	else
   3.163 +		return A;
   3.164 +}
   3.165 +
   3.166 +static inline u32 Q_INTERPOLATE(u32 A, u32 B, u32 C, u32 D)
   3.167 +{
   3.168 +	register u32 x = ((A & qcolorMask) >> 2) +
   3.169 +	                 ((B & qcolorMask) >> 2) +
   3.170 +	                 ((C & qcolorMask) >> 2) + ((D & qcolorMask) >> 2);
   3.171 +	register u32 y = (A & qlowpixelMask) +
   3.172 +	                 (B & qlowpixelMask) + (C & qlowpixelMask) + (D & qlowpixelMask);
   3.173 +
   3.174 +	y = (y >> 2) & qlowpixelMask;
   3.175 +	return x + y;
   3.176 +}
   3.177 +
   3.178 +static inline int GetResult1_32(u32 A, u32 B, u32 C, u32 D,
   3.179 +                                u32 /* E */)
   3.180 +{
   3.181 +	int x = 0;
   3.182 +	int y = 0;
   3.183 +	int r = 0;
   3.184 +
   3.185 +	if (A == C)
   3.186 +		x += 1;
   3.187 +	else if (B == C)
   3.188 +		y += 1;
   3.189 +	if (A == D)
   3.190 +		x += 1;
   3.191 +	else if (B == D)
   3.192 +		y += 1;
   3.193 +	if (x <= 1)
   3.194 +		r += 1;
   3.195 +	if (y <= 1)
   3.196 +		r -= 1;
   3.197 +	return r;
   3.198 +}
   3.199 +
   3.200 +static inline int GetResult2_32(u32 A, u32 B, u32 C, u32 D,
   3.201 +                                u32 /* E */)
   3.202 +{
   3.203 +	int x = 0;
   3.204 +	int y = 0;
   3.205 +	int r = 0;
   3.206 +
   3.207 +	if (A == C)
   3.208 +		x += 1;
   3.209 +	else if (B == C)
   3.210 +		y += 1;
   3.211 +	if (A == D)
   3.212 +		x += 1;
   3.213 +	else if (B == D)
   3.214 +		y += 1;
   3.215 +	if (x <= 1)
   3.216 +		r -= 1;
   3.217 +	if (y <= 1)
   3.218 +		r += 1;
   3.219 +	return r;
   3.220 +}
   3.221 +
   3.222 +#define BLUE_MASK565 0x001F001F
   3.223 +#define RED_MASK565 0xF800F800
   3.224 +#define GREEN_MASK565 0x07E007E0
   3.225 +
   3.226 +#define BLUE_MASK555 0x001F001F
   3.227 +#define RED_MASK555 0x7C007C00
   3.228 +#define GREEN_MASK555 0x03E003E0
   3.229 +
   3.230 +void Super2xSaI(u8 *srcPtr, u32 srcPitch,
   3.231 +                u8 *deltaPtr, u8 *dstPtr, u32 dstPitch,
   3.232 +                int width, int height)
   3.233 +{
   3.234 +	u16 *bP;
   3.235 +	u8 * dP;
   3.236 +	u32	 inc_bP;
   3.237 +	u32	 Nextline = srcPitch >> 1;
   3.238 +#ifdef MMX
   3.239 +	if (cpu_mmx)
   3.240 +	{
   3.241 +		for (; height; height--)
   3.242 +		{
   3.243 +			_2xSaISuper2xSaILine(srcPtr, deltaPtr, srcPitch, width,
   3.244 +			                     dstPtr, dstPitch);
   3.245 +			srcPtr	 += srcPitch;
   3.246 +			dstPtr	 += dstPitch * 2;
   3.247 +			deltaPtr += srcPitch;
   3.248 +		}
   3.249 +	}
   3.250 +	else
   3.251 +#endif
   3.252 +	{
   3.253 +		inc_bP = 1;
   3.254 +
   3.255 +		for (; height; height--)
   3.256 +		{
   3.257 +			bP = (u16 *) srcPtr;
   3.258 +			dP = (u8 *) dstPtr;
   3.259 +
   3.260 +			for (u32 finish = width; finish; finish -= inc_bP)
   3.261 +			{
   3.262 +				u32 color4, color5, color6;
   3.263 +				u32 color1, color2, color3;
   3.264 +				u32 colorA0, colorA1, colorA2, colorA3,
   3.265 +				    colorB0, colorB1, colorB2, colorB3, colorS1, colorS2;
   3.266 +				u32 product1a, product1b, product2a, product2b;
   3.267 +
   3.268 +				//---------------------------------------    B1 B2
   3.269 +				//                                         4  5  6 S2
   3.270 +				//                                         1  2  3 S1
   3.271 +				//                                           A1 A2
   3.272 +
   3.273 +				colorB0 = *(bP - Nextline - 1);
   3.274 +				colorB1 = *(bP - Nextline);
   3.275 +				colorB2 = *(bP - Nextline + 1);
   3.276 +				colorB3 = *(bP - Nextline + 2);
   3.277 +
   3.278 +				color4	= *(bP - 1);
   3.279 +				color5	= *(bP);
   3.280 +				color6	= *(bP + 1);
   3.281 +				colorS2 = *(bP + 2);
   3.282 +
   3.283 +				color1	= *(bP + Nextline - 1);
   3.284 +				color2	= *(bP + Nextline);
   3.285 +				color3	= *(bP + Nextline + 1);
   3.286 +				colorS1 = *(bP + Nextline + 2);
   3.287 +
   3.288 +				colorA0 = *(bP + Nextline + Nextline - 1);
   3.289 +				colorA1 = *(bP + Nextline + Nextline);
   3.290 +				colorA2 = *(bP + Nextline + Nextline + 1);
   3.291 +				colorA3 = *(bP + Nextline + Nextline + 2);
   3.292 +
   3.293 +				//--------------------------------------
   3.294 +				if (color2 == color6 && color5 != color3)
   3.295 +				{
   3.296 +					product2b = product1b = color2;
   3.297 +				}
   3.298 +				else if (color5 == color3 && color2 != color6)
   3.299 +				{
   3.300 +					product2b = product1b = color5;
   3.301 +				}
   3.302 +				else if (color5 == color3 && color2 == color6)
   3.303 +				{
   3.304 +					register int r = 0;
   3.305 +
   3.306 +					r += GetResult(color6, color5, color1, colorA1);
   3.307 +					r += GetResult(color6, color5, color4, colorB1);
   3.308 +					r += GetResult(color6, color5, colorA2, colorS1);
   3.309 +					r += GetResult(color6, color5, colorB2, colorS2);
   3.310 +
   3.311 +					if (r > 0)
   3.312 +						product2b = product1b = color6;
   3.313 +					else if (r < 0)
   3.314 +						product2b = product1b = color5;
   3.315 +					else
   3.316 +					{
   3.317 +						product2b = product1b = INTERPOLATE(color5, color6);
   3.318 +					}
   3.319 +				}
   3.320 +				else
   3.321 +				{
   3.322 +					if (color6 == color3 && color3 == colorA1
   3.323 +					    && color2 != colorA2 && color3 != colorA0)
   3.324 +						product2b =
   3.325 +						    Q_INTERPOLATE(color3, color3, color3, color2);
   3.326 +					else if (color5 == color2 && color2 == colorA2
   3.327 +					         && colorA1 != color3 && color2 != colorA3)
   3.328 +						product2b =
   3.329 +						    Q_INTERPOLATE(color2, color2, color2, color3);
   3.330 +					else
   3.331 +						product2b = INTERPOLATE(color2, color3);
   3.332 +
   3.333 +					if (color6 == color3 && color6 == colorB1
   3.334 +					    && color5 != colorB2 && color6 != colorB0)
   3.335 +						product1b =
   3.336 +						    Q_INTERPOLATE(color6, color6, color6, color5);
   3.337 +					else if (color5 == color2 && color5 == colorB2
   3.338 +					         && colorB1 != color6 && color5 != colorB3)
   3.339 +						product1b =
   3.340 +						    Q_INTERPOLATE(color6, color5, color5, color5);
   3.341 +					else
   3.342 +						product1b = INTERPOLATE(color5, color6);
   3.343 +				}
   3.344 +
   3.345 +				if (color5 == color3 && color2 != color6 && color4 == color5
   3.346 +				    && color5 != colorA2)
   3.347 +					product2a = INTERPOLATE(color2, color5);
   3.348 +				else
   3.349 +					if (color5 == color1 && color6 == color5
   3.350 +					    && color4 != color2 && color5 != colorA0)
   3.351 +						product2a = INTERPOLATE(color2, color5);
   3.352 +					else
   3.353 +						product2a = color2;
   3.354 +
   3.355 +				if (color2 == color6 && color5 != color3 && color1 == color2
   3.356 +				    && color2 != colorB2)
   3.357 +					product1a = INTERPOLATE(color2, color5);
   3.358 +				else
   3.359 +					if (color4 == color2 && color3 == color2
   3.360 +					    && color1 != color5 && color2 != colorB0)
   3.361 +						product1a = INTERPOLATE(color2, color5);
   3.362 +					else
   3.363 +						product1a = color5;
   3.364 +
   3.365 +#ifdef WORDS_BIGENDIAN
   3.366 +				product1a = (product1a << 16) | product1b;
   3.367 +				product2a = (product2a << 16) | product2b;
   3.368 +#else
   3.369 +				product1a = product1a | (product1b << 16);
   3.370 +				product2a = product2a | (product2b << 16);
   3.371 +#endif
   3.372 +
   3.373 +				*((u32 *) dP) = product1a;
   3.374 +				*((u32 *) (dP + dstPitch)) = product2a;
   3.375 +
   3.376 +				bP += inc_bP;
   3.377 +				dP += sizeof(u32);
   3.378 +			}                   // end of for ( finish= width etc..)
   3.379 +
   3.380 +			srcPtr	 += srcPitch;
   3.381 +			dstPtr	 += dstPitch << 1;
   3.382 +			deltaPtr += srcPitch;
   3.383 +		}               // endof: for (; height; height--)
   3.384 +	}
   3.385 +}
   3.386 +
   3.387 +void Super2xSaI32(u8 *srcPtr, u32 srcPitch,
   3.388 +                  u8 * /* deltaPtr */, u8 *dstPtr, u32 dstPitch,
   3.389 +                  int width, int height)
   3.390 +{
   3.391 +	u32 *bP;
   3.392 +	u32 *dP;
   3.393 +	u32	 inc_bP;
   3.394 +	u32	 Nextline = srcPitch >> 2;
   3.395 +	inc_bP = 1;
   3.396 +
   3.397 +	for (; height; height--)
   3.398 +	{
   3.399 +		bP = (u32 *) srcPtr;
   3.400 +		dP = (u32 *) dstPtr;
   3.401 +
   3.402 +		for (u32 finish = width; finish; finish -= inc_bP)
   3.403 +		{
   3.404 +			u32 color4, color5, color6;
   3.405 +			u32 color1, color2, color3;
   3.406 +			u32 colorA0, colorA1, colorA2, colorA3,
   3.407 +			    colorB0, colorB1, colorB2, colorB3, colorS1, colorS2;
   3.408 +			u32 product1a, product1b, product2a, product2b;
   3.409 +
   3.410 +			//---------------------------------------    B1 B2
   3.411 +			//                                         4  5  6 S2
   3.412 +			//                                         1  2  3 S1
   3.413 +			//                                           A1 A2
   3.414 +
   3.415 +			colorB0 = *(bP - Nextline - 1);
   3.416 +			colorB1 = *(bP - Nextline);
   3.417 +			colorB2 = *(bP - Nextline + 1);
   3.418 +			colorB3 = *(bP - Nextline + 2);
   3.419 +
   3.420 +			color4	= *(bP - 1);
   3.421 +			color5	= *(bP);
   3.422 +			color6	= *(bP + 1);
   3.423 +			colorS2 = *(bP + 2);
   3.424 +
   3.425 +			color1	= *(bP + Nextline - 1);
   3.426 +			color2	= *(bP + Nextline);
   3.427 +			color3	= *(bP + Nextline + 1);
   3.428 +			colorS1 = *(bP + Nextline + 2);
   3.429 +
   3.430 +			colorA0 = *(bP + Nextline + Nextline - 1);
   3.431 +			colorA1 = *(bP + Nextline + Nextline);
   3.432 +			colorA2 = *(bP + Nextline + Nextline + 1);
   3.433 +			colorA3 = *(bP + Nextline + Nextline + 2);
   3.434 +
   3.435 +			//--------------------------------------
   3.436 +			if (color2 == color6 && color5 != color3)
   3.437 +			{
   3.438 +				product2b = product1b = color2;
   3.439 +			}
   3.440 +			else if (color5 == color3 && color2 != color6)
   3.441 +			{
   3.442 +				product2b = product1b = color5;
   3.443 +			}
   3.444 +			else if (color5 == color3 && color2 == color6)
   3.445 +			{
   3.446 +				register int r = 0;
   3.447 +
   3.448 +				r += GetResult(color6, color5, color1, colorA1);
   3.449 +				r += GetResult(color6, color5, color4, colorB1);
   3.450 +				r += GetResult(color6, color5, colorA2, colorS1);
   3.451 +				r += GetResult(color6, color5, colorB2, colorS2);
   3.452 +
   3.453 +				if (r > 0)
   3.454 +					product2b = product1b = color6;
   3.455 +				else if (r < 0)
   3.456 +					product2b = product1b = color5;
   3.457 +				else
   3.458 +				{
   3.459 +					product2b = product1b = INTERPOLATE(color5, color6);
   3.460 +				}
   3.461 +			}
   3.462 +			else
   3.463 +			{
   3.464 +				if (color6 == color3 && color3 == colorA1
   3.465 +				    && color2 != colorA2 && color3 != colorA0)
   3.466 +					product2b =
   3.467 +					    Q_INTERPOLATE(color3, color3, color3, color2);
   3.468 +				else if (color5 == color2 && color2 == colorA2
   3.469 +				         && colorA1 != color3 && color2 != colorA3)
   3.470 +					product2b =
   3.471 +					    Q_INTERPOLATE(color2, color2, color2, color3);
   3.472 +				else
   3.473 +					product2b = INTERPOLATE(color2, color3);
   3.474 +
   3.475 +				if (color6 == color3 && color6 == colorB1
   3.476 +				    && color5 != colorB2 && color6 != colorB0)
   3.477 +					product1b =
   3.478 +					    Q_INTERPOLATE(color6, color6, color6, color5);
   3.479 +				else if (color5 == color2 && color5 == colorB2
   3.480 +				         && colorB1 != color6 && color5 != colorB3)
   3.481 +					product1b =
   3.482 +					    Q_INTERPOLATE(color6, color5, color5, color5);
   3.483 +				else
   3.484 +					product1b = INTERPOLATE(color5, color6);
   3.485 +			}
   3.486 +
   3.487 +			if (color5 == color3 && color2 != color6 && color4 == color5
   3.488 +			    && color5 != colorA2)
   3.489 +				product2a = INTERPOLATE(color2, color5);
   3.490 +			else
   3.491 +				if (color5 == color1 && color6 == color5
   3.492 +				    && color4 != color2 && color5 != colorA0)
   3.493 +					product2a = INTERPOLATE(color2, color5);
   3.494 +				else
   3.495 +					product2a = color2;
   3.496 +
   3.497 +			if (color2 == color6 && color5 != color3 && color1 == color2
   3.498 +			    && color2 != colorB2)
   3.499 +				product1a = INTERPOLATE(color2, color5);
   3.500 +			else
   3.501 +				if (color4 == color2 && color3 == color2
   3.502 +				    && color1 != color5 && color2 != colorB0)
   3.503 +					product1a = INTERPOLATE(color2, color5);
   3.504 +				else
   3.505 +					product1a = color5;
   3.506 +			*(dP)	  = product1a;
   3.507 +			*(dP + 1) = product1b;
   3.508 +			*(dP + (dstPitch >> 2))		= product2a;
   3.509 +			*(dP + (dstPitch >> 2) + 1) = product2b;
   3.510 +
   3.511 +			bP += inc_bP;
   3.512 +			dP += 2;
   3.513 +		}                   // end of for ( finish= width etc..)
   3.514 +
   3.515 +		srcPtr += srcPitch;
   3.516 +		dstPtr += dstPitch << 1;
   3.517 +		//        deltaPtr += srcPitch;
   3.518 +	}               // endof: for (; height; height--)
   3.519 +}
   3.520 +
   3.521 +void SuperEagle(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
   3.522 +                u8 *dstPtr, u32 dstPitch, int width, int height)
   3.523 +{
   3.524 +	u8 * dP;
   3.525 +	u16 *bP;
   3.526 +	u16 *xP;
   3.527 +	u32	 inc_bP;
   3.528 +
   3.529 +#ifdef MMX
   3.530 +	if (cpu_mmx)
   3.531 +	{
   3.532 +		for (; height; height--)
   3.533 +		{
   3.534 +			_2xSaISuperEagleLine(srcPtr, deltaPtr, srcPitch, width,
   3.535 +			                     dstPtr, dstPitch);
   3.536 +			srcPtr	 += srcPitch;
   3.537 +			dstPtr	 += dstPitch * 2;
   3.538 +			deltaPtr += srcPitch;
   3.539 +		}
   3.540 +	}
   3.541 +	else
   3.542 +#endif
   3.543 +	{
   3.544 +		inc_bP = 1;
   3.545 +
   3.546 +		u32 Nextline = srcPitch >> 1;
   3.547 +
   3.548 +		for (; height; height--)
   3.549 +		{
   3.550 +			bP = (u16 *) srcPtr;
   3.551 +			xP = (u16 *) deltaPtr;
   3.552 +			dP = dstPtr;
   3.553 +			for (u32 finish = width; finish; finish -= inc_bP)
   3.554 +			{
   3.555 +				u32 color4, color5, color6;
   3.556 +				u32 color1, color2, color3;
   3.557 +				u32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2;
   3.558 +				u32 product1a, product1b, product2a, product2b;
   3.559 +
   3.560 +				colorB1 = *(bP - Nextline);
   3.561 +				colorB2 = *(bP - Nextline + 1);
   3.562 +
   3.563 +				color4	= *(bP - 1);
   3.564 +				color5	= *(bP);
   3.565 +				color6	= *(bP + 1);
   3.566 +				colorS2 = *(bP + 2);
   3.567 +
   3.568 +				color1	= *(bP + Nextline - 1);
   3.569 +				color2	= *(bP + Nextline);
   3.570 +				color3	= *(bP + Nextline + 1);
   3.571 +				colorS1 = *(bP + Nextline + 2);
   3.572 +
   3.573 +				colorA1 = *(bP + Nextline + Nextline);
   3.574 +				colorA2 = *(bP + Nextline + Nextline + 1);
   3.575 +
   3.576 +				// --------------------------------------
   3.577 +				if (color2 == color6 && color5 != color3)
   3.578 +				{
   3.579 +					product1b = product2a = color2;
   3.580 +					if ((color1 == color2) || (color6 == colorB2))
   3.581 +					{
   3.582 +						product1a = INTERPOLATE(color2, color5);
   3.583 +						product1a = INTERPOLATE(color2, product1a);
   3.584 +						//                       product1a = color2;
   3.585 +					}
   3.586 +					else
   3.587 +					{
   3.588 +						product1a = INTERPOLATE(color5, color6);
   3.589 +					}
   3.590 +
   3.591 +					if ((color6 == colorS2) || (color2 == colorA1))
   3.592 +					{
   3.593 +						product2b = INTERPOLATE(color2, color3);
   3.594 +						product2b = INTERPOLATE(color2, product2b);
   3.595 +						//                       product2b = color2;
   3.596 +					}
   3.597 +					else
   3.598 +					{
   3.599 +						product2b = INTERPOLATE(color2, color3);
   3.600 +					}
   3.601 +				}
   3.602 +				else if (color5 == color3 && color2 != color6)
   3.603 +				{
   3.604 +					product2b = product1a = color5;
   3.605 +
   3.606 +					if ((colorB1 == color5) || (color3 == colorS1))
   3.607 +					{
   3.608 +						product1b = INTERPOLATE(color5, color6);
   3.609 +						product1b = INTERPOLATE(color5, product1b);
   3.610 +						//                       product1b = color5;
   3.611 +					}
   3.612 +					else
   3.613 +					{
   3.614 +						product1b = INTERPOLATE(color5, color6);
   3.615 +					}
   3.616 +
   3.617 +					if ((color3 == colorA2) || (color4 == color5))
   3.618 +					{
   3.619 +						product2a = INTERPOLATE(color5, color2);
   3.620 +						product2a = INTERPOLATE(color5, product2a);
   3.621 +						//                       product2a = color5;
   3.622 +					}
   3.623 +					else
   3.624 +					{
   3.625 +						product2a = INTERPOLATE(color2, color3);
   3.626 +					}
   3.627 +				}
   3.628 +				else if (color5 == color3 && color2 == color6)
   3.629 +				{
   3.630 +					register int r = 0;
   3.631 +
   3.632 +					r += GetResult(color6, color5, color1, colorA1);
   3.633 +					r += GetResult(color6, color5, color4, colorB1);
   3.634 +					r += GetResult(color6, color5, colorA2, colorS1);
   3.635 +					r += GetResult(color6, color5, colorB2, colorS2);
   3.636 +
   3.637 +					if (r > 0)
   3.638 +					{
   3.639 +						product1b = product2a = color2;
   3.640 +						product1a = product2b = INTERPOLATE(color5, color6);
   3.641 +					}
   3.642 +					else if (r < 0)
   3.643 +					{
   3.644 +						product2b = product1a = color5;
   3.645 +						product1b = product2a = INTERPOLATE(color5, color6);
   3.646 +					}
   3.647 +					else
   3.648 +					{
   3.649 +						product2b = product1a = color5;
   3.650 +						product1b = product2a = color2;
   3.651 +					}
   3.652 +				}
   3.653 +				else
   3.654 +				{
   3.655 +					product2b = product1a = INTERPOLATE(color2, color6);
   3.656 +					product2b =
   3.657 +					    Q_INTERPOLATE(color3, color3, color3, product2b);
   3.658 +					product1a =
   3.659 +					    Q_INTERPOLATE(color5, color5, color5, product1a);
   3.660 +
   3.661 +					product2a = product1b = INTERPOLATE(color5, color3);
   3.662 +					product2a =
   3.663 +					    Q_INTERPOLATE(color2, color2, color2, product2a);
   3.664 +					product1b =
   3.665 +					    Q_INTERPOLATE(color6, color6, color6, product1b);
   3.666 +
   3.667 +					//                    product1a = color5;
   3.668 +					//                    product1b = color6;
   3.669 +					//                    product2a = color2;
   3.670 +					//                    product2b = color3;
   3.671 +				}
   3.672 +#ifdef WORDS_BIGENDIAN
   3.673 +				product1a = (product1a << 16) | product1b;
   3.674 +				product2a = (product2a << 16) | product2b;
   3.675 +#else
   3.676 +				product1a = product1a | (product1b << 16);
   3.677 +				product2a = product2a | (product2b << 16);
   3.678 +#endif
   3.679 +
   3.680 +				*((u32 *) dP) = product1a;
   3.681 +				*((u32 *) (dP + dstPitch)) = product2a;
   3.682 +				*xP = color5;
   3.683 +
   3.684 +				bP += inc_bP;
   3.685 +				xP += inc_bP;
   3.686 +				dP += sizeof(u32);
   3.687 +			}           // end of for ( finish= width etc..)
   3.688 +
   3.689 +			srcPtr	 += srcPitch;
   3.690 +			dstPtr	 += dstPitch << 1;
   3.691 +			deltaPtr += srcPitch;
   3.692 +		}               // endof: for (height; height; height--)
   3.693 +	}
   3.694 +}
   3.695 +
   3.696 +void SuperEagle32(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
   3.697 +                  u8 *dstPtr, u32 dstPitch, int width, int height)
   3.698 +{
   3.699 +	u32 *dP;
   3.700 +	u32 *bP;
   3.701 +	u32 *xP;
   3.702 +	u32	 inc_bP;
   3.703 +
   3.704 +	inc_bP = 1;
   3.705 +
   3.706 +	u32 Nextline = srcPitch >> 2;
   3.707 +
   3.708 +	for (; height; height--)
   3.709 +	{
   3.710 +		bP = (u32 *) srcPtr;
   3.711 +		xP = (u32 *) deltaPtr;
   3.712 +		dP = (u32 *)dstPtr;
   3.713 +		for (u32 finish = width; finish; finish -= inc_bP)
   3.714 +		{
   3.715 +			u32 color4, color5, color6;
   3.716 +			u32 color1, color2, color3;
   3.717 +			u32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2;
   3.718 +			u32 product1a, product1b, product2a, product2b;
   3.719 +
   3.720 +			colorB1 = *(bP - Nextline);
   3.721 +			colorB2 = *(bP - Nextline + 1);
   3.722 +
   3.723 +			color4	= *(bP - 1);
   3.724 +			color5	= *(bP);
   3.725 +			color6	= *(bP + 1);
   3.726 +			colorS2 = *(bP + 2);
   3.727 +
   3.728 +			color1	= *(bP + Nextline - 1);
   3.729 +			color2	= *(bP + Nextline);
   3.730 +			color3	= *(bP + Nextline + 1);
   3.731 +			colorS1 = *(bP + Nextline + 2);
   3.732 +
   3.733 +			colorA1 = *(bP + Nextline + Nextline);
   3.734 +			colorA2 = *(bP + Nextline + Nextline + 1);
   3.735 +
   3.736 +			// --------------------------------------
   3.737 +			if (color2 == color6 && color5 != color3)
   3.738 +			{
   3.739 +				product1b = product2a = color2;
   3.740 +				if ((color1 == color2) || (color6 == colorB2))
   3.741 +				{
   3.742 +					product1a = INTERPOLATE(color2, color5);
   3.743 +					product1a = INTERPOLATE(color2, product1a);
   3.744 +					//                       product1a = color2;
   3.745 +				}
   3.746 +				else
   3.747 +				{
   3.748 +					product1a = INTERPOLATE(color5, color6);
   3.749 +				}
   3.750 +
   3.751 +				if ((color6 == colorS2) || (color2 == colorA1))
   3.752 +				{
   3.753 +					product2b = INTERPOLATE(color2, color3);
   3.754 +					product2b = INTERPOLATE(color2, product2b);
   3.755 +					//                       product2b = color2;
   3.756 +				}
   3.757 +				else
   3.758 +				{
   3.759 +					product2b = INTERPOLATE(color2, color3);
   3.760 +				}
   3.761 +			}
   3.762 +			else if (color5 == color3 && color2 != color6)
   3.763 +			{
   3.764 +				product2b = product1a = color5;
   3.765 +
   3.766 +				if ((colorB1 == color5) || (color3 == colorS1))
   3.767 +				{
   3.768 +					product1b = INTERPOLATE(color5, color6);
   3.769 +					product1b = INTERPOLATE(color5, product1b);
   3.770 +					//                       product1b = color5;
   3.771 +				}
   3.772 +				else
   3.773 +				{
   3.774 +					product1b = INTERPOLATE(color5, color6);
   3.775 +				}
   3.776 +
   3.777 +				if ((color3 == colorA2) || (color4 == color5))
   3.778 +				{
   3.779 +					product2a = INTERPOLATE(color5, color2);
   3.780 +					product2a = INTERPOLATE(color5, product2a);
   3.781 +					//                       product2a = color5;
   3.782 +				}
   3.783 +				else
   3.784 +				{
   3.785 +					product2a = INTERPOLATE(color2, color3);
   3.786 +				}
   3.787 +			}
   3.788 +			else if (color5 == color3 && color2 == color6)
   3.789 +			{
   3.790 +				register int r = 0;
   3.791 +
   3.792 +				r += GetResult(color6, color5, color1, colorA1);
   3.793 +				r += GetResult(color6, color5, color4, colorB1);
   3.794 +				r += GetResult(color6, color5, colorA2, colorS1);
   3.795 +				r += GetResult(color6, color5, colorB2, colorS2);
   3.796 +
   3.797 +				if (r > 0)
   3.798 +				{
   3.799 +					product1b = product2a = color2;
   3.800 +					product1a = product2b = INTERPOLATE(color5, color6);
   3.801 +				}
   3.802 +				else if (r < 0)
   3.803 +				{
   3.804 +					product2b = product1a = color5;
   3.805 +					product1b = product2a = INTERPOLATE(color5, color6);
   3.806 +				}
   3.807 +				else
   3.808 +				{
   3.809 +					product2b = product1a = color5;
   3.810 +					product1b = product2a = color2;
   3.811 +				}
   3.812 +			}
   3.813 +			else
   3.814 +			{
   3.815 +				product2b = product1a = INTERPOLATE(color2, color6);
   3.816 +				product2b =
   3.817 +				    Q_INTERPOLATE(color3, color3, color3, product2b);
   3.818 +				product1a =
   3.819 +				    Q_INTERPOLATE(color5, color5, color5, product1a);
   3.820 +
   3.821 +				product2a = product1b = INTERPOLATE(color5, color3);
   3.822 +				product2a =
   3.823 +				    Q_INTERPOLATE(color2, color2, color2, product2a);
   3.824 +				product1b =
   3.825 +				    Q_INTERPOLATE(color6, color6, color6, product1b);
   3.826 +
   3.827 +				//                    product1a = color5;
   3.828 +				//                    product1b = color6;
   3.829 +				//                    product2a = color2;
   3.830 +				//                    product2b = color3;
   3.831 +			}
   3.832 +			*(dP)	  = product1a;
   3.833 +			*(dP + 1) = product1b;
   3.834 +			*(dP + (dstPitch >> 2))		= product2a;
   3.835 +			*(dP + (dstPitch >> 2) + 1) = product2b;
   3.836 +			*xP = color5;
   3.837 +
   3.838 +			bP += inc_bP;
   3.839 +			xP += inc_bP;
   3.840 +			dP += 2;
   3.841 +		}             // end of for ( finish= width etc..)
   3.842 +
   3.843 +		srcPtr	 += srcPitch;
   3.844 +		dstPtr	 += dstPitch << 1;
   3.845 +		deltaPtr += srcPitch;
   3.846 +	}                 // endof: for (height; height; height--)
   3.847 +}
   3.848 +
   3.849 +void _2xSaI(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
   3.850 +            u8 *dstPtr, u32 dstPitch, int width, int height)
   3.851 +{
   3.852 +	u8 * dP;
   3.853 +	u16 *bP;
   3.854 +	u32	 inc_bP;
   3.855 +
   3.856 +#ifdef MMX
   3.857 +	if (cpu_mmx)
   3.858 +	{
   3.859 +		for (; height; height -= 1)
   3.860 +		{
   3.861 +			_2xSaILine(srcPtr, deltaPtr, srcPitch, width, dstPtr, dstPitch);
   3.862 +			srcPtr	 += srcPitch;
   3.863 +			dstPtr	 += dstPitch * 2;
   3.864 +			deltaPtr += srcPitch;
   3.865 +		}
   3.866 +	}
   3.867 +	else
   3.868 +#endif
   3.869 +	{
   3.870 +		inc_bP = 1;
   3.871 +
   3.872 +		u32 Nextline = srcPitch >> 1;
   3.873 +
   3.874 +		for (; height; height--)
   3.875 +		{
   3.876 +			bP = (u16 *) srcPtr;
   3.877 +			dP = dstPtr;
   3.878 +
   3.879 +			for (u32 finish = width; finish; finish -= inc_bP)
   3.880 +			{
   3.881 +				register u32 colorA, colorB;
   3.882 +				u32 colorC, colorD,
   3.883 +				    colorE, colorF, colorG, colorH,
   3.884 +				    colorI, colorJ, colorK, colorL,
   3.885 +
   3.886 +				    colorM, colorN, colorO, colorP;
   3.887 +				u32 product, product1, product2;
   3.888 +
   3.889 +				//---------------------------------------
   3.890 +				// Map of the pixels:                    I|E F|J
   3.891 +				//                                       G|A B|K
   3.892 +				//                                       H|C D|L
   3.893 +				//                                       M|N O|P
   3.894 +				colorI = *(bP - Nextline - 1);
   3.895 +				colorE = *(bP - Nextline);
   3.896 +				colorF = *(bP - Nextline + 1);
   3.897 +				colorJ = *(bP - Nextline + 2);
   3.898 +
   3.899 +				colorG = *(bP - 1);
   3.900 +				colorA = *(bP);
   3.901 +				colorB = *(bP + 1);
   3.902 +				colorK = *(bP + 2);
   3.903 +
   3.904 +				colorH = *(bP + Nextline - 1);
   3.905 +				colorC = *(bP + Nextline);
   3.906 +				colorD = *(bP + Nextline + 1);
   3.907 +				colorL = *(bP + Nextline + 2);
   3.908 +
   3.909 +				colorM = *(bP + Nextline + Nextline - 1);
   3.910 +				colorN = *(bP + Nextline + Nextline);
   3.911 +				colorO = *(bP + Nextline + Nextline + 1);
   3.912 +				colorP = *(bP + Nextline + Nextline + 2);
   3.913 +
   3.914 +				if ((colorA == colorD) && (colorB != colorC))
   3.915 +				{
   3.916 +					if (((colorA == colorE) && (colorB == colorL)) ||
   3.917 +					    ((colorA == colorC) && (colorA == colorF)
   3.918 +					     && (colorB != colorE) && (colorB == colorJ)))
   3.919 +					{
   3.920 +						product = colorA;
   3.921 +					}
   3.922 +					else
   3.923 +					{
   3.924 +						product = INTERPOLATE(colorA, colorB);
   3.925 +					}
   3.926 +
   3.927 +					if (((colorA == colorG) && (colorC == colorO)) ||
   3.928 +					    ((colorA == colorB) && (colorA == colorH)
   3.929 +					     && (colorG != colorC) && (colorC == colorM)))
   3.930 +					{
   3.931 +						product1 = colorA;
   3.932 +					}
   3.933 +					else
   3.934 +					{
   3.935 +						product1 = INTERPOLATE(colorA, colorC);
   3.936 +					}
   3.937 +					product2 = colorA;
   3.938 +				}
   3.939 +				else if ((colorB == colorC) && (colorA != colorD))
   3.940 +				{
   3.941 +					if (((colorB == colorF) && (colorA == colorH)) ||
   3.942 +					    ((colorB == colorE) && (colorB == colorD)
   3.943 +					     && (colorA != colorF) && (colorA == colorI)))
   3.944 +					{
   3.945 +						product = colorB;
   3.946 +					}
   3.947 +					else
   3.948 +					{
   3.949 +						product = INTERPOLATE(colorA, colorB);
   3.950 +					}
   3.951 +
   3.952 +					if (((colorC == colorH) && (colorA == colorF)) ||
   3.953 +					    ((colorC == colorG) && (colorC == colorD)
   3.954 +					     && (colorA != colorH) && (colorA == colorI)))
   3.955 +					{
   3.956 +						product1 = colorC;
   3.957 +					}
   3.958 +					else
   3.959 +					{
   3.960 +						product1 = INTERPOLATE(colorA, colorC);
   3.961 +					}
   3.962 +					product2 = colorB;
   3.963 +				}
   3.964 +				else if ((colorA == colorD) && (colorB == colorC))
   3.965 +				{
   3.966 +					if (colorA == colorB)
   3.967 +					{
   3.968 +						product	 = colorA;
   3.969 +						product1 = colorA;
   3.970 +						product2 = colorA;
   3.971 +					}
   3.972 +					else
   3.973 +					{
   3.974 +						register int r = 0;
   3.975 +
   3.976 +						product1 = INTERPOLATE(colorA, colorC);
   3.977 +						product	 = INTERPOLATE(colorA, colorB);
   3.978 +
   3.979 +						r +=
   3.980 +						    GetResult1(colorA, colorB, colorG, colorE,
   3.981 +						               colorI);
   3.982 +						r +=
   3.983 +						    GetResult2(colorB, colorA, colorK, colorF,
   3.984 +						               colorJ);
   3.985 +						r +=
   3.986 +						    GetResult2(colorB, colorA, colorH, colorN,
   3.987 +						               colorM);
   3.988 +						r +=
   3.989 +						    GetResult1(colorA, colorB, colorL, colorO,
   3.990 +						               colorP);
   3.991 +
   3.992 +						if (r > 0)
   3.993 +							product2 = colorA;
   3.994 +						else if (r < 0)
   3.995 +							product2 = colorB;
   3.996 +						else
   3.997 +						{
   3.998 +							product2 =
   3.999 +							    Q_INTERPOLATE(colorA, colorB, colorC,
  3.1000 +							                  colorD);
  3.1001 +						}
  3.1002 +					}
  3.1003 +				}
  3.1004 +				else
  3.1005 +				{
  3.1006 +					product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD);
  3.1007 +
  3.1008 +					if ((colorA == colorC) && (colorA == colorF)
  3.1009 +					    && (colorB != colorE) && (colorB == colorJ))
  3.1010 +					{
  3.1011 +						product = colorA;
  3.1012 +					}
  3.1013 +					else if ((colorB == colorE) && (colorB == colorD)
  3.1014 +					         && (colorA != colorF) && (colorA == colorI))
  3.1015 +					{
  3.1016 +						product = colorB;
  3.1017 +					}
  3.1018 +					else
  3.1019 +					{
  3.1020 +						product = INTERPOLATE(colorA, colorB);
  3.1021 +					}
  3.1022 +
  3.1023 +					if ((colorA == colorB) && (colorA == colorH)
  3.1024 +					    && (colorG != colorC) && (colorC == colorM))
  3.1025 +					{
  3.1026 +						product1 = colorA;
  3.1027 +					}
  3.1028 +					else if ((colorC == colorG) && (colorC == colorD)
  3.1029 +					         && (colorA != colorH) && (colorA == colorI))
  3.1030 +					{
  3.1031 +						product1 = colorC;
  3.1032 +					}
  3.1033 +					else
  3.1034 +					{
  3.1035 +						product1 = INTERPOLATE(colorA, colorC);
  3.1036 +					}
  3.1037 +				}
  3.1038 +
  3.1039 +#ifdef WORDS_BIGENDIAN
  3.1040 +				product	 = (colorA << 16) | product;
  3.1041 +				product1 = (product1 << 16) | product2;
  3.1042 +#else
  3.1043 +				product	 = colorA | (product << 16);
  3.1044 +				product1 = product1 | (product2 << 16);
  3.1045 +#endif
  3.1046 +				*((s32 *) dP) = product;
  3.1047 +				*((u32 *) (dP + dstPitch)) = product1;
  3.1048 +
  3.1049 +				bP += inc_bP;
  3.1050 +				dP += sizeof(u32);
  3.1051 +			}           // end of for ( finish= width etc..)
  3.1052 +
  3.1053 +			srcPtr	 += srcPitch;
  3.1054 +			dstPtr	 += dstPitch << 1;
  3.1055 +			deltaPtr += srcPitch;
  3.1056 +		}               // endof: for (height; height; height--)
  3.1057 +	}
  3.1058 +}
  3.1059 +
  3.1060 +void _2xSaI32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  3.1061 +              u8 *dstPtr, u32 dstPitch, int width, int height)
  3.1062 +{
  3.1063 +	u32 *dP;
  3.1064 +	u32 *bP;
  3.1065 +	u32	 inc_bP = 1;
  3.1066 +
  3.1067 +	u32 Nextline = srcPitch >> 2;
  3.1068 +
  3.1069 +	for (; height; height--)
  3.1070 +	{
  3.1071 +		bP = (u32 *) srcPtr;
  3.1072 +		dP = (u32 *) dstPtr;
  3.1073 +
  3.1074 +		for (u32 finish = width; finish; finish -= inc_bP)
  3.1075 +		{
  3.1076 +			register u32 colorA, colorB;
  3.1077 +			u32 colorC, colorD,
  3.1078 +			    colorE, colorF, colorG, colorH,
  3.1079 +			    colorI, colorJ, colorK, colorL,
  3.1080 +
  3.1081 +			    colorM, colorN, colorO, colorP;
  3.1082 +			u32 product, product1, product2;
  3.1083 +
  3.1084 +			//---------------------------------------
  3.1085 +			// Map of the pixels:                    I|E F|J
  3.1086 +			//                                       G|A B|K
  3.1087 +			//                                       H|C D|L
  3.1088 +			//                                       M|N O|P
  3.1089 +			colorI = *(bP - Nextline - 1);
  3.1090 +			colorE = *(bP - Nextline);
  3.1091 +			colorF = *(bP - Nextline + 1);
  3.1092 +			colorJ = *(bP - Nextline + 2);
  3.1093 +
  3.1094 +			colorG = *(bP - 1);
  3.1095 +			colorA = *(bP);
  3.1096 +			colorB = *(bP + 1);
  3.1097 +			colorK = *(bP + 2);
  3.1098 +
  3.1099 +			colorH = *(bP + Nextline - 1);
  3.1100 +			colorC = *(bP + Nextline);
  3.1101 +			colorD = *(bP + Nextline + 1);
  3.1102 +			colorL = *(bP + Nextline + 2);
  3.1103 +
  3.1104 +			colorM = *(bP + Nextline + Nextline - 1);
  3.1105 +			colorN = *(bP + Nextline + Nextline);
  3.1106 +			colorO = *(bP + Nextline + Nextline + 1);
  3.1107 +			colorP = *(bP + Nextline + Nextline + 2);
  3.1108 +
  3.1109 +			if ((colorA == colorD) && (colorB != colorC))
  3.1110 +			{
  3.1111 +				if (((colorA == colorE) && (colorB == colorL)) ||
  3.1112 +				    ((colorA == colorC) && (colorA == colorF)
  3.1113 +				     && (colorB != colorE) && (colorB == colorJ)))
  3.1114 +				{
  3.1115 +					product = colorA;
  3.1116 +				}
  3.1117 +				else
  3.1118 +				{
  3.1119 +					product = INTERPOLATE(colorA, colorB);
  3.1120 +				}
  3.1121 +
  3.1122 +				if (((colorA == colorG) && (colorC == colorO)) ||
  3.1123 +				    ((colorA == colorB) && (colorA == colorH)
  3.1124 +				     && (colorG != colorC) && (colorC == colorM)))
  3.1125 +				{
  3.1126 +					product1 = colorA;
  3.1127 +				}
  3.1128 +				else
  3.1129 +				{
  3.1130 +					product1 = INTERPOLATE(colorA, colorC);
  3.1131 +				}
  3.1132 +				product2 = colorA;
  3.1133 +			}
  3.1134 +			else if ((colorB == colorC) && (colorA != colorD))
  3.1135 +			{
  3.1136 +				if (((colorB == colorF) && (colorA == colorH)) ||
  3.1137 +				    ((colorB == colorE) && (colorB == colorD)
  3.1138 +				     && (colorA != colorF) && (colorA == colorI)))
  3.1139 +				{
  3.1140 +					product = colorB;
  3.1141 +				}
  3.1142 +				else
  3.1143 +				{
  3.1144 +					product = INTERPOLATE(colorA, colorB);
  3.1145 +				}
  3.1146 +
  3.1147 +				if (((colorC == colorH) && (colorA == colorF)) ||
  3.1148 +				    ((colorC == colorG) && (colorC == colorD)
  3.1149 +				     && (colorA != colorH) && (colorA == colorI)))
  3.1150 +				{
  3.1151 +					product1 = colorC;
  3.1152 +				}
  3.1153 +				else
  3.1154 +				{
  3.1155 +					product1 = INTERPOLATE(colorA, colorC);
  3.1156 +				}
  3.1157 +				product2 = colorB;
  3.1158 +			}
  3.1159 +			else if ((colorA == colorD) && (colorB == colorC))
  3.1160 +			{
  3.1161 +				if (colorA == colorB)
  3.1162 +				{
  3.1163 +					product	 = colorA;
  3.1164 +					product1 = colorA;
  3.1165 +					product2 = colorA;
  3.1166 +				}
  3.1167 +				else
  3.1168 +				{
  3.1169 +					register int r = 0;
  3.1170 +
  3.1171 +					product1 = INTERPOLATE(colorA, colorC);
  3.1172 +					product	 = INTERPOLATE(colorA, colorB);
  3.1173 +
  3.1174 +					r +=
  3.1175 +					    GetResult1(colorA, colorB, colorG, colorE,
  3.1176 +					               colorI);
  3.1177 +					r +=
  3.1178 +					    GetResult2(colorB, colorA, colorK, colorF,
  3.1179 +					               colorJ);
  3.1180 +					r +=
  3.1181 +					    GetResult2(colorB, colorA, colorH, colorN,
  3.1182 +					               colorM);
  3.1183 +					r +=
  3.1184 +					    GetResult1(colorA, colorB, colorL, colorO,
  3.1185 +					               colorP);
  3.1186 +
  3.1187 +					if (r > 0)
  3.1188 +						product2 = colorA;
  3.1189 +					else if (r < 0)
  3.1190 +						product2 = colorB;
  3.1191 +					else
  3.1192 +					{
  3.1193 +						product2 =
  3.1194 +						    Q_INTERPOLATE(colorA, colorB, colorC,
  3.1195 +						                  colorD);
  3.1196 +					}
  3.1197 +				}
  3.1198 +			}
  3.1199 +			else
  3.1200 +			{
  3.1201 +				product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD);
  3.1202 +
  3.1203 +				if ((colorA == colorC) && (colorA == colorF)
  3.1204 +				    && (colorB != colorE) && (colorB == colorJ))
  3.1205 +				{
  3.1206 +					product = colorA;
  3.1207 +				}
  3.1208 +				else if ((colorB == colorE) && (colorB == colorD)
  3.1209 +				         && (colorA != colorF) && (colorA == colorI))
  3.1210 +				{
  3.1211 +					product = colorB;
  3.1212 +				}
  3.1213 +				else
  3.1214 +				{
  3.1215 +					product = INTERPOLATE(colorA, colorB);
  3.1216 +				}
  3.1217 +
  3.1218 +				if ((colorA == colorB) && (colorA == colorH)
  3.1219 +				    && (colorG != colorC) && (colorC == colorM))
  3.1220 +				{
  3.1221 +					product1 = colorA;
  3.1222 +				}
  3.1223 +				else if ((colorC == colorG) && (colorC == colorD)
  3.1224 +				         && (colorA != colorH) && (colorA == colorI))
  3.1225 +				{
  3.1226 +					product1 = colorC;
  3.1227 +				}
  3.1228 +				else
  3.1229 +				{
  3.1230 +					product1 = INTERPOLATE(colorA, colorC);
  3.1231 +				}
  3.1232 +			}
  3.1233 +			*(dP)	  = colorA;
  3.1234 +			*(dP + 1) = product;
  3.1235 +			*(dP + (dstPitch >> 2))		= product1;
  3.1236 +			*(dP + (dstPitch >> 2) + 1) = product2;
  3.1237 +
  3.1238 +			bP += inc_bP;
  3.1239 +			dP += 2;
  3.1240 +		}             // end of for ( finish= width etc..)
  3.1241 +
  3.1242 +		srcPtr += srcPitch;
  3.1243 +		dstPtr += dstPitch << 1;
  3.1244 +		//    deltaPtr += srcPitch;
  3.1245 +	}                 // endof: for (height; height; height--)
  3.1246 +}
  3.1247 +
  3.1248 +static u32 Bilinear(u32 A, u32 B, u32 x)
  3.1249 +{
  3.1250 +	unsigned long areaA, areaB;
  3.1251 +	unsigned long result;
  3.1252 +
  3.1253 +	if (A == B)
  3.1254 +		return A;
  3.1255 +
  3.1256 +	areaB = (x >> 11) & 0x1f;   // reduce 16 bit fraction to 5 bits
  3.1257 +	areaA = 0x20 - areaB;
  3.1258 +
  3.1259 +	A = (A & redblueMask) | ((A & greenMask) << 16);
  3.1260 +	B = (B & redblueMask) | ((B & greenMask) << 16);
  3.1261 +
  3.1262 +	result = ((areaA * A) + (areaB * B)) >> 5;
  3.1263 +
  3.1264 +	return (result & redblueMask) | ((result >> 16) & greenMask);
  3.1265 +}
  3.1266 +
  3.1267 +static u32 Bilinear4(u32 A, u32 B, u32 C, u32 D, u32 x,
  3.1268 +                     u32 y)
  3.1269 +{
  3.1270 +	unsigned long areaA, areaB, areaC, areaD;
  3.1271 +	unsigned long result, xy;
  3.1272 +
  3.1273 +	x  = (x >> 11) & 0x1f;
  3.1274 +	y  = (y >> 11) & 0x1f;
  3.1275 +	xy = (x * y) >> 5;
  3.1276 +
  3.1277 +	A = (A & redblueMask) | ((A & greenMask) << 16);
  3.1278 +	B = (B & redblueMask) | ((B & greenMask) << 16);
  3.1279 +	C = (C & redblueMask) | ((C & greenMask) << 16);
  3.1280 +	D = (D & redblueMask) | ((D & greenMask) << 16);
  3.1281 +
  3.1282 +	areaA = 0x20 + xy - x - y;
  3.1283 +	areaB = x - xy;
  3.1284 +	areaC = y - xy;
  3.1285 +	areaD = xy;
  3.1286 +
  3.1287 +	result = ((areaA * A) + (areaB * B) + (areaC * C) + (areaD * D)) >> 5;
  3.1288 +
  3.1289 +	return (result & redblueMask) | ((result >> 16) & greenMask);
  3.1290 +}
  3.1291 +
  3.1292 +void Scale_2xSaI(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  3.1293 +                 u8 *dstPtr, u32 dstPitch,
  3.1294 +                 u32 dstWidth, u32 dstHeight, int width, int height)
  3.1295 +{
  3.1296 +	u8 * dP;
  3.1297 +	u16 *bP;
  3.1298 +
  3.1299 +	u32 w;
  3.1300 +	u32 h;
  3.1301 +	u32 dw;
  3.1302 +	u32 dh;
  3.1303 +	u32 hfinish;
  3.1304 +	u32 wfinish;
  3.1305 +
  3.1306 +	u32 Nextline = srcPitch >> 1;
  3.1307 +
  3.1308 +	wfinish = (width - 1) << 16; // convert to fixed point
  3.1309 +	dw		= wfinish / (dstWidth - 1);
  3.1310 +	hfinish = (height - 1) << 16; // convert to fixed point
  3.1311 +	dh		= hfinish / (dstHeight - 1);
  3.1312 +
  3.1313 +	for (h = 0; h < hfinish; h += dh)
  3.1314 +	{
  3.1315 +		u32 y1, y2;
  3.1316 +
  3.1317 +		y1 = h & 0xffff; // fraction part of fixed point
  3.1318 +		bP = (u16 *) (srcPtr + ((h >> 16) * srcPitch));
  3.1319 +		dP = dstPtr;
  3.1320 +		y2 = 0x10000 - y1;
  3.1321 +
  3.1322 +		w = 0;
  3.1323 +
  3.1324 +		for (; w < wfinish; )
  3.1325 +		{
  3.1326 +			u32 A, B, C, D;
  3.1327 +			u32 E, F, G, H;
  3.1328 +			u32 I, J, K, L;
  3.1329 +			u32 x1, x2, a1, f1, f2;
  3.1330 +			u32 position, product1;
  3.1331 +
  3.1332 +			position = w >> 16;
  3.1333 +			A		 = bP[position]; // current pixel
  3.1334 +			B		 = bP[position + 1]; // next pixel
  3.1335 +			C		 = bP[position + Nextline];
  3.1336 +			D		 = bP[position + Nextline + 1];
  3.1337 +			E		 = bP[position - Nextline];
  3.1338 +			F		 = bP[position - Nextline + 1];
  3.1339 +			G		 = bP[position - 1];
  3.1340 +			H		 = bP[position + Nextline - 1];
  3.1341 +			I		 = bP[position + 2];
  3.1342 +			J		 = bP[position + Nextline + 2];
  3.1343 +			K		 = bP[position + Nextline + Nextline];
  3.1344 +			L		 = bP[position + Nextline + Nextline + 1];
  3.1345 +
  3.1346 +			x1 = w & 0xffff; // fraction part of fixed point
  3.1347 +			x2 = 0x10000 - x1;
  3.1348 +
  3.1349 +			/*0*/
  3.1350 +			if (A == B && C == D && A == C)
  3.1351 +				product1 = A;
  3.1352 +			else /*1*/ if (A == D && B != C)
  3.1353 +			{
  3.1354 +				f1 = (x1 >> 1) + (0x10000 >> 2);
  3.1355 +				f2 = (y1 >> 1) + (0x10000 >> 2);
  3.1356 +				if (y1 <= f1 && A == J && A != E) // close to B
  3.1357 +				{
  3.1358 +					a1		 = f1 - y1;
  3.1359 +					product1 = Bilinear(A, B, a1);
  3.1360 +				}
  3.1361 +				else if (y1 >= f1 && A == G && A != L)  // close to C
  3.1362 +				{
  3.1363 +					a1		 = y1 - f1;
  3.1364 +					product1 = Bilinear(A, C, a1);
  3.1365 +				}
  3.1366 +				else if (x1 >= f2 && A == E && A != J) // close to B
  3.1367 +				{
  3.1368 +					a1		 = x1 - f2;
  3.1369 +					product1 = Bilinear(A, B, a1);
  3.1370 +				}
  3.1371 +				else if (x1 <= f2 && A == L && A != G) // close to C
  3.1372 +				{
  3.1373 +					a1		 = f2 - x1;
  3.1374 +					product1 = Bilinear(A, C, a1);
  3.1375 +				}
  3.1376 +				else if (y1 >= x1) // close to C
  3.1377 +				{
  3.1378 +					a1		 = y1 - x1;
  3.1379 +					product1 = Bilinear(A, C, a1);
  3.1380 +				}
  3.1381 +				else if (y1 <= x1) // close to B
  3.1382 +				{
  3.1383 +					a1		 = x1 - y1;
  3.1384 +					product1 = Bilinear(A, B, a1);
  3.1385 +				}
  3.1386 +			}
  3.1387 +			else
  3.1388 +				/*2*/
  3.1389 +				if (B == C && A != D)
  3.1390 +				{
  3.1391 +					f1 = (x1 >> 1) + (0x10000 >> 2);
  3.1392 +					f2 = (y1 >> 1) + (0x10000 >> 2);
  3.1393 +					if (y2 >= f1 && B == H && B != F) // close to A
  3.1394 +					{
  3.1395 +						a1		 = y2 - f1;
  3.1396 +						product1 = Bilinear(B, A, a1);
  3.1397 +					}
  3.1398 +					else if (y2 <= f1 && B == I && B != K) // close to D
  3.1399 +					{
  3.1400 +						a1		 = f1 - y2;
  3.1401 +						product1 = Bilinear(B, D, a1);
  3.1402 +					}
  3.1403 +					else if (x2 >= f2 && B == F && B != H) // close to A
  3.1404 +					{
  3.1405 +						a1		 = x2 - f2;
  3.1406 +						product1 = Bilinear(B, A, a1);
  3.1407 +					}
  3.1408 +					else if (x2 <= f2 && B == K && B != I) // close to D
  3.1409 +					{
  3.1410 +						a1		 = f2 - x2;
  3.1411 +						product1 = Bilinear(B, D, a1);
  3.1412 +					}
  3.1413 +					else if (y2 >= x1) // close to A
  3.1414 +					{
  3.1415 +						a1		 = y2 - x1;
  3.1416 +						product1 = Bilinear(B, A, a1);
  3.1417 +					}
  3.1418 +					else if (y2 <= x1) // close to D
  3.1419 +					{
  3.1420 +						a1		 = x1 - y2;
  3.1421 +						product1 = Bilinear(B, D, a1);
  3.1422 +					}
  3.1423 +				}
  3.1424 +				/*3*/
  3.1425 +				else
  3.1426 +				{
  3.1427 +					product1 = Bilinear4(A, B, C, D, x1, y1);
  3.1428 +				}
  3.1429 +
  3.1430 +			//end First Pixel
  3.1431 +			*(u32 *) dP = product1;
  3.1432 +			dP += 2;
  3.1433 +			w  += dw;
  3.1434 +		}
  3.1435 +		dstPtr += dstPitch;
  3.1436 +	}
  3.1437 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/filters/2xSaImmx.asm	Sun Mar 04 20:32:31 2012 -0600
     4.3 @@ -0,0 +1,2109 @@
     4.4 +;/*---------------------------------------------------------------------*
     4.5 +; * The following (piece of) code, (part of) the 2xSaI engine,          *
     4.6 +; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa.                    *
     4.7 +; * Non-Commercial use of this software is allowed and is encouraged,   *
     4.8 +; * provided that appropriate credit be given.                          *
     4.9 +; * You may freely modify this code, but I request                      *
    4.10 +; * that any improvements to the engine be submitted to me, so          *
    4.11 +; * that I can implement these improvements in newer versions of        *
    4.12 +; * the software.                                                       *
    4.13 +; * If you need more information, have any comments or suggestions,     *
    4.14 +; * you can e-mail me. My e-mail: derek-liauw@usa.net.                  *
    4.15 +; *---------------------------------------------------------------------*/
    4.16 +
    4.17 +;----------------------
    4.18 +; 2xSaI version 0.59 WIP, soon to become version 0.60
    4.19 +;----------------------
    4.20 +
    4.21 +;%define FAR_POINTER
    4.22 +
    4.23 +
    4.24 +
    4.25 +          BITS 32
    4.26 +%ifdef __DJGPP__
    4.27 +          GLOBAL __2xSaILine
    4.28 +          GLOBAL __2xSaISuperEagleLine
    4.29 +                  GLOBAL __2xSaISuper2xSaILine
    4.30 +          GLOBAL _Init_2xSaIMMX
    4.31 +%else
    4.32 +          GLOBAL _2xSaILine
    4.33 +          GLOBAL _2xSaISuperEagleLine
    4.34 +                  GLOBAL _2xSaISuper2xSaILine
    4.35 +          GLOBAL Init_2xSaIMMX
    4.36 +%endif
    4.37 +          SECTION .text ALIGN = 32
    4.38 +
    4.39 +%ifdef FAR_POINTER
    4.40 +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
    4.41 +;                        uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
    4.42 +%else
    4.43 +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
    4.44 +;                        uint8 *dstPtr, uint32 dstPitch);
    4.45 +%endif
    4.46 +
    4.47 +srcPtr        equ 8
    4.48 +deltaPtr      equ 12
    4.49 +srcPitch      equ 16
    4.50 +width         equ 20
    4.51 +dstOffset     equ 24
    4.52 +dstPitch      equ 28
    4.53 +dstSegment    equ 32
    4.54 +
    4.55 +
    4.56 +
    4.57 +
    4.58 +colorB0   equ -2
    4.59 +colorB1   equ 0
    4.60 +colorB2   equ 2
    4.61 +colorB3   equ 4
    4.62 +
    4.63 +color7   equ -2
    4.64 +color8   equ 0
    4.65 +color9   equ 2
    4.66 +
    4.67 +color4   equ -2
    4.68 +color5   equ 0
    4.69 +color6   equ 2
    4.70 +colorS2   equ 4
    4.71 +
    4.72 +color1   equ -2
    4.73 +color2   equ 0
    4.74 +color3   equ 2
    4.75 +colorS1   equ 4
    4.76 +
    4.77 +colorA0   equ -2
    4.78 +colorA1   equ 0
    4.79 +colorA2   equ 2
    4.80 +colorA3   equ 4
    4.81 +
    4.82 +
    4.83 +
    4.84 +
    4.85 +%ifdef __DJGPP__
    4.86 +__2xSaISuper2xSaILine:
    4.87 +%else
    4.88 +_2xSaISuper2xSaILine:
    4.89 +%endif
    4.90 +; Store some stuff
    4.91 +         push ebp
    4.92 +         mov ebp, esp
    4.93 +         pushad
    4.94 +
    4.95 +; Prepare the destination
    4.96 +%ifdef FAR_POINTER
    4.97 +         ; Set the selector
    4.98 +         mov eax, [ebp+dstSegment]
    4.99 +         mov fs, ax
   4.100 +%endif
   4.101 +         mov edx, [ebp+dstOffset]         ; edx points to the screen
   4.102 +; Prepare the source
   4.103 +         ; eax points to colorA
   4.104 +         mov eax, [ebp+srcPtr]                          ;eax points to colorA
   4.105 +         mov ebx, [ebp+srcPitch]                        ;ebx contains the source pitch
   4.106 +         mov ecx, [ebp+width]                           ;ecx contains the number of pixels to process
   4.107 +         ; eax now points to colorB1
   4.108 +         sub eax, ebx                                           ;eax points to B1 which is the base 
   4.109 +
   4.110 +; Main Loop
   4.111 +.Loop:   push ecx
   4.112 +
   4.113 +         ;-----Check Delta------------------
   4.114 +         mov ecx, [ebp+deltaPtr]
   4.115 +
   4.116 +
   4.117 +                ;load source img
   4.118 +         movq mm0, [eax+colorB0]
   4.119 +         movq mm1, [eax+colorB3]
   4.120 +         movq mm2, [eax+ebx+color4]
   4.121 +         movq mm3, [eax+ebx+colorS2]
   4.122 +         movq mm4, [eax+ebx+ebx+color1]
   4.123 +         movq mm5, [eax+ebx+ebx+colorS1]
   4.124 +         push eax
   4.125 +         add eax, ebx
   4.126 +         movq mm6, [eax+ebx+ebx+colorA0]
   4.127 +         movq mm7, [eax+ebx+ebx+colorA3]
   4.128 +         pop eax
   4.129 +
   4.130 +                ;compare to delta
   4.131 +         pcmpeqw mm0, [ecx+2+colorB0]
   4.132 +         pcmpeqw mm1, [ecx+2+colorB3]
   4.133 +         pcmpeqw mm2, [ecx+ebx+2+color4]
   4.134 +         pcmpeqw mm3, [ecx+ebx+2+colorS2]
   4.135 +         pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
   4.136 +         pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
   4.137 +         add ecx, ebx
   4.138 +         pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
   4.139 +         pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
   4.140 +         sub ecx, ebx
   4.141 +
   4.142 +
   4.143 +                ;compose results
   4.144 +         pand mm0, mm1
   4.145 +         pand mm2, mm3
   4.146 +         pand mm4, mm5
   4.147 +         pand mm6, mm7
   4.148 +         pand mm0, mm2
   4.149 +         pand mm4, mm6
   4.150 +         pxor mm7, mm7
   4.151 +         pand mm0, mm4
   4.152 +         movq mm6, [eax+colorB0]
   4.153 +         pcmpeqw mm7, mm0                       ;did any compare give us a zero ?
   4.154 +
   4.155 +         movq [ecx+2+colorB0], mm6
   4.156 +
   4.157 +         packsswb mm7, mm7
   4.158 +         movd ecx, mm7
   4.159 +         test ecx, ecx                          
   4.160 +         jz near .SKIP_PROCESS          ;no, so we can skip
   4.161 +
   4.162 +         ;End Delta
   4.163 +
   4.164 +         ;---------------------------------
   4.165 +         movq mm0, [eax+ebx+color5]
   4.166 +         movq mm1, [eax+ebx+color6]
   4.167 +         movq mm2, mm0
   4.168 +         movq mm3, mm1
   4.169 +         movq mm4, mm0
   4.170 +         movq mm5, mm1
   4.171 +
   4.172 +         pand mm0, [colorMask]
   4.173 +         pand mm1, [colorMask]
   4.174 +
   4.175 +         psrlw mm0, 1
   4.176 +         psrlw mm1, 1
   4.177 +
   4.178 +         pand mm3, [lowPixelMask]
   4.179 +         paddw mm0, mm1
   4.180 +
   4.181 +         pand mm3, mm2
   4.182 +         paddw mm0, mm3                ;mm0 contains the interpolated values
   4.183 +         movq [I56Pixel], mm0
   4.184 +         movq mm7, mm0
   4.185 +
   4.186 +         ;-------------------
   4.187 +         movq mm0, mm7
   4.188 +         movq mm1, mm4  ;5,5,5,6
   4.189 +         movq mm2, mm0
   4.190 +         movq mm3, mm1
   4.191 +
   4.192 +         pand mm0, [colorMask]
   4.193 +         pand mm1, [colorMask]
   4.194 +
   4.195 +         psrlw mm0, 1
   4.196 +         psrlw mm1, 1
   4.197 +
   4.198 +         pand mm3, [lowPixelMask]
   4.199 +         paddw mm0, mm1
   4.200 +
   4.201 +         pand mm3, mm2
   4.202 +         paddw mm0, mm3                ;mm0 contains the interpolated values
   4.203 +         movq [I5556Pixel], mm0
   4.204 +         ;--------------------
   4.205 +
   4.206 +         movq mm0, mm7
   4.207 +         movq mm1, mm5  ;6,6,6,5
   4.208 +         movq mm2, mm0
   4.209 +         movq mm3, mm1
   4.210 +
   4.211 +         pand mm0, [colorMask]
   4.212 +         pand mm1, [colorMask]
   4.213 +
   4.214 +         psrlw mm0, 1
   4.215 +         psrlw mm1, 1
   4.216 +
   4.217 +         pand mm3, [lowPixelMask]
   4.218 +         paddw mm0, mm1
   4.219 +
   4.220 +         pand mm3, mm2
   4.221 +         paddw mm0, mm3
   4.222 +         movq [I5666Pixel], mm0
   4.223 +
   4.224 +         ;-------------------------
   4.225 +         ;-------------------------
   4.226 +         movq mm0, [eax+ebx+ebx+color2]
   4.227 +         movq mm1, [eax+ebx+ebx+color3]
   4.228 +         movq mm2, mm0
   4.229 +         movq mm3, mm1
   4.230 +         movq mm4, mm0
   4.231 +         movq mm5, mm1
   4.232 +
   4.233 +         pand mm0, [colorMask]
   4.234 +         pand mm1, [colorMask]
   4.235 +
   4.236 +         psrlw mm0, 1
   4.237 +         psrlw mm1, 1
   4.238 +
   4.239 +         pand mm3, [lowPixelMask]
   4.240 +         paddw mm0, mm1
   4.241 +
   4.242 +         pand mm3, mm2
   4.243 +         paddw mm0, mm3
   4.244 +         movq [I23Pixel], mm0
   4.245 +         movq mm7, mm0
   4.246 +
   4.247 +         ;---------------------
   4.248 +         movq mm0, mm7
   4.249 +         movq mm1, mm4  ;2,2,2,3
   4.250 +         movq mm2, mm0
   4.251 +         movq mm3, mm1
   4.252 +
   4.253 +         pand mm0, [colorMask]
   4.254 +         pand mm1, [colorMask]
   4.255 +
   4.256 +         psrlw mm0, 1
   4.257 +         psrlw mm1, 1
   4.258 +
   4.259 +         pand mm3, [lowPixelMask]
   4.260 +         paddw mm0, mm1
   4.261 +
   4.262 +         pand mm3, mm2
   4.263 +         paddw mm0, mm3
   4.264 +         movq [I2223Pixel], mm0
   4.265 +
   4.266 +         ;----------------------
   4.267 +         movq mm0, mm7
   4.268 +         movq mm1, mm5  ;3,3,3,2
   4.269 +         movq mm2, mm0
   4.270 +         movq mm3, mm1
   4.271 +
   4.272 +         pand mm0, [colorMask]
   4.273 +         pand mm1, [colorMask]
   4.274 +
   4.275 +         psrlw mm0, 1
   4.276 +         psrlw mm1, 1
   4.277 +
   4.278 +         pand mm3, [lowPixelMask]
   4.279 +         paddw mm0, mm1
   4.280 +
   4.281 +         pand mm3, mm2
   4.282 +         paddw mm0, mm3
   4.283 +         movq [I2333Pixel], mm0
   4.284 +
   4.285 +
   4.286 +         ;--------------------
   4.287 +;////////////////////////////////
   4.288 +; Decide which "branch" to take
   4.289 +;--------------------------------
   4.290 +         movq mm0, [eax+ebx+color5]
   4.291 +         movq mm1, [eax+ebx+color6]
   4.292 +         movq mm6, mm0
   4.293 +         movq mm7, mm1
   4.294 +         pcmpeqw mm0, [eax+ebx+ebx+color3]
   4.295 +         pcmpeqw mm1, [eax+ebx+ebx+color2]
   4.296 +         pcmpeqw mm6, mm7
   4.297 +
   4.298 +         movq mm2, mm0
   4.299 +         movq mm3, mm0
   4.300 +
   4.301 +         pand mm0, mm1       ;colorA == colorD && colorB == colorC
   4.302 +         pxor mm7, mm7
   4.303 +
   4.304 +         pcmpeqw mm2, mm7
   4.305 +         pand mm6, mm0
   4.306 +         pand mm2, mm1       ;colorA != colorD && colorB == colorC
   4.307 +
   4.308 +         pcmpeqw mm1, mm7
   4.309 +
   4.310 +         pand mm1, mm3       ;colorA == colorD && colorB != colorC
   4.311 +         pxor mm0, mm6
   4.312 +         por mm1, mm6
   4.313 +         movq mm7, mm0
   4.314 +         movq [Mask26], mm2
   4.315 +         packsswb mm7, mm7
   4.316 +         movq [Mask35], mm1
   4.317 +
   4.318 +         movd ecx, mm7
   4.319 +         test ecx, ecx
   4.320 +         jz near .SKIP_GUESS
   4.321 +
   4.322 +;---------------------------------------------
   4.323 +         movq mm6, mm0
   4.324 +         movq mm4, [eax+ebx+colorA]
   4.325 +         movq mm5, [eax+ebx+colorB]
   4.326 +         pxor mm7, mm7
   4.327 +         pand mm6, [ONE]
   4.328 +
   4.329 +         movq mm0, [eax+colorE]
   4.330 +         movq mm1, [eax+ebx+colorG]
   4.331 +         movq mm2, mm0
   4.332 +         movq mm3, mm1
   4.333 +         pcmpeqw mm0, mm4
   4.334 +         pcmpeqw mm1, mm4
   4.335 +         pcmpeqw mm2, mm5
   4.336 +         pcmpeqw mm3, mm5
   4.337 +         pand mm0, mm6
   4.338 +         pand mm1, mm6
   4.339 +         pand mm2, mm6
   4.340 +         pand mm3, mm6
   4.341 +         paddw mm0, mm1
   4.342 +         paddw mm2, mm3
   4.343 +
   4.344 +         pxor mm3, mm3
   4.345 +         pcmpgtw mm0, mm6
   4.346 +         pcmpgtw mm2, mm6
   4.347 +         pcmpeqw mm0, mm3
   4.348 +         pcmpeqw mm2, mm3
   4.349 +         pand mm0, mm6
   4.350 +         pand mm2, mm6
   4.351 +         paddw mm7, mm0
   4.352 +         psubw mm7, mm2
   4.353 +
   4.354 +         movq mm0, [eax+colorF]
   4.355 +         movq mm1, [eax+ebx+colorK]
   4.356 +         movq mm2, mm0
   4.357 +         movq mm3, mm1
   4.358 +         pcmpeqw mm0, mm4
   4.359 +         pcmpeqw mm1, mm4
   4.360 +         pcmpeqw mm2, mm5
   4.361 +         pcmpeqw mm3, mm5
   4.362 +         pand mm0, mm6
   4.363 +         pand mm1, mm6
   4.364 +         pand mm2, mm6
   4.365 +         pand mm3, mm6
   4.366 +         paddw mm0, mm1
   4.367 +         paddw mm2, mm3
   4.368 +
   4.369 +         pxor mm3, mm3
   4.370 +         pcmpgtw mm0, mm6
   4.371 +         pcmpgtw mm2, mm6
   4.372 +         pcmpeqw mm0, mm3
   4.373 +         pcmpeqw mm2, mm3
   4.374 +         pand mm0, mm6
   4.375 +         pand mm2, mm6
   4.376 +         paddw mm7, mm0
   4.377 +         psubw mm7, mm2
   4.378 +
   4.379 +         push eax
   4.380 +         add eax, ebx
   4.381 +         movq mm0, [eax+ebx+colorH]
   4.382 +         movq mm1, [eax+ebx+ebx+colorN]
   4.383 +         movq mm2, mm0
   4.384 +         movq mm3, mm1
   4.385 +         pcmpeqw mm0, mm4
   4.386 +         pcmpeqw mm1, mm4
   4.387 +         pcmpeqw mm2, mm5
   4.388 +         pcmpeqw mm3, mm5
   4.389 +         pand mm0, mm6
   4.390 +         pand mm1, mm6
   4.391 +         pand mm2, mm6
   4.392 +         pand mm3, mm6
   4.393 +         paddw mm0, mm1
   4.394 +         paddw mm2, mm3
   4.395 +
   4.396 +         pxor mm3, mm3
   4.397 +         pcmpgtw mm0, mm6
   4.398 +         pcmpgtw mm2, mm6
   4.399 +         pcmpeqw mm0, mm3
   4.400 +         pcmpeqw mm2, mm3
   4.401 +         pand mm0, mm6
   4.402 +         pand mm2, mm6
   4.403 +         paddw mm7, mm0
   4.404 +         psubw mm7, mm2
   4.405 +
   4.406 +         movq mm0, [eax+ebx+colorL]
   4.407 +         movq mm1, [eax+ebx+ebx+colorO]
   4.408 +         movq mm2, mm0
   4.409 +         movq mm3, mm1
   4.410 +         pcmpeqw mm0, mm4
   4.411 +         pcmpeqw mm1, mm4
   4.412 +         pcmpeqw mm2, mm5
   4.413 +         pcmpeqw mm3, mm5
   4.414 +         pand mm0, mm6
   4.415 +         pand mm1, mm6
   4.416 +         pand mm2, mm6
   4.417 +         pand mm3, mm6
   4.418 +         paddw mm0, mm1
   4.419 +         paddw mm2, mm3
   4.420 +
   4.421 +         pxor mm3, mm3
   4.422 +         pcmpgtw mm0, mm6
   4.423 +         pcmpgtw mm2, mm6
   4.424 +         pcmpeqw mm0, mm3
   4.425 +         pcmpeqw mm2, mm3
   4.426 +         pand mm0, mm6
   4.427 +         pand mm2, mm6
   4.428 +         paddw mm7, mm0
   4.429 +         psubw mm7, mm2
   4.430 +
   4.431 +         pop eax
   4.432 +         movq mm1, mm7
   4.433 +         pxor mm0, mm0
   4.434 +         pcmpgtw mm7, mm0
   4.435 +         pcmpgtw mm0, mm1
   4.436 +
   4.437 +         por mm7, [Mask35]
   4.438 +         por mm0, [Mask26] 
   4.439 +         movq [Mask35], mm7
   4.440 +         movq [Mask26], mm0
   4.441 +
   4.442 +.SKIP_GUESS:
   4.443 +
   4.444 +         ;Start the ASSEMBLY !!!        eh... compose all the results together to form the final image...
   4.445 +
   4.446 +                 
   4.447 +         movq mm0, [eax+ebx+color5]
   4.448 +         movq mm1, [eax+ebx+ebx+color2]
   4.449 +         movq mm2, mm0
   4.450 +         movq mm3, mm1
   4.451 +         movq mm4, mm0
   4.452 +         movq mm5, mm1
   4.453 +
   4.454 +         pand mm0, [colorMask]
   4.455 +         pand mm1, [colorMask]
   4.456 +
   4.457 +         psrlw mm0, 1
   4.458 +         psrlw mm1, 1
   4.459 +
   4.460 +         pand mm3, [lowPixelMask]
   4.461 +         paddw mm0, mm1
   4.462 +
   4.463 +         pand mm3, mm2
   4.464 +         paddw mm0, mm3                ;mm0 contains the interpolated values
   4.465 +                 ;---------------------------
   4.466 +
   4.467 +
   4.468 +
   4.469 +%ifdef dfhsdfhsdahdsfhdsfh
   4.470 +
   4.471 +                if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
   4.472 +                   product2a = INTERPOLATE (color2, color5);
   4.473 +                else
   4.474 +                if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
   4.475 +                   product2a = INTERPOLATE(color2, color5);
   4.476 +                else
   4.477 +                   product2a = color2;
   4.478 +
   4.479 +                if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
   4.480 +                   product1a = INTERPOLATE (color2, color5);
   4.481 +                else
   4.482 +                if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
   4.483 +                   product1a = INTERPOLATE(color2, color5);
   4.484 +                else
   4.485 +                   product1a = color5;
   4.486 +
   4.487 +%endif
   4.488 +
   4.489 +
   4.490 +                 movq mm7, [Mask26]
   4.491 +                 movq mm6, [eax+colorB2]
   4.492 +                 movq mm5, [eax+ebx+ebx+color2]
   4.493 +                 movq mm4, [eax+ebx+ebx+color1]
   4.494 +                 pcmpeqw mm4, mm5
   4.495 +                 pcmpeqw mm6, mm5
   4.496 +                 pxor mm5, mm5
   4.497 +                 pand mm7, mm4
   4.498 +                 pcmpeqw mm6, mm5
   4.499 +                 pand mm7, mm6
   4.500 +
   4.501 +
   4.502 +
   4.503 +                 movq mm6, [eax+ebx+ebx+color3]
   4.504 +                 movq mm5, [eax+ebx+ebx+color2]
   4.505 +                 movq mm4, [eax+ebx+ebx+color1]
   4.506 +                 movq mm2, [eax+ebx+color5]
   4.507 +                 movq mm1, [eax+ebx+color4]
   4.508 +                 movq mm3, [eax+colorB0]
   4.509 +
   4.510 +                 pcmpeqw mm2, mm4
   4.511 +                 pcmpeqw mm6, mm5
   4.512 +                 pcmpeqw mm1, mm5
   4.513 +                 pcmpeqw mm3, mm5
   4.514 +                 pxor mm5, mm5
   4.515 +                 pcmpeqw mm2, mm5
   4.516 +                 pcmpeqw mm3, mm5
   4.517 +                 pand mm6, mm1
   4.518 +                 pand mm2, mm3
   4.519 +                 pand mm6, mm2
   4.520 +                 por mm7, mm6
   4.521 +
   4.522 +                 
   4.523 +                 movq mm6, mm7
   4.524 +                 pcmpeqw mm6, mm5
   4.525 +                 pand mm7, mm0
   4.526 +
   4.527 +                 movq mm1, [eax+ebx+color5]
   4.528 +                 pand mm6, mm1
   4.529 +                 por mm7, mm6
   4.530 +                 movq [final1a], mm7                    ;finished  1a
   4.531 +
   4.532 +
   4.533 +         
   4.534 +             ;--------------------------------           
   4.535 +
   4.536 +                 movq mm7, [Mask35]
   4.537 +                 push eax
   4.538 +                 add eax, ebx
   4.539 +                 movq mm6, [eax+ebx+ebx+colorA2]
   4.540 +                 pop eax
   4.541 +                 movq mm5, [eax+ebx+color5]
   4.542 +                 movq mm4, [eax+ebx+color4]
   4.543 +                 pcmpeqw mm4, mm5
   4.544 +                 pcmpeqw mm6, mm5
   4.545 +                 pxor mm5, mm5
   4.546 +                 pand mm7, mm4
   4.547 +                 pcmpeqw mm6, mm5
   4.548 +                 pand mm7, mm6
   4.549 +
   4.550 +
   4.551 +
   4.552 +                 movq mm6, [eax+ebx+color6]
   4.553 +                 movq mm5, [eax+ebx+color5]
   4.554 +                 movq mm4, [eax+ebx+color4]
   4.555 +                 movq mm2, [eax+ebx+ebx+color2]
   4.556 +                 movq mm1, [eax+ebx+ebx+color1]
   4.557 +                 push eax
   4.558 +                 add eax, ebx
   4.559 +                 movq mm3, [eax+ebx+ebx+colorA0]
   4.560 +                 pop eax
   4.561 +
   4.562 +                 pcmpeqw mm2, mm4
   4.563 +                 pcmpeqw mm6, mm5
   4.564 +                 pcmpeqw mm1, mm5
   4.565 +                 pcmpeqw mm3, mm5
   4.566 +                 pxor mm5, mm5
   4.567 +                 pcmpeqw mm2, mm5
   4.568 +                 pcmpeqw mm3, mm5
   4.569 +                 pand mm6, mm1
   4.570 +                 pand mm2, mm3
   4.571 +                 pand mm6, mm2
   4.572 +                 por mm7, mm6
   4.573 +
   4.574 +                 
   4.575 +                 movq mm6, mm7
   4.576 +                 pcmpeqw mm6, mm5
   4.577 +                 pand mm7, mm0
   4.578 +
   4.579 +                 movq mm1, [eax+ebx+ebx+color2]
   4.580 +                 pand mm6, mm1
   4.581 +                 por mm7, mm6
   4.582 +                 movq [final2a], mm7                    ;finished  2a
   4.583 +
   4.584 +
   4.585 +                 ;--------------------------------------------
   4.586 + 
   4.587 +
   4.588 +%ifdef dfhsdfhsdahdsfhdsfh
   4.589 +                   if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
   4.590 +                      product2b = Q_INTERPOLATE (color3, color3, color3, color2);
   4.591 +                   else
   4.592 +                   if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
   4.593 +                      product2b = Q_INTERPOLATE (color2, color2, color2, color3);
   4.594 +                   else
   4.595 +                      product2b = INTERPOLATE (color2, color3);
   4.596 +
   4.597 +                   if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
   4.598 +                      product1b = Q_INTERPOLATE (color6, color6, color6, color5);
   4.599 +                   else
   4.600 +                   if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
   4.601 +                      product1b = Q_INTERPOLATE (color6, color5, color5, color5);
   4.602 +                   else
   4.603 +                      product1b = INTERPOLATE (color5, color6);
   4.604 +%endif
   4.605 +
   4.606 +                 push eax
   4.607 +                 add eax, ebx
   4.608 +                 pxor mm7, mm7
   4.609 +                 movq mm0, [eax+ebx+ebx+colorA0]
   4.610 +                 movq mm1, [eax+ebx+ebx+colorA1]
   4.611 +                 movq mm2, [eax+ebx+ebx+colorA2]
   4.612 +                 movq mm3, [eax+ebx+ebx+colorA3]
   4.613 +                 pop eax
   4.614 +                 movq mm4, [eax+ebx+ebx+color2]
   4.615 +                 movq mm5, [eax+ebx+ebx+color3]
   4.616 +                 movq mm6, [eax+ebx+color6]
   4.617 +
   4.618 +                 pcmpeqw mm6, mm5
   4.619 +                 pcmpeqw mm1, mm5
   4.620 +                 pcmpeqw mm4, mm2
   4.621 +                 pcmpeqw mm0, mm5
   4.622 +                 pcmpeqw mm4, mm7
   4.623 +                 pcmpeqw mm0, mm7
   4.624 +                 pand mm0, mm4
   4.625 +                 pand mm6, mm1
   4.626 +                 pand mm0, mm6
   4.627 +
   4.628 +
   4.629 +                 push eax
   4.630 +                 add eax, ebx
   4.631 +                 movq mm1, [eax+ebx+ebx+colorA1]
   4.632 +                 pop eax
   4.633 +                 movq mm4, [eax+ebx+ebx+color2]
   4.634 +                 movq mm5, [eax+ebx+color5]
   4.635 +                 movq mm6, [eax+ebx+ebx+color3]
   4.636 +
   4.637 +                 pcmpeqw mm5, mm4
   4.638 +                 pcmpeqw mm2, mm4
   4.639 +                 pcmpeqw mm1, mm6
   4.640 +                 pcmpeqw mm3, mm4
   4.641 +                 pcmpeqw mm1, mm7
   4.642 +                 pcmpeqw mm3, mm7
   4.643 +                 pand mm2, mm5
   4.644 +                 pand mm1, mm3
   4.645 +                 pand mm1, mm2
   4.646 +
   4.647 +
   4.648 +                 movq mm7, mm0
   4.649 +                 por mm7, mm1
   4.650 +
   4.651 +                 movq mm4, [Mask35]
   4.652 +                 movq mm3, [Mask26]
   4.653 +                 
   4.654 +                 movq mm6, mm4
   4.655 +                 pand mm6, mm7
   4.656 +                 pxor mm4, mm6
   4.657 +
   4.658 +                 movq mm6, mm3
   4.659 +                 pand mm6, mm7
   4.660 +                 pxor mm3, mm6
   4.661 +
   4.662 +                 movq mm2, mm0
   4.663 +                 movq mm7, [I2333Pixel]
   4.664 +                 movq mm6, [I2223Pixel]
   4.665 +                 movq mm5, [I23Pixel]
   4.666 +
   4.667 +
   4.668 +                 por mm2, mm4
   4.669 +                 pand mm4, [eax+ebx+ebx+color3]
   4.670 +                 por mm2, mm3
   4.671 +                 pand mm3, [eax+ebx+ebx+color2]
   4.672 +                 por mm2, mm1
   4.673 +                 pand mm0, mm7
   4.674 +                 pand mm1, mm6
   4.675 +                 pxor mm7, mm7
   4.676 +                 pcmpeqw mm2, mm7
   4.677 +                 por mm0, mm1
   4.678 +                 por mm3, mm4
   4.679 +                 pand mm2, mm5
   4.680 +                 por mm0, mm3
   4.681 +                 por mm0, mm2
   4.682 +                 movq [final2b], mm0
   4.683 +
   4.684 +                 ;-----------------------------------
   4.685 +                 
   4.686 +
   4.687 +                 pxor mm7, mm7
   4.688 +                 movq mm0, [eax+colorB0]
   4.689 +                 movq mm1, [eax+colorB1]
   4.690 +                 movq mm2, [eax+colorB2]
   4.691 +                 movq mm3, [eax+colorB3]
   4.692 +                 movq mm4, [eax+ebx+color5]
   4.693 +                 movq mm5, [eax+ebx+color6]
   4.694 +                 movq mm6, [eax+ebx+ebx+color3]
   4.695 +
   4.696 +                 pcmpeqw mm6, mm5
   4.697 +                 pcmpeqw mm1, mm5
   4.698 +                 pcmpeqw mm4, mm2
   4.699 +                 pcmpeqw mm0, mm5
   4.700 +                 pcmpeqw mm4, mm7
   4.701 +                 pcmpeqw mm0, mm7
   4.702 +                 pand mm0, mm4
   4.703 +                 pand mm6, mm1
   4.704 +                 pand mm0, mm6
   4.705 +
   4.706 +                 movq mm1, [eax+colorB1]
   4.707 +                 movq mm4, [eax+ebx+color5]
   4.708 +                 movq mm5, [eax+ebx+ebx+color2]
   4.709 +                 movq mm6, [eax+ebx+color6]
   4.710 +
   4.711 +                 pcmpeqw mm5, mm4
   4.712 +                 pcmpeqw mm2, mm4
   4.713 +                 pcmpeqw mm1, mm6
   4.714 +                 pcmpeqw mm3, mm4
   4.715 +                 pcmpeqw mm1, mm7
   4.716 +                 pcmpeqw mm3, mm7
   4.717 +                 pand mm2, mm5
   4.718 +                 pand mm1, mm3
   4.719 +                 pand mm1, mm2
   4.720 +
   4.721 +
   4.722 +                 movq mm7, mm0
   4.723 +                 por mm7, mm1
   4.724 +
   4.725 +                 movq mm4, [Mask35]
   4.726 +                 movq mm3, [Mask26]
   4.727 +                 
   4.728 +                 movq mm6, mm4
   4.729 +                 pand mm6, mm7
   4.730 +                 pxor mm4, mm6
   4.731 +
   4.732 +                 movq mm6, mm3
   4.733 +                 pand mm6, mm7
   4.734 +                 pxor mm3, mm6
   4.735 +
   4.736 +                 movq mm2, mm0
   4.737 +                 movq mm7, [I5666Pixel]
   4.738 +                 movq mm6, [I5556Pixel]
   4.739 +                 movq mm5, [I56Pixel]
   4.740 +
   4.741 +
   4.742 +                 por mm2, mm4
   4.743 +                 pand mm4, [eax+ebx+color5]
   4.744 +                 por mm2, mm3
   4.745 +                 pand mm3, [eax+ebx+color6]
   4.746 +                 por mm2, mm1
   4.747 +                 pand mm0, mm7
   4.748 +                 pand mm1, mm6
   4.749 +                 pxor mm7, mm7
   4.750 +                 pcmpeqw mm2, mm7
   4.751 +                 por mm0, mm1
   4.752 +                 por mm3, mm4
   4.753 +                 pand mm2, mm5
   4.754 +                 por mm0, mm3
   4.755 +                 por mm0, mm2
   4.756 +                 movq [final1b], mm0
   4.757 +                 
   4.758 +          ;---------
   4.759 +
   4.760 +                 movq mm0, [final1a]
   4.761 +                 movq mm4, [final2a]
   4.762 +                 movq mm2, [final1b]
   4.763 +                 movq mm6, [final2b]
   4.764 +
   4.765 +
   4.766 +                 movq mm1, mm0
   4.767 +                 movq mm5, mm4
   4.768 +
   4.769 +
   4.770 +         punpcklwd mm0, mm2
   4.771 +         punpckhwd mm1, mm2
   4.772 +
   4.773 +         punpcklwd mm4, mm6
   4.774 +         punpckhwd mm5, mm6
   4.775 +
   4.776 +
   4.777 +%ifdef FAR_POINTER
   4.778 +         movq [fs:edx], mm0
   4.779 +         movq [fs:edx+8], mm1
   4.780 +         push edx
   4.781 +         add edx, [ebp+dstPitch]
   4.782 +         movq [fs:edx], mm4
   4.783 +         movq [fs:edx+8], mm5
   4.784 +         pop edx
   4.785 +%else
   4.786 +         movq [edx], mm0
   4.787 +         movq [edx+8], mm1
   4.788 +         push edx
   4.789 +         add edx, [ebp+dstPitch]
   4.790 +         movq [edx], mm4
   4.791 +         movq [edx+8], mm5
   4.792 +         pop edx
   4.793 +%endif
   4.794 +.SKIP_PROCESS:
   4.795 +         mov ecx, [ebp+deltaPtr]
   4.796 +         add ecx, 8
   4.797 +         mov [ebp+deltaPtr], ecx
   4.798 +         add edx, 16
   4.799 +         add eax, 8
   4.800 +
   4.801 +         pop ecx
   4.802 +         sub ecx, 4
   4.803 +         cmp ecx, 0
   4.804 +         jg  near .Loop
   4.805 +
   4.806 +; Restore some stuff
   4.807 +         popad
   4.808 +         mov esp, ebp
   4.809 +         pop ebp
   4.810 +         emms
   4.811 +         ret
   4.812 +
   4.813 +
   4.814 +;-------------------------------------------------------------------------
   4.815 +;-------------------------------------------------------------------------
   4.816 +;-------------------------------------------------------------------------
   4.817 +;-------------------------------------------------------------------------
   4.818 +;-------------------------------------------------------------------------
   4.819 +;-------------------------------------------------------------------------
   4.820 +;-------------------------------------------------------------------------
   4.821 +
   4.822 +
   4.823 +
   4.824 +%ifdef __DJGPP__
   4.825 +__2xSaISuperEagleLine:
   4.826 +%else
   4.827 +_2xSaISuperEagleLine:
   4.828 +%endif
   4.829 +; Store some stuff
   4.830 +         push ebp
   4.831 +         mov ebp, esp
   4.832 +         pushad
   4.833 +
   4.834 +; Prepare the destination
   4.835 +%ifdef FAR_POINTER
   4.836 +         ; Set the selector
   4.837 +         mov eax, [ebp+dstSegment]
   4.838 +         mov fs, ax
   4.839 +%endif
   4.840 +         mov edx, [ebp+dstOffset]         ; edx points to the screen
   4.841 +; Prepare the source
   4.842 +         ; eax points to colorA
   4.843 +         mov eax, [ebp+srcPtr]
   4.844 +         mov ebx, [ebp+srcPitch]
   4.845 +         mov ecx, [ebp+width]
   4.846 +         ; eax now points to colorB1
   4.847 +         sub eax, ebx
   4.848 +
   4.849 +; Main Loop
   4.850 +.Loop:   push ecx
   4.851 +
   4.852 +         ;-----Check Delta------------------
   4.853 +         mov ecx, [ebp+deltaPtr]
   4.854 +
   4.855 +         movq mm0, [eax+colorB0]
   4.856 +         movq mm1, [eax+colorB3]
   4.857 +         movq mm2, [eax+ebx+color4]
   4.858 +         movq mm3, [eax+ebx+colorS2]
   4.859 +         movq mm4, [eax+ebx+ebx+color1]
   4.860 +         movq mm5, [eax+ebx+ebx+colorS1]
   4.861 +         push eax
   4.862 +         add eax, ebx
   4.863 +         movq mm6, [eax+ebx+ebx+colorA0]
   4.864 +         movq mm7, [eax+ebx+ebx+colorA3]
   4.865 +         pop eax
   4.866 +
   4.867 +         pcmpeqw mm0, [ecx+2+colorB0]
   4.868 +         pcmpeqw mm1, [ecx+2+colorB3]
   4.869 +         pcmpeqw mm2, [ecx+ebx+2+color4]
   4.870 +         pcmpeqw mm3, [ecx+ebx+2+colorS2]
   4.871 +         pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
   4.872 +         pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
   4.873 +         add ecx, ebx
   4.874 +         pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
   4.875 +         pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
   4.876 +         sub ecx, ebx
   4.877 +
   4.878 +
   4.879 +         pand mm0, mm1
   4.880 +         pand mm2, mm3
   4.881 +         pand mm4, mm5
   4.882 +         pand mm6, mm7
   4.883 +         pand mm0, mm2
   4.884 +         pand mm4, mm6
   4.885 +         pxor mm7, mm7
   4.886 +         pand mm0, mm4
   4.887 +         movq mm6, [eax+colorB0]
   4.888 +         pcmpeqw mm7, mm0
   4.889 +
   4.890 +         movq [ecx+2+colorB0], mm6
   4.891 +
   4.892 +         packsswb mm7, mm7
   4.893 +         movd ecx, mm7
   4.894 +         test ecx, ecx
   4.895 +         jz near .SKIP_PROCESS
   4.896 +
   4.897 +         ;End Delta
   4.898 +
   4.899 +         ;---------------------------------
   4.900 +         movq mm0, [eax+ebx+color5]
   4.901 +         movq mm1, [eax+ebx+color6]
   4.902 +         movq mm2, mm0
   4.903 +         movq mm3, mm1
   4.904 +         movq mm4, mm0
   4.905 +         movq mm5, mm1
   4.906 +
   4.907 +         pand mm0, [colorMask]
   4.908 +         pand mm1, [colorMask]
   4.909 +
   4.910 +         psrlw mm0, 1
   4.911 +         psrlw mm1, 1
   4.912 +
   4.913 +         pand mm3, [lowPixelMask]
   4.914 +         paddw mm0, mm1
   4.915 +
   4.916 +         pand mm3, mm2
   4.917 +         paddw mm0, mm3                ;mm0 contains the interpolated values
   4.918 +         movq [I56Pixel], mm0
   4.919 +         movq mm7, mm0
   4.920 +
   4.921 +         ;-------------------
   4.922 +         movq mm0, mm7
   4.923 +         movq mm1, mm4  ;5,5,5,6
   4.924 +         movq mm2, mm0
   4.925 +         movq mm3, mm1
   4.926 +
   4.927 +         pand mm0, [colorMask]
   4.928 +         pand mm1, [colorMask]
   4.929 +
   4.930 +         psrlw mm0, 1
   4.931 +         psrlw mm1, 1
   4.932 +
   4.933 +         pand mm3, [lowPixelMask]
   4.934 +         paddw mm0, mm1
   4.935 +
   4.936 +         pand mm3, mm2
   4.937 +         paddw mm0, mm3                ;mm0 contains the interpolated values
   4.938 +         movq [product1a], mm0
   4.939 +         ;--------------------
   4.940 +
   4.941 +         movq mm0, mm7
   4.942 +         movq mm1, mm5  ;6,6,6,5
   4.943 +         movq mm2, mm0
   4.944 +         movq mm3, mm1
   4.945 +
   4.946 +         pand mm0, [colorMask]
   4.947 +         pand mm1, [colorMask]
   4.948 +
   4.949 +         psrlw mm0, 1
   4.950 +         psrlw mm1, 1
   4.951 +
   4.952 +         pand mm3, [lowPixelMask]
   4.953 +         paddw mm0, mm1
   4.954 +
   4.955 +         pand mm3, mm2
   4.956 +         paddw mm0, mm3
   4.957 +         movq [product1b], mm0
   4.958 +
   4.959 +         ;-------------------------
   4.960 +         ;-------------------------
   4.961 +         movq mm0, [eax+ebx+ebx+color2]
   4.962 +         movq mm1, [eax+ebx+ebx+color3]
   4.963 +         movq mm2, mm0
   4.964 +         movq mm3, mm1
   4.965 +         movq mm4, mm0
   4.966 +         movq mm5, mm1
   4.967 +
   4.968 +         pand mm0, [colorMask]
   4.969 +         pand mm1, [colorMask]
   4.970 +
   4.971 +         psrlw mm0, 1
   4.972 +         psrlw mm1, 1
   4.973 +
   4.974 +         pand mm3, [lowPixelMask]
   4.975 +         paddw mm0, mm1
   4.976 +
   4.977 +         pand mm3, mm2
   4.978 +         paddw mm0, mm3
   4.979 +         movq [I23Pixel], mm0
   4.980 +         movq mm7, mm0
   4.981 +
   4.982 +         ;---------------------
   4.983 +         movq mm0, mm7
   4.984 +         movq mm1, mm4  ;2,2,2,3
   4.985 +         movq mm2, mm0
   4.986 +         movq mm3, mm1
   4.987 +
   4.988 +         pand mm0, [colorMask]
   4.989 +         pand mm1, [colorMask]
   4.990 +
   4.991 +         psrlw mm0, 1
   4.992 +         psrlw mm1, 1
   4.993 +
   4.994 +         pand mm3, [lowPixelMask]
   4.995 +         paddw mm0, mm1
   4.996 +
   4.997 +         pand mm3, mm2
   4.998 +         paddw mm0, mm3
   4.999 +         movq [product2a], mm0
  4.1000 +
  4.1001 +         ;----------------------
  4.1002 +         movq mm0, mm7
  4.1003 +         movq mm1, mm5  ;3,3,3,2
  4.1004 +         movq mm2, mm0
  4.1005 +         movq mm3, mm1
  4.1006 +
  4.1007 +         pand mm0, [colorMask]
  4.1008 +         pand mm1, [colorMask]
  4.1009 +
  4.1010 +         psrlw mm0, 1
  4.1011 +         psrlw mm1, 1
  4.1012 +
  4.1013 +         pand mm3, [lowPixelMask]
  4.1014 +         paddw mm0, mm1
  4.1015 +
  4.1016 +         pand mm3, mm2
  4.1017 +         paddw mm0, mm3
  4.1018 +         movq [product2b], mm0
  4.1019 +
  4.1020 +
  4.1021 +         ;////////////////////////////////
  4.1022 +         ; Decide which "branch" to take
  4.1023 +         ;--------------------------------
  4.1024 +         movq mm4, [eax+ebx+color5]
  4.1025 +         movq mm5, [eax+ebx+color6]
  4.1026 +         movq mm6, [eax+ebx+ebx+color3]
  4.1027 +         movq mm7, [eax+ebx+ebx+color2]
  4.1028 +
  4.1029 +         pxor mm3, mm3
  4.1030 +         movq mm0, mm4
  4.1031 +         movq mm1, mm5
  4.1032 +
  4.1033 +         pcmpeqw mm0, mm6
  4.1034 +         pcmpeqw mm1, mm7
  4.1035 +         pcmpeqw mm1, mm3
  4.1036 +         pand mm0, mm1
  4.1037 +         movq [Mask35], mm0
  4.1038 +
  4.1039 +         movq mm0, [eax+ebx+ebx+colorS1]
  4.1040 +         movq mm1, [eax+ebx+color4]
  4.1041 +         push eax
  4.1042 +         add eax, ebx
  4.1043 +         movq mm2, [eax+ebx+ebx+colorA2]
  4.1044 +         pop eax
  4.1045 +         movq mm3, [eax+colorB1]
  4.1046 +         pcmpeqw mm0, mm4
  4.1047 +         pcmpeqw mm1, mm4
  4.1048 +         pcmpeqw mm2, mm4
  4.1049 +         pcmpeqw mm3, mm4
  4.1050 +         pand mm0, mm1
  4.1051 +         pand mm2, mm3
  4.1052 +         por mm0, mm2
  4.1053 +         pand mm0, [Mask35]
  4.1054 +         movq [Mask35b], mm0
  4.1055 +
  4.1056 +         ;-----------
  4.1057 +         pxor mm3, mm3
  4.1058 +         movq mm0, mm4
  4.1059 +         movq mm1, mm5
  4.1060 +
  4.1061 +         pcmpeqw mm0, mm6
  4.1062 +         pcmpeqw mm1, mm7
  4.1063 +         pcmpeqw mm0, mm3
  4.1064 +         pand mm0, mm1
  4.1065 +         movq [Mask26], mm0
  4.1066 +
  4.1067 +         movq mm0, [eax+ebx+ebx+color1]
  4.1068 +         movq mm1, [eax+ebx+colorS2]
  4.1069 +         push eax
  4.1070 +         add eax, ebx
  4.1071 +         movq mm2, [eax+ebx+ebx+colorA1]
  4.1072 +         pop eax
  4.1073 +         movq mm3, [eax+colorB2]
  4.1074 +         pcmpeqw mm0, mm5
  4.1075 +         pcmpeqw mm1, mm5
  4.1076 +         pcmpeqw mm2, mm5
  4.1077 +         pcmpeqw mm3, mm5
  4.1078 +         pand mm0, mm1
  4.1079 +         pand mm2, mm3
  4.1080 +         por mm0, mm2
  4.1081 +         pand mm0, [Mask26]
  4.1082 +         movq [Mask26b], mm0
  4.1083 +
  4.1084 +         ;--------------------
  4.1085 +         movq mm0, mm4
  4.1086 +         movq mm1, mm5
  4.1087 +         movq mm2, mm0
  4.1088 +
  4.1089 +         pcmpeqw mm2, mm1
  4.1090 +         pcmpeqw mm0, mm6
  4.1091 +         pcmpeqw mm1, mm7
  4.1092 +         pand mm0, mm1
  4.1093 +         pand mm2, mm0
  4.1094 +         pxor mm0, mm2
  4.1095 +         movq mm7, mm0
  4.1096 +
  4.1097 +         ;------------------
  4.1098 +         packsswb mm7, mm7
  4.1099 +         movd ecx, mm7
  4.1100 +         test ecx, ecx
  4.1101 +         jz near .SKIP_GUESS
  4.1102 +
  4.1103 +;---------------------------------------------
  4.1104 +; Map of the pixels:                    I|E F|J
  4.1105 +;                                       G|A B|K
  4.1106 +;                                       H|C D|L
  4.1107 +;                                       M|N O|P
  4.1108 +         movq mm6, mm0
  4.1109 +         movq mm4, [eax+ebx+color5]
  4.1110 +         movq mm5, [eax+ebx+color6]
  4.1111 +         pxor mm7, mm7
  4.1112 +         pand mm6, [ONE]
  4.1113 +
  4.1114 +         movq mm0, [eax+colorB1]
  4.1115 +         movq mm1, [eax+ebx+color4]
  4.1116 +         movq mm2, mm0
  4.1117 +         movq mm3, mm1
  4.1118 +         pcmpeqw mm0, mm4
  4.1119 +         pcmpeqw mm1, mm4
  4.1120 +         pcmpeqw mm2, mm5
  4.1121 +         pcmpeqw mm3, mm5
  4.1122 +         pand mm0, mm6
  4.1123 +         pand mm1, mm6
  4.1124 +         pand mm2, mm6
  4.1125 +         pand mm3, mm6
  4.1126 +         paddw mm0, mm1
  4.1127 +         paddw mm2, mm3
  4.1128 +
  4.1129 +         pxor mm3, mm3
  4.1130 +         pcmpgtw mm0, mm6
  4.1131 +         pcmpgtw mm2, mm6
  4.1132 +         pcmpeqw mm0, mm3
  4.1133 +         pcmpeqw mm2, mm3
  4.1134 +         pand mm0, mm6
  4.1135 +         pand mm2, mm6
  4.1136 +         paddw mm7, mm0
  4.1137 +         psubw mm7, mm2
  4.1138 +
  4.1139 +         movq mm0, [eax+colorB2]
  4.1140 +         movq mm1, [eax+ebx+colorS2]
  4.1141 +         movq mm2, mm0
  4.1142 +         movq mm3, mm1
  4.1143 +         pcmpeqw mm0, mm4
  4.1144 +         pcmpeqw mm1, mm4
  4.1145 +         pcmpeqw mm2, mm5
  4.1146 +         pcmpeqw mm3, mm5
  4.1147 +         pand mm0, mm6
  4.1148 +         pand mm1, mm6
  4.1149 +         pand mm2, mm6
  4.1150 +         pand mm3, mm6
  4.1151 +         paddw mm0, mm1
  4.1152 +         paddw mm2, mm3
  4.1153 +
  4.1154 +         pxor mm3, mm3
  4.1155 +         pcmpgtw mm0, mm6
  4.1156 +         pcmpgtw mm2, mm6
  4.1157 +         pcmpeqw mm0, mm3
  4.1158 +         pcmpeqw mm2, mm3
  4.1159 +         pand mm0, mm6
  4.1160 +         pand mm2, mm6
  4.1161 +         paddw mm7, mm0
  4.1162 +         psubw mm7, mm2
  4.1163 +
  4.1164 +         push eax
  4.1165 +         add eax, ebx
  4.1166 +         movq mm0, [eax+ebx+color1]
  4.1167 +         movq mm1, [eax+ebx+ebx+colorA1]
  4.1168 +         movq mm2, mm0
  4.1169 +         movq mm3, mm1
  4.1170 +         pcmpeqw mm0, mm4
  4.1171 +         pcmpeqw mm1, mm4
  4.1172 +         pcmpeqw mm2, mm5
  4.1173 +         pcmpeqw mm3, mm5
  4.1174 +         pand mm0, mm6
  4.1175 +         pand mm1, mm6
  4.1176 +         pand mm2, mm6
  4.1177 +         pand mm3, mm6
  4.1178 +         paddw mm0, mm1
  4.1179 +         paddw mm2, mm3
  4.1180 +
  4.1181 +         pxor mm3, mm3
  4.1182 +         pcmpgtw mm0, mm6
  4.1183 +         pcmpgtw mm2, mm6
  4.1184 +         pcmpeqw mm0, mm3
  4.1185 +         pcmpeqw mm2, mm3
  4.1186 +         pand mm0, mm6
  4.1187 +         pand mm2, mm6
  4.1188 +         paddw mm7, mm0
  4.1189 +         psubw mm7, mm2
  4.1190 +
  4.1191 +         movq mm0, [eax+ebx+colorS1]
  4.1192 +         movq mm1, [eax+ebx+ebx+colorA2]
  4.1193 +         movq mm2, mm0
  4.1194 +         movq mm3, mm1
  4.1195 +         pcmpeqw mm0, mm4
  4.1196 +         pcmpeqw mm1, mm4
  4.1197 +         pcmpeqw mm2, mm5
  4.1198 +         pcmpeqw mm3, mm5
  4.1199 +         pand mm0, mm6
  4.1200 +         pand mm1, mm6
  4.1201 +         pand mm2, mm6
  4.1202 +         pand mm3, mm6
  4.1203 +         paddw mm0, mm1
  4.1204 +         paddw mm2, mm3
  4.1205 +
  4.1206 +         pxor mm3, mm3
  4.1207 +         pcmpgtw mm0, mm6
  4.1208 +         pcmpgtw mm2, mm6
  4.1209 +         pcmpeqw mm0, mm3
  4.1210 +         pcmpeqw mm2, mm3
  4.1211 +         pand mm0, mm6
  4.1212 +         pand mm2, mm6
  4.1213 +         paddw mm7, mm0
  4.1214 +         psubw mm7, mm2
  4.1215 +
  4.1216 +         pop eax
  4.1217 +         movq mm1, mm7
  4.1218 +         pxor mm0, mm0
  4.1219 +         pcmpgtw mm7, mm0
  4.1220 +         pcmpgtw mm0, mm1
  4.1221 +
  4.1222 +         por mm7, [Mask35]
  4.1223 +         por mm0, [Mask26]
  4.1224 +         movq [Mask35], mm7
  4.1225 +         movq [Mask26], mm0
  4.1226 +
  4.1227 +.SKIP_GUESS:
  4.1228 +         ;Start the ASSEMBLY !!!
  4.1229 +
  4.1230 +         movq mm4, [Mask35]
  4.1231 +         movq mm5, [Mask26]
  4.1232 +         movq mm6, [Mask35b]
  4.1233 +         movq mm7, [Mask26b]
  4.1234 +
  4.1235 +         movq mm0, [eax+ebx+color5]
  4.1236 +         movq mm1, [eax+ebx+color6]
  4.1237 +         movq mm2, [eax+ebx+ebx+color2]
  4.1238 +         movq mm3, [eax+ebx+ebx+color3]
  4.1239 +         pcmpeqw mm0, mm2
  4.1240 +         pcmpeqw mm1, mm3
  4.1241 +         movq mm2, mm4
  4.1242 +         movq mm3, mm5
  4.1243 +         por mm0, mm1
  4.1244 +         por mm2, mm3
  4.1245 +         pand mm2, mm0
  4.1246 +         pxor mm0, mm2
  4.1247 +         movq mm3, mm0
  4.1248 +
  4.1249 +         movq mm2, mm0
  4.1250 +         pxor mm0, mm0
  4.1251 +         por mm2, mm4
  4.1252 +         pxor mm4, mm6
  4.1253 +         por mm2, mm5
  4.1254 +         pxor mm5, mm7
  4.1255 +         pcmpeqw mm2, mm0
  4.1256 +         ;----------------
  4.1257 +
  4.1258 +         movq mm0, [eax+ebx+color5]
  4.1259 +         movq mm1, mm3
  4.1260 +         por mm1, mm4
  4.1261 +         por mm1, mm6
  4.1262 +         pand mm0, mm1
  4.1263 +         movq mm1, mm5
  4.1264 +         pand mm1, [I56Pixel]
  4.1265 +         por mm0, mm1
  4.1266 +         movq mm1, mm7
  4.1267 +         pand mm1, [product1b]
  4.1268 +         por mm0, mm1
  4.1269 +         movq mm1, mm2
  4.1270 +         pand mm1, [product1a]
  4.1271 +         por mm0, mm1
  4.1272 +         movq [final1a], mm0
  4.1273 +
  4.1274 +         movq mm0, [eax+ebx+color6]
  4.1275 +         movq mm1, mm3
  4.1276 +         por mm1, mm5
  4.1277 +         por mm1, mm7
  4.1278 +         pand mm0, mm1
  4.1279 +         movq mm1, mm4
  4.1280 +         pand mm1, [I56Pixel]
  4.1281 +         por mm0, mm1
  4.1282 +         movq mm1, mm6
  4.1283 +         pand mm1, [product1a]
  4.1284 +         por mm0, mm1
  4.1285 +         movq mm1, mm2
  4.1286 +         pand mm1, [product1b]
  4.1287 +         por mm0, mm1
  4.1288 +         movq [final1b], mm0
  4.1289 +
  4.1290 +         movq mm0, [eax+ebx+ebx+color2]
  4.1291 +         movq mm1, mm3
  4.1292 +         por mm1, mm5
  4.1293 +         por mm1, mm7
  4.1294 +         pand mm0, mm1
  4.1295 +         movq mm1, mm4
  4.1296 +         pand mm1, [I23Pixel]
  4.1297 +         por mm0, mm1
  4.1298 +         movq mm1, mm6
  4.1299 +         pand mm1, [product2b]
  4.1300 +         por mm0, mm1
  4.1301 +         movq mm1, mm2
  4.1302 +         pand mm1, [product2a]
  4.1303 +         por mm0, mm1
  4.1304 +         movq [final2a], mm0
  4.1305 +
  4.1306 +         movq mm0, [eax+ebx+ebx+color3]
  4.1307 +         movq mm1, mm3
  4.1308 +         por mm1, mm4
  4.1309 +         por mm1, mm6
  4.1310 +         pand mm0, mm1
  4.1311 +         movq mm1, mm5
  4.1312 +         pand mm1, [I23Pixel]
  4.1313 +         por mm0, mm1
  4.1314 +         movq mm1, mm7
  4.1315 +         pand mm1, [product2a]
  4.1316 +         por mm0, mm1
  4.1317 +         movq mm1, mm2
  4.1318 +         pand mm1, [product2b]
  4.1319 +         por mm0, mm1
  4.1320 +         movq [final2b], mm0
  4.1321 +
  4.1322 +
  4.1323 +         movq mm0, [final1a]
  4.1324 +         movq mm2, [final1b]
  4.1325 +         movq mm1, mm0
  4.1326 +         movq mm4, [final2a]
  4.1327 +         movq mm6, [final2b]
  4.1328 +         movq mm5, mm4
  4.1329 +         punpcklwd mm0, mm2
  4.1330 +         punpckhwd mm1, mm2
  4.1331 +         punpcklwd mm4, mm6
  4.1332 +         punpckhwd mm5, mm6
  4.1333 +
  4.1334 +
  4.1335 +
  4.1336 +
  4.1337 +%ifdef FAR_POINTER
  4.1338 +         movq [fs:edx], mm0
  4.1339 +         movq [fs:edx+8], mm1
  4.1340 +         push edx
  4.1341 +         add edx, [ebp+dstPitch]
  4.1342 +         movq [fs:edx], mm4
  4.1343 +         movq [fs:edx+8], mm5
  4.1344 +         pop edx
  4.1345 +%else
  4.1346 +         movq [edx], mm0
  4.1347 +         movq [edx+8], mm1
  4.1348 +         push edx
  4.1349 +         add edx, [ebp+dstPitch]
  4.1350 +         movq [edx], mm4
  4.1351 +         movq [edx+8], mm5
  4.1352 +         pop edx
  4.1353 +%endif
  4.1354 +.SKIP_PROCESS:
  4.1355 +         mov ecx, [ebp+deltaPtr]
  4.1356 +         add ecx, 8
  4.1357 +         mov [ebp+deltaPtr], ecx
  4.1358 +         add edx, 16
  4.1359 +         add eax, 8
  4.1360 +
  4.1361 +         pop ecx
  4.1362 +         sub ecx, 4
  4.1363 +         cmp ecx, 0
  4.1364 +         jg  near .Loop
  4.1365 +
  4.1366 +; Restore some stuff
  4.1367 +         popad
  4.1368 +         mov esp, ebp
  4.1369 +         pop ebp
  4.1370 +         emms
  4.1371 +         ret
  4.1372 +
  4.1373 +
  4.1374 +;-------------------------------------------------------------------------
  4.1375 +;-------------------------------------------------------------------------
  4.1376 +;-------------------------------------------------------------------------
  4.1377 +;-------------------------------------------------------------------------
  4.1378 +;-------------------------------------------------------------------------
  4.1379 +;-------------------------------------------------------------------------
  4.1380 +;-------------------------------------------------------------------------
  4.1381 +
  4.1382 +
  4.1383 +;This is version 0.50
  4.1384 +colorI   equ -2
  4.1385 +colorE   equ 0
  4.1386 +colorF   equ 2
  4.1387 +colorJ   equ 4
  4.1388 +
  4.1389 +colorG   equ -2
  4.1390 +colorA   equ 0
  4.1391 +colorB   equ 2
  4.1392 +colorK   equ 4
  4.1393 +
  4.1394 +colorH   equ -2
  4.1395 +colorC   equ 0
  4.1396 +colorD   equ 2
  4.1397 +colorL   equ 4
  4.1398 +
  4.1399 +colorM   equ -2
  4.1400 +colorN   equ 0
  4.1401 +colorO   equ 2
  4.1402 +colorP   equ 4
  4.1403 +
  4.1404 +%ifdef __DJGPP__
  4.1405 +__2xSaILine:
  4.1406 +%else
  4.1407 +_2xSaILine:
  4.1408 +%endif
  4.1409 +; Store some stuff
  4.1410 +         push ebp
  4.1411 +         mov ebp, esp
  4.1412 +         pushad
  4.1413 +
  4.1414 +; Prepare the destination
  4.1415 +%ifdef FAR_POINTER
  4.1416 +         ; Set the selector
  4.1417 +         mov eax, [ebp+dstSegment]
  4.1418 +         mov fs, ax
  4.1419 +%endif
  4.1420 +         mov edx, [ebp+dstOffset]         ; edx points to the screen
  4.1421 +; Prepare the source
  4.1422 +         ; eax points to colorA
  4.1423 +         mov eax, [ebp+srcPtr]
  4.1424 +         mov ebx, [ebp+srcPitch]
  4.1425 +         mov ecx, [ebp+width]
  4.1426 +         ; eax now points to colorE
  4.1427 +         sub eax, ebx
  4.1428 +
  4.1429 +
  4.1430 +; Main Loop
  4.1431 +.Loop:   push ecx
  4.1432 +
  4.1433 +         ;-----Check Delta------------------
  4.1434 +         mov ecx, [ebp+deltaPtr]
  4.1435 +
  4.1436 +         movq mm0, [eax+colorI]
  4.1437 +         movq mm1, [eax+colorJ]
  4.1438 +         movq mm2, [eax+ebx+colorG]
  4.1439 +         movq mm3, [eax+ebx+colorK]
  4.1440 +         movq mm4, [eax+ebx+ebx+colorH]
  4.1441 +         movq mm5, [eax+ebx+ebx+colorL]
  4.1442 +         push eax
  4.1443 +         add eax, ebx
  4.1444 +         movq mm6, [eax+ebx+ebx+colorM]
  4.1445 +         movq mm7, [eax+ebx+ebx+colorP]
  4.1446 +         pop eax
  4.1447 +
  4.1448 +         pcmpeqw mm0, [ecx+2+colorI]
  4.1449 +         pcmpeqw mm1, [ecx+2+colorK]
  4.1450 +         pcmpeqw mm2, [ecx+ebx+2+colorG]
  4.1451 +         pcmpeqw mm3, [ecx+ebx+2+colorK]
  4.1452 +         pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
  4.1453 +         pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
  4.1454 +         add ecx, ebx
  4.1455 +         pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
  4.1456 +         pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
  4.1457 +         sub ecx, ebx
  4.1458 +
  4.1459 +
  4.1460 +         pand mm0, mm1
  4.1461 +         pand mm2, mm3
  4.1462 +         pand mm4, mm5
  4.1463 +         pand mm6, mm7
  4.1464 +         pand mm0, mm2
  4.1465 +         pand mm4, mm6
  4.1466 +         pxor mm7, mm7
  4.1467 +         pand mm0, mm4
  4.1468 +         movq mm6, [eax+colorI]
  4.1469 +         pcmpeqw mm7, mm0
  4.1470 +
  4.1471 +         movq [ecx+2+colorI], mm6
  4.1472 +
  4.1473 +         packsswb mm7, mm7
  4.1474 +         movd ecx, mm7
  4.1475 +         test ecx, ecx
  4.1476 +         jz near .SKIP_PROCESS
  4.1477 +
  4.1478 +         ;End Delta
  4.1479 +
  4.1480 +         ;---------------------------------
  4.1481 +
  4.1482 +
  4.1483 +;1
  4.1484 +         ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
  4.1485 +         movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
  4.1486 +         movq mm2, [eax+ebx+colorB]        ;mm2 and mm3 contain colorB
  4.1487 +
  4.1488 +         movq mm1, mm0
  4.1489 +         movq mm3, mm2
  4.1490 +
  4.1491 +         pcmpeqw mm0, [eax+ebx+ebx+colorD]
  4.1492 +         pcmpeqw mm1, [eax+colorE]
  4.1493 +         pcmpeqw mm2, [eax+ebx+ebx+colorL]
  4.1494 +         pcmpeqw mm3, [eax+ebx+ebx+colorC]
  4.1495 +
  4.1496 +         pand mm0, mm1
  4.1497 +         pxor mm1, mm1
  4.1498 +         pand mm0, mm2
  4.1499 +         pcmpeqw mm3, mm1
  4.1500 +         pand mm0, mm3                 ;result in mm0
  4.1501 +
  4.1502 +         ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
  4.1503 +         movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
  4.1504 +         movq mm6, [eax+ebx+colorB]        ;mm6 and mm7 contain colorB
  4.1505 +         movq mm5, mm4
  4.1506 +         movq mm7, mm6
  4.1507 +
  4.1508 +         pcmpeqw mm4, [eax+ebx+ebx+colorC]
  4.1509 +         pcmpeqw mm5, [eax+colorF]
  4.1510 +         pcmpeqw mm6, [eax+colorJ]
  4.1511 +         pcmpeqw mm7, [eax+colorE]
  4.1512 +
  4.1513 +         pand mm4, mm5
  4.1514 +         pxor mm5, mm5
  4.1515 +         pand mm4, mm6
  4.1516 +         pcmpeqw mm7, mm5
  4.1517 +         pand mm4, mm7                 ;result in mm4
  4.1518 +
  4.1519 +         por mm0, mm4                  ;combine the masks
  4.1520 +         movq [Mask1], mm0
  4.1521 +
  4.1522 +         ;--------------------------------------------
  4.1523 +
  4.1524 +;2
  4.1525 +         ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
  4.1526 +         movq mm0, [eax+ebx+colorB]        ;mm0 and mm1 contain colorB
  4.1527 +         movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
  4.1528 +         movq mm1, mm0
  4.1529 +         movq mm3, mm2
  4.1530 +
  4.1531 +         pcmpeqw mm0, [eax+ebx+ebx+colorC]
  4.1532 +         pcmpeqw mm1, [eax+colorF]
  4.1533 +         pcmpeqw mm2, [eax+ebx+ebx+colorH]
  4.1534 +         pcmpeqw mm3, [eax+ebx+ebx+colorD]
  4.1535 +
  4.1536 +         pand mm0, mm1
  4.1537 +         pxor mm1, mm1
  4.1538 +         pand mm0, mm2
  4.1539 +         pcmpeqw mm3, mm1
  4.1540 +         pand mm0, mm3                 ;result in mm0
  4.1541 +
  4.1542 +         ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
  4.1543 +         movq mm4, [eax+ebx+colorB]        ;mm4 and mm5 contain colorB
  4.1544 +         movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
  4.1545 +         movq mm5, mm4
  4.1546 +         movq mm7, mm6
  4.1547 +
  4.1548 +         pcmpeqw mm4, [eax+ebx+ebx+colorD]
  4.1549 +         pcmpeqw mm5, [eax+colorE]
  4.1550 +         pcmpeqw mm6, [eax+colorI]
  4.1551 +         pcmpeqw mm7, [eax+colorF]
  4.1552 +
  4.1553 +         pand mm4, mm5
  4.1554 +         pxor mm5, mm5
  4.1555 +         pand mm4, mm6
  4.1556 +         pcmpeqw mm7, mm5
  4.1557 +         pand mm4, mm7                 ;result in mm4
  4.1558 +
  4.1559 +         por mm0, mm4                  ;combine the masks
  4.1560 +         movq [Mask2], mm0
  4.1561 +
  4.1562 +
  4.1563 +;interpolate colorA and colorB
  4.1564 +         movq mm0, [eax+ebx+colorA]
  4.1565 +         movq mm1, [eax+ebx+colorB]
  4.1566 +
  4.1567 +         movq mm2, mm0
  4.1568 +         movq mm3, mm1
  4.1569 +
  4.1570 +         pand mm0, [colorMask]
  4.1571 +         pand mm1, [colorMask]
  4.1572 +
  4.1573 +         psrlw mm0, 1
  4.1574 +         psrlw mm1, 1
  4.1575 +
  4.1576 +         pand mm3, [lowPixelMask]
  4.1577 +         paddw mm0, mm1
  4.1578 +
  4.1579 +         pand mm3, mm2
  4.1580 +         paddw mm0, mm3                ;mm0 contains the interpolated values
  4.1581 +
  4.1582 +         ;assemble the pixels
  4.1583 +         movq mm1, [eax+ebx+colorA]
  4.1584 +         movq mm2, [eax+ebx+colorB]
  4.1585 +
  4.1586 +         movq mm3, [Mask1]
  4.1587 +         movq mm5, mm1
  4.1588 +         movq mm4, [Mask2]
  4.1589 +         movq mm6, mm1
  4.1590 +
  4.1591 +         pand mm1, mm3
  4.1592 +         por mm3, mm4
  4.1593 +         pxor mm7, mm7
  4.1594 +         pand mm2, mm4
  4.1595 +
  4.1596 +         pcmpeqw mm3, mm7
  4.1597 +         por mm1, mm2
  4.1598 +         pand mm0, mm3
  4.1599 +
  4.1600 +         por mm0, mm1
  4.1601 +
  4.1602 +         punpcklwd mm5, mm0
  4.1603 +         punpckhwd mm6, mm0
  4.1604 +
  4.1605 +%ifdef FAR_POINTER
  4.1606 +         movq [fs:edx], mm5
  4.1607 +         movq [fs:edx+8], mm6
  4.1608 +%else
  4.1609 +         movq [edx], mm5
  4.1610 +         movq [edx+8], mm6
  4.1611 +%endif
  4.1612 +
  4.1613 +;------------------------------------------------
  4.1614 +;        Create the Nextline
  4.1615 +;------------------------------------------------
  4.1616 +;3       ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
  4.1617 +         movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
  4.1618 +         movq mm2, [eax+ebx+ebx+colorC]        ;mm2 and mm3 contain colorC
  4.1619 +         movq mm1, mm0
  4.1620 +         movq mm3, mm2
  4.1621 +
  4.1622 +         push eax
  4.1623 +         add eax, ebx
  4.1624 +         pcmpeqw mm0, [eax+ebx+colorD]
  4.1625 +         pcmpeqw mm1, [eax+colorG]
  4.1626 +         pcmpeqw mm2, [eax+ebx+ebx+colorO]
  4.1627 +         pcmpeqw mm3, [eax+colorB]
  4.1628 +         pop eax
  4.1629 +
  4.1630 +         pand mm0, mm1
  4.1631 +         pxor mm1, mm1
  4.1632 +         pand mm0, mm2
  4.1633 +         pcmpeqw mm3, mm1
  4.1634 +         pand mm0, mm3                 ;result in mm0
  4.1635 +
  4.1636 +         ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
  4.1637 +         movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
  4.1638 +         movq mm6, [eax+ebx+ebx+colorC]        ;mm6 and mm7 contain colorC
  4.1639 +         movq mm5, mm4
  4.1640 +         movq mm7, mm6
  4.1641 +
  4.1642 +         push eax
  4.1643 +         add eax, ebx
  4.1644 +         pcmpeqw mm4, [eax+ebx+colorH]
  4.1645 +         pcmpeqw mm5, [eax+colorB]
  4.1646 +         pcmpeqw mm6, [eax+ebx+ebx+colorM]
  4.1647 +         pcmpeqw mm7, [eax+colorG]
  4.1648 +         pop eax
  4.1649 +
  4.1650 +         pand mm4, mm5
  4.1651 +         pxor mm5, mm5
  4.1652 +         pand mm4, mm6
  4.1653 +         pcmpeqw mm7, mm5
  4.1654 +         pand mm4, mm7                 ;result in mm4
  4.1655 +
  4.1656 +         por mm0, mm4                  ;combine the masks
  4.1657 +         movq [Mask1], mm0
  4.1658 +         ;--------------------------------------------
  4.1659 +
  4.1660 +;4
  4.1661 +         ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
  4.1662 +         movq mm0, [eax+ebx+ebx+colorC]        ;mm0 and mm1 contain colorC
  4.1663 +         movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
  4.1664 +         movq mm1, mm0
  4.1665 +         movq mm3, mm2
  4.1666 +
  4.1667 +         pcmpeqw mm0, [eax+ebx+colorB]
  4.1668 +         pcmpeqw mm1, [eax+ebx+ebx+colorH]
  4.1669 +         pcmpeqw mm2, [eax+colorF]
  4.1670 +         pcmpeqw mm3, [eax+ebx+ebx+colorD]
  4.1671 +
  4.1672 +         pand mm0, mm1
  4.1673 +         pxor mm1, mm1
  4.1674 +         pand mm0, mm2
  4.1675 +         pcmpeqw mm3, mm1
  4.1676 +         pand mm0, mm3                 ;result in mm0
  4.1677 +
  4.1678 +         ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
  4.1679 +         movq mm4, [eax+ebx+ebx+colorC]        ;mm4 and mm5 contain colorC
  4.1680 +         movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
  4.1681 +         movq mm5, mm4
  4.1682 +         movq mm7, mm6
  4.1683 +
  4.1684 +         pcmpeqw mm4, [eax+ebx+ebx+colorD]
  4.1685 +         pcmpeqw mm5, [eax+ebx+colorG]
  4.1686 +         pcmpeqw mm6, [eax+colorI]
  4.1687 +         pcmpeqw mm7, [eax+ebx+ebx+colorH]
  4.1688 +
  4.1689 +         pand mm4, mm5
  4.1690 +         pxor mm5, mm5
  4.1691 +         pand mm4, mm6
  4.1692 +         pcmpeqw mm7, mm5
  4.1693 +         pand mm4, mm7                 ;result in mm4
  4.1694 +
  4.1695 +         por mm0, mm4                  ;combine the masks
  4.1696 +         movq [Mask2], mm0
  4.1697 +         ;----------------------------------------------
  4.1698 +
  4.1699 +;interpolate colorA and colorC
  4.1700 +         movq mm0, [eax+ebx+colorA]
  4.1701 +         movq mm1, [eax+ebx+ebx+colorC]
  4.1702 +
  4.1703 +         movq mm2, mm0
  4.1704 +         movq mm3, mm1
  4.1705 +
  4.1706 +         pand mm0, [colorMask]
  4.1707 +         pand mm1, [colorMask]
  4.1708 +
  4.1709 +         psrlw mm0, 1
  4.1710 +         psrlw mm1, 1
  4.1711 +
  4.1712 +         pand mm3, [lowPixelMask]
  4.1713 +         paddw mm0, mm1
  4.1714 +
  4.1715 +         pand mm3, mm2
  4.1716 +         paddw mm0, mm3                ;mm0 contains the interpolated values
  4.1717 +         ;-------------
  4.1718 +
  4.1719 +         ;assemble the pixels
  4.1720 +         movq mm1, [eax+ebx+colorA]
  4.1721 +         movq mm2, [eax+ebx+ebx+colorC]
  4.1722 +
  4.1723 +         movq mm3, [Mask1]
  4.1724 +         movq mm4, [Mask2]
  4.1725 +
  4.1726 +         pand mm1, mm3
  4.1727 +         pand mm2, mm4
  4.1728 +
  4.1729 +         por mm3, mm4
  4.1730 +         pxor mm7, mm7
  4.1731 +         por mm1, mm2
  4.1732 +
  4.1733 +         pcmpeqw mm3, mm7
  4.1734 +         pand mm0, mm3
  4.1735 +         por mm0, mm1
  4.1736 +         movq [ACPixel], mm0
  4.1737 +
  4.1738 +;////////////////////////////////
  4.1739 +; Decide which "branch" to take
  4.1740 +;--------------------------------
  4.1741 +         movq mm0, [eax+ebx+colorA]
  4.1742 +         movq mm1, [eax+ebx+colorB]
  4.1743 +         movq mm6, mm0
  4.1744 +         movq mm7, mm1
  4.1745 +         pcmpeqw mm0, [eax+ebx+ebx+colorD]
  4.1746 +         pcmpeqw mm1, [eax+ebx+ebx+colorC]
  4.1747 +         pcmpeqw mm6, mm7
  4.1748 +
  4.1749 +         movq mm2, mm0
  4.1750 +         movq mm3, mm0
  4.1751 +
  4.1752 +         pand mm0, mm1       ;colorA == colorD && colorB == colorC
  4.1753 +         pxor mm7, mm7
  4.1754 +
  4.1755 +         pcmpeqw mm2, mm7
  4.1756 +         pand mm6, mm0
  4.1757 +         pand mm2, mm1       ;colorA != colorD && colorB == colorC
  4.1758 +
  4.1759 +         pcmpeqw mm1, mm7
  4.1760 +
  4.1761 +         pand mm1, mm3       ;colorA == colorD && colorB != colorC
  4.1762 +         pxor mm0, mm6
  4.1763 +         por mm1, mm6
  4.1764 +         movq mm7, mm0
  4.1765 +         movq [Mask2], mm2
  4.1766 +         packsswb mm7, mm7
  4.1767 +         movq [Mask1], mm1
  4.1768 +
  4.1769 +         movd ecx, mm7
  4.1770 +         test ecx, ecx
  4.1771 +         jz near .SKIP_GUESS
  4.1772 +
  4.1773 +;---------------------------------------------
  4.1774 +; Map of the pixels:                    I|E F|J
  4.1775 +;                                       G|A B|K
  4.1776 +;                                       H|C D|L
  4.1777 +;                                       M|N O|P
  4.1778 +         movq mm6, mm0
  4.1779 +         movq mm4, [eax+ebx+colorA]
  4.1780 +         movq mm5, [eax+ebx+colorB]
  4.1781 +         pxor mm7, mm7
  4.1782 +         pand mm6, [ONE]
  4.1783 +
  4.1784 +         movq mm0, [eax+colorE]
  4.1785 +         movq mm1, [eax+ebx+colorG]
  4.1786 +         movq mm2, mm0
  4.1787 +         movq mm3, mm1
  4.1788 +         pcmpeqw mm0, mm4
  4.1789 +         pcmpeqw mm1, mm4
  4.1790 +         pcmpeqw mm2, mm5
  4.1791 +         pcmpeqw mm3, mm5
  4.1792 +         pand mm0, mm6
  4.1793 +         pand mm1, mm6
  4.1794 +         pand mm2, mm6
  4.1795 +         pand mm3, mm6
  4.1796 +         paddw mm0, mm1
  4.1797 +         paddw mm2, mm3
  4.1798 +
  4.1799 +         pxor mm3, mm3
  4.1800 +         pcmpgtw mm0, mm6
  4.1801 +         pcmpgtw mm2, mm6
  4.1802 +         pcmpeqw mm0, mm3
  4.1803 +         pcmpeqw mm2, mm3
  4.1804 +         pand mm0, mm6
  4.1805 +         pand mm2, mm6
  4.1806 +         paddw mm7, mm0
  4.1807 +         psubw mm7, mm2
  4.1808 +
  4.1809 +         movq mm0, [eax+colorF]
  4.1810 +         movq mm1, [eax+ebx+colorK]
  4.1811 +         movq mm2, mm0
  4.1812 +         movq mm3, mm1
  4.1813 +         pcmpeqw mm0, mm4
  4.1814 +         pcmpeqw mm1, mm4
  4.1815 +         pcmpeqw mm2, mm5
  4.1816 +         pcmpeqw mm3, mm5
  4.1817 +         pand mm0, mm6
  4.1818 +         pand mm1, mm6
  4.1819 +         pand mm2, mm6
  4.1820 +         pand mm3, mm6
  4.1821 +         paddw mm0, mm1
  4.1822 +         paddw mm2, mm3
  4.1823 +
  4.1824 +         pxor mm3, mm3
  4.1825 +         pcmpgtw mm0, mm6
  4.1826 +         pcmpgtw mm2, mm6
  4.1827 +         pcmpeqw mm0, mm3
  4.1828 +         pcmpeqw mm2, mm3
  4.1829 +         pand mm0, mm6
  4.1830 +         pand mm2, mm6
  4.1831 +         paddw mm7, mm0
  4.1832 +         psubw mm7, mm2
  4.1833 +
  4.1834 +         push eax
  4.1835 +         add eax, ebx
  4.1836 +         movq mm0, [eax+ebx+colorH]
  4.1837 +         movq mm1, [eax+ebx+ebx+colorN]
  4.1838 +         movq mm2, mm0
  4.1839 +         movq mm3, mm1
  4.1840 +         pcmpeqw mm0, mm4
  4.1841 +         pcmpeqw mm1, mm4
  4.1842 +         pcmpeqw mm2, mm5
  4.1843 +         pcmpeqw mm3, mm5
  4.1844 +         pand mm0, mm6
  4.1845 +         pand mm1, mm6
  4.1846 +         pand mm2, mm6
  4.1847 +         pand mm3, mm6
  4.1848 +         paddw mm0, mm1
  4.1849 +         paddw mm2, mm3
  4.1850 +
  4.1851 +         pxor mm3, mm3
  4.1852 +         pcmpgtw mm0, mm6
  4.1853 +         pcmpgtw mm2, mm6
  4.1854 +         pcmpeqw mm0, mm3
  4.1855 +         pcmpeqw mm2, mm3
  4.1856 +         pand mm0, mm6
  4.1857 +         pand mm2, mm6
  4.1858 +         paddw mm7, mm0
  4.1859 +         psubw mm7, mm2
  4.1860 +
  4.1861 +         movq mm0, [eax+ebx+colorL]
  4.1862 +         movq mm1, [eax+ebx+ebx+colorO]
  4.1863 +         movq mm2, mm0
  4.1864 +         movq mm3, mm1
  4.1865 +         pcmpeqw mm0, mm4
  4.1866 +         pcmpeqw mm1, mm4
  4.1867 +         pcmpeqw mm2, mm5
  4.1868 +         pcmpeqw mm3, mm5
  4.1869 +         pand mm0, mm6
  4.1870 +         pand mm1, mm6
  4.1871 +         pand mm2, mm6
  4.1872 +         pand mm3, mm6
  4.1873 +         paddw mm0, mm1
  4.1874 +         paddw mm2, mm3
  4.1875 +
  4.1876 +         pxor mm3, mm3
  4.1877 +         pcmpgtw mm0, mm6
  4.1878 +         pcmpgtw mm2, mm6
  4.1879 +         pcmpeqw mm0, mm3
  4.1880 +         pcmpeqw mm2, mm3
  4.1881 +         pand mm0, mm6
  4.1882 +         pand mm2, mm6
  4.1883 +         paddw mm7, mm0
  4.1884 +         psubw mm7, mm2
  4.1885 +
  4.1886 +         pop eax
  4.1887 +         movq mm1, mm7
  4.1888 +         pxor mm0, mm0
  4.1889 +         pcmpgtw mm7, mm0
  4.1890 +         pcmpgtw mm0, mm1
  4.1891 +
  4.1892 +         por mm7, [Mask1]
  4.1893 +         por mm0, [Mask2]
  4.1894 +         movq [Mask1], mm7
  4.1895 +         movq [Mask2], mm0
  4.1896 +
  4.1897 +.SKIP_GUESS:
  4.1898 +         ;----------------------------
  4.1899 +         ;interpolate A, B, C and D
  4.1900 +         movq mm0, [eax+ebx+colorA]
  4.1901 +         movq mm1, [eax+ebx+colorB]
  4.1902 +         movq mm4, mm0
  4.1903 +         movq mm2, [eax+ebx+ebx+colorC]
  4.1904 +         movq mm5, mm1
  4.1905 +         movq mm3, [qcolorMask]
  4.1906 +         movq mm6, mm2
  4.1907 +         movq mm7, [qlowpixelMask]
  4.1908 +
  4.1909 +         pand mm0, mm3
  4.1910 +         pand mm1, mm3
  4.1911 +         pand mm2, mm3
  4.1912 +         pand mm3, [eax+ebx+ebx+colorD]
  4.1913 +
  4.1914 +         psrlw mm0, 2
  4.1915 +         pand mm4, mm7
  4.1916 +         psrlw mm1, 2
  4.1917 +         pand mm5, mm7
  4.1918 +         psrlw mm2, 2
  4.1919 +         pand mm6, mm7
  4.1920 +         psrlw mm3, 2
  4.1921 +         pand mm7, [eax+ebx+ebx+colorD]
  4.1922 +
  4.1923 +         paddw mm0, mm1
  4.1924 +         paddw mm2, mm3
  4.1925 +
  4.1926 +         paddw mm4, mm5
  4.1927 +         paddw mm6, mm7
  4.1928 +
  4.1929 +         paddw mm4, mm6
  4.1930 +         paddw mm0, mm2
  4.1931 +         psrlw mm4, 2
  4.1932 +         pand mm4, [qlowpixelMask]
  4.1933 +         paddw mm0, mm4      ;mm0 contains the interpolated value of A, B, C and D
  4.1934 +
  4.1935 +;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
  4.1936 +         ;assemble the pixels
  4.1937 +         movq mm1, [Mask1]
  4.1938 +         movq mm2, [Mask2]
  4.1939 +         movq mm4, [eax+ebx+colorA]
  4.1940 +         movq mm5, [eax+ebx+colorB]
  4.1941 +         pand mm4, mm1
  4.1942 +         pand mm5, mm2
  4.1943 +
  4.1944 +         pxor mm7, mm7
  4.1945 +         por mm1, mm2
  4.1946 +         por mm4, mm5
  4.1947 +         pcmpeqw mm1, mm7
  4.1948 +         pand mm0, mm1
  4.1949 +         por mm4, mm0        ;mm4 contains the diagonal pixels
  4.1950 +
  4.1951 +         movq mm0, [ACPixel]
  4.1952 +         movq mm1, mm0
  4.1953 +         punpcklwd mm0, mm4
  4.1954 +         punpckhwd mm1, mm4
  4.1955 +
  4.1956 +         push edx
  4.1957 +         add edx, [ebp+dstPitch]
  4.1958 +
  4.1959 +%ifdef FAR_POINTER
  4.1960 +         movq [fs:edx], mm0
  4.1961 +         movq [fs:edx+8], mm1
  4.1962 +%else
  4.1963 +         movq [edx], mm0
  4.1964 +         movq [edx+8], mm1
  4.1965 +%endif
  4.1966 +         pop edx
  4.1967 +
  4.1968 +.SKIP_PROCESS:
  4.1969 +         mov ecx, [ebp+deltaPtr]
  4.1970 +         add ecx, 8
  4.1971 +         mov [ebp+deltaPtr], ecx
  4.1972 +         add edx, 16
  4.1973 +         add eax, 8
  4.1974 +
  4.1975 +         pop ecx
  4.1976 +         sub ecx, 4
  4.1977 +         cmp ecx, 0
  4.1978 +         jg  near .Loop
  4.1979 +
  4.1980 +; Restore some stuff
  4.1981 +         popad
  4.1982 +         mov esp, ebp
  4.1983 +         pop ebp
  4.1984 +         emms
  4.1985 +         ret
  4.1986 +
  4.1987 +;-------------------------------------------------------------------------
  4.1988 +;-------------------------------------------------------------------------
  4.1989 +;-------------------------------------------------------------------------
  4.1990 +;-------------------------------------------------------------------------
  4.1991 +;-------------------------------------------------------------------------
  4.1992 +;-------------------------------------------------------------------------
  4.1993 +;-------------------------------------------------------------------------
  4.1994 +
  4.1995 +%ifdef __DJGPP__
  4.1996 +_Init_2xSaIMMX:
  4.1997 +%else
  4.1998 +Init_2xSaIMMX:
  4.1999 +%endif
  4.2000 +; Store some stuff
  4.2001 +         push ebp
  4.2002 +         mov ebp, esp
  4.2003 +         push edx
  4.2004 +
  4.2005 +
  4.2006 +;Damn thing doesn't work
  4.2007 +;        mov eax,1
  4.2008 +;        cpuid
  4.2009 +;        test edx, 0x00800000     ;test bit 23
  4.2010 +;        jz end2 ;bit not set => no MMX detected
  4.2011 +
  4.2012 +         mov eax, [ebp+8]         ;PixelFormat
  4.2013 +         cmp eax, 555
  4.2014 +         jz Bits555
  4.2015 +         cmp eax, 565
  4.2016 +         jz Bits565
  4.2017 +end2:
  4.2018 +         mov eax, 1
  4.2019 +         jmp end3
  4.2020 +Bits555:
  4.2021 +         mov edx, 0x7BDE7BDE
  4.2022 +         mov eax, colorMask
  4.2023 +         mov [eax], edx
  4.2024 +         mov [eax+4], edx
  4.2025 +         mov edx, 0x04210421
  4.2026 +         mov eax, lowPixelMask
  4.2027 +         mov [eax], edx
  4.2028 +         mov [eax+4], edx
  4.2029 +         mov edx, 0x739C739C
  4.2030 +         mov eax, qcolorMask
  4.2031 +         mov [eax], edx
  4.2032 +         mov [eax+4], edx
  4.2033 +         mov edx, 0x0C630C63
  4.2034 +         mov eax, qlowpixelMask
  4.2035 +         mov [eax], edx
  4.2036 +         mov [eax+4], edx
  4.2037 +         mov eax, 0
  4.2038 +         jmp end3
  4.2039 +Bits565:
  4.2040 +         mov edx, 0xF7DEF7DE
  4.2041 +         mov eax, colorMask
  4.2042 +         mov [eax], edx
  4.2043 +         mov [eax+4], edx
  4.2044 +         mov edx, 0x08210821
  4.2045 +         mov eax, lowPixelMask
  4.2046 +         mov [eax], edx
  4.2047 +         mov [eax+4], edx
  4.2048 +         mov edx, 0xE79CE79C
  4.2049 +         mov eax, qcolorMask
  4.2050 +         mov [eax], edx
  4.2051 +         mov [eax+4], edx
  4.2052 +         mov edx, 0x18631863
  4.2053 +         mov eax, qlowpixelMask
  4.2054 +         mov [eax], edx
  4.2055 +         mov [eax+4], edx
  4.2056 +         mov eax, 0
  4.2057 +         jmp end3
  4.2058 +end3:   
  4.2059 +         pop edx
  4.2060 +         mov esp, ebp
  4.2061 +         pop ebp
  4.2062 +         ret
  4.2063 +
  4.2064 +
  4.2065 +;-------------------------------------------------------------------------
  4.2066 +;-------------------------------------------------------------------------
  4.2067 +;-------------------------------------------------------------------------
  4.2068 +;-------------------------------------------------------------------------
  4.2069 +;-------------------------------------------------------------------------
  4.2070 +;-------------------------------------------------------------------------
  4.2071 +;-------------------------------------------------------------------------
  4.2072 +
  4.2073 +        SECTION .data ALIGN = 32
  4.2074 +;Some constants
  4.2075 +colorMask     dd 0xF7DEF7DE,0xF7DEF7DE
  4.2076 +lowPixelMask  dd 0x08210821,0x08210821
  4.2077 +
  4.2078 +qcolorMask    dd 0xE79CE79C,0xE79CE79C
  4.2079 +qlowpixelMask dd 0x18631863,0x18631863
  4.2080 +
  4.2081 +darkenMask    dd 0xC718C718,0xC718C718
  4.2082 +GreenMask     dd 0x07E007E0,0x07E007E0
  4.2083 +RedBlueMask   dd 0xF81FF81F,0xF81FF81F
  4.2084 +
  4.2085 +FALSE         dd 0x00000000,0x00000000
  4.2086 +TRUE          dd 0xffffffff,0xffffffff
  4.2087 +ONE           dd 0x00010001,0x00010001
  4.2088 +
  4.2089 +
  4.2090 +        SECTION .bss ALIGN = 32
  4.2091 +ACPixel       resb 8
  4.2092 +Mask1         resb 8
  4.2093 +Mask2         resb 8
  4.2094 +
  4.2095 +I56Pixel      resb 8
  4.2096 +I23Pixel      resb 8
  4.2097 +I5556Pixel    resb 8
  4.2098 +I2223Pixel    resb 8
  4.2099 +I5666Pixel    resb 8
  4.2100 +I2333Pixel    resb 8
  4.2101 +Mask26        resb 8
  4.2102 +Mask35        resb 8
  4.2103 +Mask26b       resb 8
  4.2104 +Mask35b       resb 8
  4.2105 +product1a     resb 8
  4.2106 +product1b     resb 8
  4.2107 +product2a     resb 8
  4.2108 +product2b     resb 8
  4.2109 +final1a       resb 8
  4.2110 +final1b       resb 8
  4.2111 +final2a       resb 8
  4.2112 +final2b       resb 8
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/filters/Makefile	Sun Mar 04 20:32:31 2012 -0600
     5.3 @@ -0,0 +1,488 @@
     5.4 +# Makefile.in generated by automake 1.10.1 from Makefile.am.
     5.5 +# src/filters/Makefile.  Generated from Makefile.in by configure.
     5.6 +
     5.7 +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
     5.8 +# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
     5.9 +# This Makefile.in is free software; the Free Software Foundation
    5.10 +# gives unlimited permission to copy and/or distribute it,
    5.11 +# with or without modifications, as long as this notice is preserved.
    5.12 +
    5.13 +# This program is distributed in the hope that it will be useful,
    5.14 +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
    5.15 +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
    5.16 +# PARTICULAR PURPOSE.
    5.17 +
    5.18 +
    5.19 +
    5.20 +
    5.21 +pkgdatadir = $(datadir)/VisualBoyAdvance
    5.22 +pkglibdir = $(libdir)/VisualBoyAdvance
    5.23 +pkgincludedir = $(includedir)/VisualBoyAdvance
    5.24 +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
    5.25 +install_sh_DATA = $(install_sh) -c -m 644
    5.26 +install_sh_PROGRAM = $(install_sh) -c
    5.27 +install_sh_SCRIPT = $(install_sh) -c
    5.28 +INSTALL_HEADER = $(INSTALL_DATA)
    5.29 +transform = $(program_transform_name)
    5.30 +NORMAL_INSTALL = :
    5.31 +PRE_INSTALL = :
    5.32 +POST_INSTALL = :
    5.33 +NORMAL_UNINSTALL = :
    5.34 +PRE_UNINSTALL = :
    5.35 +POST_UNINSTALL = :
    5.36 +build_triplet = x86_64-unknown-linux-gnu
    5.37 +host_triplet = x86_64-unknown-linux-gnu
    5.38 +target_triplet = x86_64-unknown-linux-gnu
    5.39 +subdir = src/filters
    5.40 +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
    5.41 +OBJDIR = $(top_srcdir)/src/obj
    5.42 +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
    5.43 +am__aclocal_m4_deps = $(top_srcdir)/m4/gettext.m4 \
    5.44 +	$(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/lib-ld.m4 \
    5.45 +	$(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
    5.46 +	$(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
    5.47 +	$(top_srcdir)/m4/progtest.m4 $(top_srcdir)/acinclude.m4 \
    5.48 +	$(top_srcdir)/configure.in
    5.49 +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
    5.50 +	$(ACLOCAL_M4)
    5.51 +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
    5.52 +CONFIG_CLEAN_FILES =
    5.53 +LIBRARIES = $(noinst_LIBRARIES)
    5.54 +AR = ar
    5.55 +ARFLAGS = cru
    5.56 +lib386_a_AR = $(AR) $(ARFLAGS)
    5.57 +lib386_a_LIBADD =
    5.58 +am_lib386_a_OBJECTS = 2xSaImmx.$(OBJEXT)
    5.59 +lib386_a_OBJECTS = $(patsubst %,$(OBJDIR)/%,$(am_lib386_a_OBJECTS))
    5.60 +libfilter_a_AR = $(AR) $(ARFLAGS)
    5.61 +libfilter_a_LIBADD =
    5.62 +am_libfilter_a_OBJECTS = 2xSaI.$(OBJEXT) admame.$(OBJEXT) \
    5.63 +	bilinear.$(OBJEXT) hq2x.$(OBJEXT) interframe.$(OBJEXT) \
    5.64 +	motionblur.$(OBJEXT) pixel.$(OBJEXT) scanline.$(OBJEXT) \
    5.65 +	simple2x.$(OBJEXT)
    5.66 +libfilter_a_OBJECTS = $(patsubst %,$(OBJDIR)/%,$(am_libfilter_a_OBJECTS))
    5.67 +DEFAULT_INCLUDES = -I.
    5.68 +depcomp = $(SHELL) $(top_srcdir)/depcomp
    5.69 +am__depfiles_maybe = depfiles
    5.70 +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
    5.71 +	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
    5.72 +CXXLD = $(CXX)
    5.73 +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
    5.74 +	-o $@
    5.75 +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
    5.76 +	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
    5.77 +CCLD = $(CC)
    5.78 +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
    5.79 +SOURCES = $(lib386_a_SOURCES) $(libfilter_a_SOURCES)
    5.80 +DIST_SOURCES = $(lib386_a_SOURCES) $(libfilter_a_SOURCES)
    5.81 +ETAGS = etags
    5.82 +CTAGS = ctags
    5.83 +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    5.84 +ACLOCAL = ${SHELL} /home/r/proj/vba/trunk/missing --run aclocal-1.10
    5.85 +AMTAR = ${SHELL} /home/r/proj/vba/trunk/missing --run tar
    5.86 +AUTOCONF = ${SHELL} /home/r/proj/vba/trunk/missing --run autoconf
    5.87 +AUTOHEADER = ${SHELL} /home/r/proj/vba/trunk/missing --run autoheader
    5.88 +AUTOMAKE = ${SHELL} /home/r/proj/vba/trunk/missing --run automake-1.10
    5.89 +AWK = gawk
    5.90 +CC = gcc
    5.91 +CCDEPMODE = depmode=gcc3
    5.92 +CFLAGS = -g -O2
    5.93 +CPP = gcc -E
    5.94 +CPPFLAGS = 
    5.95 +CXX = g++
    5.96 +CXXDEPMODE = depmode=gcc3
    5.97 +CXXFLAGS = -g -O2 -DC_CORE -DPROFILING -DDEV_VERSION
    5.98 +CYGPATH_W = echo
    5.99 +DEFS = -DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -DPACKAGE=\"VisualBoyAdvance\" -DVERSION=\"1.7.2\" -DYYTEXT_POINTER=1 -DHAVE_LIBZ=1 -DHAVE_LIBPNG=1 -DHAVE_LIBPTHREAD=1 -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_MALLOC_H=1 -DHAVE_STRINGS_H=1 -DHAVE_UNISTD_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_NETINET_IN_H=1
   5.100 +DEPDIR = .deps
   5.101 +ECHO_C = 
   5.102 +ECHO_N = -n
   5.103 +ECHO_T = 
   5.104 +EGREP = /bin/grep -E
   5.105 +EXEEXT = 
   5.106 +GETTEXT_PACKAGE = 
   5.107 +GMSGFMT = 
   5.108 +GREP = /bin/grep
   5.109 +GTKMM_CFLAGS = 
   5.110 +GTKMM_CPPFLAGS = 
   5.111 +GTKMM_LIBS = 
   5.112 +INSTALL = /usr/bin/install -c
   5.113 +INSTALL_DATA = ${INSTALL} -m 644
   5.114 +INSTALL_PROGRAM = ${INSTALL}
   5.115 +INSTALL_SCRIPT = ${INSTALL}
   5.116 +INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
   5.117 +INTLLIBS = 
   5.118 +LDFLAGS = 
   5.119 +LEX = flex
   5.120 +LEXLIB = -lfl
   5.121 +LEX_OUTPUT_ROOT = lex.yy
   5.122 +LIBICONV = 
   5.123 +LIBINTL = 
   5.124 +LIBOBJS = 
   5.125 +LIBS = -lpthread -lpng -lz 
   5.126 +LTLIBICONV = 
   5.127 +LTLIBINTL = 
   5.128 +LTLIBOBJS = 
   5.129 +MAKEINFO = ${SHELL} /home/r/proj/vba/trunk/missing --run makeinfo
   5.130 +MKDIR_P = /bin/mkdir -p
   5.131 +MKINSTALLDIRS = 
   5.132 +MSGFMT = 
   5.133 +MSGMERGE = 
   5.134 +NASM = /usr/bin/nasm
   5.135 +OBJEXT = o
   5.136 +PACKAGE = VisualBoyAdvance
   5.137 +PACKAGE_BUGREPORT = 
   5.138 +PACKAGE_NAME = 
   5.139 +PACKAGE_STRING = 
   5.140 +PACKAGE_TARNAME = 
   5.141 +PACKAGE_VERSION = 
   5.142 +PATH_SEPARATOR = :
   5.143 +PKG_CONFIG = 
   5.144 +POSUB = 
   5.145 +RANLIB = ranlib
   5.146 +SDL_CFLAGS = -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT
   5.147 +SDL_CONFIG = /usr/bin/sdl-config
   5.148 +SDL_LIBS = -L/usr/lib -Wl,-rpath,/usr/lib -lSDL -lpthread
   5.149 +SET_MAKE = 
   5.150 +SHELL = /bin/sh
   5.151 +STRIP = 
   5.152 +USE_NLS = 
   5.153 +VBA_EXTRA = 
   5.154 +VBA_LIBS = ../gba/libgba.a ../gb/libgb.a ../common/libgbcom.a ../filters/libfilter.a ../lua/libgblua.a ../prof/libprof.a
   5.155 +VBA_SRC_EXTRA =  lua prof sdl
   5.156 +VERSION = 1.7.2
   5.157 +XGETTEXT = 
   5.158 +XMKMF = 
   5.159 +YACC = bison -y
   5.160 +YFLAGS = 
   5.161 +abs_builddir = /home/r/proj/vba/trunk/src/filters
   5.162 +abs_srcdir = /home/r/proj/vba/trunk/src/filters
   5.163 +abs_top_builddir = /home/r/proj/vba/trunk
   5.164 +abs_top_srcdir = /home/r/proj/vba/trunk
   5.165 +ac_ct_CC = gcc
   5.166 +ac_ct_CXX = g++
   5.167 +am__include = include
   5.168 +am__leading_dot = .
   5.169 +am__quote = 
   5.170 +am__tar = ${AMTAR} chof - "$$tardir"
   5.171 +am__untar = ${AMTAR} xf -
   5.172 +bindir = ${exec_prefix}/bin
   5.173 +build = x86_64-unknown-linux-gnu
   5.174 +build_alias = 
   5.175 +build_cpu = x86_64
   5.176 +build_os = linux-gnu
   5.177 +build_vendor = unknown
   5.178 +builddir = .
   5.179 +datadir = ${datarootdir}
   5.180 +datarootdir = ${prefix}/share
   5.181 +docdir = ${datarootdir}/doc/${PACKAGE}
   5.182 +dvidir = ${docdir}
   5.183 +exec_prefix = ${prefix}
   5.184 +host = x86_64-unknown-linux-gnu
   5.185 +host_alias = 
   5.186 +host_cpu = x86_64
   5.187 +host_os = linux-gnu
   5.188 +host_vendor = unknown
   5.189 +htmldir = ${docdir}
   5.190 +includedir = ${prefix}/include
   5.191 +infodir = ${datarootdir}/info
   5.192 +install_sh = $(SHELL) /home/r/proj/vba/trunk/install-sh
   5.193 +libdir = ${exec_prefix}/lib
   5.194 +libexecdir = ${exec_prefix}/libexec
   5.195 +localedir = ${datarootdir}/locale
   5.196 +localstatedir = ${prefix}/var
   5.197 +mandir = ${datarootdir}/man
   5.198 +mkdir_p = /bin/mkdir -p
   5.199 +oldincludedir = /usr/include
   5.200 +pdfdir = ${docdir}
   5.201 +prefix = /usr/local
   5.202 +program_transform_name = s,x,x,
   5.203 +psdir = ${docdir}
   5.204 +sbindir = ${exec_prefix}/sbin
   5.205 +sharedstatedir = ${prefix}/com
   5.206 +srcdir = .
   5.207 +sysconfdir = ${prefix}/etc
   5.208 +target = x86_64-unknown-linux-gnu
   5.209 +target_alias = 
   5.210 +target_cpu = x86_64
   5.211 +target_os = linux-gnu
   5.212 +target_vendor = unknown
   5.213 +top_builddir = ../..
   5.214 +top_srcdir = ../..
   5.215 +SUFFIXES = .asm
   5.216 +noinst_LIBRARIES = lib386.a libfilter.a
   5.217 +lib386_a_SOURCES = 2xSaImmx.asm
   5.218 +libfilter_a_SOURCES = \
   5.219 +	2xSaI.cpp		\
   5.220 +	admame.cpp		\
   5.221 +	bilinear.cpp		\
   5.222 +	hq2x.cpp		\
   5.223 +	hq2x.h			\
   5.224 +	interframe.cpp		\
   5.225 +	interp.h		\
   5.226 +	lq2x.h			\
   5.227 +	motionblur.cpp		\
   5.228 +	pixel.cpp		\
   5.229 +	scanline.cpp		\
   5.230 +	simple2x.cpp
   5.231 +
   5.232 +all: all-am
   5.233 +
   5.234 +.SUFFIXES:
   5.235 +.SUFFIXES: .asm .cpp .o .obj
   5.236 +$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
   5.237 +	@for dep in $?; do \
   5.238 +	  case '$(am__configure_deps)' in \
   5.239 +	    *$$dep*) \
   5.240 +	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
   5.241 +		&& exit 0; \
   5.242 +	      exit 1;; \
   5.243 +	  esac; \
   5.244 +	done; \
   5.245 +	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu  src/filters/Makefile'; \
   5.246 +	cd $(top_srcdir) && \
   5.247 +	  $(AUTOMAKE) --gnu  src/filters/Makefile
   5.248 +.PRECIOUS: Makefile
   5.249 +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
   5.250 +	@case '$?' in \
   5.251 +	  *config.status*) \
   5.252 +	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
   5.253 +	  *) \
   5.254 +	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
   5.255 +	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
   5.256 +	esac;
   5.257 +
   5.258 +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
   5.259 +	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
   5.260 +
   5.261 +$(top_srcdir)/configure:  $(am__configure_deps)
   5.262 +	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
   5.263 +$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
   5.264 +	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
   5.265 +
   5.266 +clean-noinstLIBRARIES:
   5.267 +	-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
   5.268 +lib386.a: $(lib386_a_OBJECTS) $(lib386_a_DEPENDENCIES) 
   5.269 +	-rm -f lib386.a
   5.270 +	$(lib386_a_AR) lib386.a $(lib386_a_OBJECTS) $(lib386_a_LIBADD)
   5.271 +	$(RANLIB) lib386.a
   5.272 +libfilter.a: $(libfilter_a_OBJECTS) $(libfilter_a_DEPENDENCIES) 
   5.273 +	-rm -f libfilter.a
   5.274 +	$(libfilter_a_AR) libfilter.a $(libfilter_a_OBJECTS) $(libfilter_a_LIBADD)
   5.275 +	$(RANLIB) libfilter.a
   5.276 +
   5.277 +mostlyclean-compile:
   5.278 +	-rm -f *.$(OBJEXT)
   5.279 +
   5.280 +distclean-compile:
   5.281 +	-rm -f *.tab.c
   5.282 +
   5.283 +include ./$(DEPDIR)/2xSaI.Po
   5.284 +include ./$(DEPDIR)/admame.Po
   5.285 +include ./$(DEPDIR)/bilinear.Po
   5.286 +include ./$(DEPDIR)/hq2x.Po
   5.287 +include ./$(DEPDIR)/interframe.Po
   5.288 +include ./$(DEPDIR)/motionblur.Po
   5.289 +include ./$(DEPDIR)/pixel.Po
   5.290 +include ./$(DEPDIR)/scanline.Po
   5.291 +include ./$(DEPDIR)/simple2x.Po
   5.292 +
   5.293 +$(OBJDIR)/%.o: %.cpp
   5.294 +	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
   5.295 +	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
   5.296 +#	source='$<' object='$@' libtool=no \
   5.297 +#	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
   5.298 +#	$(CXXCOMPILE) -c -o $@ $<
   5.299 +
   5.300 +.cpp.obj:
   5.301 +	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
   5.302 +	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
   5.303 +#	source='$<' object='$@' libtool=no \
   5.304 +#	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
   5.305 +#	$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
   5.306 +
   5.307 +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
   5.308 +	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
   5.309 +	unique=`for i in $$list; do \
   5.310 +	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
   5.311 +	  done | \
   5.312 +	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
   5.313 +	      END { if (nonempty) { for (i in files) print i; }; }'`; \
   5.314 +	mkid -fID $$unique
   5.315 +tags: TAGS
   5.316 +
   5.317 +TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
   5.318 +		$(TAGS_FILES) $(LISP)
   5.319 +	tags=; \
   5.320 +	here=`pwd`; \
   5.321 +	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
   5.322 +	unique=`for i in $$list; do \
   5.323 +	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
   5.324 +	  done | \
   5.325 +	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
   5.326 +	      END { if (nonempty) { for (i in files) print i; }; }'`; \
   5.327 +	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
   5.328 +	  test -n "$$unique" || unique=$$empty_fix; \
   5.329 +	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
   5.330 +	    $$tags $$unique; \
   5.331 +	fi
   5.332 +ctags: CTAGS
   5.333 +CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
   5.334 +		$(TAGS_FILES) $(LISP)
   5.335 +	tags=; \
   5.336 +	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
   5.337 +	unique=`for i in $$list; do \
   5.338 +	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
   5.339 +	  done | \
   5.340 +	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
   5.341 +	      END { if (nonempty) { for (i in files) print i; }; }'`; \
   5.342 +	test -z "$(CTAGS_ARGS)$$tags$$unique" \
   5.343 +	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
   5.344 +	     $$tags $$unique
   5.345 +
   5.346 +GTAGS:
   5.347 +	here=`$(am__cd) $(top_builddir) && pwd` \
   5.348 +	  && cd $(top_srcdir) \
   5.349 +	  && gtags -i $(GTAGS_ARGS) $$here
   5.350 +
   5.351 +distclean-tags:
   5.352 +	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
   5.353 +
   5.354 +distdir: $(DISTFILES)
   5.355 +	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
   5.356 +	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
   5.357 +	list='$(DISTFILES)'; \
   5.358 +	  dist_files=`for file in $$list; do echo $$file; done | \
   5.359 +	  sed -e "s|^$$srcdirstrip/||;t" \
   5.360 +	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
   5.361 +	case $$dist_files in \
   5.362 +	  */*) $(MKDIR_P) `echo "$$dist_files" | \
   5.363 +			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
   5.364 +			   sort -u` ;; \
   5.365 +	esac; \
   5.366 +	for file in $$dist_files; do \
   5.367 +	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
   5.368 +	  if test -d $$d/$$file; then \
   5.369 +	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
   5.370 +	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
   5.371 +	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
   5.372 +	    fi; \
   5.373 +	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
   5.374 +	  else \
   5.375 +	    test -f $(distdir)/$$file \
   5.376 +	    || cp -p $$d/$$file $(distdir)/$$file \
   5.377 +	    || exit 1; \
   5.378 +	  fi; \
   5.379 +	done
   5.380 +check-am: all-am
   5.381 +check: check-am
   5.382 +all-am: Makefile $(lib386_a_OBJECTS) $(libfilter_a_OBJECTS)
   5.383 +installdirs:
   5.384 +install: install-am
   5.385 +install-exec: install-exec-am
   5.386 +install-data: install-data-am
   5.387 +uninstall: uninstall-am
   5.388 +
   5.389 +install-am: all-am
   5.390 +	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
   5.391 +
   5.392 +installcheck: installcheck-am
   5.393 +install-strip:
   5.394 +	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
   5.395 +	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
   5.396 +	  `test -z '$(STRIP)' || \
   5.397 +	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
   5.398 +mostlyclean-generic:
   5.399 +
   5.400 +clean-generic:
   5.401 +
   5.402 +distclean-generic:
   5.403 +	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
   5.404 +
   5.405 +maintainer-clean-generic:
   5.406 +	@echo "This command is intended for maintainers to use"
   5.407 +	@echo "it deletes files that may require special tools to rebuild."
   5.408 +clean: clean-am
   5.409 +
   5.410 +clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
   5.411 +
   5.412 +distclean: distclean-am
   5.413 +	-rm -rf ./$(DEPDIR)
   5.414 +	-rm -f Makefile
   5.415 +distclean-am: clean-am distclean-compile distclean-generic \
   5.416 +	distclean-tags
   5.417 +
   5.418 +dvi: dvi-am
   5.419 +
   5.420 +dvi-am:
   5.421 +
   5.422 +html: html-am
   5.423 +
   5.424 +info: info-am
   5.425 +
   5.426 +info-am:
   5.427 +
   5.428 +install-data-am:
   5.429 +
   5.430 +install-dvi: install-dvi-am
   5.431 +
   5.432 +install-exec-am:
   5.433 +
   5.434 +install-html: install-html-am
   5.435 +
   5.436 +install-info: install-info-am
   5.437 +
   5.438 +install-man:
   5.439 +
   5.440 +install-pdf: install-pdf-am
   5.441 +
   5.442 +install-ps: install-ps-am
   5.443 +
   5.444 +installcheck-am:
   5.445 +
   5.446 +maintainer-clean: maintainer-clean-am
   5.447 +	-rm -rf ./$(DEPDIR)
   5.448 +	-rm -f Makefile
   5.449 +maintainer-clean-am: distclean-am maintainer-clean-generic
   5.450 +
   5.451 +mostlyclean: mostlyclean-am
   5.452 +
   5.453 +mostlyclean-am: mostlyclean-compile mostlyclean-generic
   5.454 +
   5.455 +pdf: pdf-am
   5.456 +
   5.457 +pdf-am:
   5.458 +
   5.459 +ps: ps-am
   5.460 +
   5.461 +ps-am:
   5.462 +
   5.463 +uninstall-am:
   5.464 +
   5.465 +.MAKE: install-am install-strip
   5.466 +
   5.467 +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
   5.468 +	clean-noinstLIBRARIES ctags distclean distclean-compile \
   5.469 +	distclean-generic distclean-tags distdir dvi dvi-am html \
   5.470 +	html-am info info-am install install-am install-data \
   5.471 +	install-data-am install-dvi install-dvi-am install-exec \
   5.472 +	install-exec-am install-html install-html-am install-info \
   5.473 +	install-info-am install-man install-pdf install-pdf-am \
   5.474 +	install-ps install-ps-am install-strip installcheck \
   5.475 +	installcheck-am installdirs maintainer-clean \
   5.476 +	maintainer-clean-generic mostlyclean mostlyclean-compile \
   5.477 +	mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
   5.478 +	uninstall-am
   5.479 +
   5.480 +LBITS := $(shell getconf LONG_BIT)
   5.481 +ifeq ($(LBITS),64)
   5.482 +   ELFFLAG = elf64
   5.483 +else
   5.484 +   ELFFLAG = elf
   5.485 +endif
   5.486 +
   5.487 +$(OBJDIR)/%.o: %.asm
   5.488 +	$(NASM) -f $(ELFFLAG) -o $@ $<
   5.489 +# Tell versions [3.59,3.63) of GNU make to not export all variables.
   5.490 +# Otherwise a system limit (for SysV at least) may be exceeded.
   5.491 +.NOEXPORT:
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/filters/Makefile.am	Sun Mar 04 20:32:31 2012 -0600
     6.3 @@ -0,0 +1,22 @@
     6.4 +SUFFIXES = .asm
     6.5 +
     6.6 +noinst_LIBRARIES = lib386.a libfilter.a
     6.7 +
     6.8 +lib386_a_SOURCES = 2xSaImmx.asm
     6.9 +
    6.10 +.asm.o:
    6.11 +	$(NASM) -f elf -o $@ $<
    6.12 +
    6.13 +libfilter_a_SOURCES = \
    6.14 +	2xSaI.cpp		\
    6.15 +	admame.cpp		\
    6.16 +	bilinear.cpp		\
    6.17 +	hq2x.cpp		\
    6.18 +	hq2x.h			\
    6.19 +	interframe.cpp		\
    6.20 +	interp.h		\
    6.21 +	lq2x.h			\
    6.22 +	motionblur.cpp		\
    6.23 +	pixel.cpp		\
    6.24 +	scanline.cpp		\
    6.25 +	simple2x.cpp
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/filters/admame.cpp	Sun Mar 04 20:32:31 2012 -0600
     7.3 @@ -0,0 +1,1036 @@
     7.4 +/*
     7.5 + * This file is part of the Advance project.
     7.6 + *
     7.7 + * Copyright (C) 1999-2002 Andrea Mazzoleni
     7.8 + *
     7.9 + * This program is free software; you can redistribute it and/or modify
    7.10 + * it under the terms of the GNU General Public License as published by
    7.11 + * the Free Software Foundation; either version 2 of the License, or
    7.12 + * (at your option) any later version.
    7.13 + *
    7.14 + * This program is distributed in the hope that it will be useful,
    7.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    7.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    7.17 + * GNU General Public License for more details.
    7.18 + *
    7.19 + * You should have received a copy of the GNU General Public License
    7.20 + * along with this program; if not, write to the Free Software
    7.21 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    7.22 + *
    7.23 + * In addition, as a special exception, Andrea Mazzoleni
    7.24 + * gives permission to link the code of this program with
    7.25 + * the MAME library (or with modified versions of MAME that use the
    7.26 + * same license as MAME), and distribute linked combinations including
    7.27 + * the two.  You must obey the GNU General Public License in all
    7.28 + * respects for all of the code used other than MAME.  If you modify
    7.29 + * this file, you may extend this exception to your version of the
    7.30 + * file, but you are not obligated to do so.  If you do not wish to
    7.31 + * do so, delete this exception statement from your version.
    7.32 + */
    7.33 +
    7.34 +/*
    7.35 + * Alternatively at the previous license terms, you are allowed to use this
    7.36 + * code in your program with these conditions:
    7.37 + * - the program is not used in commercial activities.
    7.38 + * - the whole source code of the program is released with the binary.
    7.39 + */
    7.40 +
    7.41 +#include "../Port.h"
    7.42 +
    7.43 +#ifdef MMX
    7.44 +extern "C" bool cpu_mmx;
    7.45 +#endif
    7.46 +
    7.47 +static void internal_scale2x_16_def(u16 *dst, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
    7.48 +{
    7.49 +	/* first pixel */
    7.50 +	dst[0] = src1[0];
    7.51 +	if (src1[1] == src0[0] && src2[0] != src0[0])
    7.52 +		dst[1] = src0[0];
    7.53 +	else
    7.54 +		dst[1] = src1[0];
    7.55 +	++src0;
    7.56 +	++src1;
    7.57 +	++src2;
    7.58 +	dst += 2;
    7.59 +
    7.60 +	/* central pixels */
    7.61 +	count -= 2;
    7.62 +	while (count)
    7.63 +	{
    7.64 +		if (src0[0] != src2[0] && src1[-1] != src1[1])
    7.65 +		{
    7.66 +			dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
    7.67 +			dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
    7.68 +		}
    7.69 +		else
    7.70 +		{
    7.71 +			dst[0] = src1[0];
    7.72 +			dst[1] = src1[0];
    7.73 +		}
    7.74 +
    7.75 +		++src0;
    7.76 +		++src1;
    7.77 +		++src2;
    7.78 +		dst += 2;
    7.79 +		--count;
    7.80 +	}
    7.81 +
    7.82 +	/* last pixel */
    7.83 +	if (src1[-1] == src0[0] && src2[0] != src0[0])
    7.84 +		dst[0] = src0[0];
    7.85 +	else
    7.86 +		dst[0] = src1[0];
    7.87 +	dst[1] = src1[0];
    7.88 +}
    7.89 +
    7.90 +static void internal_scale2x_32_def(u32 *dst,
    7.91 +                                    const u32 *src0,
    7.92 +                                    const u32 *src1,
    7.93 +                                    const u32 *src2,
    7.94 +                                    unsigned count)
    7.95 +{
    7.96 +	/* first pixel */
    7.97 +	dst[0] = src1[0];
    7.98 +	if (src1[1] == src0[0] && src2[0] != src0[0])
    7.99 +		dst[1] = src0[0];
   7.100 +	else
   7.101 +		dst[1] = src1[0];
   7.102 +	++src0;
   7.103 +	++src1;
   7.104 +	++src2;
   7.105 +	dst += 2;
   7.106 +
   7.107 +	/* central pixels */
   7.108 +	count -= 2;
   7.109 +	while (count)
   7.110 +	{
   7.111 +		if (src0[0] != src2[0] && src1[-1] != src1[1])
   7.112 +		{
   7.113 +			dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
   7.114 +			dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
   7.115 +		}
   7.116 +		else
   7.117 +		{
   7.118 +			dst[0] = src1[0];
   7.119 +			dst[1] = src1[0];
   7.120 +		}
   7.121 +
   7.122 +		++src0;
   7.123 +		++src1;
   7.124 +		++src2;
   7.125 +		dst += 2;
   7.126 +		--count;
   7.127 +	}
   7.128 +
   7.129 +	/* last pixel */
   7.130 +	if (src1[-1] == src0[0] && src2[0] != src0[0])
   7.131 +		dst[0] = src0[0];
   7.132 +	else
   7.133 +		dst[0] = src1[0];
   7.134 +	dst[1] = src1[0];
   7.135 +}
   7.136 +
   7.137 +#ifdef MMX
   7.138 +static void internal_scale2x_16_mmx_single(u16 *dst, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
   7.139 +{
   7.140 +	/* always do the first and last run */
   7.141 +	count -= 2 * 4;
   7.142 +
   7.143 +#ifdef __GNUC__
   7.144 +	__asm__ __volatile__ (
   7.145 +	    /* first run */
   7.146 +	    /* set the current, current_pre, current_next registers */
   7.147 +	    "movq 0(%1), %%mm0\n"
   7.148 +	    "movq 0(%1),%%mm7\n"
   7.149 +	    "movq 8(%1),%%mm1\n"
   7.150 +	    "psllq $48,%%mm0\n"
   7.151 +	    "psllq $48,%%mm1\n"
   7.152 +	    "psrlq $48, %%mm0\n"
   7.153 +	    "movq %%mm7,%%mm2\n"
   7.154 +	    "movq %%mm7,%%mm3\n"
   7.155 +	    "psllq $16,%%mm2\n"
   7.156 +	    "psrlq $16,%%mm3\n"
   7.157 +	    "por %%mm2,%%mm0\n"
   7.158 +	    "por %%mm3,%%mm1\n"
   7.159 +
   7.160 +	    /* current_upper */
   7.161 +	    "movq (%0),%%mm6\n"
   7.162 +
   7.163 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.164 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.165 +	    "movq %%mm0,%%mm2\n"
   7.166 +	    "movq %%mm1,%%mm4\n"
   7.167 +	    "movq %%mm0,%%mm3\n"
   7.168 +	    "movq %%mm1,%%mm5\n"
   7.169 +	    "pcmpeqw %%mm6,%%mm2\n"
   7.170 +	    "pcmpeqw %%mm6,%%mm4\n"
   7.171 +	    "pcmpeqw (%2),%%mm3\n"
   7.172 +	    "pcmpeqw (%2),%%mm5\n"
   7.173 +	    "pandn %%mm2,%%mm3\n"
   7.174 +	    "pandn %%mm4,%%mm5\n"
   7.175 +	    "movq %%mm0,%%mm2\n"
   7.176 +	    "movq %%mm1,%%mm4\n"
   7.177 +	    "pcmpeqw %%mm1,%%mm2\n"
   7.178 +	    "pcmpeqw %%mm0,%%mm4\n"
   7.179 +	    "pandn %%mm3,%%mm2\n"
   7.180 +	    "pandn %%mm5,%%mm4\n"
   7.181 +	    "movq %%mm2,%%mm3\n"
   7.182 +	    "movq %%mm4,%%mm5\n"
   7.183 +	    "pand %%mm6,%%mm2\n"
   7.184 +	    "pand %%mm6,%%mm4\n"
   7.185 +	    "pandn %%mm7,%%mm3\n"
   7.186 +	    "pandn %%mm7,%%mm5\n"
   7.187 +	    "por %%mm3,%%mm2\n"
   7.188 +	    "por %%mm5,%%mm4\n"
   7.189 +
   7.190 +	    /* set *dst */
   7.191 +	    "movq %%mm2,%%mm3\n"
   7.192 +	    "punpcklwd %%mm4,%%mm2\n"
   7.193 +	    "punpckhwd %%mm4,%%mm3\n"
   7.194 +	    "movq %%mm2,(%3)\n"
   7.195 +	    "movq %%mm3,8(%3)\n"
   7.196 +
   7.197 +	    /* next */
   7.198 +	    "addl $8,%0\n"
   7.199 +	    "addl $8,%1\n"
   7.200 +	    "addl $8,%2\n"
   7.201 +	    "addl $16,%3\n"
   7.202 +
   7.203 +	    /* central runs */
   7.204 +	    "shrl $2,%4\n"
   7.205 +	    "jz 1f\n"
   7.206 +
   7.207 +	    "0:\n"
   7.208 +
   7.209 +	    /* set the current, current_pre, current_next registers */
   7.210 +	    "movq -8(%1),%%mm0\n"
   7.211 +	    "movq (%1),%%mm7\n"
   7.212 +	    "movq 8(%1),%%mm1\n"
   7.213 +	    "psrlq $48,%%mm0\n"
   7.214 +	    "psllq $48,%%mm1\n"
   7.215 +	    "movq %%mm7,%%mm2\n"
   7.216 +	    "movq %%mm7,%%mm3\n"
   7.217 +	    "psllq $16,%%mm2\n"
   7.218 +	    "psrlq $16,%%mm3\n"
   7.219 +	    "por %%mm2,%%mm0\n"
   7.220 +	    "por %%mm3,%%mm1\n"
   7.221 +
   7.222 +	    /* current_upper */
   7.223 +	    "movq (%0),%%mm6\n"
   7.224 +
   7.225 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.226 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.227 +	    "movq %%mm0,%%mm2\n"
   7.228 +	    "movq %%mm1,%%mm4\n"
   7.229 +	    "movq %%mm0,%%mm3\n"
   7.230 +	    "movq %%mm1,%%mm5\n"
   7.231 +	    "pcmpeqw %%mm6,%%mm2\n"
   7.232 +	    "pcmpeqw %%mm6,%%mm4\n"
   7.233 +	    "pcmpeqw (%2),%%mm3\n"
   7.234 +	    "pcmpeqw (%2),%%mm5\n"
   7.235 +	    "pandn %%mm2,%%mm3\n"
   7.236 +	    "pandn %%mm4,%%mm5\n"
   7.237 +	    "movq %%mm0,%%mm2\n"
   7.238 +	    "movq %%mm1,%%mm4\n"
   7.239 +	    "pcmpeqw %%mm1,%%mm2\n"
   7.240 +	    "pcmpeqw %%mm0,%%mm4\n"
   7.241 +	    "pandn %%mm3,%%mm2\n"
   7.242 +	    "pandn %%mm5,%%mm4\n"
   7.243 +	    "movq %%mm2,%%mm3\n"
   7.244 +	    "movq %%mm4,%%mm5\n"
   7.245 +	    "pand %%mm6,%%mm2\n"
   7.246 +	    "pand %%mm6,%%mm4\n"
   7.247 +	    "pandn %%mm7,%%mm3\n"
   7.248 +	    "pandn %%mm7,%%mm5\n"
   7.249 +	    "por %%mm3,%%mm2\n"
   7.250 +	    "por %%mm5,%%mm4\n"
   7.251 +
   7.252 +	    /* set *dst */
   7.253 +	    "movq %%mm2,%%mm3\n"
   7.254 +	    "punpcklwd %%mm4,%%mm2\n"
   7.255 +	    "punpckhwd %%mm4,%%mm3\n"
   7.256 +	    "movq %%mm2,(%3)\n"
   7.257 +	    "movq %%mm3,8(%3)\n"
   7.258 +
   7.259 +	    /* next */
   7.260 +	    "addl $8,%0\n"
   7.261 +	    "addl $8,%1\n"
   7.262 +	    "addl $8,%2\n"
   7.263 +	    "addl $16,%3\n"
   7.264 +
   7.265 +	    "decl %4\n"
   7.266 +	    "jnz 0b\n"
   7.267 +	    "1:\n"
   7.268 +
   7.269 +	    /* final run */
   7.270 +	    /* set the current, current_pre, current_next registers */
   7.271 +	    "movq (%1),%%mm1\n"
   7.272 +	    "movq (%1),%%mm7\n"
   7.273 +	    "movq -8(%1),%%mm0\n"
   7.274 +	    "psrlq $48,%%mm1\n"
   7.275 +	    "psrlq $48,%%mm0\n"
   7.276 +	    "psllq $48,%%mm1\n"
   7.277 +	    "movq %%mm7,%%mm2\n"
   7.278 +	    "movq %%mm7,%%mm3\n"
   7.279 +	    "psllq $16,%%mm2\n"
   7.280 +	    "psrlq $16,%%mm3\n"
   7.281 +	    "por %%mm2,%%mm0\n"
   7.282 +	    "por %%mm3,%%mm1\n"
   7.283 +
   7.284 +	    /* current_upper */
   7.285 +	    "movq (%0),%%mm6\n"
   7.286 +
   7.287 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.288 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.289 +	    "movq %%mm0,%%mm2\n"
   7.290 +	    "movq %%mm1,%%mm4\n"
   7.291 +	    "movq %%mm0,%%mm3\n"
   7.292 +	    "movq %%mm1,%%mm5\n"
   7.293 +	    "pcmpeqw %%mm6,%%mm2\n"
   7.294 +	    "pcmpeqw %%mm6,%%mm4\n"
   7.295 +	    "pcmpeqw (%2),%%mm3\n"
   7.296 +	    "pcmpeqw (%2),%%mm5\n"
   7.297 +	    "pandn %%mm2,%%mm3\n"
   7.298 +	    "pandn %%mm4,%%mm5\n"
   7.299 +	    "movq %%mm0,%%mm2\n"
   7.300 +	    "movq %%mm1,%%mm4\n"
   7.301 +	    "pcmpeqw %%mm1,%%mm2\n"
   7.302 +	    "pcmpeqw %%mm0,%%mm4\n"
   7.303 +	    "pandn %%mm3,%%mm2\n"
   7.304 +	    "pandn %%mm5,%%mm4\n"
   7.305 +	    "movq %%mm2,%%mm3\n"
   7.306 +	    "movq %%mm4,%%mm5\n"
   7.307 +	    "pand %%mm6,%%mm2\n"
   7.308 +	    "pand %%mm6,%%mm4\n"
   7.309 +	    "pandn %%mm7,%%mm3\n"
   7.310 +	    "pandn %%mm7,%%mm5\n"
   7.311 +	    "por %%mm3,%%mm2\n"
   7.312 +	    "por %%mm5,%%mm4\n"
   7.313 +
   7.314 +	    /* set *dst */
   7.315 +	    "movq %%mm2,%%mm3\n"
   7.316 +	    "punpcklwd %%mm4,%%mm2\n"
   7.317 +	    "punpckhwd %%mm4,%%mm3\n"
   7.318 +	    "movq %%mm2,(%3)\n"
   7.319 +	    "movq %%mm3,8(%3)\n"
   7.320 +	    "emms\n"
   7.321 +
   7.322 +		: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
   7.323 +		:
   7.324 +		: "cc"
   7.325 +	    );
   7.326 +#else
   7.327 +	__asm {
   7.328 +		mov eax, src0;
   7.329 +		mov ebx, src1;
   7.330 +		mov ecx, src2;
   7.331 +		mov edx, dst;
   7.332 +		mov esi, count;
   7.333 +
   7.334 +		/* first run */
   7.335 +		/* set the current, current_pre, current_next registers */
   7.336 +		movq  mm0, qword ptr [ebx];
   7.337 +		movq  mm7, qword ptr [ebx];
   7.338 +		movq  mm1, qword ptr [ebx + 8];
   7.339 +		psllq mm0, 48;
   7.340 +		psllq mm1, 48;
   7.341 +		psrlq mm0, 48;
   7.342 +		movq  mm2, mm7;
   7.343 +		movq  mm3, mm7;
   7.344 +		psllq mm2, 16;
   7.345 +		psrlq mm3, 16;
   7.346 +		por	  mm0, mm2;
   7.347 +		por	  mm1, mm3;
   7.348 +
   7.349 +		/* current_upper */
   7.350 +		movq mm6, qword ptr [eax];
   7.351 +
   7.352 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.353 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.354 +		movq	mm2, mm0;
   7.355 +		movq	mm4, mm1;
   7.356 +		movq	mm3, mm0;
   7.357 +		movq	mm5, mm1;
   7.358 +		pcmpeqw mm2, mm6;
   7.359 +		pcmpeqw mm4, mm6;
   7.360 +		pcmpeqw mm3, qword ptr [ecx];
   7.361 +		pcmpeqw mm5, qword ptr [ecx];
   7.362 +		pandn	mm3, mm2;
   7.363 +		pandn	mm5, mm4;
   7.364 +		movq	mm2, mm0;
   7.365 +		movq	mm4, mm1;
   7.366 +		pcmpeqw mm2, mm1;
   7.367 +		pcmpeqw mm4, mm0;
   7.368 +		pandn	mm2, mm3;
   7.369 +		pandn	mm4, mm5;
   7.370 +		movq	mm3, mm2;
   7.371 +		movq	mm5, mm4;
   7.372 +		pand	mm2, mm6;
   7.373 +		pand	mm4, mm6;
   7.374 +		pandn	mm3, mm7;
   7.375 +		pandn	mm5, mm7;
   7.376 +		por		mm2, mm3;
   7.377 +		por		mm4, mm5;
   7.378 +
   7.379 +		/* set *dst0 */
   7.380 +		movq	   mm3, mm2;
   7.381 +		punpcklwd  mm2, mm4;
   7.382 +		punpckhwd  mm3, mm4;
   7.383 +		movq qword ptr [edx], mm2;
   7.384 +		movq qword ptr [edx + 8], mm3;
   7.385 +
   7.386 +		/* next */
   7.387 +		add eax, 8;
   7.388 +		add ebx, 8;
   7.389 +		add ecx, 8;
   7.390 +		add edx, 16;
   7.391 +
   7.392 +		/* central runs */
   7.393 +		shr esi, 2;
   7.394 +		jz	label1;
   7.395 +		align 4;
   7.396 +label0:
   7.397 +
   7.398 +		/* set the current, current_pre, current_next registers */
   7.399 +		movq mm0, qword ptr [ebx - 8];
   7.400 +		movq  mm7, qword ptr [ebx];
   7.401 +		movq  mm1, qword ptr [ebx + 8];
   7.402 +		psrlq mm0, 48;
   7.403 +		psllq mm1, 48;
   7.404 +		movq  mm2, mm7;
   7.405 +		movq  mm3, mm7;
   7.406 +		psllq mm2, 16;
   7.407 +		psrlq mm3, 16;
   7.408 +		por	  mm0, mm2;
   7.409 +		por	  mm1, mm3;
   7.410 +
   7.411 +		/* current_upper */
   7.412 +		movq mm6, qword ptr [eax];
   7.413 +
   7.414 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.415 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.416 +		movq	mm2, mm0;
   7.417 +		movq	mm4, mm1;
   7.418 +		movq	mm3, mm0;
   7.419 +		movq	mm5, mm1;
   7.420 +		pcmpeqw mm2, mm6;
   7.421 +		pcmpeqw mm4, mm6;
   7.422 +		pcmpeqw mm3, qword ptr [ecx];
   7.423 +		pcmpeqw mm5, qword ptr [ecx];
   7.424 +		pandn	mm3, mm2;
   7.425 +		pandn	mm5, mm4;
   7.426 +		movq	mm2, mm0;
   7.427 +		movq	mm4, mm1;
   7.428 +		pcmpeqw mm2, mm1;
   7.429 +		pcmpeqw mm4, mm0;
   7.430 +		pandn	mm2, mm3;
   7.431 +		pandn	mm4, mm5;
   7.432 +		movq	mm3, mm2;
   7.433 +		movq	mm5, mm4;
   7.434 +		pand	mm2, mm6;
   7.435 +		pand	mm4, mm6;
   7.436 +		pandn	mm3, mm7;
   7.437 +		pandn	mm5, mm7;
   7.438 +		por		mm2, mm3;
   7.439 +		por		mm4, mm5;
   7.440 +
   7.441 +		/* set *dst */
   7.442 +		movq	   mm3, mm2;
   7.443 +		punpcklwd  mm2, mm4;
   7.444 +		punpckhwd  mm3, mm4;
   7.445 +		movq qword ptr [edx], mm2;
   7.446 +		movq qword ptr [edx + 8], mm3;
   7.447 +
   7.448 +		/* next */
   7.449 +		add eax, 8;
   7.450 +		add ebx, 8;
   7.451 +		add ecx, 8;
   7.452 +		add edx, 16;
   7.453 +
   7.454 +		dec esi;
   7.455 +		jnz label0;
   7.456 +label1:
   7.457 +
   7.458 +		/* final run */
   7.459 +		/* set the current, current_pre, current_next registers */
   7.460 +		movq mm1, qword ptr [ebx];
   7.461 +		movq  mm7, qword ptr [ebx];
   7.462 +		movq  mm0, qword ptr [ebx - 8];
   7.463 +		psrlq mm1, 48;
   7.464 +		psrlq mm0, 48;
   7.465 +		psllq mm1, 48;
   7.466 +		movq  mm2, mm7;
   7.467 +		movq  mm3, mm7;
   7.468 +		psllq mm2, 16;
   7.469 +		psrlq mm3, 16;
   7.470 +		por	  mm0, mm2;
   7.471 +		por	  mm1, mm3;
   7.472 +
   7.473 +		/* current_upper */
   7.474 +		movq mm6, qword ptr [eax];
   7.475 +
   7.476 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.477 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.478 +		movq	mm2, mm0;
   7.479 +		movq	mm4, mm1;
   7.480 +		movq	mm3, mm0;
   7.481 +		movq	mm5, mm1;
   7.482 +		pcmpeqw mm2, mm6;
   7.483 +		pcmpeqw mm4, mm6;
   7.484 +		pcmpeqw mm3, qword ptr [ecx];
   7.485 +		pcmpeqw mm5, qword ptr [ecx];
   7.486 +		pandn	mm3, mm2;
   7.487 +		pandn	mm5, mm4;
   7.488 +		movq	mm2, mm0;
   7.489 +		movq	mm4, mm1;
   7.490 +		pcmpeqw mm2, mm1;
   7.491 +		pcmpeqw mm4, mm0;
   7.492 +		pandn	mm2, mm3;
   7.493 +		pandn	mm4, mm5;
   7.494 +		movq	mm3, mm2;
   7.495 +		movq	mm5, mm4;
   7.496 +		pand	mm2, mm6;
   7.497 +		pand	mm4, mm6;
   7.498 +		pandn	mm3, mm7;
   7.499 +		pandn	mm5, mm7;
   7.500 +		por		mm2, mm3;
   7.501 +		por		mm4, mm5;
   7.502 +
   7.503 +		/* set *dst */
   7.504 +		movq	   mm3, mm2;
   7.505 +		punpcklwd  mm2, mm4;
   7.506 +		punpckhwd  mm3, mm4;
   7.507 +		movq qword ptr [edx], mm2;
   7.508 +		movq qword ptr [edx + 8], mm3;
   7.509 +
   7.510 +		mov src0, eax;
   7.511 +		mov src1, ebx;
   7.512 +		mov src2, ecx;
   7.513 +		mov dst, edx;
   7.514 +		mov count, esi;
   7.515 +
   7.516 +		emms;
   7.517 +	}
   7.518 +#endif
   7.519 +}
   7.520 +
   7.521 +static void internal_scale2x_32_mmx_single(u32 *dst, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count)
   7.522 +{
   7.523 +	/* always do the first and last run */
   7.524 +	count -= 2 * 2;
   7.525 +
   7.526 +#ifdef __GNUC__
   7.527 +	__asm__ __volatile__ (
   7.528 +	    /* first run */
   7.529 +	    /* set the current, current_pre, current_next registers */
   7.530 +	    "movq 0(%1),%%mm0\n"
   7.531 +	    "movq 0(%1),%%mm7\n"
   7.532 +	    "movq 8(%1),%%mm1\n"
   7.533 +	    "psllq $32,%%mm0\n"
   7.534 +	    "psllq $32,%%mm1\n"
   7.535 +	    "psrlq $32,%%mm0\n"
   7.536 +	    "movq %%mm7,%%mm2\n"
   7.537 +	    "movq %%mm7,%%mm3\n"
   7.538 +	    "psllq $32,%%mm2\n"
   7.539 +	    "psrlq $32,%%mm3\n"
   7.540 +	    "por %%mm2,%%mm0\n"
   7.541 +	    "por %%mm3,%%mm1\n"
   7.542 +
   7.543 +	    /* current_upper */
   7.544 +	    "movq (%0),%%mm6\n"
   7.545 +
   7.546 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.547 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.548 +	    "movq %%mm0,%%mm2\n"
   7.549 +	    "movq %%mm1,%%mm4\n"
   7.550 +	    "movq %%mm0,%%mm3\n"
   7.551 +	    "movq %%mm1,%%mm5\n"
   7.552 +	    "pcmpeqd %%mm6,%%mm2\n"
   7.553 +	    "pcmpeqd %%mm6,%%mm4\n"
   7.554 +	    "pcmpeqd (%2),%%mm3\n"
   7.555 +	    "pcmpeqd (%2),%%mm5\n"
   7.556 +	    "pandn %%mm2,%%mm3\n"
   7.557 +	    "pandn %%mm4,%%mm5\n"
   7.558 +	    "movq %%mm0,%%mm2\n"
   7.559 +	    "movq %%mm1,%%mm4\n"
   7.560 +	    "pcmpeqd %%mm1,%%mm2\n"
   7.561 +	    "pcmpeqd %%mm0,%%mm4\n"
   7.562 +	    "pandn %%mm3,%%mm2\n"
   7.563 +	    "pandn %%mm5,%%mm4\n"
   7.564 +	    "movq %%mm2,%%mm3\n"
   7.565 +	    "movq %%mm4,%%mm5\n"
   7.566 +	    "pand %%mm6,%%mm2\n"
   7.567 +	    "pand %%mm6,%%mm4\n"
   7.568 +	    "pandn %%mm7,%%mm3\n"
   7.569 +	    "pandn %%mm7,%%mm5\n"
   7.570 +	    "por %%mm3,%%mm2\n"
   7.571 +	    "por %%mm5,%%mm4\n"
   7.572 +
   7.573 +	    /* set *dst */
   7.574 +	    "movq %%mm2,%%mm3\n"
   7.575 +	    "punpckldq %%mm4,%%mm2\n"
   7.576 +	    "punpckhdq %%mm4,%%mm3\n"
   7.577 +	    "movq %%mm2,(%3)\n"
   7.578 +	    "movq %%mm3, 8(%3)\n"
   7.579 +
   7.580 +	    /* next */
   7.581 +	    "addl $8,%0\n"
   7.582 +	    "addl $8,%1\n"
   7.583 +	    "addl $8,%2\n"
   7.584 +	    "addl $16,%3\n"
   7.585 +
   7.586 +	    /* central runs */
   7.587 +	    "shrl $1,%4\n"
   7.588 +	    "jz 1f\n"
   7.589 +
   7.590 +	    "0:\n"
   7.591 +
   7.592 +	    /* set the current, current_pre, current_next registers */
   7.593 +	    "movq -8(%1),%%mm0\n"
   7.594 +	    "movq (%1),%%mm7\n"
   7.595 +	    "movq 8(%1),%%mm1\n"
   7.596 +	    "psrlq $32,%%mm0\n"
   7.597 +	    "psllq $32,%%mm1\n"
   7.598 +	    "movq %%mm7,%%mm2\n"
   7.599 +	    "movq %%mm7,%%mm3\n"
   7.600 +	    "psllq $32,%%mm2\n"
   7.601 +	    "psrlq $32,%%mm3\n"
   7.602 +	    "por %%mm2,%%mm0\n"
   7.603 +	    "por %%mm3,%%mm1\n"
   7.604 +
   7.605 +	    /* current_upper */
   7.606 +	    "movq (%0),%%mm6\n"
   7.607 +
   7.608 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.609 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.610 +	    "movq %%mm0,%%mm2\n"
   7.611 +	    "movq %%mm1,%%mm4\n"
   7.612 +	    "movq %%mm0,%%mm3\n"
   7.613 +	    "movq %%mm1,%%mm5\n"
   7.614 +	    "pcmpeqd %%mm6,%%mm2\n"
   7.615 +	    "pcmpeqd %%mm6,%%mm4\n"
   7.616 +	    "pcmpeqd (%2),%%mm3\n"
   7.617 +	    "pcmpeqd (%2),%%mm5\n"
   7.618 +	    "pandn %%mm2,%%mm3\n"
   7.619 +	    "pandn %%mm4,%%mm5\n"
   7.620 +	    "movq %%mm0,%%mm2\n"
   7.621 +	    "movq %%mm1,%%mm4\n"
   7.622 +	    "pcmpeqd %%mm1,%%mm2\n"
   7.623 +	    "pcmpeqd %%mm0,%%mm4\n"
   7.624 +	    "pandn %%mm3,%%mm2\n"
   7.625 +	    "pandn %%mm5,%%mm4\n"
   7.626 +	    "movq %%mm2,%%mm3\n"
   7.627 +	    "movq %%mm4,%%mm5\n"
   7.628 +	    "pand %%mm6,%%mm2\n"
   7.629 +	    "pand %%mm6,%%mm4\n"
   7.630 +	    "pandn %%mm7,%%mm3\n"
   7.631 +	    "pandn %%mm7,%%mm5\n"
   7.632 +	    "por %%mm3,%%mm2\n"
   7.633 +	    "por %%mm5,%%mm4\n"
   7.634 +
   7.635 +	    /* set *dst */
   7.636 +	    "movq %%mm2,%%mm3\n"
   7.637 +	    "punpckldq %%mm4,%%mm2\n"
   7.638 +	    "punpckhdq %%mm4,%%mm3\n"
   7.639 +	    "movq %%mm2,(%3)\n"
   7.640 +	    "movq %%mm3,8(%3)\n"
   7.641 +
   7.642 +	    /* next */
   7.643 +	    "addl $8,%0\n"
   7.644 +	    "addl $8,%1\n"
   7.645 +	    "addl $8,%2\n"
   7.646 +	    "addl $16,%3\n"
   7.647 +
   7.648 +	    "decl %4\n"
   7.649 +	    "jnz 0b\n"
   7.650 +	    "1:\n"
   7.651 +
   7.652 +	    /* final run */
   7.653 +	    /* set the current, current_pre, current_next registers */
   7.654 +	    "movq (%1),%%mm1\n"
   7.655 +	    "movq (%1),%%mm7\n"
   7.656 +	    "movq -8(%1), %%mm0\n"
   7.657 +	    "psrlq $32,%%mm1\n"
   7.658 +	    "psrlq $32,%%mm0\n"
   7.659 +	    "psllq $32,%%mm1\n"
   7.660 +	    "movq %%mm7,%%mm2\n"
   7.661 +	    "movq %%mm7,%%mm3\n"
   7.662 +	    "psllq $32,%%mm2\n"
   7.663 +	    "psrlq $32,%%mm3\n"
   7.664 +	    "por %%mm2,%%mm0\n"
   7.665 +	    "por %%mm3,%%mm1\n"
   7.666 +
   7.667 +	    /* current_upper */
   7.668 +	    "movq (%0),%%mm6\n"
   7.669 +
   7.670 +	    /* compute the upper-left pixel for dst on %%mm2 */
   7.671 +	    /* compute the upper-right pixel for dst on %%mm4 */
   7.672 +	    "movq %%mm0,%%mm2\n"
   7.673 +	    "movq %%mm1,%%mm4\n"
   7.674 +	    "movq %%mm0,%%mm3\n"
   7.675 +	    "movq %%mm1,%%mm5\n"
   7.676 +	    "pcmpeqd %%mm6,%%mm2\n"
   7.677 +	    "pcmpeqd %%mm6,%%mm4\n"
   7.678 +	    "pcmpeqd (%2),%%mm3\n"
   7.679 +	    "pcmpeqd (%2),%%mm5\n"
   7.680 +	    "pandn %%mm2,%%mm3\n"
   7.681 +	    "pandn %%mm4,%%mm5\n"
   7.682 +	    "movq %%mm0,%%mm2\n"
   7.683 +	    "movq %%mm1,%%mm4\n"
   7.684 +	    "pcmpeqd %%mm1,%%mm2\n"
   7.685 +	    "pcmpeqd %%mm0,%%mm4\n"
   7.686 +	    "pandn %%mm3,%%mm2\n"
   7.687 +	    "pandn %%mm5,%%mm4\n"
   7.688 +	    "movq %%mm2,%%mm3\n"
   7.689 +	    "movq %%mm4,%%mm5\n"
   7.690 +	    "pand %%mm6,%%mm2\n"
   7.691 +	    "pand %%mm6,%%mm4\n"
   7.692 +	    "pandn %%mm7,%%mm3\n"
   7.693 +	    "pandn %%mm7,%%mm5\n"
   7.694 +	    "por %%mm3,%%mm2\n"
   7.695 +	    "por %%mm5,%%mm4\n"
   7.696 +
   7.697 +	    /* set *dst */
   7.698 +	    "movq %%mm2,%%mm3\n"
   7.699 +	    "punpckldq %%mm4,%%mm2\n"
   7.700 +	    "punpckhdq %%mm4,%%mm3\n"
   7.701 +	    "movq %%mm2,(%3)\n"
   7.702 +	    "movq %%mm3,8(%3)\n"
   7.703 +	    "emms\n"
   7.704 +
   7.705 +		: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
   7.706 +		:
   7.707 +		: "cc"
   7.708 +	    );
   7.709 +#else
   7.710 +	__asm {
   7.711 +		mov eax, src0;
   7.712 +		mov ebx, src1;
   7.713 +		mov ecx, src2;
   7.714 +		mov edx, dst;
   7.715 +		mov esi, count;
   7.716 +
   7.717 +		/* first run */
   7.718 +		/* set the current, current_pre, current_next registers */
   7.719 +		movq  mm0, qword ptr [ebx];
   7.720 +		movq  mm7, qword ptr [ebx];
   7.721 +		movq  mm1, qword ptr [ebx + 8];
   7.722 +		psllq mm0, 32;
   7.723 +		psllq mm1, 32;
   7.724 +		psrlq mm0, 32;
   7.725 +		movq  mm2, mm7;
   7.726 +		movq  mm3, mm7;
   7.727 +		psllq mm2, 32;
   7.728 +		psrlq mm3, 32;
   7.729 +		por	  mm0, mm2;
   7.730 +		por	  mm1, mm3;
   7.731 +
   7.732 +		/* current_upper */
   7.733 +		movq mm6, qword ptr [eax];
   7.734 +
   7.735 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.736 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.737 +		movq	mm2, mm0;
   7.738 +		movq	mm4, mm1;
   7.739 +		movq	mm3, mm0;
   7.740 +		movq	mm5, mm1;
   7.741 +		pcmpeqd mm2, mm6;
   7.742 +		pcmpeqd mm4, mm6;
   7.743 +		pcmpeqd mm3, qword ptr [ecx];
   7.744 +		pcmpeqd mm5, qword ptr [ecx];
   7.745 +		pandn	mm3, mm2;
   7.746 +		pandn	mm5, mm4;
   7.747 +		movq	mm2, mm0;
   7.748 +		movq	mm4, mm1;
   7.749 +		pcmpeqd mm2, mm1;
   7.750 +		pcmpeqd mm4, mm0;
   7.751 +		pandn	mm2, mm3;
   7.752 +		pandn	mm4, mm5;
   7.753 +		movq	mm3, mm2;
   7.754 +		movq	mm5, mm4;
   7.755 +		pand	mm2, mm6;
   7.756 +		pand	mm4, mm6;
   7.757 +		pandn	mm3, mm7;
   7.758 +		pandn	mm5, mm7;
   7.759 +		por		mm2, mm3;
   7.760 +		por		mm4, mm5;
   7.761 +
   7.762 +		/* set *dst */
   7.763 +		movq	   mm3, mm2;
   7.764 +		punpckldq  mm2, mm4;
   7.765 +		punpckhdq  mm3, mm4;
   7.766 +		movq qword ptr [edx], mm2;
   7.767 +		movq qword ptr [edx + 8], mm3;
   7.768 +
   7.769 +		/* next */
   7.770 +		add eax, 8;
   7.771 +		add ebx, 8;
   7.772 +		add ecx, 8;
   7.773 +		add edx, 16;
   7.774 +
   7.775 +		/* central runs */
   7.776 +		shr esi, 1;
   7.777 +		jz	label1;
   7.778 +label0:
   7.779 +
   7.780 +		/* set the current, current_pre, current_next registers */
   7.781 +		movq mm0, qword ptr [ebx - 8];
   7.782 +		movq  mm7, qword ptr [ebx];
   7.783 +		movq  mm1, qword ptr [ebx + 8];
   7.784 +		psrlq mm0, 32;
   7.785 +		psllq mm1, 32;
   7.786 +		movq  mm2, mm7;
   7.787 +		movq  mm3, mm7;
   7.788 +		psllq mm2, 32;
   7.789 +		psrlq mm3, 32;
   7.790 +		por	  mm0, mm2;
   7.791 +		por	  mm1, mm3;
   7.792 +
   7.793 +		/* current_upper */
   7.794 +		movq mm6, qword ptr[eax];
   7.795 +
   7.796 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.797 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.798 +		movq	mm2, mm0;
   7.799 +		movq	mm4, mm1;
   7.800 +		movq	mm3, mm0;
   7.801 +		movq	mm5, mm1;
   7.802 +		pcmpeqd mm2, mm6;
   7.803 +		pcmpeqd mm4, mm6;
   7.804 +		pcmpeqd mm3, qword ptr[ecx];
   7.805 +		pcmpeqd mm5, qword ptr[ecx];
   7.806 +		pandn	mm3, mm2;
   7.807 +		pandn	mm5, mm4;
   7.808 +		movq	mm2, mm0;
   7.809 +		movq	mm4, mm1;
   7.810 +		pcmpeqd mm2, mm1;
   7.811 +		pcmpeqd mm4, mm0;
   7.812 +		pandn	mm2, mm3;
   7.813 +		pandn	mm4, mm5;
   7.814 +		movq	mm3, mm2;
   7.815 +		movq	mm5, mm4;
   7.816 +		pand	mm2, mm6;
   7.817 +		pand	mm4, mm6;
   7.818 +		pandn	mm3, mm7;
   7.819 +		pandn	mm5, mm7;
   7.820 +		por		mm2, mm3;
   7.821 +		por		mm4, mm5;
   7.822 +
   7.823 +		/* set *dst */
   7.824 +		movq	   mm3, mm2;
   7.825 +		punpckldq  mm2, mm4;
   7.826 +		punpckhdq  mm3, mm4;
   7.827 +		movq qword ptr [edx], mm2;
   7.828 +		movq qword ptr [edx + 8], mm3;
   7.829 +
   7.830 +		/* next */
   7.831 +		add eax, 8;
   7.832 +		add ebx, 8;
   7.833 +		add ecx, 8;
   7.834 +		add edx, 16;
   7.835 +
   7.836 +		dec esi;
   7.837 +		jnz label0;
   7.838 +label1:
   7.839 +
   7.840 +		/* final run */
   7.841 +		/* set the current, current_pre, current_next registers */
   7.842 +		movq mm1, qword ptr [ebx];
   7.843 +		movq  mm7, qword ptr [ebx];
   7.844 +		movq  mm0, qword ptr [ebx - 8];
   7.845 +		psrlq mm1, 32;
   7.846 +		psrlq mm0, 32;
   7.847 +		psllq mm1, 32;
   7.848 +		movq  mm2, mm7;
   7.849 +		movq  mm3, mm7;
   7.850 +		psllq mm2, 32;
   7.851 +		psrlq mm3, 32;
   7.852 +		por	  mm0, mm2;
   7.853 +		por	  mm1, mm3;
   7.854 +
   7.855 +		/* current_upper */
   7.856 +		movq mm6, qword ptr [eax];
   7.857 +
   7.858 +		/* compute the upper-left pixel for dst on %%mm2 */
   7.859 +		/* compute the upper-right pixel for dst on %%mm4 */
   7.860 +		movq	mm2, mm0;
   7.861 +		movq	mm4, mm1;
   7.862 +		movq	mm3, mm0;
   7.863 +		movq	mm5, mm1;
   7.864 +		pcmpeqd mm2, mm6;
   7.865 +		pcmpeqd mm4, mm6;
   7.866 +		pcmpeqd mm3, qword ptr [ecx];
   7.867 +		pcmpeqd mm5, qword ptr [ecx];
   7.868 +		pandn	mm3, mm2;
   7.869 +		pandn	mm5, mm4;
   7.870 +		movq	mm2, mm0;
   7.871 +		movq	mm4, mm1;
   7.872 +		pcmpeqd mm2, mm1;
   7.873 +		pcmpeqd mm4, mm0;
   7.874 +		pandn	mm2, mm3;
   7.875 +		pandn	mm4, mm5;
   7.876 +		movq	mm3, mm2;
   7.877 +		movq	mm5, mm4;
   7.878 +		pand	mm2, mm6;
   7.879 +		pand	mm4, mm6;
   7.880 +		pandn	mm3, mm7;
   7.881 +		pandn	mm5, mm7;
   7.882 +		por		mm2, mm3;
   7.883 +		por		mm4, mm5;
   7.884 +
   7.885 +		/* set *dst */
   7.886 +		movq	   mm3, mm2;
   7.887 +		punpckldq  mm2, mm4;
   7.888 +		punpckhdq  mm3, mm4;
   7.889 +		movq qword ptr [edx], mm2;
   7.890 +		movq qword ptr [edx + 8], mm3;
   7.891 +
   7.892 +		mov src0, eax;
   7.893 +		mov src1, ebx;
   7.894 +		mov src2, ecx;
   7.895 +		mov dst, edx;
   7.896 +		mov count, esi;
   7.897 +
   7.898 +		emms;
   7.899 +	}
   7.900 +#endif
   7.901 +}
   7.902 +
   7.903 +static void internal_scale2x_16_mmx(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
   7.904 +{
   7.905 +	//	assert( count >= 2*4 );
   7.906 +	internal_scale2x_16_mmx_single(dst0, src0, src1, src2, count);
   7.907 +	internal_scale2x_16_mmx_single(dst1, src2, src1, src0, count);
   7.908 +}
   7.909 +
   7.910 +static void internal_scale2x_32_mmx(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count)
   7.911 +{
   7.912 +	//	assert( count >= 2*2 );
   7.913 +	internal_scale2x_32_mmx_single(dst0, src0, src1, src2, count);
   7.914 +	internal_scale2x_32_mmx_single(dst1, src2, src1, src0, count);
   7.915 +}
   7.916 +
   7.917 +#endif
   7.918 +
   7.919 +void AdMame2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   7.920 +              u8 *dstPtr, u32 dstPitch, int width, int height)
   7.921 +{
   7.922 +	u16 *dst0 = (u16 *)dstPtr;
   7.923 +	u16 *dst1 = dst0 + (dstPitch >> 1);
   7.924 +
   7.925 +	u16 *src0 = (u16 *)srcPtr;
   7.926 +	u16 *src1 = src0 + (srcPitch >> 1);
   7.927 +	u16 *src2 = src1 + (srcPitch >> 1);
   7.928 +#ifdef MMX
   7.929 +	if (cpu_mmx)
   7.930 +	{
   7.931 +		internal_scale2x_16_mmx(dst0, dst1, src0, src0, src1, width);
   7.932 +
   7.933 +		int count = height;
   7.934 +
   7.935 +		count -= 2;
   7.936 +		while (count)
   7.937 +		{
   7.938 +			dst0 += dstPitch;
   7.939 +			dst1 += dstPitch;
   7.940 +			internal_scale2x_16_mmx(dst0, dst1, src0, src1, src2, width);
   7.941 +			src0  = src1;
   7.942 +			src1  = src2;
   7.943 +			src2 += srcPitch >> 1;
   7.944 +			--count;
   7.945 +		}
   7.946 +		dst0 += dstPitch;
   7.947 +		dst1 += dstPitch;
   7.948 +		internal_scale2x_16_mmx(dst0, dst1, src0, src1, src1, width);
   7.949 +	}
   7.950 +	else
   7.951 +	{
   7.952 +#endif
   7.953 +	internal_scale2x_16_def(dst0, src0, src0, src1, width);
   7.954 +	internal_scale2x_16_def(dst1, src1, src0, src0, width);
   7.955 +
   7.956 +	int count = height;
   7.957 +
   7.958 +	count -= 2;
   7.959 +	while (count)
   7.960 +	{
   7.961 +		dst0 += dstPitch;
   7.962 +		dst1 += dstPitch;
   7.963 +		internal_scale2x_16_def(dst0, src0, src1, src2, width);
   7.964 +		internal_scale2x_16_def(dst1, src2, src1, src0, width);
   7.965 +		src0  = src1;
   7.966 +		src1  = src2;
   7.967 +		src2 += srcPitch >> 1;
   7.968 +		--count;
   7.969 +	}
   7.970 +	dst0 += dstPitch;
   7.971 +	dst1 += dstPitch;
   7.972 +	internal_scale2x_16_def(dst0, src0, src1, src1, width);
   7.973 +	internal_scale2x_16_def(dst1, src1, src1, src0, width);
   7.974 +#ifdef MMX
   7.975 +}
   7.976 +
   7.977 +#endif
   7.978 +}
   7.979 +
   7.980 +void AdMame2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   7.981 +                u8 *dstPtr, u32 dstPitch, int width, int height)
   7.982 +{
   7.983 +	u32 *dst0 = (u32 *)dstPtr;
   7.984 +	u32 *dst1 = dst0 + (dstPitch >> 2);
   7.985 +
   7.986 +	u32 *src0 = (u32 *)srcPtr;
   7.987 +	u32 *src1 = src0 + (srcPitch >> 2);
   7.988 +	u32 *src2 = src1 + (srcPitch >> 2);
   7.989 +#ifdef MMX
   7.990 +	if (cpu_mmx)
   7.991 +	{
   7.992 +		internal_scale2x_32_mmx(dst0, dst1, src0, src0, src1, width);
   7.993 +
   7.994 +		int count = height;
   7.995 +
   7.996 +		count -= 2;
   7.997 +		while (count)
   7.998 +		{
   7.999 +			dst0 += dstPitch >> 1;
  7.1000 +			dst1 += dstPitch >> 1;
  7.1001 +			internal_scale2x_32_mmx(dst0, dst1, src0, src1, src2, width);
  7.1002 +			src0  = src1;
  7.1003 +			src1  = src2;
  7.1004 +			src2 += srcPitch >> 2;
  7.1005 +			--count;
  7.1006 +		}
  7.1007 +		dst0 += dstPitch >> 1;
  7.1008 +		dst1 += dstPitch >> 1;
  7.1009 +		internal_scale2x_32_mmx(dst0, dst1, src0, src1, src1, width);
  7.1010 +	}
  7.1011 +	else
  7.1012 +	{
  7.1013 +#endif
  7.1014 +	internal_scale2x_32_def(dst0, src0, src0, src1, width);
  7.1015 +	internal_scale2x_32_def(dst1, src1, src0, src0, width);
  7.1016 +
  7.1017 +	int count = height;
  7.1018 +
  7.1019 +	count -= 2;
  7.1020 +	while (count)
  7.1021 +	{
  7.1022 +		dst0 += dstPitch >> 1;
  7.1023 +		dst1 += dstPitch >> 1;
  7.1024 +		internal_scale2x_32_def(dst0, src0, src1, src2, width);
  7.1025 +		internal_scale2x_32_def(dst1, src2, src1, src0, width);
  7.1026 +		src0  = src1;
  7.1027 +		src1  = src2;
  7.1028 +		src2 += srcPitch >> 2;
  7.1029 +		--count;
  7.1030 +	}
  7.1031 +	dst0 += dstPitch >> 1;
  7.1032 +	dst1 += dstPitch >> 1;
  7.1033 +	internal_scale2x_32_def(dst0, src0, src1, src1, width);
  7.1034 +	internal_scale2x_32_def(dst1, src1, src1, src0, width);
  7.1035 +#ifdef MMX
  7.1036 +}
  7.1037 +
  7.1038 +#endif
  7.1039 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/src/filters/bilinear.cpp	Sun Mar 04 20:32:31 2012 -0600
     8.3 @@ -0,0 +1,418 @@
     8.4 +/**     Code adapted from Exult source code by Forgotten
     8.5 +**	Scale.cc - Trying to scale with bilinear interpolation.
     8.6 +**
     8.7 +**	Written: 6/14/00 - JSF
     8.8 +**/
     8.9 +
    8.10 +#include "../common/System.h"
    8.11 +
    8.12 +static u8 row_cur[3 * 322];
    8.13 +static u8 row_next[3 * 322];
    8.14 +
    8.15 +static u8 *rgb_row_cur	= row_cur;
    8.16 +static u8 *rgb_row_next = row_next;
    8.17 +
    8.18 +#ifdef RGB
    8.19 +#undef RGB  // wingdi.h has it
    8.20 +#endif
    8.21 +#define RGB(r, g, b) \
    8.22 +    ((r) >> 3) << systemRedShift | \
    8.23 +    ((g) >> 3) << systemGreenShift | \
    8.24 +    ((b) >> 3) << systemBlueShift \
    8.25 +
    8.26 +static void fill_rgb_row_16(u16 *from, int src_width, u8 *row, int width)
    8.27 +{
    8.28 +	u8 *copy_start = row + src_width * 3;
    8.29 +	u8 *all_stop   = row + width * 3;
    8.30 +	while (row < copy_start)
    8.31 +	{
    8.32 +		u16 color = *from++;
    8.33 +		*row++ = ((color >> systemRedShift) & 0x1f) << 3;
    8.34 +		*row++ = ((color >> systemGreenShift) & 0x1f) << 3;
    8.35 +		*row++ = ((color >> systemBlueShift) & 0x1f) << 3;
    8.36 +	}
    8.37 +	// any remaining elements to be written to 'row' are a replica of the
    8.38 +	// preceding pixel
    8.39 +	u8 *p = row - 3;
    8.40 +	while (row < all_stop)
    8.41 +	{
    8.42 +		// we're guaranteed three elements per pixel; could unroll the loop
    8.43 +		// further, especially with a Duff's Device, but the gains would be
    8.44 +		// probably limited (judging by profiler output)
    8.45 +		*row++ = *p++;
    8.46 +		*row++ = *p++;
    8.47 +		*row++ = *p++;
    8.48 +	}
    8.49 +}
    8.50 +
    8.51 +static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width)
    8.52 +{
    8.53 +	u8 *copy_start = row + src_width * 3;
    8.54 +	u8 *all_stop   = row + width * 3;
    8.55 +	while (row < copy_start)
    8.56 +	{
    8.57 +		u32 color = *from++;
    8.58 +		*row++ = ((color >> systemRedShift) & 0x1f) << 3;
    8.59 +		*row++ = ((color >> systemGreenShift) & 0x1f) << 3;
    8.60 +		*row++ = ((color >> systemBlueShift) & 0x1f) << 3;
    8.61 +	}
    8.62 +	// any remaining elements to be written to 'row' are a replica of the
    8.63 +	// preceding pixel
    8.64 +	u8 *p = row - 3;
    8.65 +	while (row < all_stop)
    8.66 +	{
    8.67 +		// we're guaranteed three elements per pixel; could unroll the loop
    8.68 +		// further, especially with a Duff's Device, but the gains would be
    8.69 +		// probably limited (judging by profiler output)
    8.70 +		*row++ = *p++;
    8.71 +		*row++ = *p++;
    8.72 +		*row++ = *p++;
    8.73 +	}
    8.74 +}
    8.75 +
    8.76 +void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
    8.77 +              u8 *dstPtr, u32 dstPitch, int width, int height)
    8.78 +{
    8.79 +	u16 *to		= (u16 *)dstPtr;
    8.80 +	u16 *to_odd = (u16 *)(dstPtr + dstPitch);
    8.81 +
    8.82 +	int	 from_width = width;
    8.83 +	u16 *from		= (u16 *)srcPtr;
    8.84 +	fill_rgb_row_16(from, from_width, rgb_row_cur, width + 1);
    8.85 +
    8.86 +	for (int y = 0; y < height; y++)
    8.87 +	{
    8.88 +		u16 *from_orig = from;
    8.89 +		u16 *to_orig   = to;
    8.90 +
    8.91 +		if (y + 1 < height)
    8.92 +			fill_rgb_row_16(from + width + 2, from_width, rgb_row_next,
    8.93 +			                width + 1);
    8.94 +		else
    8.95 +			fill_rgb_row_16(from, from_width, rgb_row_next, width + 1);
    8.96 +
    8.97 +		// every pixel in the src region, is extended to 4 pixels in the
    8.98 +		// destination, arranged in a square 'quad'; if the current src
    8.99 +		// pixel is 'a', then in what follows 'b' is the src pixel to the
   8.100 +		// right, 'c' is the src pixel below, and 'd' is the src pixel to
   8.101 +		// the right and down
   8.102 +		u8 *cur_row	 = rgb_row_cur;
   8.103 +		u8 *next_row = rgb_row_next;
   8.104 +		u8 *ar		 = cur_row++;
   8.105 +		u8 *ag		 = cur_row++;
   8.106 +		u8 *ab		 = cur_row++;
   8.107 +		u8 *cr		 = next_row++;
   8.108 +		u8 *cg		 = next_row++;
   8.109 +		u8 *cb		 = next_row++;
   8.110 +		for (int x = 0; x < width; x++)
   8.111 +		{
   8.112 +			u8 *br = cur_row++;
   8.113 +			u8 *bg = cur_row++;
   8.114 +			u8 *bb = cur_row++;
   8.115 +			u8 *dr = next_row++;
   8.116 +			u8 *dg = next_row++;
   8.117 +			u8 *db = next_row++;
   8.118 +
   8.119 +			// upper left pixel in quad: just copy it in
   8.120 +			*to++ = RGB(*ar, *ag, *ab);
   8.121 +
   8.122 +			// upper right
   8.123 +			*to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1);
   8.124 +
   8.125 +			// lower left
   8.126 +			*to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1);
   8.127 +
   8.128 +			// lower right
   8.129 +			*to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2,
   8.130 +			                (*ag + *bg + *cg + *dg) >> 2,
   8.131 +			                (*ab + *bb + *cb + *db) >> 2);
   8.132 +
   8.133 +			// 'b' becomes 'a', 'd' becomes 'c'
   8.134 +			ar = br;
   8.135 +			ag = bg;
   8.136 +			ab = bb;
   8.137 +			cr = dr;
   8.138 +			cg = dg;
   8.139 +			cb = db;
   8.140 +		}
   8.141 +
   8.142 +		// the "next" rgb row becomes the current; the old current rgb row is
   8.143 +		// recycled and serves as the new "next" row
   8.144 +		u8 *temp;
   8.145 +		temp		 = rgb_row_cur;
   8.146 +		rgb_row_cur	 = rgb_row_next;
   8.147 +		rgb_row_next = temp;
   8.148 +
   8.149 +		// update the pointers for start of next pair of lines
   8.150 +		from   = (u16 *)((u8 *)from_orig + srcPitch);
   8.151 +		to	   = (u16 *)((u8 *)to_orig + (dstPitch << 1));
   8.152 +		to_odd = (u16 *)((u8 *)to + dstPitch);
   8.153 +	}
   8.154 +}
   8.155 +
   8.156 +void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   8.157 +                  u8 *dstPtr, u32 dstPitch, int width, int height)
   8.158 +{
   8.159 +	u16 *to		= (u16 *)dstPtr;
   8.160 +	u16 *to_odd = (u16 *)(dstPtr + dstPitch);
   8.161 +
   8.162 +	int	 from_width = width;
   8.163 +	u16 *from		= (u16 *)srcPtr;
   8.164 +	fill_rgb_row_16(from, from_width, rgb_row_cur, width + 1);
   8.165 +
   8.166 +	for (int y = 0; y < height; y++)
   8.167 +	{
   8.168 +		u16 *from_orig = from;
   8.169 +		u16 *to_orig   = to;
   8.170 +
   8.171 +		if (y + 1 < height)
   8.172 +			fill_rgb_row_16(from + width + 2, from_width, rgb_row_next,
   8.173 +			                width + 1);
   8.174 +		else
   8.175 +			fill_rgb_row_16(from, from_width, rgb_row_next, width + 1);
   8.176 +
   8.177 +		// every pixel in the src region, is extended to 4 pixels in the
   8.178 +		// destination, arranged in a square 'quad'; if the current src
   8.179 +		// pixel is 'a', then in what follows 'b' is the src pixel to the
   8.180 +		// right, 'c' is the src pixel below, and 'd' is the src pixel to
   8.181 +		// the right and down
   8.182 +		u8 *cur_row	 = rgb_row_cur;
   8.183 +		u8 *next_row = rgb_row_next;
   8.184 +		u8 *ar		 = cur_row++;
   8.185 +		u8 *ag		 = cur_row++;
   8.186 +		u8 *ab		 = cur_row++;
   8.187 +		u8 *cr		 = next_row++;
   8.188 +		u8 *cg		 = next_row++;
   8.189 +		u8 *cb		 = next_row++;
   8.190 +		for (int x = 0; x < width; x++)
   8.191 +		{
   8.192 +			u8 *br = cur_row++;
   8.193 +			u8 *bg = cur_row++;
   8.194 +			u8 *bb = cur_row++;
   8.195 +			u8 *dr = next_row++;
   8.196 +			u8 *dg = next_row++;
   8.197 +			u8 *db = next_row++;
   8.198 +
   8.199 +			// upper left pixel in quad: just copy it in
   8.200 +			//*to++ = manip.rgb(*ar, *ag, *ab);
   8.201 +#ifdef USE_ORIGINAL_BILINEAR_PLUS
   8.202 +			*to++ = RGB(
   8.203 +			    (((*ar) << 2) + ((*ar)) + (*cr + *br + *br)) >> 3,
   8.204 +			    (((*ag) << 2) + ((*ag)) + (*cg + *bg + *bg)) >> 3,
   8.205 +			    (((*ab) << 2) + ((*ab)) + (*cb + *bb + *bb)) >> 3);
   8.206 +#else
   8.207 +			*to++ = RGB(
   8.208 +			    (((*ar) << 3) + ((*ar) << 1) + (*cr + *br + *br + *cr)) >> 4,
   8.209 +			    (((*ag) << 3) + ((*ag) << 1) + (*cg + *bg + *bg + *cg)) >> 4,
   8.210 +			    (((*ab) << 3) + ((*ab) << 1) + (*cb + *bb + *bb + *cb)) >> 4);
   8.211 +#endif
   8.212 +
   8.213 +			// upper right
   8.214 +			*to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1);
   8.215 +
   8.216 +			// lower left
   8.217 +			*to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1);
   8.218 +
   8.219 +			// lower right
   8.220 +			*to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2,
   8.221 +			                (*ag + *bg + *cg + *dg) >> 2,
   8.222 +			                (*ab + *bb + *cb + *db) >> 2);
   8.223 +
   8.224 +			// 'b' becomes 'a', 'd' becomes 'c'
   8.225 +			ar = br;
   8.226 +			ag = bg;
   8.227 +			ab = bb;
   8.228 +			cr = dr;
   8.229 +			cg = dg;
   8.230 +			cb = db;
   8.231 +		}
   8.232 +
   8.233 +		// the "next" rgb row becomes the current; the old current rgb row is
   8.234 +		// recycled and serves as the new "next" row
   8.235 +		u8 *temp;
   8.236 +		temp		 = rgb_row_cur;
   8.237 +		rgb_row_cur	 = rgb_row_next;
   8.238 +		rgb_row_next = temp;
   8.239 +
   8.240 +		// update the pointers for start of next pair of lines
   8.241 +		from   = (u16 *)((u8 *)from_orig + srcPitch);
   8.242 +		to	   = (u16 *)((u8 *)to_orig + (dstPitch << 1));
   8.243 +		to_odd = (u16 *)((u8 *)to + dstPitch);
   8.244 +	}
   8.245 +}
   8.246 +
   8.247 +void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   8.248 +                u8 *dstPtr, u32 dstPitch, int width, int height)
   8.249 +{
   8.250 +	u32 *to		= (u32 *)dstPtr;
   8.251 +	u32 *to_odd = (u32 *)(dstPtr + dstPitch);
   8.252 +
   8.253 +	int from_width = width;
   8.254 +	if (width + 1 < from_width)
   8.255 +		from_width = width + 1;
   8.256 +	u32 *from = (u32 *)srcPtr;
   8.257 +	fill_rgb_row_32(from, from_width, rgb_row_cur, width + 1);
   8.258 +
   8.259 +	for (int y = 0; y < height; y++)
   8.260 +	{
   8.261 +		u32 *from_orig = from;
   8.262 +		u32 *to_orig   = to;
   8.263 +
   8.264 +		if (y + 1 < height)
   8.265 +			fill_rgb_row_32(from + width + 1, from_width, rgb_row_next,
   8.266 +			                width + 1);
   8.267 +		else
   8.268 +			fill_rgb_row_32(from, from_width, rgb_row_next, width + 1);
   8.269 +
   8.270 +		// every pixel in the src region, is extended to 4 pixels in the
   8.271 +		// destination, arranged in a square 'quad'; if the current src
   8.272 +		// pixel is 'a', then in what follows 'b' is the src pixel to the
   8.273 +		// right, 'c' is the src pixel below, and 'd' is the src pixel to
   8.274 +		// the right and down
   8.275 +		u8 *cur_row	 = rgb_row_cur;
   8.276 +		u8 *next_row = rgb_row_next;
   8.277 +		u8 *ar		 = cur_row++;
   8.278 +		u8 *ag		 = cur_row++;
   8.279 +		u8 *ab		 = cur_row++;
   8.280 +		u8 *cr		 = next_row++;
   8.281 +		u8 *cg		 = next_row++;
   8.282 +		u8 *cb		 = next_row++;
   8.283 +		for (int x = 0; x < width; x++)
   8.284 +		{
   8.285 +			u8 *br = cur_row++;
   8.286 +			u8 *bg = cur_row++;
   8.287 +			u8 *bb = cur_row++;
   8.288 +			u8 *dr = next_row++;
   8.289 +			u8 *dg = next_row++;
   8.290 +			u8 *db = next_row++;
   8.291 +
   8.292 +			// upper left pixel in quad: just copy it in
   8.293 +			*to++ = RGB(*ar, *ag, *ab);
   8.294 +
   8.295 +			// upper right
   8.296 +			*to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1);
   8.297 +
   8.298 +			// lower left
   8.299 +			*to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1);
   8.300 +
   8.301 +			// lower right
   8.302 +			*to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2,
   8.303 +			                (*ag + *bg + *cg + *dg) >> 2,
   8.304 +			                (*ab + *bb + *cb + *db) >> 2);
   8.305 +
   8.306 +			// 'b' becomes 'a', 'd' becomes 'c'
   8.307 +			ar = br;
   8.308 +			ag = bg;
   8.309 +			ab = bb;
   8.310 +			cr = dr;
   8.311 +			cg = dg;
   8.312 +			cb = db;
   8.313 +		}
   8.314 +
   8.315 +		// the "next" rgb row becomes the current; the old current rgb row is
   8.316 +		// recycled and serves as the new "next" row
   8.317 +		u8 *temp;
   8.318 +		temp		 = rgb_row_cur;
   8.319 +		rgb_row_cur	 = rgb_row_next;
   8.320 +		rgb_row_next = temp;
   8.321 +
   8.322 +		// update the pointers for start of next pair of lines
   8.323 +		from   = (u32 *)((u8 *)from_orig + srcPitch);
   8.324 +		to	   = (u32 *)((u8 *)to_orig + (dstPitch << 1));
   8.325 +		to_odd = (u32 *)((u8 *)to + dstPitch);
   8.326 +	}
   8.327 +}
   8.328 +
   8.329 +void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   8.330 +                    u8 *dstPtr, u32 dstPitch, int width, int height)
   8.331 +{
   8.332 +	u32 *to		= (u32 *)dstPtr;
   8.333 +	u32 *to_odd = (u32 *)(dstPtr + dstPitch);
   8.334 +
   8.335 +	int from_width = width;
   8.336 +	if (width + 1 < from_width)
   8.337 +		from_width = width + 1;
   8.338 +	u32 *from = (u32 *)srcPtr;
   8.339 +	fill_rgb_row_32(from, from_width, rgb_row_cur, width + 1);
   8.340 +
   8.341 +	for (int y = 0; y < height; y++)
   8.342 +	{
   8.343 +		u32 *from_orig = from;
   8.344 +		u32 *to_orig   = to;
   8.345 +
   8.346 +		if (y + 1 < height)
   8.347 +			fill_rgb_row_32(from + width + 1, from_width, rgb_row_next,
   8.348 +			                width + 1);
   8.349 +		else
   8.350 +			fill_rgb_row_32(from, from_width, rgb_row_next, width + 1);
   8.351 +
   8.352 +		// every pixel in the src region, is extended to 4 pixels in the
   8.353 +		// destination, arranged in a square 'quad'; if the current src
   8.354 +		// pixel is 'a', then in what follows 'b' is the src pixel to the
   8.355 +		// right, 'c' is the src pixel below, and 'd' is the src pixel to
   8.356 +		// the right and down
   8.357 +		u8 *cur_row	 = rgb_row_cur;
   8.358 +		u8 *next_row = rgb_row_next;
   8.359 +		u8 *ar		 = cur_row++;
   8.360 +		u8 *ag		 = cur_row++;
   8.361 +		u8 *ab		 = cur_row++;
   8.362 +		u8 *cr		 = next_row++;
   8.363 +		u8 *cg		 = next_row++;
   8.364 +		u8 *cb		 = next_row++;
   8.365 +		for (int x = 0; x < width; x++)
   8.366 +		{
   8.367 +			u8 *br = cur_row++;
   8.368 +			u8 *bg = cur_row++;
   8.369 +			u8 *bb = cur_row++;
   8.370 +			u8 *dr = next_row++;
   8.371 +			u8 *dg = next_row++;
   8.372 +			u8 *db = next_row++;
   8.373 +
   8.374 +			// upper left pixel in quad: just copy it in
   8.375 +			//*to++ = manip.rgb(*ar, *ag, *ab);
   8.376 +#ifdef USE_ORIGINAL_BILINEAR_PLUS
   8.377 +			*to++ = RGB(
   8.378 +			    (((*ar) << 2) + ((*ar)) + (*cr + *br + *br)) >> 3,
   8.379 +			    (((*ag) << 2) + ((*ag)) + (*cg + *bg + *bg)) >> 3,
   8.380 +			    (((*ab) << 2) + ((*ab)) + (*cb + *bb + *bb)) >> 3);
   8.381 +#else
   8.382 +			*to++ = RGB(
   8.383 +			    (((*ar) << 3) + ((*ar) << 1) + (*cr + *br + *br + *cr)) >> 4,
   8.384 +			    (((*ag) << 3) + ((*ag) << 1) + (*cg + *bg + *bg + *cg)) >> 4,
   8.385 +			    (((*ab) << 3) + ((*ab) << 1) + (*cb + *bb + *bb + *cb)) >> 4);
   8.386 +#endif
   8.387 +
   8.388 +			// upper right
   8.389 +			*to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1);
   8.390 +
   8.391 +			// lower left
   8.392 +			*to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1);
   8.393 +
   8.394 +			// lower right
   8.395 +			*to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2,
   8.396 +			                (*ag + *bg + *cg + *dg) >> 2,
   8.397 +			                (*ab + *bb + *cb + *db) >> 2);
   8.398 +
   8.399 +			// 'b' becomes 'a', 'd' becomes 'c'
   8.400 +			ar = br;
   8.401 +			ag = bg;
   8.402 +			ab = bb;
   8.403 +			cr = dr;
   8.404 +			cg = dg;
   8.405 +			cb = db;
   8.406 +		}
   8.407 +
   8.408 +		// the "next" rgb row becomes the current; the old current rgb row is
   8.409 +		// recycled and serves as the new "next" row
   8.410 +		u8 *temp;
   8.411 +		temp		 = rgb_row_cur;
   8.412 +		rgb_row_cur	 = rgb_row_next;
   8.413 +		rgb_row_next = temp;
   8.414 +
   8.415 +		// update the pointers for start of next pair of lines
   8.416 +		from   = (u32 *)((u8 *)from_orig + srcPitch);
   8.417 +		to	   = (u32 *)((u8 *)to_orig + (dstPitch << 1));
   8.418 +		to_odd = (u32 *)((u8 *)to + dstPitch);
   8.419 +	}
   8.420 +}
   8.421 +
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/filters/filters.h	Sun Mar 04 20:32:31 2012 -0600
     9.3 @@ -0,0 +1,58 @@
     9.4 +
     9.5 +#ifndef VBA_FILTERS_H
     9.6 +#define VBA_FILTERS_H
     9.7 +
     9.8 +#if _MSC_VER > 1000
     9.9 +#pragma once
    9.10 +#endif // _MSC_VER > 1000
    9.11 +
    9.12 +extern void Pixelate2x16(u8*, u32, u8*, u8*, u32, int, int);
    9.13 +extern void Pixelate2x32(u8*, u32, u8*, u8*, u32, int, int);
    9.14 +extern void (*Pixelate3x16)(u8*, u32, u8*, u8*, u32, int, int);
    9.15 +extern void (*Pixelate3x32)(u8*, u32, u8*, u8*, u32, int, int);
    9.16 +extern void (*Pixelate4x16)(u8*, u32, u8*, u8*, u32, int, int);
    9.17 +extern void (*Pixelate4x32)(u8*, u32, u8*, u8*, u32, int, int);
    9.18 +extern void MotionBlur(u8*, u32, u8*, u8*, u32, int, int);
    9.19 +extern void MotionBlur32(u8*, u32, u8*, u8*, u32, int, int);
    9.20 +extern void _2xSaI(u8*, u32, u8*, u8*, u32, int, int);
    9.21 +extern void _2xSaI32(u8*, u32, u8*, u8*, u32, int, int);
    9.22 +extern void Super2xSaI(u8*, u32, u8*, u8*, u32, int, int);
    9.23 +extern void Super2xSaI32(u8*, u32, u8*, u8*, u32, int, int);
    9.24 +extern void SuperEagle(u8*, u32, u8*, u8*, u32, int, int);
    9.25 +extern void SuperEagle32(u8*, u32, u8*, u8*, u32, int, int);
    9.26 +extern void AdMame2x(u8*, u32, u8*, u8*, u32, int, int);
    9.27 +extern void AdMame2x32(u8*, u32, u8*, u8*, u32, int, int);
    9.28 +extern void Simple2x16(u8*, u32, u8*, u8*, u32, int, int);
    9.29 +extern void Simple2x32(u8*, u32, u8*, u8*, u32, int, int);
    9.30 +extern void (*Simple3x16)(u8*, u32, u8*, u8*, u32, int, int);
    9.31 +extern void (*Simple3x32)(u8*, u32, u8*, u8*, u32, int, int);
    9.32 +extern void (*Simple4x16)(u8*, u32, u8*, u8*, u32, int, int);
    9.33 +extern void (*Simple4x32)(u8*, u32, u8*, u8*, u32, int, int);
    9.34 +extern void Bilinear(u8*, u32, u8*, u8*, u32, int, int);
    9.35 +extern void Bilinear32(u8*, u32, u8*, u8*, u32, int, int);
    9.36 +extern void BilinearPlus(u8*, u32, u8*, u8*, u32, int, int);
    9.37 +extern void BilinearPlus32(u8*, u32, u8*, u8*, u32, int, int);
    9.38 +extern void Scanlines(u8*, u32, u8*, u8*, u32, int, int);
    9.39 +extern void Scanlines32(u8*, u32, u8*, u8*, u32, int, int);
    9.40 +extern void ScanlinesTV(u8*, u32, u8*, u8*, u32, int, int);
    9.41 +extern void ScanlinesTV32(u8*, u32, u8*, u8*, u32, int, int);
    9.42 +extern void hq2x(u8*, u32, u8*, u8*, u32, int, int);
    9.43 +extern void hq2x32(u8*, u32, u8*, u8*, u32, int, int);
    9.44 +extern void hq2xS(u8*, u32, u8*, u8*, u32, int, int);
    9.45 +extern void hq2xS32(u8*, u32, u8*, u8*, u32, int, int);
    9.46 +extern void lq2x(u8*, u32, u8*, u8*, u32, int, int);
    9.47 +extern void lq2x32(u8*, u32, u8*, u8*, u32, int, int);
    9.48 +extern void hq3x(u8*, u32, u8*, u8*, u32, int, int);
    9.49 +extern void hq3x32(u8*, u32, u8*, u8*, u32, int, int);
    9.50 +extern void hq3xS(u8*, u32, u8*, u8*, u32, int, int);
    9.51 +extern void hq3xS32(u8*, u32, u8*, u8*, u32, int, int);
    9.52 +
    9.53 +extern void SmartIB(u8*, u32, int, int);
    9.54 +extern void SmartIB32(u8*, u32, int, int);
    9.55 +extern void MotionBlurIB(u8*, u32, int, int);
    9.56 +extern void InterlaceIB(u8*, u32, int, int);
    9.57 +extern void MotionBlurIB32(u8*, u32, int, int);
    9.58 +
    9.59 +extern void InterframeCleanup();
    9.60 +
    9.61 +#endif // VBA_FILTERS_H
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/src/filters/hq2x.cpp	Sun Mar 04 20:32:31 2012 -0600
    10.3 @@ -0,0 +1,966 @@
    10.4 +/*
    10.5 + * This file is part of the Advance project.
    10.6 + *
    10.7 + * Copyright (C) 2003 Andrea Mazzoleni
    10.8 + *
    10.9 + * This program is free software; you can redistribute it and/or modify
   10.10 + * it under the terms of the GNU General Public License as published by
   10.11 + * the Free Software Foundation; either version 2 of the License, or
   10.12 + * (at your option) any later version.
   10.13 + *
   10.14 + * This program is distributed in the hope that it will be useful,
   10.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   10.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   10.17 + * GNU General Public License for more details.
   10.18 + *
   10.19 + * You should have received a copy of the GNU General Public License
   10.20 + * along with this program; if not, write to the Free Software
   10.21 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   10.22 + *
   10.23 + * In addition, as a special exception, Andrea Mazzoleni
   10.24 + * gives permission to link the code of this program with
   10.25 + * the MAME library (or with modified versions of MAME that use the
   10.26 + * same license as MAME), and distribute linked combinations including
   10.27 + * the two.  You must obey the GNU General Public License in all
   10.28 + * respects for all of the code used other than MAME.  If you modify
   10.29 + * this file, you may extend this exception to your version of the
   10.30 + * file, but you are not obligated to do so.  If you do not wish to
   10.31 + * do so, delete this exception statement from your version.
   10.32 + */
   10.33 +#include "../Port.h"
   10.34 +#include "interp.h"
   10.35 +
   10.36 +unsigned interp_mask[2];
   10.37 +unsigned interp_bits_per_pixel;
   10.38 +
   10.39 +/***************************************************************************/
   10.40 +/* HQ2x C implementation */
   10.41 +
   10.42 +/*
   10.43 + * This effect is a rewritten implementation of the hq2x effect made by Maxim Stepin
   10.44 + */
   10.45 +
   10.46 +static void hq2x_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
   10.47 +{
   10.48 +	unsigned i;
   10.49 +
   10.50 +	for (i = 0; i < count; ++i)
   10.51 +	{
   10.52 +		unsigned char mask;
   10.53 +
   10.54 +		u16 c[9];
   10.55 +
   10.56 +		c[1] = src0[0];
   10.57 +		c[4] = src1[0];
   10.58 +		c[7] = src2[0];
   10.59 +
   10.60 +		if (i > 0)
   10.61 +		{
   10.62 +			c[0] = src0[-1];
   10.63 +			c[3] = src1[-1];
   10.64 +			c[6] = src2[-1];
   10.65 +		}
   10.66 +		else
   10.67 +		{
   10.68 +			c[0] = c[1];
   10.69 +			c[3] = c[4];
   10.70 +			c[6] = c[7];
   10.71 +		}
   10.72 +
   10.73 +		if (i < count - 1)
   10.74 +		{
   10.75 +			c[2] = src0[1];
   10.76 +			c[5] = src1[1];
   10.77 +			c[8] = src2[1];
   10.78 +		}
   10.79 +		else
   10.80 +		{
   10.81 +			c[2] = c[1];
   10.82 +			c[5] = c[4];
   10.83 +			c[8] = c[7];
   10.84 +		}
   10.85 +
   10.86 +		mask = 0;
   10.87 +
   10.88 +		if (interp_16_diff(c[0], c[4]))
   10.89 +			mask |= 1 << 0;
   10.90 +		if (interp_16_diff(c[1], c[4]))
   10.91 +			mask |= 1 << 1;
   10.92 +		if (interp_16_diff(c[2], c[4]))
   10.93 +			mask |= 1 << 2;
   10.94 +		if (interp_16_diff(c[3], c[4]))
   10.95 +			mask |= 1 << 3;
   10.96 +		if (interp_16_diff(c[5], c[4]))
   10.97 +			mask |= 1 << 4;
   10.98 +		if (interp_16_diff(c[6], c[4]))
   10.99 +			mask |= 1 << 5;
  10.100 +		if (interp_16_diff(c[7], c[4]))
  10.101 +			mask |= 1 << 6;
  10.102 +		if (interp_16_diff(c[8], c[4]))
  10.103 +			mask |= 1 << 7;
  10.104 +
  10.105 +#define P0 dst0[0]
  10.106 +#define P1 dst0[1]
  10.107 +#define P2 dst1[0]
  10.108 +#define P3 dst1[1]
  10.109 +#define MUR interp_16_diff(c[1], c[5]) // top-right
  10.110 +#define MDR interp_16_diff(c[5], c[7]) // bottom-right
  10.111 +#define MDL interp_16_diff(c[7], c[3]) // bottom-left
  10.112 +#define MUL interp_16_diff(c[3], c[1]) // top-left
  10.113 +#define IC(p0) c[p0]
  10.114 +#define I11(p0, p1) interp_16_11(c[p0], c[p1])
  10.115 +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2])
  10.116 +#define I31(p0, p1) interp_16_31(c[p0], c[p1])
  10.117 +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2])
  10.118 +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2])
  10.119 +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2])
  10.120 +#define I53(p0, p1) interp_16_53(c[p0], c[p1])
  10.121 +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2])
  10.122 +#define I71(p0, p1) interp_16_71(c[p0], c[p1])
  10.123 +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2])
  10.124 +#define I97(p0, p1) interp_16_97(c[p0], c[p1])
  10.125 +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2])
  10.126 +#define I151(p0, p1) interp_16_151(c[p0], c[p1])
  10.127 +
  10.128 +		switch (mask)
  10.129 +		{
  10.130 +#include "hq2x.h"
  10.131 +		}
  10.132 +
  10.133 +#undef P0
  10.134 +#undef P1
  10.135 +#undef P2
  10.136 +#undef P3
  10.137 +#undef MUR
  10.138 +#undef MDR
  10.139 +#undef MDL
  10.140 +#undef MUL
  10.141 +#undef IC
  10.142 +#undef I11
  10.143 +#undef I211
  10.144 +#undef I31
  10.145 +#undef I332
  10.146 +#undef I431
  10.147 +#undef I521
  10.148 +#undef I53
  10.149 +#undef I611
  10.150 +#undef I71
  10.151 +#undef I772
  10.152 +#undef I97
  10.153 +#undef I1411
  10.154 +#undef I151
  10.155 +
  10.156 +		src0 += 1;
  10.157 +		src1 += 1;
  10.158 +		src2 += 1;
  10.159 +		dst0 += 2;
  10.160 +		dst1 += 2;
  10.161 +	}
  10.162 +}
  10.163 +
  10.164 +static void hq2x_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count)
  10.165 +{
  10.166 +	unsigned i;
  10.167 +
  10.168 +	for (i = 0; i < count; ++i)
  10.169 +	{
  10.170 +		unsigned char mask;
  10.171 +
  10.172 +		u32 c[9];
  10.173 +
  10.174 +		c[1] = src0[0];
  10.175 +		c[4] = src1[0];
  10.176 +		c[7] = src2[0];
  10.177 +
  10.178 +		if (i > 0)
  10.179 +		{
  10.180 +			c[0] = src0[-1];
  10.181 +			c[3] = src1[-1];
  10.182 +			c[6] = src2[-1];
  10.183 +		}
  10.184 +		else
  10.185 +		{
  10.186 +			c[0] = c[1];
  10.187 +			c[3] = c[4];
  10.188 +			c[6] = c[7];
  10.189 +		}
  10.190 +
  10.191 +		if (i < count - 1)
  10.192 +		{
  10.193 +			c[2] = src0[1];
  10.194 +			c[5] = src1[1];
  10.195 +			c[8] = src2[1];
  10.196 +		}
  10.197 +		else
  10.198 +		{
  10.199 +			c[2] = c[1];
  10.200 +			c[5] = c[4];
  10.201 +			c[8] = c[7];
  10.202 +		}
  10.203 +
  10.204 +		mask = 0;
  10.205 +
  10.206 +		if (interp_32_diff(c[0], c[4]))
  10.207 +			mask |= 1 << 0;
  10.208 +		if (interp_32_diff(c[1], c[4]))
  10.209 +			mask |= 1 << 1;
  10.210 +		if (interp_32_diff(c[2], c[4]))
  10.211 +			mask |= 1 << 2;
  10.212 +		if (interp_32_diff(c[3], c[4]))
  10.213 +			mask |= 1 << 3;
  10.214 +		if (interp_32_diff(c[5], c[4]))
  10.215 +			mask |= 1 << 4;
  10.216 +		if (interp_32_diff(c[6], c[4]))
  10.217 +			mask |= 1 << 5;
  10.218 +		if (interp_32_diff(c[7], c[4]))
  10.219 +			mask |= 1 << 6;
  10.220 +		if (interp_32_diff(c[8], c[4]))
  10.221 +			mask |= 1 << 7;
  10.222 +
  10.223 +#define P0 dst0[0]
  10.224 +#define P1 dst0[1]
  10.225 +#define P2 dst1[0]
  10.226 +#define P3 dst1[1]
  10.227 +#define MUR interp_32_diff(c[1], c[5]) // top-right
  10.228 +#define MDR interp_32_diff(c[5], c[7]) // bottom-right
  10.229 +#define MDL interp_32_diff(c[7], c[3]) // bottom-left
  10.230 +#define MUL interp_32_diff(c[3], c[1]) // top-left
  10.231 +#define IC(p0) c[p0]
  10.232 +#define I11(p0, p1) interp_32_11(c[p0], c[p1])
  10.233 +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2])
  10.234 +#define I31(p0, p1) interp_32_31(c[p0], c[p1])
  10.235 +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2])
  10.236 +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2])
  10.237 +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2])
  10.238 +#define I53(p0, p1) interp_32_53(c[p0], c[p1])
  10.239 +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2])
  10.240 +#define I71(p0, p1) interp_32_71(c[p0], c[p1])
  10.241 +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2])
  10.242 +#define I97(p0, p1) interp_32_97(c[p0], c[p1])
  10.243 +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2])
  10.244 +#define I151(p0, p1) interp_32_151(c[p0], c[p1])
  10.245 +
  10.246 +		switch (mask)
  10.247 +		{
  10.248 +#include "hq2x.h"
  10.249 +		}
  10.250 +
  10.251 +#undef P0
  10.252 +#undef P1
  10.253 +#undef P2
  10.254 +#undef P3
  10.255 +#undef MUR
  10.256 +#undef MDR
  10.257 +#undef MDL
  10.258 +#undef MUL
  10.259 +#undef IC
  10.260 +#undef I11
  10.261 +#undef I211
  10.262 +#undef I31
  10.263 +#undef I332
  10.264 +#undef I431
  10.265 +#undef I521
  10.266 +#undef I53
  10.267 +#undef I611
  10.268 +#undef I71
  10.269 +#undef I772
  10.270 +#undef I97
  10.271 +#undef I1411
  10.272 +#undef I151
  10.273 +
  10.274 +		src0 += 1;
  10.275 +		src1 += 1;
  10.276 +		src2 += 1;
  10.277 +		dst0 += 2;
  10.278 +		dst1 += 2;
  10.279 +	}
  10.280 +}
  10.281 +
  10.282 +/***************************************************************************/
  10.283 +/* HQ2xS C implementation */
  10.284 +
  10.285 +/*
  10.286 + * This effect is derived from the hq2x effect made by Maxim Stepin
  10.287 + */
  10.288 +
  10.289 +static void hq2xS_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
  10.290 +{
  10.291 +	unsigned i;
  10.292 +
  10.293 +	for (i = 0; i < count; ++i)
  10.294 +	{
  10.295 +		unsigned char mask;
  10.296 +
  10.297 +		u16 c[9];
  10.298 +
  10.299 +		c[1] = src0[0];
  10.300 +		c[4] = src1[0];
  10.301 +		c[7] = src2[0];
  10.302 +
  10.303 +		c[0] = src0[-1];
  10.304 +		c[3] = src1[-1];
  10.305 +		c[6] = src2[-1];
  10.306 +
  10.307 +		c[2] = src0[1];
  10.308 +		c[5] = src1[1];
  10.309 +		c[8] = src2[1];
  10.310 +
  10.311 +		mask = 0;
  10.312 +
  10.313 +		// hq2xS dynamic edge detection:
  10.314 +		// simply comparing the center color against its surroundings will give bad results in many cases,
  10.315 +		// so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
  10.316 +		int brightArray[9];
  10.317 +		int maxBright = 0, minBright = 999999;
  10.318 +		for (int j = 0; j < 9; j++)
  10.319 +		{
  10.320 +			int r, g, b;
  10.321 +			if (interp_bits_per_pixel == 16)
  10.322 +			{
  10.323 +				b = (int)((c[j] & 0x1F)) << 3;
  10.324 +				g = (int)((c[j] & 0x7E0)) >> 3;
  10.325 +				r = (int)((c[j] & 0xF800)) >> 8;
  10.326 +			}
  10.327 +			else
  10.328 +			{
  10.329 +				b = (int)((c[j] & 0x1F)) << 3;
  10.330 +				g = (int)((c[j] & 0x3E0)) >> 2;
  10.331 +				r = (int)((c[j] & 0x7C00)) >> 7;
  10.332 +			}
  10.333 +			const int bright = r + r + r + g + g + g + b + b;
  10.334 +			if (bright > maxBright) maxBright = bright;
  10.335 +			if (bright < minBright) minBright = bright;
  10.336 +
  10.337 +			brightArray[j] = bright;
  10.338 +		}
  10.339 +		int diffBright = ((maxBright - minBright) * 7) >> 4;
  10.340 +		if (diffBright > 7)
  10.341 +		{
  10.342 +		#define ABS(x) ((x) < 0 ? -(x) : (x))
  10.343 +
  10.344 +			const int centerBright = brightArray[4];
  10.345 +			if (ABS(brightArray[0] - centerBright) > diffBright)
  10.346 +				mask |= 1 << 0;
  10.347 +			if (ABS(brightArray[1] - centerBright) > diffBright)
  10.348 +				mask |= 1 << 1;
  10.349 +			if (ABS(brightArray[2] - centerBright) > diffBright)
  10.350 +				mask |= 1 << 2;
  10.351 +			if (ABS(brightArray[3] - centerBright) > diffBright)
  10.352 +				mask |= 1 << 3;
  10.353 +			if (ABS(brightArray[5] - centerBright) > diffBright)
  10.354 +				mask |= 1 << 4;
  10.355 +			if (ABS(brightArray[6] - centerBright) > diffBright)
  10.356 +				mask |= 1 << 5;
  10.357 +			if (ABS(brightArray[7] - centerBright) > diffBright)
  10.358 +				mask |= 1 << 6;
  10.359 +			if (ABS(brightArray[8] - centerBright) > diffBright)
  10.360 +				mask |= 1 << 7;
  10.361 +		}
  10.362 +
  10.363 +#define P0 dst0[0]
  10.364 +#define P1 dst0[1]
  10.365 +#define P2 dst1[0]
  10.366 +#define P3 dst1[1]
  10.367 +#define MUR false //(ABS(brightArray[1] - brightArray[5]) > diffBright) // top-right
  10.368 +#define MDR false //(ABS(brightArray[5] - brightArray[7]) > diffBright) // bottom-right
  10.369 +#define MDL false //(ABS(brightArray[7] - brightArray[3]) > diffBright) // bottom-left
  10.370 +#define MUL false //(ABS(brightArray[3] - brightArray[1]) > diffBright) // top-left
  10.371 +#define IC(p0) c[p0]
  10.372 +#define I11(p0, p1) interp_16_11(c[p0], c[p1])
  10.373 +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2])
  10.374 +#define I31(p0, p1) interp_16_31(c[p0], c[p1])
  10.375 +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2])
  10.376 +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2])
  10.377 +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2])
  10.378 +#define I53(p0, p1) interp_16_53(c[p0], c[p1])
  10.379 +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2])
  10.380 +#define I71(p0, p1) interp_16_71(c[p0], c[p1])
  10.381 +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2])
  10.382 +#define I97(p0, p1) interp_16_97(c[p0], c[p1])
  10.383 +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2])
  10.384 +#define I151(p0, p1) interp_16_151(c[p0], c[p1])
  10.385 +
  10.386 +		switch (mask)
  10.387 +		{
  10.388 +#include "hq2x.h"
  10.389 +		}
  10.390 +
  10.391 +#undef P0
  10.392 +#undef P1
  10.393 +#undef P2
  10.394 +#undef P3
  10.395 +#undef MUR
  10.396 +#undef MDR
  10.397 +#undef MDL
  10.398 +#undef MUL
  10.399 +#undef IC
  10.400 +#undef I11
  10.401 +#undef I211
  10.402 +#undef I31
  10.403 +#undef I332
  10.404 +#undef I431
  10.405 +#undef I521
  10.406 +#undef I53
  10.407 +#undef I611
  10.408 +#undef I71
  10.409 +#undef I772
  10.410 +#undef I97
  10.411 +#undef I1411
  10.412 +#undef I151
  10.413 +
  10.414 +		src0 += 1;
  10.415 +		src1 += 1;
  10.416 +		src2 += 1;
  10.417 +		dst0 += 2;
  10.418 +		dst1 += 2;
  10.419 +	}
  10.420 +}
  10.421 +
  10.422 +static void hq2xS_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count)
  10.423 +{
  10.424 +	unsigned i;
  10.425 +
  10.426 +	for (i = 0; i < count; ++i)
  10.427 +	{
  10.428 +		unsigned char mask;
  10.429 +
  10.430 +		u32 c[9];
  10.431 +
  10.432 +		c[1] = src0[0];
  10.433 +		c[4] = src1[0];
  10.434 +		c[7] = src2[0];
  10.435 +
  10.436 +		c[0] = src0[-1];
  10.437 +		c[3] = src1[-1];
  10.438 +		c[6] = src2[-1];
  10.439 +
  10.440 +		c[2] = src0[1];
  10.441 +		c[5] = src1[1];
  10.442 +		c[8] = src2[1];
  10.443 +
  10.444 +		mask = 0;
  10.445 +
  10.446 +		// hq2xS dynamic edge detection:
  10.447 +		// simply comparing the center color against its surroundings will give bad results in many cases,
  10.448 +		// so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
  10.449 +		int brightArray[9];
  10.450 +		int maxBright = 0, minBright = 999999;
  10.451 +		for (int j = 0; j < 9; j++)
  10.452 +		{
  10.453 +			const int b		 = (int)((c[j] & 0xF8));
  10.454 +			const int g		 = (int)((c[j] & 0xF800)) >> 8;
  10.455 +			const int r		 = (int)((c[j] & 0xF80000)) >> 16;
  10.456 +			const int bright = r + r + r + g + g + g + b + b;
  10.457 +			if (bright > maxBright) maxBright = bright;
  10.458 +			if (bright < minBright) minBright = bright;
  10.459 +
  10.460 +			brightArray[j] = bright;
  10.461 +		}
  10.462 +		int diffBright = ((maxBright - minBright) * 7) >> 4;
  10.463 +		if (diffBright > 7)
  10.464 +		{
  10.465 +		#define ABS(x) ((x) < 0 ? -(x) : (x))
  10.466 +
  10.467 +			const int centerBright = brightArray[4];
  10.468 +			if (ABS(brightArray[0] - centerBright) > diffBright)
  10.469 +				mask |= 1 << 0;
  10.470 +			if (ABS(brightArray[1] - centerBright) > diffBright)
  10.471 +				mask |= 1 << 1;
  10.472 +			if (ABS(brightArray[2] - centerBright) > diffBright)
  10.473 +				mask |= 1 << 2;
  10.474 +			if (ABS(brightArray[3] - centerBright) > diffBright)
  10.475 +				mask |= 1 << 3;
  10.476 +			if (ABS(brightArray[5] - centerBright) > diffBright)
  10.477 +				mask |= 1 << 4;
  10.478 +			if (ABS(brightArray[6] - centerBright) > diffBright)
  10.479 +				mask |= 1 << 5;
  10.480 +			if (ABS(brightArray[7] - centerBright) > diffBright)
  10.481 +				mask |= 1 << 6;
  10.482 +			if (ABS(brightArray[8] - centerBright) > diffBright)
  10.483 +				mask |= 1 << 7;
  10.484 +		}
  10.485 +
  10.486 +#define P0 dst0[0]
  10.487 +#define P1 dst0[1]
  10.488 +#define P2 dst1[0]
  10.489 +#define P3 dst1[1]
  10.490 +#define MUR false //(ABS(brightArray[1] - brightArray[5]) > diffBright) // top-right
  10.491 +#define MDR false //(ABS(brightArray[5] - brightArray[7]) > diffBright) // bottom-right
  10.492 +#define MDL false //(ABS(brightArray[7] - brightArray[3]) > diffBright) // bottom-left
  10.493 +#define MUL false //(ABS(brightArray[3] - brightArray[1]) > diffBright) // top-left
  10.494 +#define IC(p0) c[p0]
  10.495 +#define I11(p0, p1) interp_32_11(c[p0], c[p1])
  10.496 +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2])
  10.497 +#define I31(p0, p1) interp_32_31(c[p0], c[p1])
  10.498 +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2])
  10.499 +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2])
  10.500 +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2])
  10.501 +#define I53(p0, p1) interp_32_53(c[p0], c[p1])
  10.502 +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2])
  10.503 +#define I71(p0, p1) interp_32_71(c[p0], c[p1])
  10.504 +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2])
  10.505 +#define I97(p0, p1) interp_32_97(c[p0], c[p1])
  10.506 +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2])
  10.507 +#define I151(p0, p1) interp_32_151(c[p0], c[p1])
  10.508 +
  10.509 +		switch (mask)
  10.510 +		{
  10.511 +#include "hq2x.h"
  10.512 +		}
  10.513 +
  10.514 +#undef P0
  10.515 +#undef P1
  10.516 +#undef P2
  10.517 +#undef P3
  10.518 +#undef MUR
  10.519 +#undef MDR
  10.520 +#undef MDL
  10.521 +#undef MUL
  10.522 +#undef IC
  10.523 +#undef I11
  10.524 +#undef I211
  10.525 +#undef I31
  10.526 +#undef I332
  10.527 +#undef I431
  10.528 +#undef I521
  10.529 +#undef I53
  10.530 +#undef I611
  10.531 +#undef I71
  10.532 +#undef I772
  10.533 +#undef I97
  10.534 +#undef I1411
  10.535 +#undef I151
  10.536 +
  10.537 +		src0 += 1;
  10.538 +		src1 += 1;
  10.539 +		src2 += 1;
  10.540 +		dst0 += 2;
  10.541 +		dst1 += 2;
  10.542 +	}
  10.543 +}
  10.544 +
  10.545 +/***************************************************************************/
  10.546 +/* LQ2x C implementation */
  10.547 +
  10.548 +/*
  10.549 + * This effect is derived from the hq2x effect made by Maxim Stepin
  10.550 + */
  10.551 +
  10.552 +static void lq2x_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count)
  10.553 +{
  10.554 +	unsigned i;
  10.555 +
  10.556 +	for (i = 0; i < count; ++i)
  10.557 +	{
  10.558 +		unsigned char mask;
  10.559 +
  10.560 +		u16 c[9];
  10.561 +
  10.562 +		c[1] = src0[0];
  10.563 +		c[4] = src1[0];
  10.564 +		c[7] = src2[0];
  10.565 +
  10.566 +		if (i > 0)
  10.567 +		{
  10.568 +			c[0] = src0[-1];
  10.569 +			c[3] = src1[-1];
  10.570 +			c[6] = src2[-1];
  10.571 +		}
  10.572 +		else
  10.573 +		{
  10.574 +			c[0] = c[1];
  10.575 +			c[3] = c[4];
  10.576 +			c[6] = c[7];
  10.577 +		}
  10.578 +
  10.579 +		if (i < count - 1)
  10.580 +		{
  10.581 +			c[2] = src0[1];
  10.582 +			c[5] = src1[1];
  10.583 +			c[8] = src2[1];
  10.584 +		}
  10.585 +		else
  10.586 +		{
  10.587 +			c[2] = c[1];
  10.588 +			c[5] = c[4];
  10.589 +			c[8] = c[7];
  10.590 +		}
  10.591 +
  10.592 +		mask = 0;
  10.593 +
  10.594 +		if (c[0] != c[4])
  10.595 +			mask |= 1 << 0;
  10.596 +		if (c[1] != c[4])
  10.597 +			mask |= 1 << 1;
  10.598 +		if (c[2] != c[4])
  10.599 +			mask |= 1 << 2;
  10.600 +		if (c[3] != c[4])
  10.601 +			mask |= 1 << 3;
  10.602 +		if (c[5] != c[4])
  10.603 +			mask |= 1 << 4;
  10.604 +		if (c[6] != c[4])
  10.605 +			mask |= 1 << 5;
  10.606 +		if (c[7] != c[4])
  10.607 +			mask |= 1 << 6;
  10.608 +		if (c[8] != c[4])
  10.609 +			mask |= 1 << 7;
  10.610 +
  10.611 +#define P0 dst0[0]
  10.612 +#define P1 dst0[1]
  10.613 +#define P2 dst1[0]
  10.614 +#define P3 dst1[1]
  10.615 +#define MUR (c[1] != c[5])
  10.616 +#define MDR (c[5] != c[7])
  10.617 +#define MDL (c[7] != c[3])
  10.618 +#define MUL (c[3] != c[1])
  10.619 +#define IC(p0) c[p0]
  10.620 +#define I11(p0, p1) interp_16_11(c[p0], c[p1])
  10.621 +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2])
  10.622 +#define I31(p0, p1) interp_16_31(c[p0], c[p1])
  10.623 +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2])
  10.624 +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2])
  10.625 +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2])
  10.626 +#define I53(p0, p1) interp_16_53(c[p0], c[p1])
  10.627 +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2])
  10.628 +#define I71(p0, p1) interp_16_71(c[p0], c[p1])
  10.629 +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2])
  10.630 +#define I97(p0, p1) interp_16_97(c[p0], c[p1])
  10.631 +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2])
  10.632 +#define I151(p0, p1) interp_16_151(c[p0], c[p1])
  10.633 +
  10.634 +		switch (mask)
  10.635 +		{
  10.636 +#include "lq2x.h"
  10.637 +		}
  10.638 +
  10.639 +#undef P0
  10.640 +#undef P1
  10.641 +#undef P2
  10.642 +#undef P3
  10.643 +#undef MUR
  10.644 +#undef MDR
  10.645 +#undef MDL
  10.646 +#undef MUL
  10.647 +#undef IC
  10.648 +#undef I11
  10.649 +#undef I211
  10.650 +#undef I31
  10.651 +#undef I332
  10.652 +#undef I431
  10.653 +#undef I521
  10.654 +#undef I53
  10.655 +#undef I611
  10.656 +#undef I71
  10.657 +#undef I772
  10.658 +#undef I97
  10.659 +#undef I1411
  10.660 +#undef I151
  10.661 +
  10.662 +		src0 += 1;
  10.663 +		src1 += 1;
  10.664 +		src2 += 1;
  10.665 +		dst0 += 2;
  10.666 +		dst1 += 2;
  10.667 +	}
  10.668 +}
  10.669 +
  10.670 +static void lq2x_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count)
  10.671 +{
  10.672 +	unsigned i;
  10.673 +
  10.674 +	for (i = 0; i < count; ++i)
  10.675 +	{
  10.676 +		unsigned char mask;
  10.677 +
  10.678 +		u32 c[9];
  10.679 +
  10.680 +		c[1] = src0[0];
  10.681 +		c[4] = src1[0];
  10.682 +		c[7] = src2[0];
  10.683 +
  10.684 +		if (i > 0)
  10.685 +		{
  10.686 +			c[0] = src0[-1];
  10.687 +			c[3] = src1[-1];
  10.688 +			c[6] = src2[-1];
  10.689 +		}
  10.690 +		else
  10.691 +		{
  10.692 +			c[0] = c[1];
  10.693 +			c[3] = c[4];
  10.694 +			c[6] = c[7];
  10.695 +		}
  10.696 +
  10.697 +		if (i < count - 1)
  10.698 +		{
  10.699 +			c[2] = src0[1];
  10.700 +			c[5] = src1[1];
  10.701 +			c[8] = src2[1];
  10.702 +		}
  10.703 +		else
  10.704 +		{
  10.705 +			c[2] = c[1];
  10.706 +			c[5] = c[4];
  10.707 +			c[8] = c[7];
  10.708 +		}
  10.709 +
  10.710 +		mask = 0;
  10.711 +
  10.712 +		if (c[0] != c[4])
  10.713 +			mask |= 1 << 0;
  10.714 +		if (c[1] != c[4])
  10.715 +			mask |= 1 << 1;
  10.716 +		if (c[2] != c[4])
  10.717 +			mask |= 1 << 2;
  10.718 +		if (c[3] != c[4])
  10.719 +			mask |= 1 << 3;
  10.720 +		if (c[5] != c[4])
  10.721 +			mask |= 1 << 4;
  10.722 +		if (c[6] != c[4])
  10.723 +			mask |= 1 << 5;
  10.724 +		if (c[7] != c[4])
  10.725 +			mask |= 1 << 6;
  10.726 +		if (c[8] != c[4])
  10.727 +			mask |= 1 << 7;
  10.728 +
  10.729 +#define P0 dst0[0]
  10.730 +#define P1 dst0[1]
  10.731 +#define P2 dst1[0]
  10.732 +#define P3 dst1[1]
  10.733 +#define MUR (c[1] != c[5])
  10.734 +#define MDR (c[5] != c[7])
  10.735 +#define MDL (c[7] != c[3])
  10.736 +#define MUL (c[3] != c[1])
  10.737 +#define IC(p0) c[p0]
  10.738 +#define I11(p0, p1) interp_32_11(c[p0], c[p1])
  10.739 +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2])
  10.740 +#define I31(p0, p1) interp_32_31(c[p0], c[p1])
  10.741 +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2])
  10.742 +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2])
  10.743 +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2])
  10.744 +#define I53(p0, p1) interp_32_53(c[p0], c[p1])
  10.745 +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2])
  10.746 +#define I71(p0, p1) interp_32_71(c[p0], c[p1])
  10.747 +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2])
  10.748 +#define I97(p0, p1) interp_32_97(c[p0], c[p1])
  10.749 +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2])
  10.750 +#define I151(p0, p1) interp_32_151(c[p0], c[p1])
  10.751 +
  10.752 +		switch (mask)
  10.753 +		{
  10.754 +#include "lq2x.h"
  10.755 +		}
  10.756 +
  10.757 +#undef P0
  10.758 +#undef P1
  10.759 +#undef P2
  10.760 +#undef P3
  10.761 +#undef MUR
  10.762 +#undef MDR
  10.763 +#undef MDL
  10.764 +#undef MUL
  10.765 +#undef IC
  10.766 +#undef I11
  10.767 +#undef I211
  10.768 +#undef I31
  10.769 +#undef I332
  10.770 +#undef I431
  10.771 +#undef I521
  10.772 +#undef I53
  10.773 +#undef I611
  10.774 +#undef I71
  10.775 +#undef I772
  10.776 +#undef I97
  10.777 +#undef I1411
  10.778 +#undef I151
  10.779 +
  10.780 +		src0 += 1;
  10.781 +		src1 += 1;
  10.782 +		src2 += 1;
  10.783 +		dst0 += 2;
  10.784 +		dst1 += 2;
  10.785 +	}
  10.786 +}
  10.787 +
  10.788 +void hq2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.789 +          u8 *dstPtr, u32 dstPitch, int width, int height)
  10.790 +{
  10.791 +	u16 *dst0 = (u16 *)dstPtr;
  10.792 +	u16 *dst1 = dst0 + (dstPitch >> 1);
  10.793 +
  10.794 +	u16 *src0 = (u16 *)srcPtr;
  10.795 +	u16 *src1 = src0 + (srcPitch >> 1);
  10.796 +	u16 *src2 = src1 + (srcPitch >> 1);
  10.797 +
  10.798 +	hq2x_16_def(dst0, dst1, src0, src0, src1, width);
  10.799 +
  10.800 +	int count = height;
  10.801 +
  10.802 +	count -= 2;
  10.803 +	while (count)
  10.804 +	{
  10.805 +		dst0 += dstPitch;
  10.806 +		dst1 += dstPitch;
  10.807 +		hq2x_16_def(dst0, dst1, src0, src1, src2, width);
  10.808 +		src0  = src1;
  10.809 +		src1  = src2;
  10.810 +		src2 += srcPitch >> 1;
  10.811 +		--count;
  10.812 +	}
  10.813 +	dst0 += dstPitch;
  10.814 +	dst1 += dstPitch;
  10.815 +	hq2x_16_def(dst0, dst1, src0, src1, src1, width);
  10.816 +}
  10.817 +
  10.818 +void hq2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.819 +            u8 *dstPtr, u32 dstPitch, int width, int height)
  10.820 +{
  10.821 +	u32 *dst0 = (u32 *)dstPtr;
  10.822 +	u32 *dst1 = dst0 + (dstPitch >> 2);
  10.823 +
  10.824 +	u32 *src0 = (u32 *)srcPtr;
  10.825 +	u32 *src1 = src0 + (srcPitch >> 2);
  10.826 +	u32 *src2 = src1 + (srcPitch >> 2);
  10.827 +	hq2x_32_def(dst0, dst1, src0, src0, src1, width);
  10.828 +
  10.829 +	int count = height;
  10.830 +
  10.831 +	count -= 2;
  10.832 +	while (count)
  10.833 +	{
  10.834 +		dst0 += dstPitch >> 1;
  10.835 +		dst1 += dstPitch >> 1;
  10.836 +		hq2x_32_def(dst0, dst1, src0, src1, src2, width);
  10.837 +		src0  = src1;
  10.838 +		src1  = src2;
  10.839 +		src2 += srcPitch >> 2;
  10.840 +		--count;
  10.841 +	}
  10.842 +	dst0 += dstPitch >> 1;
  10.843 +	dst1 += dstPitch >> 1;
  10.844 +	hq2x_32_def(dst0, dst1, src0, src1, src1, width);
  10.845 +}
  10.846 +
  10.847 +void hq2xS(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.848 +           u8 *dstPtr, u32 dstPitch, int width, int height)
  10.849 +{
  10.850 +	u16 *dst0 = (u16 *)dstPtr;
  10.851 +	u16 *dst1 = dst0 + (dstPitch >> 1);
  10.852 +
  10.853 +	u16 *src0 = (u16 *)srcPtr;
  10.854 +	u16 *src1 = src0 + (srcPitch >> 1);
  10.855 +	u16 *src2 = src1 + (srcPitch >> 1);
  10.856 +
  10.857 +	hq2xS_16_def(dst0, dst1, src0, src0, src1, width);
  10.858 +
  10.859 +	int count = height;
  10.860 +
  10.861 +	count -= 2;
  10.862 +	while (count)
  10.863 +	{
  10.864 +		dst0 += dstPitch;
  10.865 +		dst1 += dstPitch;
  10.866 +		hq2xS_16_def(dst0, dst1, src0, src1, src2, width);
  10.867 +		src0  = src1;
  10.868 +		src1  = src2;
  10.869 +		src2 += srcPitch >> 1;
  10.870 +		--count;
  10.871 +	}
  10.872 +	dst0 += dstPitch;
  10.873 +	dst1 += dstPitch;
  10.874 +	hq2xS_16_def(dst0, dst1, src0, src1, src1, width);
  10.875 +}
  10.876 +
  10.877 +void hq2xS32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.878 +             u8 *dstPtr, u32 dstPitch, int width, int height)
  10.879 +{
  10.880 +	u32 *dst0 = (u32 *)dstPtr;
  10.881 +	u32 *dst1 = dst0 + (dstPitch >> 2);
  10.882 +
  10.883 +	u32 *src0 = (u32 *)srcPtr;
  10.884 +	u32 *src1 = src0 + (srcPitch >> 2);
  10.885 +	u32 *src2 = src1 + (srcPitch >> 2);
  10.886 +	hq2xS_32_def(dst0, dst1, src0, src0, src1, width);
  10.887 +
  10.888 +	int count = height;
  10.889 +
  10.890 +	count -= 2;
  10.891 +	while (count)
  10.892 +	{
  10.893 +		dst0 += dstPitch >> 1;
  10.894 +		dst1 += dstPitch >> 1;
  10.895 +		hq2xS_32_def(dst0, dst1, src0, src1, src2, width);
  10.896 +		src0  = src1;
  10.897 +		src1  = src2;
  10.898 +		src2 += srcPitch >> 2;
  10.899 +		--count;
  10.900 +	}
  10.901 +	dst0 += dstPitch >> 1;
  10.902 +	dst1 += dstPitch >> 1;
  10.903 +	hq2xS_32_def(dst0, dst1, src0, src1, src1, width);
  10.904 +}
  10.905 +
  10.906 +void lq2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.907 +          u8 *dstPtr, u32 dstPitch, int width, int height)
  10.908 +{
  10.909 +	u16 *dst0 = (u16 *)dstPtr;
  10.910 +	u16 *dst1 = dst0 + (dstPitch >> 1);
  10.911 +
  10.912 +	u16 *src0 = (u16 *)srcPtr;
  10.913 +	u16 *src1 = src0 + (srcPitch >> 1);
  10.914 +	u16 *src2 = src1 + (srcPitch >> 1);
  10.915 +
  10.916 +	lq2x_16_def(dst0, dst1, src0, src0, src1, width);
  10.917 +
  10.918 +	int count = height;
  10.919 +
  10.920 +	count -= 2;
  10.921 +	while (count)
  10.922 +	{
  10.923 +		dst0 += dstPitch;
  10.924 +		dst1 += dstPitch;
  10.925 +		lq2x_16_def(dst0, dst1, src0, src1, src2, width);
  10.926 +		src0  = src1;
  10.927 +		src1  = src2;
  10.928 +		src2 += srcPitch >> 1;
  10.929 +		--count;
  10.930 +	}
  10.931 +	dst0 += dstPitch;
  10.932 +	dst1 += dstPitch;
  10.933 +	lq2x_16_def(dst0, dst1, src0, src1, src1, width);
  10.934 +}
  10.935 +
  10.936 +void lq2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  10.937 +            u8 *dstPtr, u32 dstPitch, int width, int height)
  10.938 +{
  10.939 +	u32 *dst0 = (u32 *)dstPtr;
  10.940 +	u32 *dst1 = dst0 + (dstPitch >> 2);
  10.941 +
  10.942 +	u32 *src0 = (u32 *)srcPtr;
  10.943 +	u32 *src1 = src0 + (srcPitch >> 2);
  10.944 +	u32 *src2 = src1 + (srcPitch >> 2);
  10.945 +	lq2x_32_def(dst0, dst1, src0, src0, src1, width);
  10.946 +
  10.947 +	int count = height;
  10.948 +
  10.949 +	count -= 2;
  10.950 +	while (count)
  10.951 +	{
  10.952 +		dst0 += dstPitch >> 1;
  10.953 +		dst1 += dstPitch >> 1;
  10.954 +		lq2x_32_def(dst0, dst1, src0, src1, src2, width);
  10.955 +		src0  = src1;
  10.956 +		src1  = src2;
  10.957 +		src2 += srcPitch >> 2;
  10.958 +		--count;
  10.959 +	}
  10.960 +	dst0 += dstPitch >> 1;
  10.961 +	dst1 += dstPitch >> 1;
  10.962 +	lq2x_32_def(dst0, dst1, src0, src1, src1, width);
  10.963 +}
  10.964 +
  10.965 +void hq2x_init(unsigned bits_per_pixel)
  10.966 +{
  10.967 +	interp_set(bits_per_pixel);
  10.968 +}
  10.969 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/filters/hq2x.h	Sun Mar 04 20:32:31 2012 -0600
    11.3 @@ -0,0 +1,1824 @@
    11.4 +case 0 : 
    11.5 +case 1 : 
    11.6 +case 4 : 
    11.7 +case 5 : 
    11.8 +case 32 : 
    11.9 +case 33 : 
   11.10 +case 36 : 
   11.11 +case 37 : 
   11.12 +case 128 : 
   11.13 +case 129 : 
   11.14 +case 132 : 
   11.15 +case 133 : 
   11.16 +case 160 : 
   11.17 +case 161 : 
   11.18 +case 164 : 
   11.19 +case 165 : 
   11.20 +{
   11.21 +  P0 = I211(4, 1, 3);
   11.22 +  P1 = I211(4, 1, 5);
   11.23 +  P2 = I211(4, 3, 7);
   11.24 +  P3 = I211(4, 5, 7);
   11.25 +} break;
   11.26 +case 2 : 
   11.27 +case 34 : 
   11.28 +case 130 : 
   11.29 +case 162 : 
   11.30 +{
   11.31 +  P0 = I31(4, 0);
   11.32 +  P1 = I31(4, 2);
   11.33 +  P2 = I211(4, 3, 7);
   11.34 +  P3 = I211(4, 5, 7);
   11.35 +} break;
   11.36 +case 3 : 
   11.37 +case 35 : 
   11.38 +case 131 : 
   11.39 +case 163 : 
   11.40 +{
   11.41 +  P0 = I31(4, 3);
   11.42 +  P1 = I31(4, 2);
   11.43 +  P2 = I211(4, 3, 7);
   11.44 +  P3 = I211(4, 5, 7);
   11.45 +} break;
   11.46 +case 6 : 
   11.47 +case 38 : 
   11.48 +case 134 : 
   11.49 +case 166 : 
   11.50 +{
   11.51 +  P0 = I31(4, 0);
   11.52 +  P1 = I31(4, 5);
   11.53 +  P2 = I211(4, 3, 7);
   11.54 +  P3 = I211(4, 5, 7);
   11.55 +} break;
   11.56 +case 7 : 
   11.57 +case 39 : 
   11.58 +case 135 : 
   11.59 +case 167 : 
   11.60 +{
   11.61 +  P0 = I31(4, 3);
   11.62 +  P1 = I31(4, 5);
   11.63 +  P2 = I211(4, 3, 7);
   11.64 +  P3 = I211(4, 5, 7);
   11.65 +} break;
   11.66 +case 8 : 
   11.67 +case 12 : 
   11.68 +case 136 : 
   11.69 +case 140 : 
   11.70 +{
   11.71 +  P0 = I31(4, 0);
   11.72 +  P1 = I211(4, 1, 5);
   11.73 +  P2 = I31(4, 6);
   11.74 +  P3 = I211(4, 5, 7);
   11.75 +} break;
   11.76 +case 9 : 
   11.77 +case 13 : 
   11.78 +case 137 : 
   11.79 +case 141 : 
   11.80 +{
   11.81 +  P0 = I31(4, 1);
   11.82 +  P1 = I211(4, 1, 5);
   11.83 +  P2 = I31(4, 6);
   11.84 +  P3 = I211(4, 5, 7);
   11.85 +} break;
   11.86 +case 10 : 
   11.87 +case 138 : 
   11.88 +{
   11.89 +  P1 = I31(4, 2);
   11.90 +  P2 = I31(4, 6);
   11.91 +  P3 = I211(4, 5, 7);
   11.92 +  if (MUL) {
   11.93 +    P0 = I31(4, 0);
   11.94 +  } else {
   11.95 +    P0 = I211(4, 1, 3);
   11.96 +  }
   11.97 +} break;
   11.98 +case 11 : 
   11.99 +case 139 : 
  11.100 +{
  11.101 +  P1 = I31(4, 2);
  11.102 +  P2 = I31(4, 6);
  11.103 +  P3 = I211(4, 5, 7);
  11.104 +  if (MUL) {
  11.105 +    P0 = IC(4);
  11.106 +  } else {
  11.107 +    P0 = I211(4, 1, 3);
  11.108 +  }
  11.109 +} break;
  11.110 +case 14 : 
  11.111 +case 142 : 
  11.112 +{
  11.113 +  P2 = I31(4, 6);
  11.114 +  P3 = I211(4, 5, 7);
  11.115 +  if (MUL) {
  11.116 +    P0 = I31(4, 0);
  11.117 +    P1 = I31(4, 5);
  11.118 +  } else {
  11.119 +    P0 = I332(1, 3, 4);
  11.120 +    P1 = I521(4, 1, 5);
  11.121 +  }
  11.122 +} break;
  11.123 +case 15 : 
  11.124 +case 143 : 
  11.125 +{
  11.126 +  P2 = I31(4, 6);
  11.127 +  P3 = I211(4, 5, 7);
  11.128 +  if (MUL) {
  11.129 +    P0 = IC(4);
  11.130 +    P1 = I31(4, 5);
  11.131 +  } else {
  11.132 +    P0 = I332(1, 3, 4);
  11.133 +    P1 = I521(4, 1, 5);
  11.134 +  }
  11.135 +} break;
  11.136 +case 16 : 
  11.137 +case 17 : 
  11.138 +case 48 : 
  11.139 +case 49 : 
  11.140 +{
  11.141 +  P0 = I211(4, 1, 3);
  11.142 +  P1 = I31(4, 2);
  11.143 +  P2 = I211(4, 3, 7);
  11.144 +  P3 = I31(4, 8);
  11.145 +} break;
  11.146 +case 18 : 
  11.147 +case 50 : 
  11.148 +{
  11.149 +  P0 = I31(4, 0);
  11.150 +  P2 = I211(4, 3, 7);
  11.151 +  P3 = I31(4, 8);
  11.152 +  if (MUR) {
  11.153 +    P1 = I31(4, 2);
  11.154 +  } else {
  11.155 +    P1 = I211(4, 1, 5);
  11.156 +  }
  11.157 +} break;
  11.158 +case 19 : 
  11.159 +case 51 : 
  11.160 +{
  11.161 +  P2 = I211(4, 3, 7);
  11.162 +  P3 = I31(4, 8);
  11.163 +  if (MUR) {
  11.164 +    P0 = I31(4, 3);
  11.165 +    P1 = I31(4, 2);
  11.166 +  } else {
  11.167 +    P0 = I521(4, 1, 3);
  11.168 +    P1 = I332(1, 5, 4);
  11.169 +  }
  11.170 +} break;
  11.171 +case 20 : 
  11.172 +case 21 : 
  11.173 +case 52 : 
  11.174 +case 53 : 
  11.175 +{
  11.176 +  P0 = I211(4, 1, 3);
  11.177 +  P1 = I31(4, 1);
  11.178 +  P2 = I211(4, 3, 7);
  11.179 +  P3 = I31(4, 8);
  11.180 +} break;
  11.181 +case 22 : 
  11.182 +case 54 : 
  11.183 +{
  11.184 +  P0 = I31(4, 0);
  11.185 +  P2 = I211(4, 3, 7);
  11.186 +  P3 = I31(4, 8);
  11.187 +  if (MUR) {
  11.188 +    P1 = IC(4);
  11.189 +  } else {
  11.190 +    P1 = I211(4, 1, 5);
  11.191 +  }
  11.192 +} break;
  11.193 +case 23 : 
  11.194 +case 55 : 
  11.195 +{
  11.196 +  P2 = I211(4, 3, 7);
  11.197 +  P3 = I31(4, 8);
  11.198 +  if (MUR) {
  11.199 +    P0 = I31(4, 3);
  11.200 +    P1 = IC(4);
  11.201 +  } else {
  11.202 +    P0 = I521(4, 1, 3);
  11.203 +    P1 = I332(1, 5, 4);
  11.204 +  }
  11.205 +} break;
  11.206 +case 24 : 
  11.207 +case 66 : 
  11.208 +{
  11.209 +  P0 = I31(4, 0);
  11.210 +  P1 = I31(4, 2);
  11.211 +  P2 = I31(4, 6);
  11.212 +  P3 = I31(4, 8);
  11.213 +} break;
  11.214 +case 25 : 
  11.215 +{
  11.216 +  P0 = I31(4, 1);
  11.217 +  P1 = I31(4, 2);
  11.218 +  P2 = I31(4, 6);
  11.219 +  P3 = I31(4, 8);
  11.220 +} break;
  11.221 +case 26 : 
  11.222 +case 31 : 
  11.223 +case 95 : 
  11.224 +{
  11.225 +  P2 = I31(4, 6);
  11.226 +  P3 = I31(4, 8);
  11.227 +  if (MUL) {
  11.228 +    P0 = IC(4);
  11.229 +  } else {
  11.230 +    P0 = I211(4, 1, 3);
  11.231 +  }
  11.232 +  if (MUR) {
  11.233 +    P1 = IC(4);
  11.234 +  } else {
  11.235 +    P1 = I211(4, 1, 5);
  11.236 +  }
  11.237 +} break;
  11.238 +case 27 : 
  11.239 +case 75 : 
  11.240 +{
  11.241 +  P1 = I31(4, 2);
  11.242 +  P2 = I31(4, 6);
  11.243 +  P3 = I31(4, 8);
  11.244 +  if (MUL) {
  11.245 +    P0 = IC(4);
  11.246 +  } else {
  11.247 +    P0 = I211(4, 1, 3);
  11.248 +  }
  11.249 +} break;
  11.250 +case 28 : 
  11.251 +{
  11.252 +  P0 = I31(4, 0);
  11.253 +  P1 = I31(4, 1);
  11.254 +  P2 = I31(4, 6);
  11.255 +  P3 = I31(4, 8);
  11.256 +} break;
  11.257 +case 29 : 
  11.258 +{
  11.259 +  P0 = I31(4, 1);
  11.260 +  P1 = I31(4, 1);
  11.261 +  P2 = I31(4, 6);
  11.262 +  P3 = I31(4, 8);
  11.263 +} break;
  11.264 +case 30 : 
  11.265 +case 86 : 
  11.266 +{
  11.267 +  P0 = I31(4, 0);
  11.268 +  P2 = I31(4, 6);
  11.269 +  P3 = I31(4, 8);
  11.270 +  if (MUR) {
  11.271 +    P1 = IC(4);
  11.272 +  } else {
  11.273 +    P1 = I211(4, 1, 5);
  11.274 +  }
  11.275 +} break;
  11.276 +case 40 : 
  11.277 +case 44 : 
  11.278 +case 168 : 
  11.279 +case 172 : 
  11.280 +{
  11.281 +  P0 = I31(4, 0);
  11.282 +  P1 = I211(4, 1, 5);
  11.283 +  P2 = I31(4, 7);
  11.284 +  P3 = I211(4, 5, 7);
  11.285 +} break;
  11.286 +case 41 : 
  11.287 +case 45 : 
  11.288 +case 169 : 
  11.289 +case 173 : 
  11.290 +{
  11.291 +  P0 = I31(4, 1);
  11.292 +  P1 = I211(4, 1, 5);
  11.293 +  P2 = I31(4, 7);
  11.294 +  P3 = I211(4, 5, 7);
  11.295 +} break;
  11.296 +case 42 : 
  11.297 +case 170 : 
  11.298 +{
  11.299 +  P1 = I31(4, 2);
  11.300 +  P3 = I211(4, 5, 7);
  11.301 +  if (MUL) {
  11.302 +    P0 = I31(4, 0);
  11.303 +    P2 = I31(4, 7);
  11.304 +  } else {
  11.305 +    P0 = I332(1, 3, 4);
  11.306 +    P2 = I521(4, 3, 7);
  11.307 +  }
  11.308 +} break;
  11.309 +case 43 : 
  11.310 +case 171 : 
  11.311 +{
  11.312 +  P1 = I31(4, 2);
  11.313 +  P3 = I211(4, 5, 7);
  11.314 +  if (MUL) {
  11.315 +    P0 = IC(4);
  11.316 +    P2 = I31(4, 7);
  11.317 +  } else {
  11.318 +    P0 = I332(1, 3, 4);
  11.319 +    P2 = I521(4, 3, 7);
  11.320 +  }
  11.321 +} break;
  11.322 +case 46 : 
  11.323 +case 174 : 
  11.324 +{
  11.325 +  P1 = I31(4, 5);
  11.326 +  P2 = I31(4, 7);
  11.327 +  P3 = I211(4, 5, 7);
  11.328 +  if (MUL) {
  11.329 +    P0 = I31(4, 0);
  11.330 +  } else {
  11.331 +    P0 = I611(4, 1, 3);
  11.332 +  }
  11.333 +} break;
  11.334 +case 47 : 
  11.335 +case 175 : 
  11.336 +{
  11.337 +  P1 = I31(4, 5);
  11.338 +  P2 = I31(4, 7);
  11.339 +  P3 = I211(4, 5, 7);
  11.340 +  if (MUL) {
  11.341 +    P0 = IC(4);
  11.342 +  } else {
  11.343 +    P0 = I1411(4, 1, 3);
  11.344 +  }
  11.345 +} break;
  11.346 +case 56 : 
  11.347 +{
  11.348 +  P0 = I31(4, 0);
  11.349 +  P1 = I31(4, 2);
  11.350 +  P2 = I31(4, 7);
  11.351 +  P3 = I31(4, 8);
  11.352 +} break;
  11.353 +case 57 : 
  11.354 +{
  11.355 +  P0 = I31(4, 1);
  11.356 +  P1 = I31(4, 2);
  11.357 +  P2 = I31(4, 7);
  11.358 +  P3 = I31(4, 8);
  11.359 +} break;
  11.360 +case 58 : 
  11.361 +{
  11.362 +  P2 = I31(4, 7);
  11.363 +  P3 = I31(4, 8);
  11.364 +  if (MUL) {
  11.365 +    P0 = I31(4, 0);
  11.366 +  } else {
  11.367 +    P0 = I611(4, 1, 3);
  11.368 +  }
  11.369 +  if (MUR) {
  11.370 +    P1 = I31(4, 2);
  11.371 +  } else {
  11.372 +    P1 = I611(4, 1, 5);
  11.373 +  }
  11.374 +} break;
  11.375 +case 59 : 
  11.376 +{
  11.377 +  P2 = I31(4, 7);
  11.378 +  P3 = I31(4, 8);
  11.379 +  if (MUL) {
  11.380 +    P0 = IC(4);
  11.381 +  } else {
  11.382 +    P0 = I211(4, 1, 3);
  11.383 +  }
  11.384 +  if (MUR) {
  11.385 +    P1 = I31(4, 2);
  11.386 +  } else {
  11.387 +    P1 = I611(4, 1, 5);
  11.388 +  }
  11.389 +} break;
  11.390 +case 60 : 
  11.391 +{
  11.392 +  P0 = I31(4, 0);
  11.393 +  P1 = I31(4, 1);
  11.394 +  P2 = I31(4, 7);
  11.395 +  P3 = I31(4, 8);
  11.396 +} break;
  11.397 +case 61 : 
  11.398 +{
  11.399 +  P0 = I31(4, 1);
  11.400 +  P1 = I31(4, 1);
  11.401 +  P2 = I31(4, 7);
  11.402 +  P3 = I31(4, 8);
  11.403 +} break;
  11.404 +case 62 : 
  11.405 +{
  11.406 +  P0 = I31(4, 0);
  11.407 +  P2 = I31(4, 7);
  11.408 +  P3 = I31(4, 8);
  11.409 +  if (MUR) {
  11.410 +    P1 = IC(4);
  11.411 +  } else {
  11.412 +    P1 = I211(4, 1, 5);
  11.413 +  }
  11.414 +} break;
  11.415 +case 63 : 
  11.416 +{
  11.417 +  P2 = I31(4, 7);
  11.418 +  P3 = I31(4, 8);
  11.419 +  if (MUL) {
  11.420 +    P0 = IC(4);
  11.421 +  } else {
  11.422 +    P0 = I1411(4, 1, 3);
  11.423 +  }
  11.424 +  if (MUR) {
  11.425 +    P1 = IC(4);
  11.426 +  } else {
  11.427 +    P1 = I211(4, 1, 5);
  11.428 +  }
  11.429 +} break;
  11.430 +case 64 : 
  11.431 +case 65 : 
  11.432 +case 68 : 
  11.433 +case 69 : 
  11.434 +{
  11.435 +  P0 = I211(4, 1, 3);
  11.436 +  P1 = I211(4, 1, 5);
  11.437 +  P2 = I31(4, 6);
  11.438 +  P3 = I31(4, 8);
  11.439 +} break;
  11.440 +case 67 : 
  11.441 +{
  11.442 +  P0 = I31(4, 3);
  11.443 +  P1 = I31(4, 2);
  11.444 +  P2 = I31(4, 6);
  11.445 +  P3 = I31(4, 8);
  11.446 +} break;
  11.447 +case 70 : 
  11.448 +{
  11.449 +  P0 = I31(4, 0);
  11.450 +  P1 = I31(4, 5);
  11.451 +  P2 = I31(4, 6);
  11.452 +  P3 = I31(4, 8);
  11.453 +} break;
  11.454 +case 71 : 
  11.455 +{
  11.456 +  P0 = I31(4, 3);
  11.457 +  P1 = I31(4, 5);
  11.458 +  P2 = I31(4, 6);
  11.459 +  P3 = I31(4, 8);
  11.460 +} break;
  11.461 +case 72 : 
  11.462 +case 76 : 
  11.463 +{
  11.464 +  P0 = I31(4, 0);
  11.465 +  P1 = I211(4, 1, 5);
  11.466 +  P3 = I31(4, 8);
  11.467 +  if (MDL) {
  11.468 +    P2 = I31(4, 6);
  11.469 +  } else {
  11.470 +    P2 = I211(4, 3, 7);
  11.471 +  }
  11.472 +} break;
  11.473 +case 73 : 
  11.474 +case 77 : 
  11.475 +{
  11.476 +  P1 = I211(4, 1, 5);
  11.477 +  P3 = I31(4, 8);
  11.478 +  if (MDL) {
  11.479 +    P0 = I31(4, 1);
  11.480 +    P2 = I31(4, 6);
  11.481 +  } else {
  11.482 +    P0 = I521(4, 3, 1);
  11.483 +    P2 = I332(3, 7, 4);
  11.484 +  }
  11.485 +} break;
  11.486 +case 74 : 
  11.487 +case 107 : 
  11.488 +case 123 : 
  11.489 +{
  11.490 +  P1 = I31(4, 2);
  11.491 +  P3 = I31(4, 8);
  11.492 +  if (MDL) {
  11.493 +    P2 = IC(4);
  11.494 +  } else {
  11.495 +    P2 = I211(4, 3, 7);
  11.496 +  }
  11.497 +  if (MUL) {
  11.498 +    P0 = IC(4);
  11.499 +  } else {
  11.500 +    P0 = I211(4, 1, 3);
  11.501 +  }
  11.502 +} break;
  11.503 +case 78 : 
  11.504 +{
  11.505 +  P1 = I31(4, 5);
  11.506 +  P3 = I31(4, 8);
  11.507 +  if (MDL) {
  11.508 +    P2 = I31(4, 6);
  11.509 +  } else {
  11.510 +    P2 = I611(4, 3, 7);
  11.511 +  }
  11.512 +  if (MUL) {
  11.513 +    P0 = I31(4, 0);
  11.514 +  } else {
  11.515 +    P0 = I611(4, 1, 3);
  11.516 +  }
  11.517 +} break;
  11.518 +case 79 : 
  11.519 +{
  11.520 +  P1 = I31(4, 5);
  11.521 +  P3 = I31(4, 8);
  11.522 +  if (MDL) {
  11.523 +    P2 = I31(4, 6);
  11.524 +  } else {
  11.525 +    P2 = I611(4, 3, 7);
  11.526 +  }
  11.527 +  if (MUL) {
  11.528 +    P0 = IC(4);
  11.529 +  } else {
  11.530 +    P0 = I211(4, 1, 3);
  11.531 +  }
  11.532 +} break;
  11.533 +case 80 : 
  11.534 +case 81 : 
  11.535 +{
  11.536 +  P0 = I211(4, 1, 3);
  11.537 +  P1 = I31(4, 2);
  11.538 +  P2 = I31(4, 6);
  11.539 +  if (MDR) {
  11.540 +    P3 = I31(4, 8);
  11.541 +  } else {
  11.542 +    P3 = I211(4, 5, 7);
  11.543 +  }
  11.544 +} break;
  11.545 +case 82 : 
  11.546 +case 214 : 
  11.547 +case 222 : 
  11.548 +{
  11.549 +  P0 = I31(4, 0);
  11.550 +  P2 = I31(4, 6);
  11.551 +  if (MDR) {
  11.552 +    P3 = IC(4);
  11.553 +  } else {
  11.554 +    P3 = I211(4, 5, 7);
  11.555 +  }
  11.556 +  if (MUR) {
  11.557 +    P1 = IC(4);
  11.558 +  } else {
  11.559 +    P1 = I211(4, 1, 5);
  11.560 +  }
  11.561 +} break;
  11.562 +case 83 : 
  11.563 +{
  11.564 +  P0 = I31(4, 3);
  11.565 +  P2 = I31(4, 6);
  11.566 +  if (MDR) {
  11.567 +    P3 = I31(4, 8);
  11.568 +  } else {
  11.569 +    P3 = I611(4, 5, 7);
  11.570 +  }
  11.571 +  if (MUR) {
  11.572 +    P1 = I31(4, 2);
  11.573 +  } else {
  11.574 +    P1 = I611(4, 1, 5);
  11.575 +  }
  11.576 +} break;
  11.577 +case 84 : 
  11.578 +case 85 : 
  11.579 +{
  11.580 +  P0 = I211(4, 1, 3);
  11.581 +  P2 = I31(4, 6);
  11.582 +  if (MDR) {
  11.583 +    P1 = I31(4, 1);
  11.584 +    P3 = I31(4, 8);
  11.585 +  } else {
  11.586 +    P1 = I521(4, 5, 1);
  11.587 +    P3 = I332(5, 7, 4);
  11.588 +  }
  11.589 +} break;
  11.590 +case 87 : 
  11.591 +{
  11.592 +  P0 = I31(4, 3);
  11.593 +  P2 = I31(4, 6);
  11.594 +  if (MDR) {
  11.595 +    P3 = I31(4, 8);
  11.596 +  } else {
  11.597 +    P3 = I611(4, 5, 7);
  11.598 +  }
  11.599 +  if (MUR) {
  11.600 +    P1 = IC(4);
  11.601 +  } else {
  11.602 +    P1 = I211(4, 1, 5);
  11.603 +  }
  11.604 +} break;
  11.605 +case 88 : 
  11.606 +case 248 : 
  11.607 +case 250 : 
  11.608 +{
  11.609 +  P0 = I31(4, 0);
  11.610 +  P1 = I31(4, 2);
  11.611 +  if (MDL) {
  11.612 +    P2 = IC(4);
  11.613 +  } else {
  11.614 +    P2 = I211(4, 3, 7);
  11.615 +  }
  11.616 +  if (MDR) {
  11.617 +    P3 = IC(4);
  11.618 +  } else {
  11.619 +    P3 = I211(4, 5, 7);
  11.620 +  }
  11.621 +} break;
  11.622 +case 89 : 
  11.623 +{
  11.624 +  P0 = I31(4, 1);
  11.625 +  P1 = I31(4, 2);
  11.626 +  if (MDL) {
  11.627 +    P2 = I31(4, 6);
  11.628 +  } else {
  11.629 +    P2 = I611(4, 3, 7);
  11.630 +  }
  11.631 +  if (MDR) {
  11.632 +    P3 = I31(4, 8);
  11.633 +  } else {
  11.634 +    P3 = I611(4, 5, 7);
  11.635 +  }
  11.636 +} break;
  11.637 +case 90 : 
  11.638 +{
  11.639 +  if (MDL) {
  11.640 +    P2 = I31(4, 6);
  11.641 +  } else {
  11.642 +    P2 = I611(4, 3, 7);
  11.643 +  }
  11.644 +  if (MDR) {
  11.645 +    P3 = I31(4, 8);
  11.646 +  } else {
  11.647 +    P3 = I611(4, 5, 7);
  11.648 +  }
  11.649 +  if (MUL) {
  11.650 +    P0 = I31(4, 0);
  11.651 +  } else {
  11.652 +    P0 = I611(4, 1, 3);
  11.653 +  }
  11.654 +  if (MUR) {
  11.655 +    P1 = I31(4, 2);
  11.656 +  } else {
  11.657 +    P1 = I611(4, 1, 5);
  11.658 +  }
  11.659 +} break;
  11.660 +case 91 : 
  11.661 +{
  11.662 +  if (MDL) {
  11.663 +    P2 = I31(4, 6);
  11.664 +  } else {
  11.665 +    P2 = I611(4, 3, 7);
  11.666 +  }
  11.667 +  if (MDR) {
  11.668 +    P3 = I31(4, 8);
  11.669 +  } else {
  11.670 +    P3 = I611(4, 5, 7);
  11.671 +  }
  11.672 +  if (MUL) {
  11.673 +    P0 = IC(4);
  11.674 +  } else {
  11.675 +    P0 = I211(4, 1, 3);
  11.676 +  }
  11.677 +  if (MUR) {
  11.678 +    P1 = I31(4, 2);
  11.679 +  } else {
  11.680 +    P1 = I611(4, 1, 5);
  11.681 +  }
  11.682 +} break;
  11.683 +case 92 : 
  11.684 +{
  11.685 +  P0 = I31(4, 0);
  11.686 +  P1 = I31(4, 1);
  11.687 +  if (MDL) {
  11.688 +    P2 = I31(4, 6);
  11.689 +  } else {
  11.690 +    P2 = I611(4, 3, 7);
  11.691 +  }
  11.692 +  if (MDR) {
  11.693 +    P3 = I31(4, 8);
  11.694 +  } else {
  11.695 +    P3 = I611(4, 5, 7);
  11.696 +  }
  11.697 +} break;
  11.698 +case 93 : 
  11.699 +{
  11.700 +  P0 = I31(4, 1);
  11.701 +  P1 = I31(4, 1);
  11.702 +  if (MDL) {
  11.703 +    P2 = I31(4, 6);
  11.704 +  } else {
  11.705 +    P2 = I611(4, 3, 7);
  11.706 +  }
  11.707 +  if (MDR) {
  11.708 +    P3 = I31(4, 8);
  11.709 +  } else {
  11.710 +    P3 = I611(4, 5, 7);
  11.711 +  }
  11.712 +} break;
  11.713 +case 94 : 
  11.714 +{
  11.715 +  if (MDL) {
  11.716 +    P2 = I31(4, 6);
  11.717 +  } else {
  11.718 +    P2 = I611(4, 3, 7);
  11.719 +  }
  11.720 +  if (MDR) {
  11.721 +    P3 = I31(4, 8);
  11.722 +  } else {
  11.723 +    P3 = I611(4, 5, 7);
  11.724 +  }
  11.725 +  if (MUL) {
  11.726 +    P0 = I31(4, 0);
  11.727 +  } else {
  11.728 +    P0 = I611(4, 1, 3);
  11.729 +  }
  11.730 +  if (MUR) {
  11.731 +    P1 = IC(4);
  11.732 +  } else {
  11.733 +    P1 = I211(4, 1, 5);
  11.734 +  }
  11.735 +} break;
  11.736 +case 96 : 
  11.737 +case 97 : 
  11.738 +case 100 : 
  11.739 +case 101 : 
  11.740 +{
  11.741 +  P0 = I211(4, 1, 3);
  11.742 +  P1 = I211(4, 1, 5);
  11.743 +  P2 = I31(4, 3);
  11.744 +  P3 = I31(4, 8);
  11.745 +} break;
  11.746 +case 98 : 
  11.747 +{
  11.748 +  P0 = I31(4, 0);
  11.749 +  P1 = I31(4, 2);
  11.750 +  P2 = I31(4, 3);
  11.751 +  P3 = I31(4, 8);
  11.752 +} break;
  11.753 +case 99 : 
  11.754 +{
  11.755 +  P0 = I31(4, 3);
  11.756 +  P1 = I31(4, 2);
  11.757 +  P2 = I31(4, 3);
  11.758 +  P3 = I31(4, 8);
  11.759 +} break;
  11.760 +case 102 : 
  11.761 +{
  11.762 +  P0 = I31(4, 0);
  11.763 +  P1 = I31(4, 5);
  11.764 +  P2 = I31(4, 3);
  11.765 +  P3 = I31(4, 8);
  11.766 +} break;
  11.767 +case 103 : 
  11.768 +{
  11.769 +  P0 = I31(4, 3);
  11.770 +  P1 = I31(4, 5);
  11.771 +  P2 = I31(4, 3);
  11.772 +  P3 = I31(4, 8);
  11.773 +} break;
  11.774 +case 104 : 
  11.775 +case 108 : 
  11.776 +{
  11.777 +  P0 = I31(4, 0);
  11.778 +  P1 = I211(4, 1, 5);
  11.779 +  P3 = I31(4, 8);
  11.780 +  if (MDL) {
  11.781 +    P2 = IC(4);
  11.782 +  } else {
  11.783 +    P2 = I211(4, 3, 7);
  11.784 +  }
  11.785 +} break;
  11.786 +case 105 : 
  11.787 +case 109 : 
  11.788 +{
  11.789 +  P1 = I211(4, 1, 5);
  11.790 +  P3 = I31(4, 8);
  11.791 +  if (MDL) {
  11.792 +    P0 = I31(4, 1);
  11.793 +    P2 = IC(4);
  11.794 +  } else {
  11.795 +    P0 = I521(4, 3, 1);
  11.796 +    P2 = I332(3, 7, 4);
  11.797 +  }
  11.798 +} break;
  11.799 +case 106 : 
  11.800 +case 120 : 
  11.801 +{
  11.802 +  P0 = I31(4, 0);
  11.803 +  P1 = I31(4, 2);
  11.804 +  P3 = I31(4, 8);
  11.805 +  if (MDL) {
  11.806 +    P2 = IC(4);
  11.807 +  } else {
  11.808 +    P2 = I211(4, 3, 7);
  11.809 +  }
  11.810 +} break;
  11.811 +case 110 : 
  11.812 +{
  11.813 +  P0 = I31(4, 0);
  11.814 +  P1 = I31(4, 5);
  11.815 +  P3 = I31(4, 8);
  11.816 +  if (MDL) {
  11.817 +    P2 = IC(4);
  11.818 +  } else {
  11.819 +    P2 = I211(4, 3, 7);
  11.820 +  }
  11.821 +} break;
  11.822 +case 111 : 
  11.823 +{
  11.824 +  P1 = I31(4, 5);
  11.825 +  P3 = I31(4, 8);
  11.826 +  if (MDL) {
  11.827 +    P2 = IC(4);
  11.828 +  } else {
  11.829 +    P2 = I211(4, 3, 7);
  11.830 +  }
  11.831 +  if (MUL) {
  11.832 +    P0 = IC(4);
  11.833 +  } else {
  11.834 +    P0 = I1411(4, 1, 3);
  11.835 +  }
  11.836 +} break;
  11.837 +case 112 : 
  11.838 +case 113 : 
  11.839 +{
  11.840 +  P0 = I211(4, 1, 3);
  11.841 +  P1 = I31(4, 2);
  11.842 +  if (MDR) {
  11.843 +    P2 = I31(4, 3);
  11.844 +    P3 = I31(4, 8);
  11.845 +  } else {
  11.846 +    P2 = I521(4, 7, 3);
  11.847 +    P3 = I332(5, 7, 4);
  11.848 +  }
  11.849 +} break;
  11.850 +case 114 : 
  11.851 +{
  11.852 +  P0 = I31(4, 0);
  11.853 +  P2 = I31(4, 3);
  11.854 +  if (MDR) {
  11.855 +    P3 = I31(4, 8);
  11.856 +  } else {
  11.857 +    P3 = I611(4, 5, 7);
  11.858 +  }
  11.859 +  if (MUR) {
  11.860 +    P1 = I31(4, 2);
  11.861 +  } else {
  11.862 +    P1 = I611(4, 1, 5);
  11.863 +  }
  11.864 +} break;
  11.865 +case 115 : 
  11.866 +{
  11.867 +  P0 = I31(4, 3);
  11.868 +  P2 = I31(4, 3);
  11.869 +  if (MDR) {
  11.870 +    P3 = I31(4, 8);
  11.871 +  } else {
  11.872 +    P3 = I611(4, 5, 7);
  11.873 +  }
  11.874 +  if (MUR) {
  11.875 +    P1 = I31(4, 2);
  11.876 +  } else {
  11.877 +    P1 = I611(4, 1, 5);
  11.878 +  }
  11.879 +} break;
  11.880 +case 116 : 
  11.881 +case 117 : 
  11.882 +{
  11.883 +  P0 = I211(4, 1, 3);
  11.884 +  P1 = I31(4, 1);
  11.885 +  P2 = I31(4, 3);
  11.886 +  if (MDR) {
  11.887 +    P3 = I31(4, 8);
  11.888 +  } else {
  11.889 +    P3 = I611(4, 5, 7);
  11.890 +  }
  11.891 +} break;
  11.892 +case 118 : 
  11.893 +{
  11.894 +  P0 = I31(4, 0);
  11.895 +  P2 = I31(4, 3);
  11.896 +  P3 = I31(4, 8);
  11.897 +  if (MUR) {
  11.898 +    P1 = IC(4);
  11.899 +  } else {
  11.900 +    P1 = I211(4, 1, 5);
  11.901 +  }
  11.902 +} break;
  11.903 +case 119 : 
  11.904 +{
  11.905 +  P2 = I31(4, 3);
  11.906 +  P3 = I31(4, 8);
  11.907 +  if (MUR) {
  11.908 +    P0 = I31(4, 3);
  11.909 +    P1 = IC(4);
  11.910 +  } else {
  11.911 +    P0 = I521(4, 1, 3);
  11.912 +    P1 = I332(1, 5, 4);
  11.913 +  }
  11.914 +} break;
  11.915 +case 121 : 
  11.916 +{
  11.917 +  P0 = I31(4, 1);
  11.918 +  P1 = I31(4, 2);
  11.919 +  if (MDL) {
  11.920 +    P2 = IC(4);
  11.921 +  } else {
  11.922 +    P2 = I211(4, 3, 7);
  11.923 +  }
  11.924 +  if (MDR) {
  11.925 +    P3 = I31(4, 8);
  11.926 +  } else {
  11.927 +    P3 = I611(4, 5, 7);
  11.928 +  }
  11.929 +} break;
  11.930 +case 122 : 
  11.931 +{
  11.932 +  if (MDL) {
  11.933 +    P2 = IC(4);
  11.934 +  } else {
  11.935 +    P2 = I211(4, 3, 7);
  11.936 +  }
  11.937 +  if (MDR) {
  11.938 +    P3 = I31(4, 8);
  11.939 +  } else {
  11.940 +    P3 = I611(4, 5, 7);
  11.941 +  }
  11.942 +  if (MUL) {
  11.943 +    P0 = I31(4, 0);
  11.944 +  } else {
  11.945 +    P0 = I611(4, 1, 3);
  11.946 +  }
  11.947 +  if (MUR) {
  11.948 +    P1 = I31(4, 2);
  11.949 +  } else {
  11.950 +    P1 = I611(4, 1, 5);
  11.951 +  }
  11.952 +} break;
  11.953 +case 124 : 
  11.954 +{
  11.955 +  P0 = I31(4, 0);
  11.956 +  P1 = I31(4, 1);
  11.957 +  P3 = I31(4, 8);
  11.958 +  if (MDL) {
  11.959 +    P2 = IC(4);
  11.960 +  } else {
  11.961 +    P2 = I211(4, 3, 7);
  11.962 +  }
  11.963 +} break;
  11.964 +case 125 : 
  11.965 +{
  11.966 +  P1 = I31(4, 1);
  11.967 +  P3 = I31(4, 8);
  11.968 +  if (MDL) {
  11.969 +    P0 = I31(4, 1);
  11.970 +    P2 = IC(4);
  11.971 +  } else {
  11.972 +    P0 = I521(4, 3, 1);
  11.973 +    P2 = I332(3, 7, 4);
  11.974 +  }
  11.975 +} break;
  11.976 +case 126 : 
  11.977 +{
  11.978 +  P0 = I31(4, 0);
  11.979 +  P3 = I31(4, 8);
  11.980 +  if (MDL) {
  11.981 +    P2 = IC(4);
  11.982 +  } else {
  11.983 +    P2 = I211(4, 3, 7);
  11.984 +  }
  11.985 +  if (MUR) {
  11.986 +    P1 = IC(4);
  11.987 +  } else {
  11.988 +    P1 = I211(4, 1, 5);
  11.989 +  }
  11.990 +} break;
  11.991 +case 127 : 
  11.992 +{
  11.993 +  P3 = I31(4, 8);
  11.994 +  if (MDL) {
  11.995 +    P2 = IC(4);
  11.996 +  } else {
  11.997 +    P2 = I211(4, 3, 7);
  11.998 +  }
  11.999 +  if (MUL) {
 11.1000 +    P0 = IC(4);
 11.1001 +  } else {
 11.1002 +    P0 = I1411(4, 1, 3);
 11.1003 +  }
 11.1004 +  if (MUR) {
 11.1005 +    P1 = IC(4);
 11.1006 +  } else {
 11.1007 +    P1 = I211(4, 1, 5);
 11.1008 +  }
 11.1009 +} break;
 11.1010 +case 144 : 
 11.1011 +case 145 : 
 11.1012 +case 176 : 
 11.1013 +case 177 : 
 11.1014 +{
 11.1015 +  P0 = I211(4, 1, 3);
 11.1016 +  P1 = I31(4, 2);
 11.1017 +  P2 = I211(4, 3, 7);
 11.1018 +  P3 = I31(4, 7);
 11.1019 +} break;
 11.1020 +case 146 : 
 11.1021 +case 178 : 
 11.1022 +{
 11.1023 +  P0 = I31(4, 0);
 11.1024 +  P2 = I211(4, 3, 7);
 11.1025 +  if (MUR) {
 11.1026 +    P1 = I31(4, 2);
 11.1027 +    P3 = I31(4, 7);
 11.1028 +  } else {
 11.1029 +    P1 = I332(1, 5, 4);
 11.1030 +    P3 = I521(4, 5, 7);
 11.1031 +  }
 11.1032 +} break;
 11.1033 +case 147 : 
 11.1034 +case 179 : 
 11.1035 +{
 11.1036 +  P0 = I31(4, 3);
 11.1037 +  P2 = I211(4, 3, 7);
 11.1038 +  P3 = I31(4, 7);
 11.1039 +  if (MUR) {
 11.1040 +    P1 = I31(4, 2);
 11.1041 +  } else {
 11.1042 +    P1 = I611(4, 1, 5);
 11.1043 +  }
 11.1044 +} break;
 11.1045 +case 148 : 
 11.1046 +case 149 : 
 11.1047 +case 180 : 
 11.1048 +case 181 : 
 11.1049 +{
 11.1050 +  P0 = I211(4, 1, 3);
 11.1051 +  P1 = I31(4, 1);
 11.1052 +  P2 = I211(4, 3, 7);
 11.1053 +  P3 = I31(4, 7);
 11.1054 +} break;
 11.1055 +case 150 : 
 11.1056 +case 182 : 
 11.1057 +{
 11.1058 +  P0 = I31(4, 0);
 11.1059 +  P2 = I211(4, 3, 7);
 11.1060 +  if (MUR) {
 11.1061 +    P1 = IC(4);
 11.1062 +    P3 = I31(4, 7);
 11.1063 +  } else {
 11.1064 +    P1 = I332(1, 5, 4);
 11.1065 +    P3 = I521(4, 5, 7);
 11.1066 +  }
 11.1067 +} break;
 11.1068 +case 151 : 
 11.1069 +case 183 : 
 11.1070 +{
 11.1071 +  P0 = I31(4, 3);
 11.1072 +  P2 = I211(4, 3, 7);
 11.1073 +  P3 = I31(4, 7);
 11.1074 +  if (MUR) {
 11.1075 +    P1 = IC(4);
 11.1076 +  } else {
 11.1077 +    P1 = I1411(4, 1, 5);
 11.1078 +  }
 11.1079 +} break;
 11.1080 +case 152 : 
 11.1081 +{
 11.1082 +  P0 = I31(4, 0);
 11.1083 +  P1 = I31(4, 2);
 11.1084 +  P2 = I31(4, 6);
 11.1085 +  P3 = I31(4, 7);
 11.1086 +} break;
 11.1087 +case 153 : 
 11.1088 +{
 11.1089 +  P0 = I31(4, 1);
 11.1090 +  P1 = I31(4, 2);
 11.1091 +  P2 = I31(4, 6);
 11.1092 +  P3 = I31(4, 7);
 11.1093 +} break;
 11.1094 +case 154 : 
 11.1095 +{
 11.1096 +  P2 = I31(4, 6);
 11.1097 +  P3 = I31(4, 7);
 11.1098 +  if (MUL) {
 11.1099 +    P0 = I31(4, 0);
 11.1100 +  } else {
 11.1101 +    P0 = I611(4, 1, 3);
 11.1102 +  }
 11.1103 +  if (MUR) {
 11.1104 +    P1 = I31(4, 2);
 11.1105 +  } else {
 11.1106 +    P1 = I611(4, 1, 5);
 11.1107 +  }
 11.1108 +} break;
 11.1109 +case 155 : 
 11.1110 +{
 11.1111 +  P1 = I31(4, 2);
 11.1112 +  P2 = I31(4, 6);
 11.1113 +  P3 = I31(4, 7);
 11.1114 +  if (MUL) {
 11.1115 +    P0 = IC(4);
 11.1116 +  } else {
 11.1117 +    P0 = I211(4, 1, 3);
 11.1118 +  }
 11.1119 +} break;
 11.1120 +case 156 : 
 11.1121 +{
 11.1122 +  P0 = I31(4, 0);
 11.1123 +  P1 = I31(4, 1);
 11.1124 +  P2 = I31(4, 6);
 11.1125 +  P3 = I31(4, 7);
 11.1126 +} break;
 11.1127 +case 157 : 
 11.1128 +{
 11.1129 +  P0 = I31(4, 1);
 11.1130 +  P1 = I31(4, 1);
 11.1131 +  P2 = I31(4, 6);
 11.1132 +  P3 = I31(4, 7);
 11.1133 +} break;
 11.1134 +case 158 : 
 11.1135 +{
 11.1136 +  P2 = I31(4, 6);
 11.1137 +  P3 = I31(4, 7);
 11.1138 +  if (MUL) {
 11.1139 +    P0 = I31(4, 0);
 11.1140 +  } else {
 11.1141 +    P0 = I611(4, 1, 3);
 11.1142 +  }
 11.1143 +  if (MUR) {
 11.1144 +    P1 = IC(4);
 11.1145 +  } else {
 11.1146 +    P1 = I211(4, 1, 5);
 11.1147 +  }
 11.1148 +} break;
 11.1149 +case 159 : 
 11.1150 +{
 11.1151 +  P2 = I31(4, 6);
 11.1152 +  P3 = I31(4, 7);
 11.1153 +  if (MUL) {
 11.1154 +    P0 = IC(4);
 11.1155 +  } else {
 11.1156 +    P0 = I211(4, 1, 3);
 11.1157 +  }
 11.1158 +  if (MUR) {
 11.1159 +    P1 = IC(4);
 11.1160 +  } else {
 11.1161 +    P1 = I1411(4, 1, 5);
 11.1162 +  }
 11.1163 +} break;
 11.1164 +case 184 : 
 11.1165 +{
 11.1166 +  P0 = I31(4, 0);
 11.1167 +  P1 = I31(4, 2);
 11.1168 +  P2 = I31(4, 7);
 11.1169 +  P3 = I31(4, 7);
 11.1170 +} break;
 11.1171 +case 185 : 
 11.1172 +{
 11.1173 +  P0 = I31(4, 1);
 11.1174 +  P1 = I31(4, 2);
 11.1175 +  P2 = I31(4, 7);
 11.1176 +  P3 = I31(4, 7);
 11.1177 +} break;
 11.1178 +case 186 : 
 11.1179 +{
 11.1180 +  P2 = I31(4, 7);
 11.1181 +  P3 = I31(4, 7);
 11.1182 +  if (MUL) {
 11.1183 +    P0 = I31(4, 0);
 11.1184 +  } else {
 11.1185 +    P0 = I611(4, 1, 3);
 11.1186 +  }
 11.1187 +  if (MUR) {
 11.1188 +    P1 = I31(4, 2);
 11.1189 +  } else {
 11.1190 +    P1 = I611(4, 1, 5);
 11.1191 +  }
 11.1192 +} break;
 11.1193 +case 187 : 
 11.1194 +{
 11.1195 +  P1 = I31(4, 2);
 11.1196 +  P3 = I31(4, 7);
 11.1197 +  if (MUL) {
 11.1198 +    P0 = IC(4);
 11.1199 +    P2 = I31(4, 7);
 11.1200 +  } else {
 11.1201 +    P0 = I332(1, 3, 4);
 11.1202 +    P2 = I521(4, 3, 7);
 11.1203 +  }
 11.1204 +} break;
 11.1205 +case 188 : 
 11.1206 +{
 11.1207 +  P0 = I31(4, 0);
 11.1208 +  P1 = I31(4, 1);
 11.1209 +  P2 = I31(4, 7);
 11.1210 +  P3 = I31(4, 7);
 11.1211 +} break;
 11.1212 +case 189 : 
 11.1213 +{
 11.1214 +  P0 = I31(4, 1);
 11.1215 +  P1 = I31(4, 1);
 11.1216 +  P2 = I31(4, 7);
 11.1217 +  P3 = I31(4, 7);
 11.1218 +} break;
 11.1219 +case 190 : 
 11.1220 +{
 11.1221 +  P0 = I31(4, 0);
 11.1222 +  P2 = I31(4, 7);
 11.1223 +  if (MUR) {
 11.1224 +    P1 = IC(4);
 11.1225 +    P3 = I31(4, 7);
 11.1226 +  } else {
 11.1227 +    P1 = I332(1, 5, 4);
 11.1228 +    P3 = I521(4, 5, 7);
 11.1229 +  }
 11.1230 +} break;
 11.1231 +case 191 : 
 11.1232 +{
 11.1233 +  P2 = I31(4, 7);
 11.1234 +  P3 = I31(4, 7);
 11.1235 +  if (MUL) {
 11.1236 +    P0 = IC(4);
 11.1237 +  } else {
 11.1238 +    P0 = I1411(4, 1, 3);
 11.1239 +  }
 11.1240 +  if (MUR) {
 11.1241 +    P1 = IC(4);
 11.1242 +  } else {
 11.1243 +    P1 = I1411(4, 1, 5);
 11.1244 +  }
 11.1245 +} break;
 11.1246 +case 192 : 
 11.1247 +case 193 : 
 11.1248 +case 196 : 
 11.1249 +case 197 : 
 11.1250 +{
 11.1251 +  P0 = I211(4, 1, 3);
 11.1252 +  P1 = I211(4, 1, 5);
 11.1253 +  P2 = I31(4, 6);
 11.1254 +  P3 = I31(4, 5);
 11.1255 +} break;
 11.1256 +case 194 : 
 11.1257 +{
 11.1258 +  P0 = I31(4, 0);
 11.1259 +  P1 = I31(4, 2);
 11.1260 +  P2 = I31(4, 6);
 11.1261 +  P3 = I31(4, 5);
 11.1262 +} break;
 11.1263 +case 195 : 
 11.1264 +{
 11.1265 +  P0 = I31(4, 3);
 11.1266 +  P1 = I31(4, 2);
 11.1267 +  P2 = I31(4, 6);
 11.1268 +  P3 = I31(4, 5);
 11.1269 +} break;
 11.1270 +case 198 : 
 11.1271 +{
 11.1272 +  P0 = I31(4, 0);
 11.1273 +  P1 = I31(4, 5);
 11.1274 +  P2 = I31(4, 6);
 11.1275 +  P3 = I31(4, 5);
 11.1276 +} break;
 11.1277 +case 199 : 
 11.1278 +{
 11.1279 +  P0 = I31(4, 3);
 11.1280 +  P1 = I31(4, 5);
 11.1281 +  P2 = I31(4, 6);
 11.1282 +  P3 = I31(4, 5);
 11.1283 +} break;
 11.1284 +case 200 : 
 11.1285 +case 204 : 
 11.1286 +{
 11.1287 +  P0 = I31(4, 0);
 11.1288 +  P1 = I211(4, 1, 5);
 11.1289 +  if (MDL) {
 11.1290 +    P2 = I31(4, 6);
 11.1291 +    P3 = I31(4, 5);
 11.1292 +  } else {
 11.1293 +    P2 = I332(3, 7, 4);
 11.1294 +    P3 = I521(4, 7, 5);
 11.1295 +  }
 11.1296 +} break;
 11.1297 +case 201 : 
 11.1298 +case 205 : 
 11.1299 +{
 11.1300 +  P0 = I31(4, 1);
 11.1301 +  P1 = I211(4, 1, 5);
 11.1302 +  P3 = I31(4, 5);
 11.1303 +  if (MDL) {
 11.1304 +    P2 = I31(4, 6);
 11.1305 +  } else {
 11.1306 +    P2 = I611(4, 3, 7);
 11.1307 +  }
 11.1308 +} break;
 11.1309 +case 202 : 
 11.1310 +{
 11.1311 +  P1 = I31(4, 2);
 11.1312 +  P3 = I31(4, 5);
 11.1313 +  if (MDL) {
 11.1314 +    P2 = I31(4, 6);
 11.1315 +  } else {
 11.1316 +    P2 = I611(4, 3, 7);
 11.1317 +  }
 11.1318 +  if (MUL) {
 11.1319 +    P0 = I31(4, 0);
 11.1320 +  } else {
 11.1321 +    P0 = I611(4, 1, 3);
 11.1322 +  }
 11.1323 +} break;
 11.1324 +case 203 : 
 11.1325 +{
 11.1326 +  P1 = I31(4, 2);
 11.1327 +  P2 = I31(4, 6);
 11.1328 +  P3 = I31(4, 5);
 11.1329 +  if (MUL) {
 11.1330 +    P0 = IC(4);
 11.1331 +  } else {
 11.1332 +    P0 = I211(4, 1, 3);
 11.1333 +  }
 11.1334 +} break;
 11.1335 +case 206 : 
 11.1336 +{
 11.1337 +  P1 = I31(4, 5);
 11.1338 +  P3 = I31(4, 5);
 11.1339 +  if (MDL) {
 11.1340 +    P2 = I31(4, 6);
 11.1341 +  } else {
 11.1342 +    P2 = I611(4, 3, 7);
 11.1343 +  }
 11.1344 +  if (MUL) {
 11.1345 +    P0 = I31(4, 0);
 11.1346 +  } else {
 11.1347 +    P0 = I611(4, 1, 3);
 11.1348 +  }
 11.1349 +} break;
 11.1350 +case 207 : 
 11.1351 +{
 11.1352 +  P2 = I31(4, 6);
 11.1353 +  P3 = I31(4, 5);
 11.1354 +  if (MUL) {
 11.1355 +    P0 = IC(4);
 11.1356 +    P1 = I31(4, 5);
 11.1357 +  } else {
 11.1358 +    P0 = I332(1, 3, 4);
 11.1359 +    P1 = I521(4, 1, 5);
 11.1360 +  }
 11.1361 +} break;
 11.1362 +case 208 : 
 11.1363 +case 209 : 
 11.1364 +{
 11.1365 +  P0 = I211(4, 1, 3);
 11.1366 +  P1 = I31(4, 2);
 11.1367 +  P2 = I31(4, 6);
 11.1368 +  if (MDR) {
 11.1369 +    P3 = IC(4);
 11.1370 +  } else {
 11.1371 +    P3 = I211(4, 5, 7);
 11.1372 +  }
 11.1373 +} break;
 11.1374 +case 210 : 
 11.1375 +case 216 : 
 11.1376 +{
 11.1377 +  P0 = I31(4, 0);
 11.1378 +  P1 = I31(4, 2);
 11.1379 +  P2 = I31(4, 6);
 11.1380 +  if (MDR) {
 11.1381 +    P3 = IC(4);
 11.1382 +  } else {
 11.1383 +    P3 = I211(4, 5, 7);
 11.1384 +  }
 11.1385 +} break;
 11.1386 +case 211 : 
 11.1387 +{
 11.1388 +  P0 = I31(4, 3);
 11.1389 +  P1 = I31(4, 2);
 11.1390 +  P2 = I31(4, 6);
 11.1391 +  if (MDR) {
 11.1392 +    P3 = IC(4);
 11.1393 +  } else {
 11.1394 +    P3 = I211(4, 5, 7);
 11.1395 +  }
 11.1396 +} break;
 11.1397 +case 212 : 
 11.1398 +case 213 : 
 11.1399 +{
 11.1400 +  P0 = I211(4, 1, 3);
 11.1401 +  P2 = I31(4, 6);
 11.1402 +  if (MDR) {
 11.1403 +    P1 = I31(4, 1);
 11.1404 +    P3 = IC(4);
 11.1405 +  } else {
 11.1406 +    P1 = I521(4, 5, 1);
 11.1407 +    P3 = I332(5, 7, 4);
 11.1408 +  }
 11.1409 +} break;
 11.1410 +case 215 : 
 11.1411 +{
 11.1412 +  P0 = I31(4, 3);
 11.1413 +  P2 = I31(4, 6);
 11.1414 +  if (MDR) {
 11.1415 +    P3 = IC(4);
 11.1416 +  } else {
 11.1417 +    P3 = I211(4, 5, 7);
 11.1418 +  }
 11.1419 +  if (MUR) {
 11.1420 +    P1 = IC(4);
 11.1421 +  } else {
 11.1422 +    P1 = I1411(4, 1, 5);
 11.1423 +  }
 11.1424 +} break;
 11.1425 +case 217 : 
 11.1426 +{
 11.1427 +  P0 = I31(4, 1);
 11.1428 +  P1 = I31(4, 2);
 11.1429 +  P2 = I31(4, 6);
 11.1430 +  if (MDR) {
 11.1431 +    P3 = IC(4);
 11.1432 +  } else {
 11.1433 +    P3 = I211(4, 5, 7);
 11.1434 +  }
 11.1435 +} break;
 11.1436 +case 218 : 
 11.1437 +{
 11.1438 +  if (MDL) {
 11.1439 +    P2 = I31(4, 6);
 11.1440 +  } else {
 11.1441 +    P2 = I611(4, 3, 7);
 11.1442 +  }
 11.1443 +  if (MDR) {
 11.1444 +    P3 = IC(4);
 11.1445 +  } else {
 11.1446 +    P3 = I211(4, 5, 7);
 11.1447 +  }
 11.1448 +  if (MUL) {
 11.1449 +    P0 = I31(4, 0);
 11.1450 +  } else {
 11.1451 +    P0 = I611(4, 1, 3);
 11.1452 +  }
 11.1453 +  if (MUR) {
 11.1454 +    P1 = I31(4, 2);
 11.1455 +  } else {
 11.1456 +    P1 = I611(4, 1, 5);
 11.1457 +  }
 11.1458 +} break;
 11.1459 +case 219 : 
 11.1460 +{
 11.1461 +  P1 = I31(4, 2);
 11.1462 +  P2 = I31(4, 6);
 11.1463 +  if (MDR) {
 11.1464 +    P3 = IC(4);
 11.1465 +  } else {
 11.1466 +    P3 = I211(4, 5, 7);
 11.1467 +  }
 11.1468 +  if (MUL) {
 11.1469 +    P0 = IC(4);
 11.1470 +  } else {
 11.1471 +    P0 = I211(4, 1, 3);
 11.1472 +  }
 11.1473 +} break;
 11.1474 +case 220 : 
 11.1475 +{
 11.1476 +  P0 = I31(4, 0);
 11.1477 +  P1 = I31(4, 1);
 11.1478 +  if (MDL) {
 11.1479 +    P2 = I31(4, 6);
 11.1480 +  } else {
 11.1481 +    P2 = I611(4, 3, 7);
 11.1482 +  }
 11.1483 +  if (MDR) {
 11.1484 +    P3 = IC(4);
 11.1485 +  } else {
 11.1486 +    P3 = I211(4, 5, 7);
 11.1487 +  }
 11.1488 +} break;
 11.1489 +case 221 : 
 11.1490 +{
 11.1491 +  P0 = I31(4, 1);
 11.1492 +  P2 = I31(4, 6);
 11.1493 +  if (MDR) {
 11.1494 +    P1 = I31(4, 1);
 11.1495 +    P3 = IC(4);
 11.1496 +  } else {
 11.1497 +    P1 = I521(4, 5, 1);
 11.1498 +    P3 = I332(5, 7, 4);
 11.1499 +  }
 11.1500 +} break;
 11.1501 +case 223 : 
 11.1502 +{
 11.1503 +  P2 = I31(4, 6);
 11.1504 +  if (MDR) {
 11.1505 +    P3 = IC(4);
 11.1506 +  } else {
 11.1507 +    P3 = I211(4, 5, 7);
 11.1508 +  }
 11.1509 +  if (MUL) {
 11.1510 +    P0 = IC(4);
 11.1511 +  } else {
 11.1512 +    P0 = I211(4, 1, 3);
 11.1513 +  }
 11.1514 +  if (MUR) {
 11.1515 +    P1 = IC(4);
 11.1516 +  } else {
 11.1517 +    P1 = I1411(4, 1, 5);
 11.1518 +  }
 11.1519 +} break;
 11.1520 +case 224 : 
 11.1521 +case 225 : 
 11.1522 +case 228 : 
 11.1523 +case 229 : 
 11.1524 +{
 11.1525 +  P0 = I211(4, 1, 3);
 11.1526 +  P1 = I211(4, 1, 5);
 11.1527 +  P2 = I31(4, 3);
 11.1528 +  P3 = I31(4, 5);
 11.1529 +} break;
 11.1530 +case 226 : 
 11.1531 +{
 11.1532 +  P0 = I31(4, 0);
 11.1533 +  P1 = I31(4, 2);
 11.1534 +  P2 = I31(4, 3);
 11.1535 +  P3 = I31(4, 5);
 11.1536 +} break;
 11.1537 +case 227 : 
 11.1538 +{
 11.1539 +  P0 = I31(4, 3);
 11.1540 +  P1 = I31(4, 2);
 11.1541 +  P2 = I31(4, 3);
 11.1542 +  P3 = I31(4, 5);
 11.1543 +} break;
 11.1544 +case 230 : 
 11.1545 +{
 11.1546 +  P0 = I31(4, 0);
 11.1547 +  P1 = I31(4, 5);
 11.1548 +  P2 = I31(4, 3);
 11.1549 +  P3 = I31(4, 5);
 11.1550 +} break;
 11.1551 +case 231 : 
 11.1552 +{
 11.1553 +  P0 = I31(4, 3);
 11.1554 +  P1 = I31(4, 5);
 11.1555 +  P2 = I31(4, 3);
 11.1556 +  P3 = I31(4, 5);
 11.1557 +} break;
 11.1558 +case 232 : 
 11.1559 +case 236 : 
 11.1560 +{
 11.1561 +  P0 = I31(4, 0);
 11.1562 +  P1 = I211(4, 1, 5);
 11.1563 +  if (MDL) {
 11.1564 +    P2 = IC(4);
 11.1565 +    P3 = I31(4, 5);
 11.1566 +  } else {
 11.1567 +    P2 = I332(3, 7, 4);
 11.1568 +    P3 = I521(4, 7, 5);
 11.1569 +  }
 11.1570 +} break;
 11.1571 +case 233 : 
 11.1572 +case 237 : 
 11.1573 +{
 11.1574 +  P0 = I31(4, 1);
 11.1575 +  P1 = I211(4, 1, 5);
 11.1576 +  P3 = I31(4, 5);
 11.1577 +  if (MDL) {
 11.1578 +    P2 = IC(4);
 11.1579 +  } else {
 11.1580 +    P2 = I1411(4, 3, 7);
 11.1581 +  }
 11.1582 +} break;
 11.1583 +case 234 : 
 11.1584 +{
 11.1585 +  P1 = I31(4, 2);
 11.1586 +  P3 = I31(4, 5);
 11.1587 +  if (MDL) {
 11.1588 +    P2 = IC(4);
 11.1589 +  } else {
 11.1590 +    P2 = I211(4, 3, 7);
 11.1591 +  }
 11.1592 +  if (MUL) {
 11.1593 +    P0 = I31(4, 0);
 11.1594 +  } else {
 11.1595 +    P0 = I611(4, 1, 3);
 11.1596 +  }
 11.1597 +} break;
 11.1598 +case 235 : 
 11.1599 +{
 11.1600 +  P1 = I31(4, 2);
 11.1601 +  P3 = I31(4, 5);
 11.1602 +  if (MDL) {
 11.1603 +    P2 = IC(4);
 11.1604 +  } else {
 11.1605 +    P2 = I1411(4, 3, 7);
 11.1606 +  }
 11.1607 +  if (MUL) {
 11.1608 +    P0 = IC(4);
 11.1609 +  } else {
 11.1610 +    P0 = I211(4, 1, 3);
 11.1611 +  }
 11.1612 +} break;
 11.1613 +case 238 : 
 11.1614 +{
 11.1615 +  P0 = I31(4, 0);
 11.1616 +  P1 = I31(4, 5);
 11.1617 +  if (MDL) {
 11.1618 +    P2 = IC(4);
 11.1619 +    P3 = I31(4, 5);
 11.1620 +  } else {
 11.1621 +    P2 = I332(3, 7, 4);
 11.1622 +    P3 = I521(4, 7, 5);
 11.1623 +  }
 11.1624 +} break;
 11.1625 +case 239 : 
 11.1626 +{
 11.1627 +  P1 = I31(4, 5);
 11.1628 +  P3 = I31(4, 5);
 11.1629 +  if (MDL) {
 11.1630 +    P2 = IC(4);
 11.1631 +  } else {
 11.1632 +    P2 = I1411(4, 3, 7);
 11.1633 +  }
 11.1634 +  if (MUL) {
 11.1635 +    P0 = IC(4);
 11.1636 +  } else {
 11.1637 +    P0 = I1411(4, 1, 3);
 11.1638 +  }
 11.1639 +} break;
 11.1640 +case 240 : 
 11.1641 +case 241 : 
 11.1642 +{
 11.1643 +  P0 = I211(4, 1, 3);
 11.1644 +  P1 = I31(4, 2);
 11.1645 +  if (MDR) {
 11.1646 +    P2 = I31(4, 3);
 11.1647 +    P3 = IC(4);
 11.1648 +  } else {
 11.1649 +    P2 = I521(4, 7, 3);
 11.1650 +    P3 = I332(5, 7, 4);
 11.1651 +  }
 11.1652 +} break;
 11.1653 +case 242 : 
 11.1654 +{
 11.1655 +  P0 = I31(4, 0);
 11.1656 +  P2 = I31(4, 3);
 11.1657 +  if (MDR) {
 11.1658 +    P3 = IC(4);
 11.1659 +  } else {
 11.1660 +    P3 = I211(4, 5, 7);
 11.1661 +  }
 11.1662 +  if (MUR) {
 11.1663 +    P1 = I31(4, 2);
 11.1664 +  } else {
 11.1665 +    P1 = I611(4, 1, 5);
 11.1666 +  }
 11.1667 +} break;
 11.1668 +case 243 : 
 11.1669 +{
 11.1670 +  P0 = I31(4, 3);
 11.1671 +  P1 = I31(4, 2);
 11.1672 +  if (MDR) {
 11.1673 +    P2 = I31(4, 3);
 11.1674 +    P3 = IC(4);
 11.1675 +  } else {
 11.1676 +    P2 = I521(4, 7, 3);
 11.1677 +    P3 = I332(5, 7, 4);
 11.1678 +  }
 11.1679 +} break;
 11.1680 +case 244 : 
 11.1681 +case 245 : 
 11.1682 +{
 11.1683 +  P0 = I211(4, 1, 3);
 11.1684 +  P1 = I31(4, 1);
 11.1685 +  P2 = I31(4, 3);
 11.1686 +  if (MDR) {
 11.1687 +    P3 = IC(4);
 11.1688 +  } else {
 11.1689 +    P3 = I1411(4, 5, 7);
 11.1690 +  }
 11.1691 +} break;
 11.1692 +case 246 : 
 11.1693 +{
 11.1694 +  P0 = I31(4, 0);
 11.1695 +  P2 = I31(4, 3);
 11.1696 +  if (MDR) {
 11.1697 +    P3 = IC(4);
 11.1698 +  } else {
 11.1699 +    P3 = I1411(4, 5, 7);
 11.1700 +  }
 11.1701 +  if (MUR) {
 11.1702 +    P1 = IC(4);
 11.1703 +  } else {
 11.1704 +    P1 = I211(4, 1, 5);
 11.1705 +  }
 11.1706 +} break;
 11.1707 +case 247 : 
 11.1708 +{
 11.1709 +  P0 = I31(4, 3);
 11.1710 +  P2 = I31(4, 3);
 11.1711 +  if (MDR) {
 11.1712 +    P3 = IC(4);
 11.1713 +  } else {
 11.1714 +    P3 = I1411(4, 5, 7);
 11.1715 +  }
 11.1716 +  if (MUR) {
 11.1717 +    P1 = IC(4);
 11.1718 +  } else {
 11.1719 +    P1 = I1411(4, 1, 5);
 11.1720 +  }
 11.1721 +} break;
 11.1722 +case 249 : 
 11.1723 +{
 11.1724 +  P0 = I31(4, 1);
 11.1725 +  P1 = I31(4, 2);
 11.1726 +  if (MDL) {
 11.1727 +    P2 = IC(4);
 11.1728 +  } else {
 11.1729 +    P2 = I1411(4, 3, 7);
 11.1730 +  }
 11.1731 +  if (MDR) {
 11.1732 +    P3 = IC(4);
 11.1733 +  } else {
 11.1734 +    P3 = I211(4, 5, 7);
 11.1735 +  }
 11.1736 +} break;
 11.1737 +case 251 : 
 11.1738 +{
 11.1739 +  P1 = I31(4, 2);
 11.1740 +  if (MDL) {
 11.1741 +    P2 = IC(4);
 11.1742 +  } else {
 11.1743 +    P2 = I1411(4, 3, 7);
 11.1744 +  }
 11.1745 +  if (MDR) {
 11.1746 +    P3 = IC(4);
 11.1747 +  } else {
 11.1748 +    P3 = I211(4, 5, 7);
 11.1749 +  }
 11.1750 +  if (MUL) {
 11.1751 +    P0 = IC(4);
 11.1752 +  } else {
 11.1753 +    P0 = I211(4, 1, 3);
 11.1754 +  }
 11.1755 +} break;
 11.1756 +case 252 : 
 11.1757 +{
 11.1758 +  P0 = I31(4, 0);
 11.1759 +  P1 = I31(4, 1);
 11.1760 +  if (MDL) {
 11.1761 +    P2 = IC(4);
 11.1762 +  } else {
 11.1763 +    P2 = I211(4, 3, 7);
 11.1764 +  }
 11.1765 +  if (MDR) {
 11.1766 +    P3 = IC(4);
 11.1767 +  } else {
 11.1768 +    P3 = I1411(4, 5, 7);
 11.1769 +  }
 11.1770 +} break;
 11.1771 +case 253 : 
 11.1772 +{
 11.1773 +  P0 = I31(4, 1);
 11.1774 +  P1 = I31(4, 1);
 11.1775 +  if (MDL) {
 11.1776 +    P2 = IC(4);
 11.1777 +  } else {
 11.1778 +    P2 = I1411(4, 3, 7);
 11.1779 +  }
 11.1780 +  if (MDR) {
 11.1781 +    P3 = IC(4);
 11.1782 +  } else {
 11.1783 +    P3 = I1411(4, 5, 7);
 11.1784 +  }
 11.1785 +} break;
 11.1786 +case 254 : 
 11.1787 +{
 11.1788 +  P0 = I31(4, 0);
 11.1789 +  if (MDL) {
 11.1790 +    P2 = IC(4);
 11.1791 +  } else {
 11.1792 +    P2 = I211(4, 3, 7);
 11.1793 +  }
 11.1794 +  if (MDR) {
 11.1795 +    P3 = IC(4);
 11.1796 +  } else {
 11.1797 +    P3 = I1411(4, 5, 7);
 11.1798 +  }
 11.1799 +  if (MUR) {
 11.1800 +    P1 = IC(4);
 11.1801 +  } else {
 11.1802 +    P1 = I211(4, 1, 5);
 11.1803 +  }
 11.1804 +} break;
 11.1805 +case 255 : 
 11.1806 +{
 11.1807 +  if (MDL) {
 11.1808 +    P2 = IC(4);
 11.1809 +  } else {
 11.1810 +    P2 = I1411(4, 3, 7);
 11.1811 +  }
 11.1812 +  if (MDR) {
 11.1813 +    P3 = IC(4);
 11.1814 +  } else {
 11.1815 +    P3 = I1411(4, 5, 7);
 11.1816 +  }
 11.1817 +  if (MUL) {
 11.1818 +    P0 = IC(4);
 11.1819 +  } else {
 11.1820 +    P0 = I1411(4, 1, 3);
 11.1821 +  }
 11.1822 +  if (MUR) {
 11.1823 +    P1 = IC(4);
 11.1824 +  } else {
 11.1825 +    P1 = I1411(4, 1, 5);
 11.1826 +  }
 11.1827 +} break;
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/filters/hq3x32.cpp	Sun Mar 04 20:32:31 2012 -0600
    12.3 @@ -0,0 +1,445 @@
    12.4 +#include "../Port.h"
    12.5 +#include "hq_shared32.h"
    12.6 +#include "interp.h"
    12.7 +
    12.8 +#define SIZE_PIXEL 2 // 16bit = 2 bytes
    12.9 +#define PIXELTYPE unsigned short
   12.10 +#define Interp1 Interp1_16
   12.11 +#define Interp2 Interp2_16
   12.12 +#define Interp3 Interp3_16
   12.13 +#define Interp4 Interp4_16
   12.14 +#define Interp5 Interp5_16
   12.15 +
   12.16 +void hq3x(unsigned char *pIn,  unsigned int srcPitch,
   12.17 +          unsigned char *,
   12.18 +          unsigned char *pOut, unsigned int dstPitch,
   12.19 +          int Xres, int Yres)
   12.20 +{
   12.21 +	int i, j;
   12.22 +	unsigned int line;
   12.23 +	PIXELTYPE	 c[10];
   12.24 +
   12.25 +	// +----+----+----+
   12.26 +	// |    |    |    |
   12.27 +	// | c1 | c2 | c3 |
   12.28 +	// +----+----+----+
   12.29 +	// |    |    |    |
   12.30 +	// | c4 | c5 | c6 |
   12.31 +	// +----+----+----+
   12.32 +	// |    |    |    |
   12.33 +	// | c7 | c8 | c9 |
   12.34 +	// +----+----+----+
   12.35 +
   12.36 +	for (j = 0; j < Yres; j++)
   12.37 +	{
   12.38 +		if ((j > 0) || (j < Yres - 1))
   12.39 +			line = srcPitch;
   12.40 +		else
   12.41 +			line = 0;
   12.42 +
   12.43 +		for (i = 0; i < Xres; i++)
   12.44 +		{
   12.45 +			c[2] = *((PIXELTYPE *)(pIn - line));
   12.46 +			c[5] = *((PIXELTYPE *)(pIn));
   12.47 +			c[8] = *((PIXELTYPE *)(pIn + line));
   12.48 +
   12.49 +			if (i > 0)
   12.50 +			{
   12.51 +				c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL));
   12.52 +				c[4] = *((PIXELTYPE *)(pIn        - SIZE_PIXEL));
   12.53 +				c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL));
   12.54 +			}
   12.55 +			else
   12.56 +			{
   12.57 +				c[1] = c[2];
   12.58 +				c[4] = c[5];
   12.59 +				c[7] = c[8];
   12.60 +			}
   12.61 +
   12.62 +			if (i < Xres - 1)
   12.63 +			{
   12.64 +				c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL));
   12.65 +				c[6] = *((PIXELTYPE *)(pIn        + SIZE_PIXEL));
   12.66 +				c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL));
   12.67 +			}
   12.68 +			else
   12.69 +			{
   12.70 +				c[3] = c[2];
   12.71 +				c[6] = c[5];
   12.72 +				c[9] = c[8];
   12.73 +			}
   12.74 +
   12.75 +			int pattern = 0;
   12.76 +
   12.77 +			if (interp_16_diff(c[1], c[5]))
   12.78 +				pattern |= 1 << 0;
   12.79 +			if (interp_16_diff(c[2], c[5]))
   12.80 +				pattern |= 1 << 1;
   12.81 +			if (interp_16_diff(c[3], c[5]))
   12.82 +				pattern |= 1 << 2;
   12.83 +			if (interp_16_diff(c[4], c[5]))
   12.84 +				pattern |= 1 << 3;
   12.85 +			if (interp_16_diff(c[6], c[5]))
   12.86 +				pattern |= 1 << 4;
   12.87 +			if (interp_16_diff(c[7], c[5]))
   12.88 +				pattern |= 1 << 5;
   12.89 +			if (interp_16_diff(c[8], c[5]))
   12.90 +				pattern |= 1 << 6;
   12.91 +			if (interp_16_diff(c[9], c[5]))
   12.92 +				pattern |= 1 << 7;
   12.93 +
   12.94 +#define Diff interp_16_diff
   12.95 +#include "hq3x32.h"
   12.96 +#undef Diff
   12.97 +			pIn	 += SIZE_PIXEL;
   12.98 +			pOut += 3 << 1;
   12.99 +		}
  12.100 +		pIn	 += srcPitch - (Xres << 1);
  12.101 +		pOut += dstPitch - (3 * Xres << 1);
  12.102 +		pOut += dstPitch << 1;
  12.103 +		//	pIn+=SIZE_PIXEL;
  12.104 +		//	pOut+=3*SIZE_PIXEL;
  12.105 +		//}
  12.106 +		//pIn+=srcPitch-(4*Xres);
  12.107 +		//pOut+=dstPitch-(3*Xres*SIZE_PIXEL);
  12.108 +		//pOut+=2*dstPitch;
  12.109 +	}
  12.110 +}
  12.111 +
  12.112 +void hq3xS(unsigned char *pIn,  unsigned int srcPitch,
  12.113 +           unsigned char *,
  12.114 +           unsigned char *pOut, unsigned int dstPitch,
  12.115 +           int Xres, int Yres)
  12.116 +{
  12.117 +	int i, j;
  12.118 +	PIXELTYPE c[10];
  12.119 +
  12.120 +	// +----+----+----+
  12.121 +	// |    |    |    |
  12.122 +	// | c1 | c2 | c3 |
  12.123 +	// +----+----+----+
  12.124 +	// |    |    |    |
  12.125 +	// | c4 | c5 | c6 |
  12.126 +	// +----+----+----+
  12.127 +	// |    |    |    |
  12.128 +	// | c7 | c8 | c9 |
  12.129 +	// +----+----+----+
  12.130 +
  12.131 +	for (j = 0; j < Yres; j++)
  12.132 +	{
  12.133 +		for (i = 0; i < Xres; i++)
  12.134 +		{
  12.135 +			c[2] = *((PIXELTYPE *)(pIn - srcPitch));
  12.136 +			c[5] = *((PIXELTYPE *)(pIn));
  12.137 +			c[8] = *((PIXELTYPE *)(pIn + srcPitch));
  12.138 +
  12.139 +			c[1] = *((PIXELTYPE *)(pIn - srcPitch - SIZE_PIXEL));
  12.140 +			c[4] = *((PIXELTYPE *)(pIn        - SIZE_PIXEL));
  12.141 +			c[7] = *((PIXELTYPE *)(pIn + srcPitch - SIZE_PIXEL));
  12.142 +
  12.143 +			c[3] = *((PIXELTYPE *)(pIn - srcPitch + SIZE_PIXEL));
  12.144 +			c[6] = *((PIXELTYPE *)(pIn        + SIZE_PIXEL));
  12.145 +			c[9] = *((PIXELTYPE *)(pIn + srcPitch + SIZE_PIXEL));
  12.146 +
  12.147 +			int pattern = 0;
  12.148 +
  12.149 +			// hq3xS dynamic edge detection:
  12.150 +			// simply comparing the center color against its surroundings will give bad results in many cases,
  12.151 +			// so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
  12.152 +			int brightArray[10];
  12.153 +			int maxBright = 0, minBright = 999999;
  12.154 +			for (int j = 1; j < 10; j++)
  12.155 +			{
  12.156 +				int r, g, b;
  12.157 +				if (interp_bits_per_pixel == 16)
  12.158 +				{
  12.159 +					b = (int)((c[j] & 0x1F)) << 3;
  12.160 +					g = (int)((c[j] & 0x7E0)) >> 3;
  12.161 +					r = (int)((c[j] & 0xF800)) >> 8;
  12.162 +				}
  12.163 +				else
  12.164 +				{
  12.165 +					b = (int)((c[j] & 0x1F)) << 3;
  12.166 +					g = (int)((c[j] & 0x3E0)) >> 2;
  12.167 +					r = (int)((c[j] & 0x7C00)) >> 7;
  12.168 +				}
  12.169 +				const int bright = r + r + r + g + g + g + b + b;
  12.170 +				if (bright > maxBright) maxBright = bright;
  12.171 +				if (bright < minBright) minBright = bright;
  12.172 +
  12.173 +				brightArray[j] = bright;
  12.174 +			}
  12.175 +			const int diffBright = ((maxBright - minBright) * 7) >> 4;
  12.176 +			if (diffBright > 7)
  12.177 +			{
  12.178 +				#define ABS(x) ((x) < 0 ? -(x) : (x))
  12.179 +
  12.180 +				const int centerBright = brightArray[5];
  12.181 +				if (ABS(brightArray[1] - centerBright) > diffBright)
  12.182 +					pattern |= 1 << 0;
  12.183 +				if (ABS(brightArray[2] - centerBright) > diffBright)
  12.184 +					pattern |= 1 << 1;
  12.185 +				if (ABS(brightArray[3] - centerBright) > diffBright)
  12.186 +					pattern |= 1 << 2;
  12.187 +				if (ABS(brightArray[4] - centerBright) > diffBright)
  12.188 +					pattern |= 1 << 3;
  12.189 +				if (ABS(brightArray[6] - centerBright) > diffBright)
  12.190 +					pattern |= 1 << 4;
  12.191 +				if (ABS(brightArray[7] - centerBright) > diffBright)
  12.192 +					pattern |= 1 << 5;
  12.193 +				if (ABS(brightArray[8] - centerBright) > diffBright)
  12.194 +					pattern |= 1 << 6;
  12.195 +				if (ABS(brightArray[9] - centerBright) > diffBright)
  12.196 +					pattern |= 1 << 7;
  12.197 +			}
  12.198 +
  12.199 +#define Diff(x, y) false //(ABS((x) - (y)) > diffBright)
  12.200 +#undef cget
  12.201 +#define cget(x) brightArray[x]
  12.202 +#include "hq3x32.h"
  12.203 +#undef cget
  12.204 +#undef Diff
  12.205 +			pIn	 += SIZE_PIXEL;
  12.206 +			pOut += 3 << 1;
  12.207 +		}
  12.208 +		pIn	 += srcPitch - (Xres << 1);
  12.209 +		pOut += dstPitch - (3 * Xres << 1);
  12.210 +		pOut += dstPitch << 1;
  12.211 +		//	pIn+=SIZE_PIXEL;
  12.212 +		//	pOut+=3*SIZE_PIXEL;
  12.213 +		//}
  12.214 +		//pIn+=srcPitch-(4*Xres);
  12.215 +		//pOut+=dstPitch-(3*Xres*SIZE_PIXEL);
  12.216 +		//pOut+=2*dstPitch;
  12.217 +	}
  12.218 +}
  12.219 +
  12.220 +#undef Interp1
  12.221 +#undef Interp2
  12.222 +#undef Interp3
  12.223 +#undef Interp4
  12.224 +#undef Interp5
  12.225 +#undef SIZE_PIXEL
  12.226 +#undef PIXELTYPE
  12.227 +#define SIZE_PIXEL 4 // 32bit = 4 bytes
  12.228 +#define PIXELTYPE unsigned int
  12.229 +
  12.230 +void hq3x32(unsigned char *pIn,  unsigned int srcPitch,
  12.231 +            unsigned char *,
  12.232 +            unsigned char *pOut, unsigned int dstPitch,
  12.233 +            int Xres, int Yres)
  12.234 +{
  12.235 +	unsigned int YUV1, YUV2;
  12.236 +	int i, j, k;
  12.237 +	unsigned int line;
  12.238 +	PIXELTYPE c[10];
  12.239 +
  12.240 +	// +----+----+----+
  12.241 +	// |    |    |    |
  12.242 +	// | c1 | c2 | c3 |
  12.243 +	// +----+----+----+
  12.244 +	// |    |    |    |
  12.245 +	// | c4 | c5 | c6 |
  12.246 +	// +----+----+----+
  12.247 +	// |    |    |    |
  12.248 +	// | c7 | c8 | c9 |
  12.249 +	// +----+----+----+
  12.250 +
  12.251 +	for (j = 0; j < Yres; j++)
  12.252 +	{
  12.253 +		if ((j > 0) && (j < Yres - 1))
  12.254 +			line = srcPitch;
  12.255 +		else
  12.256 +			line = 0;
  12.257 +
  12.258 +		for (i = 0; i < Xres; i++)
  12.259 +		{
  12.260 +			c[2] = *((PIXELTYPE *)(pIn - line));
  12.261 +			c[5] = *((PIXELTYPE *)(pIn));
  12.262 +			c[8] = *((PIXELTYPE *)(pIn + line));
  12.263 +
  12.264 +			if (i > 0)
  12.265 +			{
  12.266 +				c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL));
  12.267 +				c[4] = *((PIXELTYPE *)(pIn        - SIZE_PIXEL));
  12.268 +				c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL));
  12.269 +			}
  12.270 +			else
  12.271 +			{
  12.272 +				c[1] = c[2];
  12.273 +				c[4] = c[5];
  12.274 +				c[7] = c[8];
  12.275 +			}
  12.276 +
  12.277 +			if (i < Xres - 1)
  12.278 +			{
  12.279 +				c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL));
  12.280 +				c[6] = *((PIXELTYPE *)(pIn        + SIZE_PIXEL));
  12.281 +				c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL));
  12.282 +			}
  12.283 +			else
  12.284 +			{
  12.285 +				c[3] = c[2];
  12.286 +				c[6] = c[5];
  12.287 +				c[9] = c[8];
  12.288 +			}
  12.289 +
  12.290 +			int pattern = 0;
  12.291 +			int flag	= 1;
  12.292 +
  12.293 +			YUV1 = RGBtoYUV(c[5]);
  12.294 +
  12.295 +			for (k = 1; k <= 9; k++)
  12.296 +			{
  12.297 +				if (k == 5) continue;
  12.298 +
  12.299 +				if (c[k] != c[5])
  12.300 +				{
  12.301 +					YUV2 = RGBtoYUV(c[k]);
  12.302 +					if (
  12.303 +					    (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) ||
  12.304 +					    (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) ||
  12.305 +					    (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV)
  12.306 +					    )
  12.307 +						pattern |= flag;
  12.308 +				}
  12.309 +				flag <<= 1;
  12.310 +			}
  12.311 +
  12.312 +#include "hq3x32.h"
  12.313 +			pIn	 += SIZE_PIXEL;
  12.314 +			pOut += 3 << 2;
  12.315 +		}
  12.316 +		pIn	 += srcPitch - (Xres << 2);
  12.317 +		pOut += dstPitch - (3 * Xres << 2);
  12.318 +		pOut += dstPitch << 1;
  12.319 +		//	pIn+=SIZE_PIXEL;
  12.320 +		//	pOut+=3*SIZE_PIXEL;
  12.321 +		//}
  12.322 +		//pIn+=srcPitch-(4*Xres);
  12.323 +		//pOut+=dstPitch-(3*Xres*SIZE_PIXEL);
  12.324 +		//pOut+=2*dstPitch;
  12.325 +	}
  12.326 +}
  12.327 +
  12.328 +void hq3xS32(unsigned char *pIn,  unsigned int srcPitch,
  12.329 +             unsigned char *,
  12.330 +             unsigned char *pOut, unsigned int dstPitch,
  12.331 +             int Xres, int Yres)
  12.332 +{
  12.333 +	int i, j;
  12.334 +	unsigned int line;
  12.335 +	PIXELTYPE c[10];
  12.336 +
  12.337 +	// +----+----+----+
  12.338 +	// |    |    |    |
  12.339 +	// | c1 | c2 | c3 |
  12.340 +	// +----+----+----+
  12.341 +	// |    |    |    |
  12.342 +	// | c4 | c5 | c6 |
  12.343 +	// +----+----+----+
  12.344 +	// |    |    |    |
  12.345 +	// | c7 | c8 | c9 |
  12.346 +	// +----+----+----+
  12.347 +
  12.348 +	for (j = 0; j < Yres; j++)
  12.349 +	{
  12.350 +		if ((j > 0) && (j < Yres - 1))
  12.351 +			line = srcPitch;
  12.352 +		else
  12.353 +			line = 0;
  12.354 +
  12.355 +		for (i = 0; i < Xres; i++)
  12.356 +		{
  12.357 +			c[2] = *((PIXELTYPE *)(pIn - line));
  12.358 +			c[5] = *((PIXELTYPE *)(pIn));
  12.359 +			c[8] = *((PIXELTYPE *)(pIn + line));
  12.360 +
  12.361 +			if (i > 0)
  12.362 +			{
  12.363 +				c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL));
  12.364 +				c[4] = *((PIXELTYPE *)(pIn        - SIZE_PIXEL));
  12.365 +				c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL));
  12.366 +			}
  12.367 +			else
  12.368 +			{
  12.369 +				c[1] = c[2];
  12.370 +				c[4] = c[5];
  12.371 +				c[7] = c[8];
  12.372 +			}
  12.373 +
  12.374 +			if (i < Xres - 1)
  12.375 +			{
  12.376 +				c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL));
  12.377 +				c[6] = *((PIXELTYPE *)(pIn        + SIZE_PIXEL));
  12.378 +				c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL));
  12.379 +			}
  12.380 +			else
  12.381 +			{
  12.382 +				c[3] = c[2];
  12.383 +				c[6] = c[5];
  12.384 +				c[9] = c[8];
  12.385 +			}
  12.386 +
  12.387 +			int pattern = 0;
  12.388 +
  12.389 +			// hq3xS dynamic edge detection:
  12.390 +			// simply comparing the center color against its surroundings will give bad results in many cases,
  12.391 +			// so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
  12.392 +			int brightArray[10];
  12.393 +			int maxBright = 0, minBright = 999999;
  12.394 +			for (int j = 1; j < 10; j++)
  12.395 +			{
  12.396 +				const int b		 = (int)((c[j] & 0xF8));
  12.397 +				const int g		 = (int)((c[j] & 0xF800)) >> 8;
  12.398 +				const int r		 = (int)((c[j] & 0xF80000)) >> 16;
  12.399 +				const int bright = r + r + r + g + g + g + b + b;
  12.400 +				if (bright > maxBright) maxBright = bright;
  12.401 +				if (bright < minBright) minBright = bright;
  12.402 +
  12.403 +				brightArray[j] = bright;
  12.404 +			}
  12.405 +			int diffBright = ((maxBright - minBright) * 7) >> 4;
  12.406 +			if (diffBright > 7)
  12.407 +			{
  12.408 +				#define ABS(x) ((x) < 0 ? -(x) : (x))
  12.409 +
  12.410 +				const int centerBright = brightArray[5];
  12.411 +				if (ABS(brightArray[1] - centerBright) > diffBright)
  12.412 +					pattern |= 1 << 0;
  12.413 +				if (ABS(brightArray[2] - centerBright) > diffBright)
  12.414 +					pattern |= 1 << 1;
  12.415 +				if (ABS(brightArray[3] - centerBright) > diffBright)
  12.416 +					pattern |= 1 << 2;
  12.417 +				if (ABS(brightArray[4] - centerBright) > diffBright)
  12.418 +					pattern |= 1 << 3;
  12.419 +				if (ABS(brightArray[6] - centerBright) > diffBright)
  12.420 +					pattern |= 1 << 4;
  12.421 +				if (ABS(brightArray[7] - centerBright) > diffBright)
  12.422 +					pattern |= 1 << 5;
  12.423 +				if (ABS(brightArray[8] - centerBright) > diffBright)
  12.424 +					pattern |= 1 << 6;
  12.425 +				if (ABS(brightArray[9] - centerBright) > diffBright)
  12.426 +					pattern |= 1 << 7;
  12.427 +			}
  12.428 +
  12.429 +#define Diff(x, y) false //(ABS((x) - (y)) > diffBright)
  12.430 +#undef cget
  12.431 +#define cget(x) brightArray[x]
  12.432 +#include "hq3x32.h"
  12.433 +#undef cget
  12.434 +#undef Diff
  12.435 +			pIn	 += SIZE_PIXEL;
  12.436 +			pOut += 3 << 2;
  12.437 +		}
  12.438 +		pIn	 += srcPitch - (Xres << 2);
  12.439 +		pOut += dstPitch - (3 * Xres << 2);
  12.440 +		pOut += dstPitch << 1;
  12.441 +		//	pIn+=SIZE_PIXEL;
  12.442 +		//	pOut+=3*SIZE_PIXEL;
  12.443 +		//}
  12.444 +		//pIn+=srcPitch-(4*Xres);
  12.445 +		//pOut+=dstPitch-(3*Xres*SIZE_PIXEL);
  12.446 +		//pOut+=2*dstPitch;
  12.447 +	}
  12.448 +}
  12.449 \ No newline at end of file
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/filters/hq3x32.h	Sun Mar 04 20:32:31 2012 -0600
    13.3 @@ -0,0 +1,3674 @@
    13.4 +#define PIXEL00_1M  Interp1( pOut, c[5], c[1]       );
    13.5 +#define PIXEL00_1U  Interp1( pOut, c[5], c[2]       );
    13.6 +#define PIXEL00_1L  Interp1( pOut, c[5], c[4]       );
    13.7 +#define PIXEL00_2   Interp2( pOut, c[5], c[4], c[2] );
    13.8 +#define PIXEL00_4   Interp4( pOut, c[5], c[4], c[2] );
    13.9 +#define PIXEL00_5   Interp5( pOut, c[4], c[2]       );
   13.10 +#define PIXEL00_C   *((PIXELTYPE*)(pOut)) = c[5];
   13.11 +
   13.12 +#define PIXEL01_1   Interp1( pOut+SIZE_PIXEL, c[5], c[2] );
   13.13 +#define PIXEL01_3   Interp3( pOut+SIZE_PIXEL, c[5], c[2] );
   13.14 +#define PIXEL01_6   Interp1( pOut+SIZE_PIXEL, c[2], c[5] );
   13.15 +#define PIXEL01_C   *((PIXELTYPE*)(pOut+SIZE_PIXEL)) = c[5];
   13.16 +
   13.17 +#define PIXEL02_1M  Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[3]       );
   13.18 +#define PIXEL02_1U  Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2]       );
   13.19 +#define PIXEL02_1R  Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6]       );
   13.20 +#define PIXEL02_2   Interp2( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2], c[6] );
   13.21 +#define PIXEL02_4   Interp4( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2], c[6] );
   13.22 +#define PIXEL02_5   Interp5( pOut+SIZE_PIXEL+SIZE_PIXEL, c[2], c[6]       );
   13.23 +#define PIXEL02_C   *((PIXELTYPE*)(pOut+SIZE_PIXEL+SIZE_PIXEL)) = c[5];
   13.24 +
   13.25 +#define PIXEL10_1   Interp1( pOut+dstPitch, c[5], c[4] );
   13.26 +#define PIXEL10_3   Interp3( pOut+dstPitch, c[5], c[4] );
   13.27 +#define PIXEL10_6   Interp1( pOut+dstPitch, c[4], c[5] );
   13.28 +#define PIXEL10_C   *((PIXELTYPE*)(pOut+dstPitch)) = c[5];
   13.29 +
   13.30 +#define PIXEL11     *((PIXELTYPE*)(pOut+dstPitch+SIZE_PIXEL)) = c[5];
   13.31 +
   13.32 +#define PIXEL12_1   Interp1( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] );
   13.33 +#define PIXEL12_3   Interp3( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] );
   13.34 +#define PIXEL12_6   Interp1( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[6], c[5] );
   13.35 +#define PIXEL12_C   *((PIXELTYPE*)(pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL)) = c[5];
   13.36 +
   13.37 +#define PIXEL20_1M  Interp1( pOut+dstPitch+dstPitch, c[5], c[7]       );
   13.38 +#define PIXEL20_1D  Interp1( pOut+dstPitch+dstPitch, c[5], c[8]       );
   13.39 +#define PIXEL20_1L  Interp1( pOut+dstPitch+dstPitch, c[5], c[4]       );
   13.40 +#define PIXEL20_2   Interp2( pOut+dstPitch+dstPitch, c[5], c[8], c[4] );
   13.41 +#define PIXEL20_4   Interp4( pOut+dstPitch+dstPitch, c[5], c[8], c[4] );
   13.42 +#define PIXEL20_5   Interp5( pOut+dstPitch+dstPitch, c[8], c[4]       );
   13.43 +#define PIXEL20_C   *((PIXELTYPE*)(pOut+dstPitch+dstPitch)) = c[5];
   13.44 +
   13.45 +#define PIXEL21_1   Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[5], c[8] );
   13.46 +#define PIXEL21_3   Interp3( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[5], c[8] );
   13.47 +#define PIXEL21_6   Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[8], c[5] );
   13.48 +#define PIXEL21_C   *((PIXELTYPE*)(pOut+dstPitch+dstPitch+SIZE_PIXEL)) = c[5];
   13.49 +
   13.50 +#define PIXEL22_1M  Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[9]       );
   13.51 +#define PIXEL22_1D  Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[8]       );
   13.52 +#define PIXEL22_1R  Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6]       );
   13.53 +#define PIXEL22_2   Interp2( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6], c[8] );
   13.54 +#define PIXEL22_4   Interp4( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6], c[8] );
   13.55 +#define PIXEL22_5   Interp5( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[6], c[8]       );
   13.56 +#define PIXEL22_C   *((PIXELTYPE*)(pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL)) = c[5];
   13.57 +
   13.58 +#ifndef cget
   13.59 +#define cget(x) c[x]
   13.60 +#endif
   13.61 +
   13.62 +      switch (pattern)
   13.63 +      {
   13.64 +        case 0:
   13.65 +        case 1:
   13.66 +        case 4:
   13.67 +        case 32:
   13.68 +        case 128:
   13.69 +        case 5:
   13.70 +        case 132:
   13.71 +        case 160:
   13.72 +        case 33:
   13.73 +        case 129:
   13.74 +        case 36:
   13.75 +        case 133:
   13.76 +        case 164:
   13.77 +        case 161:
   13.78 +        case 37:
   13.79 +        case 165:
   13.80 +        {
   13.81 +          PIXEL00_2
   13.82 +          PIXEL01_1
   13.83 +          PIXEL02_2
   13.84 +          PIXEL10_1
   13.85 +          PIXEL11
   13.86 +          PIXEL12_1
   13.87 +          PIXEL20_2
   13.88 +          PIXEL21_1
   13.89 +          PIXEL22_2
   13.90 +          break;
   13.91 +        }
   13.92 +        case 2:
   13.93 +        case 34:
   13.94 +        case 130:
   13.95 +        case 162:
   13.96 +        {
   13.97 +          PIXEL00_1M
   13.98 +          PIXEL01_C
   13.99 +          PIXEL02_1M
  13.100 +          PIXEL10_1
  13.101 +          PIXEL11
  13.102 +          PIXEL12_1
  13.103 +          PIXEL20_2
  13.104 +          PIXEL21_1
  13.105 +          PIXEL22_2
  13.106 +          break;
  13.107 +        }
  13.108 +        case 16:
  13.109 +        case 17:
  13.110 +        case 48:
  13.111 +        case 49:
  13.112 +        {
  13.113 +          PIXEL00_2
  13.114 +          PIXEL01_1
  13.115 +          PIXEL02_1M
  13.116 +          PIXEL10_1
  13.117 +          PIXEL11
  13.118 +          PIXEL12_C
  13.119 +          PIXEL20_2
  13.120 +          PIXEL21_1
  13.121 +          PIXEL22_1M
  13.122 +          break;
  13.123 +        }
  13.124 +        case 64:
  13.125 +        case 65:
  13.126 +        case 68:
  13.127 +        case 69:
  13.128 +        {
  13.129 +          PIXEL00_2
  13.130 +          PIXEL01_1
  13.131 +          PIXEL02_2
  13.132 +          PIXEL10_1
  13.133 +          PIXEL11
  13.134 +          PIXEL12_1
  13.135 +          PIXEL20_1M
  13.136 +          PIXEL21_C
  13.137 +          PIXEL22_1M
  13.138 +          break;
  13.139 +        }
  13.140 +        case 8:
  13.141 +        case 12:
  13.142 +        case 136:
  13.143 +        case 140:
  13.144 +        {
  13.145 +          PIXEL00_1M
  13.146 +          PIXEL01_1
  13.147 +          PIXEL02_2
  13.148 +          PIXEL10_C
  13.149 +          PIXEL11
  13.150 +          PIXEL12_1
  13.151 +          PIXEL20_1M
  13.152 +          PIXEL21_1
  13.153 +          PIXEL22_2
  13.154 +          break;
  13.155 +        }
  13.156 +        case 3:
  13.157 +        case 35:
  13.158 +        case 131:
  13.159 +        case 163:
  13.160 +        {
  13.161 +          PIXEL00_1L
  13.162 +          PIXEL01_C
  13.163 +          PIXEL02_1M
  13.164 +          PIXEL10_1
  13.165 +          PIXEL11
  13.166 +          PIXEL12_1
  13.167 +          PIXEL20_2
  13.168 +          PIXEL21_1
  13.169 +          PIXEL22_2
  13.170 +          break;
  13.171 +        }
  13.172 +        case 6:
  13.173 +        case 38:
  13.174 +        case 134:
  13.175 +        case 166:
  13.176 +        {
  13.177 +          PIXEL00_1M
  13.178 +          PIXEL01_C
  13.179 +          PIXEL02_1R
  13.180 +          PIXEL10_1
  13.181 +          PIXEL11
  13.182 +          PIXEL12_1
  13.183 +          PIXEL20_2
  13.184 +          PIXEL21_1
  13.185 +          PIXEL22_2
  13.186 +          break;
  13.187 +        }
  13.188 +        case 20:
  13.189 +        case 21:
  13.190 +        case 52:
  13.191 +        case 53:
  13.192 +        {
  13.193 +          PIXEL00_2
  13.194 +          PIXEL01_1
  13.195 +          PIXEL02_1U
  13.196 +          PIXEL10_1
  13.197 +          PIXEL11
  13.198 +          PIXEL12_C
  13.199 +          PIXEL20_2
  13.200 +          PIXEL21_1
  13.201 +          PIXEL22_1M
  13.202 +          break;
  13.203 +        }
  13.204 +        case 144:
  13.205 +        case 145:
  13.206 +        case 176:
  13.207 +        case 177:
  13.208 +        {
  13.209 +          PIXEL00_2
  13.210 +          PIXEL01_1
  13.211 +          PIXEL02_1M
  13.212 +          PIXEL10_1
  13.213 +          PIXEL11
  13.214 +          PIXEL12_C
  13.215 +          PIXEL20_2
  13.216 +          PIXEL21_1
  13.217 +          PIXEL22_1D
  13.218 +          break;
  13.219 +        }
  13.220 +        case 192:
  13.221 +        case 193:
  13.222 +        case 196:
  13.223 +        case 197:
  13.224 +        {
  13.225 +          PIXEL00_2
  13.226 +          PIXEL01_1
  13.227 +          PIXEL02_2
  13.228 +          PIXEL10_1
  13.229 +          PIXEL11
  13.230 +          PIXEL12_1
  13.231 +          PIXEL20_1M
  13.232 +          PIXEL21_C
  13.233 +          PIXEL22_1R
  13.234 +          break;
  13.235 +        }
  13.236 +        case 96:
  13.237 +        case 97:
  13.238 +        case 100:
  13.239 +        case 101:
  13.240 +        {
  13.241 +          PIXEL00_2
  13.242 +          PIXEL01_1
  13.243 +          PIXEL02_2
  13.244 +          PIXEL10_1
  13.245 +          PIXEL11
  13.246 +          PIXEL12_1
  13.247 +          PIXEL20_1L
  13.248 +          PIXEL21_C
  13.249 +          PIXEL22_1M
  13.250 +          break;
  13.251 +        }
  13.252 +        case 40:
  13.253 +        case 44:
  13.254 +        case 168:
  13.255 +        case 172:
  13.256 +        {
  13.257 +          PIXEL00_1M
  13.258 +          PIXEL01_1
  13.259 +          PIXEL02_2
  13.260 +          PIXEL10_C
  13.261 +          PIXEL11
  13.262 +          PIXEL12_1
  13.263 +          PIXEL20_1D
  13.264 +          PIXEL21_1
  13.265 +          PIXEL22_2
  13.266 +          break;
  13.267 +        }
  13.268 +        case 9:
  13.269 +        case 13:
  13.270 +        case 137:
  13.271 +        case 141:
  13.272 +        {
  13.273 +          PIXEL00_1U
  13.274 +          PIXEL01_1
  13.275 +          PIXEL02_2
  13.276 +          PIXEL10_C
  13.277 +          PIXEL11
  13.278 +          PIXEL12_1
  13.279 +          PIXEL20_1M
  13.280 +          PIXEL21_1
  13.281 +          PIXEL22_2
  13.282 +          break;
  13.283 +        }
  13.284 +        case 18:
  13.285 +        case 50:
  13.286 +        {
  13.287 +          PIXEL00_1M
  13.288 +			  
  13.289 +		if (Diff(cget(2), cget(6)))
  13.290 +          {
  13.291 +            PIXEL01_C
  13.292 +            PIXEL02_1M
  13.293 +            PIXEL12_C
  13.294 +          }
  13.295 +          else
  13.296 +          {
  13.297 +            PIXEL01_3
  13.298 +            PIXEL02_4
  13.299 +            PIXEL12_3
  13.300 +          }
  13.301 +          PIXEL10_1
  13.302 +          PIXEL11
  13.303 +          PIXEL20_2
  13.304 +          PIXEL21_1
  13.305 +          PIXEL22_1M
  13.306 +          break;
  13.307 +        }
  13.308 +        case 80:
  13.309 +        case 81:
  13.310 +        {
  13.311 +          PIXEL00_2
  13.312 +          PIXEL01_1
  13.313 +          PIXEL02_1M
  13.314 +          PIXEL10_1
  13.315 +          PIXEL11
  13.316 +          PIXEL20_1M
  13.317 +          if (Diff(cget(6), cget(8)))
  13.318 +          {
  13.319 +            PIXEL12_C
  13.320 +            PIXEL21_C
  13.321 +            PIXEL22_1M
  13.322 +          }
  13.323 +          else
  13.324 +          {
  13.325 +            PIXEL12_3
  13.326 +            PIXEL21_3
  13.327 +            PIXEL22_4
  13.328 +          }
  13.329 +          break;
  13.330 +        }
  13.331 +        case 72:
  13.332 +        case 76:
  13.333 +        {
  13.334 +          PIXEL00_1M
  13.335 +          PIXEL01_1
  13.336 +          PIXEL02_2
  13.337 +          PIXEL11
  13.338 +          PIXEL12_1
  13.339 +          if (Diff(cget(8), cget(4)))
  13.340 +          {
  13.341 +            PIXEL10_C
  13.342 +            PIXEL20_1M
  13.343 +            PIXEL21_C
  13.344 +          }
  13.345 +          else
  13.346 +          {
  13.347 +            PIXEL10_3
  13.348 +            PIXEL20_4
  13.349 +            PIXEL21_3
  13.350 +          }
  13.351 +          PIXEL22_1M
  13.352 +          break;
  13.353 +        }
  13.354 +        case 10:
  13.355 +        case 138:
  13.356 +        {
  13.357 +          if (Diff(cget(4), cget(2)))
  13.358 +          {
  13.359 +            PIXEL00_1M
  13.360 +            PIXEL01_C
  13.361 +            PIXEL10_C
  13.362 +          }
  13.363 +          else
  13.364 +          {
  13.365 +            PIXEL00_4
  13.366 +            PIXEL01_3
  13.367 +            PIXEL10_3
  13.368 +          }
  13.369 +          PIXEL02_1M
  13.370 +          PIXEL11
  13.371 +          PIXEL12_1
  13.372 +          PIXEL20_1M
  13.373 +          PIXEL21_1
  13.374 +          PIXEL22_2
  13.375 +          break;
  13.376 +        }
  13.377 +        case 66:
  13.378 +        {
  13.379 +          PIXEL00_1M
  13.380 +          PIXEL01_C
  13.381 +          PIXEL02_1M
  13.382 +          PIXEL10_1
  13.383 +          PIXEL11
  13.384 +          PIXEL12_1
  13.385 +          PIXEL20_1M
  13.386 +          PIXEL21_C
  13.387 +          PIXEL22_1M
  13.388 +          break;
  13.389 +        }
  13.390 +        case 24:
  13.391 +        {
  13.392 +          PIXEL00_1M
  13.393 +          PIXEL01_1
  13.394 +          PIXEL02_1M
  13.395 +          PIXEL10_C
  13.396 +          PIXEL11
  13.397 +          PIXEL12_C
  13.398 +          PIXEL20_1M
  13.399 +          PIXEL21_1
  13.400 +          PIXEL22_1M
  13.401 +          break;
  13.402 +        }
  13.403 +        case 7:
  13.404 +        case 39:
  13.405 +        case 135:
  13.406 +        {
  13.407 +          PIXEL00_1L
  13.408 +          PIXEL01_C
  13.409 +          PIXEL02_1R
  13.410 +          PIXEL10_1
  13.411 +          PIXEL11
  13.412 +          PIXEL12_1
  13.413 +          PIXEL20_2
  13.414 +          PIXEL21_1
  13.415 +          PIXEL22_2
  13.416 +          break;
  13.417 +        }
  13.418 +        case 148:
  13.419 +        case 149:
  13.420 +        case 180:
  13.421 +        {
  13.422 +          PIXEL00_2
  13.423 +          PIXEL01_1
  13.424 +          PIXEL02_1U
  13.425 +          PIXEL10_1
  13.426 +          PIXEL11
  13.427 +          PIXEL12_C
  13.428 +          PIXEL20_2
  13.429 +          PIXEL21_1
  13.430 +          PIXEL22_1D
  13.431 +          break;
  13.432 +        }
  13.433 +        case 224:
  13.434 +        case 228:
  13.435 +        case 225:
  13.436 +        {
  13.437 +          PIXEL00_2
  13.438 +          PIXEL01_1
  13.439 +          PIXEL02_2
  13.440 +          PIXEL10_1
  13.441 +          PIXEL11
  13.442 +          PIXEL12_1
  13.443 +          PIXEL20_1L
  13.444 +          PIXEL21_C
  13.445 +          PIXEL22_1R
  13.446 +          break;
  13.447 +        }
  13.448 +        case 41:
  13.449 +        case 169:
  13.450 +        case 45:
  13.451 +        {
  13.452 +          PIXEL00_1U
  13.453 +          PIXEL01_1
  13.454 +          PIXEL02_2
  13.455 +          PIXEL10_C
  13.456 +          PIXEL11
  13.457 +          PIXEL12_1
  13.458 +          PIXEL20_1D
  13.459 +          PIXEL21_1
  13.460 +          PIXEL22_2
  13.461 +          break;
  13.462 +        }
  13.463 +        case 22:
  13.464 +        case 54:
  13.465 +        {
  13.466 +          PIXEL00_1M
  13.467 +          if (Diff(cget(2), cget(6)))
  13.468 +          {
  13.469 +            PIXEL01_C
  13.470 +            PIXEL02_C
  13.471 +            PIXEL12_C
  13.472 +          }
  13.473 +          else
  13.474 +          {
  13.475 +            PIXEL01_3
  13.476 +            PIXEL02_4
  13.477 +            PIXEL12_3
  13.478 +          }
  13.479 +          PIXEL10_1
  13.480 +          PIXEL11
  13.481 +          PIXEL20_2
  13.482 +          PIXEL21_1
  13.483 +          PIXEL22_1M
  13.484 +          break;
  13.485 +        }
  13.486 +        case 208:
  13.487 +        case 209:
  13.488 +        {
  13.489 +          PIXEL00_2
  13.490 +          PIXEL01_1
  13.491 +          PIXEL02_1M
  13.492 +          PIXEL10_1
  13.493 +          PIXEL11
  13.494 +          PIXEL20_1M
  13.495 +          if (Diff(cget(6), cget(8)))
  13.496 +          {
  13.497 +            PIXEL12_C
  13.498 +            PIXEL21_C
  13.499 +            PIXEL22_C
  13.500 +          }
  13.501 +          else
  13.502 +          {
  13.503 +            PIXEL12_3
  13.504 +            PIXEL21_3
  13.505 +            PIXEL22_4
  13.506 +          }
  13.507 +          break;
  13.508 +        }
  13.509 +        case 104:
  13.510 +        case 108:
  13.511 +        {
  13.512 +          PIXEL00_1M
  13.513 +          PIXEL01_1
  13.514 +          PIXEL02_2
  13.515 +          PIXEL11
  13.516 +          PIXEL12_1
  13.517 +          if (Diff(cget(8), cget(4)))
  13.518 +          {
  13.519 +            PIXEL10_C
  13.520 +            PIXEL20_C
  13.521 +            PIXEL21_C
  13.522 +          }
  13.523 +          else
  13.524 +          {
  13.525 +            PIXEL10_3
  13.526 +            PIXEL20_4
  13.527 +            PIXEL21_3
  13.528 +          }
  13.529 +          PIXEL22_1M
  13.530 +          break;
  13.531 +        }
  13.532 +        case 11:
  13.533 +        case 139:
  13.534 +        {
  13.535 +          if (Diff(cget(4), cget(2)))
  13.536 +          {
  13.537 +            PIXEL00_C
  13.538 +            PIXEL01_C
  13.539 +            PIXEL10_C
  13.540 +          }
  13.541 +          else
  13.542 +          {
  13.543 +            PIXEL00_4
  13.544 +            PIXEL01_3
  13.545 +            PIXEL10_3
  13.546 +          }
  13.547 +          PIXEL02_1M
  13.548 +          PIXEL11
  13.549 +          PIXEL12_1
  13.550 +          PIXEL20_1M
  13.551 +          PIXEL21_1
  13.552 +          PIXEL22_2
  13.553 +          break;
  13.554 +        }
  13.555 +        case 19:
  13.556 +        case 51:
  13.557 +        {
  13.558 +          if (Diff(cget(2), cget(6)))
  13.559 +          {
  13.560 +            PIXEL00_1L
  13.561 +            PIXEL01_C
  13.562 +            PIXEL02_1M
  13.563 +            PIXEL12_C
  13.564 +          }
  13.565 +          else
  13.566 +          {
  13.567 +            PIXEL00_2
  13.568 +            PIXEL01_6
  13.569 +            PIXEL02_5
  13.570 +            PIXEL12_1
  13.571 +          }
  13.572 +          PIXEL10_1
  13.573 +          PIXEL11
  13.574 +          PIXEL20_2
  13.575 +          PIXEL21_1
  13.576 +          PIXEL22_1M
  13.577 +          break;
  13.578 +        }
  13.579 +        case 146:
  13.580 +        case 178:
  13.581 +        {
  13.582 +          if (Diff(cget(2), cget(6)))
  13.583 +          {
  13.584 +            PIXEL01_C
  13.585 +            PIXEL02_1M
  13.586 +            PIXEL12_C
  13.587 +            PIXEL22_1D
  13.588 +          }
  13.589 +          else
  13.590 +          {
  13.591 +            PIXEL01_1
  13.592 +            PIXEL02_5
  13.593 +            PIXEL12_6
  13.594 +            PIXEL22_2
  13.595 +          }
  13.596 +          PIXEL00_1M
  13.597 +          PIXEL10_1
  13.598 +          PIXEL11
  13.599 +          PIXEL20_2
  13.600 +          PIXEL21_1
  13.601 +          break;
  13.602 +        }
  13.603 +        case 84:
  13.604 +        case 85:
  13.605 +        {
  13.606 +          if (Diff(cget(6), cget(8)))
  13.607 +          {
  13.608 +            PIXEL02_1U
  13.609 +            PIXEL12_C
  13.610 +            PIXEL21_C
  13.611 +            PIXEL22_1M
  13.612 +          }
  13.613 +          else
  13.614 +          {
  13.615 +            PIXEL02_2
  13.616 +            PIXEL12_6
  13.617 +            PIXEL21_1
  13.618 +            PIXEL22_5
  13.619 +          }
  13.620 +          PIXEL00_2
  13.621 +          PIXEL01_1
  13.622 +          PIXEL10_1
  13.623 +          PIXEL11
  13.624 +          PIXEL20_1M
  13.625 +          break;
  13.626 +        }
  13.627 +        case 112:
  13.628 +        case 113:
  13.629 +        {
  13.630 +          if (Diff(cget(6), cget(8)))
  13.631 +          {
  13.632 +            PIXEL12_C
  13.633 +            PIXEL20_1L
  13.634 +            PIXEL21_C
  13.635 +            PIXEL22_1M
  13.636 +          }
  13.637 +          else
  13.638 +          {
  13.639 +            PIXEL12_1
  13.640 +            PIXEL20_2
  13.641 +            PIXEL21_6
  13.642 +            PIXEL22_5
  13.643 +          }
  13.644 +          PIXEL00_2
  13.645 +          PIXEL01_1
  13.646 +          PIXEL02_1M
  13.647 +          PIXEL10_1
  13.648 +          PIXEL11
  13.649 +          break;
  13.650 +        }
  13.651 +        case 200:
  13.652 +        case 204:
  13.653 +        {
  13.654 +          if (Diff(cget(8), cget(4)))
  13.655 +          {
  13.656 +            PIXEL10_C
  13.657 +            PIXEL20_1M
  13.658 +            PIXEL21_C
  13.659 +            PIXEL22_1R
  13.660 +          }
  13.661 +          else
  13.662 +          {
  13.663 +            PIXEL10_1
  13.664 +            PIXEL20_5
  13.665 +            PIXEL21_6
  13.666 +            PIXEL22_2
  13.667 +          }
  13.668 +          PIXEL00_1M
  13.669 +          PIXEL01_1
  13.670 +          PIXEL02_2
  13.671 +          PIXEL11
  13.672 +          PIXEL12_1
  13.673 +          break;
  13.674 +        }
  13.675 +        case 73:
  13.676 +        case 77:
  13.677 +        {
  13.678 +          if (Diff(cget(8), cget(4)))
  13.679 +          {
  13.680 +            PIXEL00_1U
  13.681 +            PIXEL10_C
  13.682 +            PIXEL20_1M
  13.683 +            PIXEL21_C
  13.684 +          }
  13.685 +          else
  13.686 +          {
  13.687 +            PIXEL00_2
  13.688 +            PIXEL10_6
  13.689 +            PIXEL20_5
  13.690 +            PIXEL21_1
  13.691 +          }
  13.692 +          PIXEL01_1
  13.693 +          PIXEL02_2
  13.694 +          PIXEL11
  13.695 +          PIXEL12_1
  13.696 +          PIXEL22_1M
  13.697 +          break;
  13.698 +        }
  13.699 +        case 42:
  13.700 +        case 170:
  13.701 +        {
  13.702 +          if (Diff(cget(4), cget(2)))
  13.703 +          {
  13.704 +            PIXEL00_1M
  13.705 +            PIXEL01_C
  13.706 +            PIXEL10_C
  13.707 +            PIXEL20_1D
  13.708 +          }
  13.709 +          else
  13.710 +          {
  13.711 +            PIXEL00_5
  13.712 +            PIXEL01_1
  13.713 +            PIXEL10_6
  13.714 +            PIXEL20_2
  13.715 +          }
  13.716 +          PIXEL02_1M
  13.717 +          PIXEL11
  13.718 +          PIXEL12_1
  13.719 +          PIXEL21_1
  13.720 +          PIXEL22_2
  13.721 +          break;
  13.722 +        }
  13.723 +        case 14:
  13.724 +        case 142:
  13.725 +        {
  13.726 +          if (Diff(cget(4), cget(2)))
  13.727 +          {
  13.728 +            PIXEL00_1M
  13.729 +            PIXEL01_C
  13.730 +            PIXEL02_1R
  13.731 +            PIXEL10_C
  13.732 +          }
  13.733 +          else
  13.734 +          {
  13.735 +            PIXEL00_5
  13.736 +            PIXEL01_6
  13.737 +            PIXEL02_2
  13.738 +            PIXEL10_1
  13.739 +          }
  13.740 +          PIXEL11
  13.741 +          PIXEL12_1
  13.742 +          PIXEL20_1M
  13.743 +          PIXEL21_1
  13.744 +          PIXEL22_2
  13.745 +          break;
  13.746 +        }
  13.747 +        case 67:
  13.748 +        {
  13.749 +          PIXEL00_1L
  13.750 +          PIXEL01_C
  13.751 +          PIXEL02_1M
  13.752 +          PIXEL10_1
  13.753 +          PIXEL11
  13.754 +          PIXEL12_1
  13.755 +          PIXEL20_1M
  13.756 +          PIXEL21_C
  13.757 +          PIXEL22_1M
  13.758 +          break;
  13.759 +        }
  13.760 +        case 70:
  13.761 +        {
  13.762 +          PIXEL00_1M
  13.763 +          PIXEL01_C
  13.764 +          PIXEL02_1R
  13.765 +          PIXEL10_1
  13.766 +          PIXEL11
  13.767 +          PIXEL12_1
  13.768 +          PIXEL20_1M
  13.769 +          PIXEL21_C
  13.770 +          PIXEL22_1M
  13.771 +          break;
  13.772 +        }
  13.773 +        case 28:
  13.774 +        {
  13.775 +          PIXEL00_1M
  13.776 +          PIXEL01_1
  13.777 +          PIXEL02_1U
  13.778 +          PIXEL10_C
  13.779 +          PIXEL11
  13.780 +          PIXEL12_C
  13.781 +          PIXEL20_1M
  13.782 +          PIXEL21_1
  13.783 +          PIXEL22_1M
  13.784 +          break;
  13.785 +        }
  13.786 +        case 152:
  13.787 +        {
  13.788 +          PIXEL00_1M
  13.789 +          PIXEL01_1
  13.790 +          PIXEL02_1M
  13.791 +          PIXEL10_C
  13.792 +          PIXEL11
  13.793 +          PIXEL12_C
  13.794 +          PIXEL20_1M
  13.795 +          PIXEL21_1
  13.796 +          PIXEL22_1D
  13.797 +          break;
  13.798 +        }
  13.799 +        case 194:
  13.800 +        {
  13.801 +          PIXEL00_1M
  13.802 +          PIXEL01_C
  13.803 +          PIXEL02_1M
  13.804 +          PIXEL10_1
  13.805 +          PIXEL11
  13.806 +          PIXEL12_1
  13.807 +          PIXEL20_1M
  13.808 +          PIXEL21_C
  13.809 +          PIXEL22_1R
  13.810 +          break;
  13.811 +        }
  13.812 +        case 98:
  13.813 +        {
  13.814 +          PIXEL00_1M
  13.815 +          PIXEL01_C
  13.816 +          PIXEL02_1M
  13.817 +          PIXEL10_1
  13.818 +          PIXEL11
  13.819 +          PIXEL12_1
  13.820 +          PIXEL20_1L
  13.821 +          PIXEL21_C
  13.822 +          PIXEL22_1M
  13.823 +          break;
  13.824 +        }
  13.825 +        case 56:
  13.826 +        {
  13.827 +          PIXEL00_1M
  13.828 +          PIXEL01_1
  13.829 +          PIXEL02_1M
  13.830 +          PIXEL10_C
  13.831 +          PIXEL11
  13.832 +          PIXEL12_C
  13.833 +          PIXEL20_1D
  13.834 +          PIXEL21_1
  13.835 +          PIXEL22_1M
  13.836 +          break;
  13.837 +        }
  13.838 +        case 25:
  13.839 +        {
  13.840 +          PIXEL00_1U
  13.841 +          PIXEL01_1
  13.842 +          PIXEL02_1M
  13.843 +          PIXEL10_C
  13.844 +          PIXEL11
  13.845 +          PIXEL12_C
  13.846 +          PIXEL20_1M
  13.847 +          PIXEL21_1
  13.848 +          PIXEL22_1M
  13.849 +          break;
  13.850 +        }
  13.851 +        case 26:
  13.852 +        case 31:
  13.853 +        {
  13.854 +          if (Diff(cget(4), cget(2)))
  13.855 +          {
  13.856 +            PIXEL00_C
  13.857 +            PIXEL10_C
  13.858 +          }
  13.859 +          else
  13.860 +          {
  13.861 +            PIXEL00_4
  13.862 +            PIXEL10_3
  13.863 +          }
  13.864 +          PIXEL01_C
  13.865 +          if (Diff(cget(2), cget(6)))
  13.866 +          {
  13.867 +            PIXEL02_C
  13.868 +            PIXEL12_C
  13.869 +          }
  13.870 +          else
  13.871 +          {
  13.872 +            PIXEL02_4
  13.873 +            PIXEL12_3
  13.874 +          }
  13.875 +          PIXEL11
  13.876 +          PIXEL20_1M
  13.877 +          PIXEL21_1
  13.878 +          PIXEL22_1M
  13.879 +          break;
  13.880 +        }
  13.881 +        case 82:
  13.882 +        case 214:
  13.883 +        {
  13.884 +          PIXEL00_1M
  13.885 +          if (Diff(cget(2), cget(6)))
  13.886 +          {
  13.887 +            PIXEL01_C
  13.888 +            PIXEL02_C
  13.889 +          }
  13.890 +          else
  13.891 +          {
  13.892 +            PIXEL01_3
  13.893 +            PIXEL02_4
  13.894 +          }
  13.895 +          PIXEL10_1
  13.896 +          PIXEL11
  13.897 +          PIXEL12_C
  13.898 +          PIXEL20_1M
  13.899 +          if (Diff(cget(6), cget(8)))
  13.900 +          {
  13.901 +            PIXEL21_C
  13.902 +            PIXEL22_C
  13.903 +          }
  13.904 +          else
  13.905 +          {
  13.906 +            PIXEL21_3
  13.907 +            PIXEL22_4
  13.908 +          }
  13.909 +          break;
  13.910 +        }
  13.911 +        case 88:
  13.912 +        case 248:
  13.913 +        {
  13.914 +          PIXEL00_1M
  13.915 +          PIXEL01_1
  13.916 +          PIXEL02_1M
  13.917 +          PIXEL11
  13.918 +          if (Diff(cget(8), cget(4)))
  13.919 +          {
  13.920 +            PIXEL10_C
  13.921 +            PIXEL20_C
  13.922 +          }
  13.923 +          else
  13.924 +          {
  13.925 +            PIXEL10_3
  13.926 +            PIXEL20_4
  13.927 +          }
  13.928 +          PIXEL21_C
  13.929 +          if (Diff(cget(6), cget(8)))
  13.930 +          {
  13.931 +            PIXEL12_C
  13.932 +            PIXEL22_C
  13.933 +          }
  13.934 +          else
  13.935 +          {
  13.936 +            PIXEL12_3
  13.937 +            PIXEL22_4
  13.938 +          }
  13.939 +          break;
  13.940 +        }
  13.941 +        case 74:
  13.942 +        case 107:
  13.943 +        {
  13.944 +          if (Diff(cget(4), cget(2)))
  13.945 +          {
  13.946 +            PIXEL00_C
  13.947 +            PIXEL01_C
  13.948 +          }
  13.949 +          else
  13.950 +          {
  13.951 +            PIXEL00_4
  13.952 +            PIXEL01_3
  13.953 +          }
  13.954 +          PIXEL02_1M
  13.955 +          PIXEL10_C
  13.956 +          PIXEL11
  13.957 +          PIXEL12_1
  13.958 +          if (Diff(cget(8), cget(4)))
  13.959 +          {
  13.960 +            PIXEL20_C
  13.961 +            PIXEL21_C
  13.962 +          }
  13.963 +          else
  13.964 +          {
  13.965 +            PIXEL20_4
  13.966 +            PIXEL21_3
  13.967 +          }
  13.968 +          PIXEL22_1M
  13.969 +          break;
  13.970 +        }
  13.971 +        case 27:
  13.972 +        {
  13.973 +          if (Diff(cget(4), cget(2)))
  13.974 +          {
  13.975 +            PIXEL00_C
  13.976 +            PIXEL01_C
  13.977 +            PIXEL10_C
  13.978 +          }
  13.979 +          else
  13.980 +          {
  13.981 +            PIXEL00_4
  13.982 +            PIXEL01_3
  13.983 +            PIXEL10_3
  13.984 +          }
  13.985 +          PIXEL02_1M
  13.986 +          PIXEL11
  13.987 +          PIXEL12_C
  13.988 +          PIXEL20_1M
  13.989 +          PIXEL21_1
  13.990 +          PIXEL22_1M
  13.991 +          break;
  13.992 +        }
  13.993 +        case 86:
  13.994 +        {
  13.995 +          PIXEL00_1M
  13.996 +          if (Diff(cget(2), cget(6)))
  13.997 +          {
  13.998 +            PIXEL01_C
  13.999 +            PIXEL02_C
 13.1000 +            PIXEL12_C
 13.1001 +          }
 13.1002 +          else
 13.1003 +          {
 13.1004 +            PIXEL01_3
 13.1005 +            PIXEL02_4
 13.1006 +            PIXEL12_3
 13.1007 +          }
 13.1008 +          PIXEL10_1
 13.1009 +          PIXEL11
 13.1010 +          PIXEL20_1M
 13.1011 +          PIXEL21_C
 13.1012 +          PIXEL22_1M
 13.1013 +          break;
 13.1014 +        }
 13.1015 +        case 216:
 13.1016 +        {
 13.1017 +          PIXEL00_1M
 13.1018 +          PIXEL01_1
 13.1019 +          PIXEL02_1M
 13.1020 +          PIXEL10_C
 13.1021 +          PIXEL11
 13.1022 +          PIXEL20_1M
 13.1023 +          if (Diff(cget(6), cget(8)))
 13.1024 +          {
 13.1025 +            PIXEL12_C
 13.1026 +            PIXEL21_C
 13.1027 +            PIXEL22_C
 13.1028 +          }
 13.1029 +          else
 13.1030 +          {
 13.1031 +            PIXEL12_3
 13.1032 +            PIXEL21_3
 13.1033 +            PIXEL22_4
 13.1034 +          }
 13.1035 +          break;
 13.1036 +        }
 13.1037 +        case 106:
 13.1038 +        {
 13.1039 +          PIXEL00_1M
 13.1040 +          PIXEL01_C
 13.1041 +          PIXEL02_1M
 13.1042 +          PIXEL11
 13.1043 +          PIXEL12_1
 13.1044 +          if (Diff(cget(8), cget(4)))
 13.1045 +          {
 13.1046 +            PIXEL10_C
 13.1047 +            PIXEL20_C
 13.1048 +            PIXEL21_C
 13.1049 +          }
 13.1050 +          else
 13.1051 +          {
 13.1052 +            PIXEL10_3
 13.1053 +            PIXEL20_4
 13.1054 +            PIXEL21_3
 13.1055 +          }
 13.1056 +          PIXEL22_1M
 13.1057 +          break;
 13.1058 +        }
 13.1059 +        case 30:
 13.1060 +        {
 13.1061 +          PIXEL00_1M
 13.1062 +          if (Diff(cget(2), cget(6)))
 13.1063 +          {
 13.1064 +            PIXEL01_C
 13.1065 +            PIXEL02_C
 13.1066 +            PIXEL12_C
 13.1067 +          }
 13.1068 +          else
 13.1069 +          {
 13.1070 +            PIXEL01_3
 13.1071 +            PIXEL02_4
 13.1072 +            PIXEL12_3
 13.1073 +          }
 13.1074 +          PIXEL10_C
 13.1075 +          PIXEL11
 13.1076 +          PIXEL20_1M
 13.1077 +          PIXEL21_1
 13.1078 +          PIXEL22_1M
 13.1079 +          break;
 13.1080 +        }
 13.1081 +        case 210:
 13.1082 +        {
 13.1083 +          PIXEL00_1M
 13.1084 +          PIXEL01_C
 13.1085 +          PIXEL02_1M
 13.1086 +          PIXEL10_1
 13.1087 +          PIXEL11
 13.1088 +          PIXEL20_1M
 13.1089 +          if (Diff(cget(6), cget(8)))
 13.1090 +          {
 13.1091 +            PIXEL12_C
 13.1092 +            PIXEL21_C
 13.1093 +            PIXEL22_C
 13.1094 +          }
 13.1095 +          else
 13.1096 +          {
 13.1097 +            PIXEL12_3
 13.1098 +            PIXEL21_3
 13.1099 +            PIXEL22_4
 13.1100 +          }
 13.1101 +          break;
 13.1102 +        }
 13.1103 +        case 120:
 13.1104 +        {
 13.1105 +          PIXEL00_1M
 13.1106 +          PIXEL01_1
 13.1107 +          PIXEL02_1M
 13.1108 +          PIXEL11
 13.1109 +          PIXEL12_C
 13.1110 +          if (Diff(cget(8), cget(4)))
 13.1111 +          {
 13.1112 +            PIXEL10_C
 13.1113 +            PIXEL20_C
 13.1114 +            PIXEL21_C
 13.1115 +          }
 13.1116 +          else
 13.1117 +          {
 13.1118 +            PIXEL10_3
 13.1119 +            PIXEL20_4
 13.1120 +            PIXEL21_3
 13.1121 +          }
 13.1122 +          PIXEL22_1M
 13.1123 +          break;
 13.1124 +        }
 13.1125 +        case 75:
 13.1126 +        {
 13.1127 +          if (Diff(cget(4), cget(2)))
 13.1128 +          {
 13.1129 +            PIXEL00_C
 13.1130 +            PIXEL01_C
 13.1131 +            PIXEL10_C
 13.1132 +          }
 13.1133 +          else
 13.1134 +          {
 13.1135 +            PIXEL00_4
 13.1136 +            PIXEL01_3
 13.1137 +            PIXEL10_3
 13.1138 +          }
 13.1139 +          PIXEL02_1M
 13.1140 +          PIXEL11
 13.1141 +          PIXEL12_1
 13.1142 +          PIXEL20_1M
 13.1143 +          PIXEL21_C
 13.1144 +          PIXEL22_1M
 13.1145 +          break;
 13.1146 +        }
 13.1147 +        case 29:
 13.1148 +        {
 13.1149 +          PIXEL00_1U
 13.1150 +          PIXEL01_1
 13.1151 +          PIXEL02_1U
 13.1152 +          PIXEL10_C
 13.1153 +          PIXEL11
 13.1154 +          PIXEL12_C
 13.1155 +          PIXEL20_1M
 13.1156 +          PIXEL21_1
 13.1157 +          PIXEL22_1M
 13.1158 +          break;
 13.1159 +        }
 13.1160 +        case 198:
 13.1161 +        {
 13.1162 +          PIXEL00_1M
 13.1163 +          PIXEL01_C
 13.1164 +          PIXEL02_1R
 13.1165 +          PIXEL10_1
 13.1166 +          PIXEL11
 13.1167 +          PIXEL12_1
 13.1168 +          PIXEL20_1M
 13.1169 +          PIXEL21_C
 13.1170 +          PIXEL22_1R
 13.1171 +          break;
 13.1172 +        }
 13.1173 +        case 184:
 13.1174 +        {
 13.1175 +          PIXEL00_1M
 13.1176 +          PIXEL01_1
 13.1177 +          PIXEL02_1M
 13.1178 +          PIXEL10_C
 13.1179 +          PIXEL11
 13.1180 +          PIXEL12_C
 13.1181 +          PIXEL20_1D
 13.1182 +          PIXEL21_1
 13.1183 +          PIXEL22_1D
 13.1184 +          break;
 13.1185 +        }
 13.1186 +        case 99:
 13.1187 +        {
 13.1188 +          PIXEL00_1L
 13.1189 +          PIXEL01_C
 13.1190 +          PIXEL02_1M
 13.1191 +          PIXEL10_1
 13.1192 +          PIXEL11
 13.1193 +          PIXEL12_1
 13.1194 +          PIXEL20_1L
 13.1195 +          PIXEL21_C
 13.1196 +          PIXEL22_1M
 13.1197 +          break;
 13.1198 +        }
 13.1199 +        case 57:
 13.1200 +        {
 13.1201 +          PIXEL00_1U
 13.1202 +          PIXEL01_1
 13.1203 +          PIXEL02_1M
 13.1204 +          PIXEL10_C
 13.1205 +          PIXEL11
 13.1206 +          PIXEL12_C
 13.1207 +          PIXEL20_1D
 13.1208 +          PIXEL21_1
 13.1209 +          PIXEL22_1M
 13.1210 +          break;
 13.1211 +        }
 13.1212 +        case 71:
 13.1213 +        {
 13.1214 +          PIXEL00_1L
 13.1215 +          PIXEL01_C
 13.1216 +          PIXEL02_1R
 13.1217 +          PIXEL10_1
 13.1218 +          PIXEL11
 13.1219 +          PIXEL12_1
 13.1220 +          PIXEL20_1M
 13.1221 +          PIXEL21_C
 13.1222 +          PIXEL22_1M
 13.1223 +          break;
 13.1224 +        }
 13.1225 +        case 156:
 13.1226 +        {
 13.1227 +          PIXEL00_1M
 13.1228 +          PIXEL01_1
 13.1229 +          PIXEL02_1U
 13.1230 +          PIXEL10_C
 13.1231 +          PIXEL11
 13.1232 +          PIXEL12_C
 13.1233 +          PIXEL20_1M
 13.1234 +          PIXEL21_1
 13.1235 +          PIXEL22_1D
 13.1236 +          break;
 13.1237 +        }
 13.1238 +        case 226:
 13.1239 +        {
 13.1240 +          PIXEL00_1M
 13.1241 +          PIXEL01_C
 13.1242 +          PIXEL02_1M
 13.1243 +          PIXEL10_1
 13.1244 +          PIXEL11
 13.1245 +          PIXEL12_1
 13.1246 +          PIXEL20_1L
 13.1247 +          PIXEL21_C
 13.1248 +          PIXEL22_1R
 13.1249 +          break;
 13.1250 +        }
 13.1251 +        case 60:
 13.1252 +        {
 13.1253 +          PIXEL00_1M
 13.1254 +          PIXEL01_1
 13.1255 +          PIXEL02_1U
 13.1256 +          PIXEL10_C
 13.1257 +          PIXEL11
 13.1258 +          PIXEL12_C
 13.1259 +          PIXEL20_1D
 13.1260 +          PIXEL21_1
 13.1261 +          PIXEL22_1M
 13.1262 +          break;
 13.1263 +        }
 13.1264 +        case 195:
 13.1265 +        {
 13.1266 +          PIXEL00_1L
 13.1267 +          PIXEL01_C
 13.1268 +          PIXEL02_1M
 13.1269 +          PIXEL10_1
 13.1270 +          PIXEL11
 13.1271 +          PIXEL12_1
 13.1272 +          PIXEL20_1M
 13.1273 +          PIXEL21_C
 13.1274 +          PIXEL22_1R
 13.1275 +          break;
 13.1276 +        }
 13.1277 +        case 102:
 13.1278 +        {
 13.1279 +          PIXEL00_1M
 13.1280 +          PIXEL01_C
 13.1281 +          PIXEL02_1R
 13.1282 +          PIXEL10_1
 13.1283 +          PIXEL11
 13.1284 +          PIXEL12_1
 13.1285 +          PIXEL20_1L
 13.1286 +          PIXEL21_C
 13.1287 +          PIXEL22_1M
 13.1288 +          break;
 13.1289 +        }
 13.1290 +        case 153:
 13.1291 +        {
 13.1292 +          PIXEL00_1U
 13.1293 +          PIXEL01_1
 13.1294 +          PIXEL02_1M
 13.1295 +          PIXEL10_C
 13.1296 +          PIXEL11
 13.1297 +          PIXEL12_C
 13.1298 +          PIXEL20_1M
 13.1299 +          PIXEL21_1
 13.1300 +          PIXEL22_1D
 13.1301 +          break;
 13.1302 +        }
 13.1303 +        case 58:
 13.1304 +        {
 13.1305 +          if (Diff(cget(4), cget(2)))
 13.1306 +          {
 13.1307 +            PIXEL00_1M
 13.1308 +          }
 13.1309 +          else
 13.1310 +          {
 13.1311 +            PIXEL00_2
 13.1312 +          }
 13.1313 +          PIXEL01_C
 13.1314 +          if (Diff(cget(2), cget(6)))
 13.1315 +          {
 13.1316 +            PIXEL02_1M
 13.1317 +          }
 13.1318 +          else
 13.1319 +          {
 13.1320 +            PIXEL02_2
 13.1321 +          }
 13.1322 +          PIXEL10_C
 13.1323 +          PIXEL11
 13.1324 +          PIXEL12_C
 13.1325 +          PIXEL20_1D
 13.1326 +          PIXEL21_1
 13.1327 +          PIXEL22_1M
 13.1328 +          break;
 13.1329 +        }
 13.1330 +        case 83:
 13.1331 +        {
 13.1332 +          PIXEL00_1L
 13.1333 +          PIXEL01_C
 13.1334 +          if (Diff(cget(2), cget(6)))
 13.1335 +          {
 13.1336 +            PIXEL02_1M
 13.1337 +          }
 13.1338 +          else
 13.1339 +          {
 13.1340 +            PIXEL02_2
 13.1341 +          }
 13.1342 +          PIXEL10_1
 13.1343 +          PIXEL11
 13.1344 +          PIXEL12_C
 13.1345 +          PIXEL20_1M
 13.1346 +          PIXEL21_C
 13.1347 +          if (Diff(cget(6), cget(8)))
 13.1348 +          {
 13.1349 +            PIXEL22_1M
 13.1350 +          }
 13.1351 +          else
 13.1352 +          {
 13.1353 +            PIXEL22_2
 13.1354 +          }
 13.1355 +          break;
 13.1356 +        }
 13.1357 +        case 92:
 13.1358 +        {
 13.1359 +          PIXEL00_1M
 13.1360 +          PIXEL01_1
 13.1361 +          PIXEL02_1U
 13.1362 +          PIXEL10_C
 13.1363 +          PIXEL11
 13.1364 +          PIXEL12_C
 13.1365 +          if (Diff(cget(8), cget(4)))
 13.1366 +          {
 13.1367 +            PIXEL20_1M
 13.1368 +          }
 13.1369 +          else
 13.1370 +          {
 13.1371 +            PIXEL20_2
 13.1372 +          }
 13.1373 +          PIXEL21_C
 13.1374 +          if (Diff(cget(6), cget(8)))
 13.1375 +          {
 13.1376 +            PIXEL22_1M
 13.1377 +          }
 13.1378 +          else
 13.1379 +          {
 13.1380 +            PIXEL22_2
 13.1381 +          }
 13.1382 +          break;
 13.1383 +        }
 13.1384 +        case 202:
 13.1385 +        {
 13.1386 +          if (Diff(cget(4), cget(2)))
 13.1387 +          {
 13.1388 +            PIXEL00_1M
 13.1389 +          }
 13.1390 +          else
 13.1391 +          {
 13.1392 +            PIXEL00_2
 13.1393 +          }
 13.1394 +          PIXEL01_C
 13.1395 +          PIXEL02_1M
 13.1396 +          PIXEL10_C
 13.1397 +          PIXEL11
 13.1398 +          PIXEL12_1
 13.1399 +          if (Diff(cget(8), cget(4)))
 13.1400 +          {
 13.1401 +            PIXEL20_1M
 13.1402 +          }
 13.1403 +          else
 13.1404 +          {
 13.1405 +            PIXEL20_2
 13.1406 +          }
 13.1407 +          PIXEL21_C
 13.1408 +          PIXEL22_1R
 13.1409 +          break;
 13.1410 +        }
 13.1411 +        case 78:
 13.1412 +        {
 13.1413 +          if (Diff(cget(4), cget(2)))
 13.1414 +          {
 13.1415 +            PIXEL00_1M
 13.1416 +          }
 13.1417 +          else
 13.1418 +          {
 13.1419 +            PIXEL00_2
 13.1420 +          }
 13.1421 +          PIXEL01_C
 13.1422 +          PIXEL02_1R
 13.1423 +          PIXEL10_C
 13.1424 +          PIXEL11
 13.1425 +          PIXEL12_1
 13.1426 +          if (Diff(cget(8), cget(4)))
 13.1427 +          {
 13.1428 +            PIXEL20_1M
 13.1429 +          }
 13.1430 +          else
 13.1431 +          {
 13.1432 +            PIXEL20_2
 13.1433 +          }
 13.1434 +          PIXEL21_C
 13.1435 +          PIXEL22_1M
 13.1436 +          break;
 13.1437 +        }
 13.1438 +        case 154:
 13.1439 +        {
 13.1440 +          if (Diff(cget(4), cget(2)))
 13.1441 +          {
 13.1442 +            PIXEL00_1M
 13.1443 +          }
 13.1444 +          else
 13.1445 +          {
 13.1446 +            PIXEL00_2
 13.1447 +          }
 13.1448 +          PIXEL01_C
 13.1449 +          if (Diff(cget(2), cget(6)))
 13.1450 +          {
 13.1451 +            PIXEL02_1M
 13.1452 +          }
 13.1453 +          else
 13.1454 +          {
 13.1455 +            PIXEL02_2
 13.1456 +          }
 13.1457 +          PIXEL10_C
 13.1458 +          PIXEL11
 13.1459 +          PIXEL12_C
 13.1460 +          PIXEL20_1M
 13.1461 +          PIXEL21_1
 13.1462 +          PIXEL22_1D
 13.1463 +          break;
 13.1464 +        }
 13.1465 +        case 114:
 13.1466 +        {
 13.1467 +          PIXEL00_1M
 13.1468 +          PIXEL01_C
 13.1469 +          if (Diff(cget(2), cget(6)))
 13.1470 +          {
 13.1471 +            PIXEL02_1M
 13.1472 +          }
 13.1473 +          else
 13.1474 +          {
 13.1475 +            PIXEL02_2
 13.1476 +          }
 13.1477 +          PIXEL10_1
 13.1478 +          PIXEL11
 13.1479 +          PIXEL12_C
 13.1480 +          PIXEL20_1L
 13.1481 +          PIXEL21_C
 13.1482 +          if (Diff(cget(6), cget(8)))
 13.1483 +          {
 13.1484 +            PIXEL22_1M
 13.1485 +          }
 13.1486 +          else
 13.1487 +          {
 13.1488 +            PIXEL22_2
 13.1489 +          }
 13.1490 +          break;
 13.1491 +        }
 13.1492 +        case 89:
 13.1493 +        {
 13.1494 +          PIXEL00_1U
 13.1495 +          PIXEL01_1
 13.1496 +          PIXEL02_1M
 13.1497 +          PIXEL10_C
 13.1498 +          PIXEL11
 13.1499 +          PIXEL12_C
 13.1500 +          if (Diff(cget(8), cget(4)))
 13.1501 +          {
 13.1502 +            PIXEL20_1M
 13.1503 +          }
 13.1504 +          else
 13.1505 +          {
 13.1506 +            PIXEL20_2
 13.1507 +          }
 13.1508 +          PIXEL21_C
 13.1509 +          if (Diff(cget(6), cget(8)))
 13.1510 +          {
 13.1511 +            PIXEL22_1M
 13.1512 +          }
 13.1513 +          else
 13.1514 +          {
 13.1515 +            PIXEL22_2
 13.1516 +          }
 13.1517 +          break;
 13.1518 +        }
 13.1519 +        case 90:
 13.1520 +        {
 13.1521 +          if (Diff(cget(4), cget(2)))
 13.1522 +          {
 13.1523 +            PIXEL00_1M
 13.1524 +          }
 13.1525 +          else
 13.1526 +          {
 13.1527 +            PIXEL00_2
 13.1528 +          }
 13.1529 +          PIXEL01_C
 13.1530 +          if (Diff(cget(2), cget(6)))
 13.1531 +          {
 13.1532 +            PIXEL02_1M
 13.1533 +          }
 13.1534 +          else
 13.1535 +          {
 13.1536 +            PIXEL02_2
 13.1537 +          }
 13.1538 +          PIXEL10_C
 13.1539 +          PIXEL11
 13.1540 +          PIXEL12_C
 13.1541 +          if (Diff(cget(8), cget(4)))
 13.1542 +          {
 13.1543 +            PIXEL20_1M
 13.1544 +          }
 13.1545 +          else
 13.1546 +          {
 13.1547 +            PIXEL20_2
 13.1548 +          }
 13.1549 +          PIXEL21_C
 13.1550 +          if (Diff(cget(6), cget(8)))
 13.1551 +          {
 13.1552 +            PIXEL22_1M
 13.1553 +          }
 13.1554 +          else
 13.1555 +          {
 13.1556 +            PIXEL22_2
 13.1557 +          }
 13.1558 +          break;
 13.1559 +        }
 13.1560 +        case 55:
 13.1561 +        case 23:
 13.1562 +        {
 13.1563 +          if (Diff(cget(2), cget(6)))
 13.1564 +          {
 13.1565 +            PIXEL00_1L
 13.1566 +            PIXEL01_C
 13.1567 +            PIXEL02_C
 13.1568 +            PIXEL12_C
 13.1569 +          }
 13.1570 +          else
 13.1571 +          {
 13.1572 +            PIXEL00_2
 13.1573 +            PIXEL01_6
 13.1574 +            PIXEL02_5
 13.1575 +            PIXEL12_1
 13.1576 +          }
 13.1577 +          PIXEL10_1
 13.1578 +          PIXEL11
 13.1579 +          PIXEL20_2
 13.1580 +          PIXEL21_1
 13.1581 +          PIXEL22_1M
 13.1582 +          break;
 13.1583 +        }
 13.1584 +        case 182:
 13.1585 +        case 150:
 13.1586 +        {
 13.1587 +          if (Diff(cget(2), cget(6)))
 13.1588 +          {
 13.1589 +            PIXEL01_C
 13.1590 +            PIXEL02_C
 13.1591 +            PIXEL12_C
 13.1592 +            PIXEL22_1D
 13.1593 +          }
 13.1594 +          else
 13.1595 +          {
 13.1596 +            PIXEL01_1
 13.1597 +            PIXEL02_5
 13.1598 +            PIXEL12_6
 13.1599 +            PIXEL22_2
 13.1600 +          }
 13.1601 +          PIXEL00_1M
 13.1602 +          PIXEL10_1
 13.1603 +          PIXEL11
 13.1604 +          PIXEL20_2
 13.1605 +          PIXEL21_1
 13.1606 +          break;
 13.1607 +        }
 13.1608 +        case 213:
 13.1609 +        case 212:
 13.1610 +        {
 13.1611 +          if (Diff(cget(6), cget(8)))
 13.1612 +          {
 13.1613 +            PIXEL02_1U
 13.1614 +            PIXEL12_C
 13.1615 +            PIXEL21_C
 13.1616 +            PIXEL22_C
 13.1617 +          }
 13.1618 +          else
 13.1619 +          {
 13.1620 +            PIXEL02_2
 13.1621 +            PIXEL12_6
 13.1622 +            PIXEL21_1
 13.1623 +            PIXEL22_5
 13.1624 +          }
 13.1625 +          PIXEL00_2
 13.1626 +          PIXEL01_1
 13.1627 +          PIXEL10_1
 13.1628 +          PIXEL11
 13.1629 +          PIXEL20_1M
 13.1630 +          break;
 13.1631 +        }
 13.1632 +        case 241:
 13.1633 +        case 240:
 13.1634 +        {
 13.1635 +          if (Diff(cget(6), cget(8)))
 13.1636 +          {
 13.1637 +            PIXEL12_C
 13.1638 +            PIXEL20_1L
 13.1639 +            PIXEL21_C
 13.1640 +            PIXEL22_C
 13.1641 +          }
 13.1642 +          else
 13.1643 +          {
 13.1644 +            PIXEL12_1
 13.1645 +            PIXEL20_2
 13.1646 +            PIXEL21_6
 13.1647 +            PIXEL22_5
 13.1648 +          }
 13.1649 +          PIXEL00_2
 13.1650 +          PIXEL01_1
 13.1651 +          PIXEL02_1M
 13.1652 +          PIXEL10_1
 13.1653 +          PIXEL11
 13.1654 +          break;
 13.1655 +        }
 13.1656 +        case 236:
 13.1657 +        case 232:
 13.1658 +        {
 13.1659 +          if (Diff(cget(8), cget(4)))
 13.1660 +          {
 13.1661 +            PIXEL10_C
 13.1662 +            PIXEL20_C
 13.1663 +            PIXEL21_C
 13.1664 +            PIXEL22_1R
 13.1665 +          }
 13.1666 +          else
 13.1667 +          {
 13.1668 +            PIXEL10_1
 13.1669 +            PIXEL20_5
 13.1670 +            PIXEL21_6
 13.1671 +            PIXEL22_2
 13.1672 +          }
 13.1673 +          PIXEL00_1M
 13.1674 +          PIXEL01_1
 13.1675 +          PIXEL02_2
 13.1676 +          PIXEL11
 13.1677 +          PIXEL12_1
 13.1678 +          break;
 13.1679 +        }
 13.1680 +        case 109:
 13.1681 +        case 105:
 13.1682 +        {
 13.1683 +          if (Diff(cget(8), cget(4)))
 13.1684 +          {
 13.1685 +            PIXEL00_1U
 13.1686 +            PIXEL10_C
 13.1687 +            PIXEL20_C
 13.1688 +            PIXEL21_C
 13.1689 +          }
 13.1690 +          else
 13.1691 +          {
 13.1692 +            PIXEL00_2
 13.1693 +            PIXEL10_6
 13.1694 +            PIXEL20_5
 13.1695 +            PIXEL21_1
 13.1696 +          }
 13.1697 +          PIXEL01_1
 13.1698 +          PIXEL02_2
 13.1699 +          PIXEL11
 13.1700 +          PIXEL12_1
 13.1701 +          PIXEL22_1M
 13.1702 +          break;
 13.1703 +        }
 13.1704 +        case 171:
 13.1705 +        case 43:
 13.1706 +        {
 13.1707 +          if (Diff(cget(4), cget(2)))
 13.1708 +          {
 13.1709 +            PIXEL00_C
 13.1710 +            PIXEL01_C
 13.1711 +            PIXEL10_C
 13.1712 +            PIXEL20_1D
 13.1713 +          }
 13.1714 +          else
 13.1715 +          {
 13.1716 +            PIXEL00_5
 13.1717 +            PIXEL01_1
 13.1718 +            PIXEL10_6
 13.1719 +            PIXEL20_2
 13.1720 +          }
 13.1721 +          PIXEL02_1M
 13.1722 +          PIXEL11
 13.1723 +          PIXEL12_1
 13.1724 +          PIXEL21_1
 13.1725 +          PIXEL22_2
 13.1726 +          break;
 13.1727 +        }
 13.1728 +        case 143:
 13.1729 +        case 15:
 13.1730 +        {
 13.1731 +          if (Diff(cget(4), cget(2)))
 13.1732 +          {
 13.1733 +            PIXEL00_C
 13.1734 +            PIXEL01_C
 13.1735 +            PIXEL02_1R
 13.1736 +            PIXEL10_C
 13.1737 +          }
 13.1738 +          else
 13.1739 +          {
 13.1740 +            PIXEL00_5
 13.1741 +            PIXEL01_6
 13.1742 +            PIXEL02_2
 13.1743 +            PIXEL10_1
 13.1744 +          }
 13.1745 +          PIXEL11
 13.1746 +          PIXEL12_1
 13.1747 +          PIXEL20_1M
 13.1748 +          PIXEL21_1
 13.1749 +          PIXEL22_2
 13.1750 +          break;
 13.1751 +        }
 13.1752 +        case 124:
 13.1753 +        {
 13.1754 +          PIXEL00_1M
 13.1755 +          PIXEL01_1
 13.1756 +          PIXEL02_1U
 13.1757 +          PIXEL11
 13.1758 +          PIXEL12_C
 13.1759 +          if (Diff(cget(8), cget(4)))
 13.1760 +          {
 13.1761 +            PIXEL10_C
 13.1762 +            PIXEL20_C
 13.1763 +            PIXEL21_C
 13.1764 +          }
 13.1765 +          else
 13.1766 +          {
 13.1767 +            PIXEL10_3
 13.1768 +            PIXEL20_4
 13.1769 +            PIXEL21_3
 13.1770 +          }
 13.1771 +          PIXEL22_1M
 13.1772 +          break;
 13.1773 +        }
 13.1774 +        case 203:
 13.1775 +        {
 13.1776 +          if (Diff(cget(4), cget(2)))
 13.1777 +          {
 13.1778 +            PIXEL00_C
 13.1779 +            PIXEL01_C
 13.1780 +            PIXEL10_C
 13.1781 +          }
 13.1782 +          else
 13.1783 +          {
 13.1784 +            PIXEL00_4
 13.1785 +            PIXEL01_3
 13.1786 +            PIXEL10_3
 13.1787 +          }
 13.1788 +          PIXEL02_1M
 13.1789 +          PIXEL11
 13.1790 +          PIXEL12_1
 13.1791 +          PIXEL20_1M
 13.1792 +          PIXEL21_C
 13.1793 +          PIXEL22_1R
 13.1794 +          break;
 13.1795 +        }
 13.1796 +        case 62:
 13.1797 +        {
 13.1798 +          PIXEL00_1M
 13.1799 +          if (Diff(cget(2), cget(6)))
 13.1800 +          {
 13.1801 +            PIXEL01_C
 13.1802 +            PIXEL02_C
 13.1803 +            PIXEL12_C
 13.1804 +          }
 13.1805 +          else
 13.1806 +          {
 13.1807 +            PIXEL01_3
 13.1808 +            PIXEL02_4
 13.1809 +            PIXEL12_3
 13.1810 +          }
 13.1811 +          PIXEL10_C
 13.1812 +          PIXEL11
 13.1813 +          PIXEL20_1D
 13.1814 +          PIXEL21_1
 13.1815 +          PIXEL22_1M
 13.1816 +          break;
 13.1817 +        }
 13.1818 +        case 211:
 13.1819 +        {
 13.1820 +          PIXEL00_1L
 13.1821 +          PIXEL01_C
 13.1822 +          PIXEL02_1M
 13.1823 +          PIXEL10_1
 13.1824 +          PIXEL11
 13.1825 +          PIXEL20_1M
 13.1826 +          if (Diff(cget(6), cget(8)))
 13.1827 +          {
 13.1828 +            PIXEL12_C
 13.1829 +            PIXEL21_C
 13.1830 +            PIXEL22_C
 13.1831 +          }
 13.1832 +          else
 13.1833 +          {
 13.1834 +            PIXEL12_3
 13.1835 +            PIXEL21_3
 13.1836 +            PIXEL22_4
 13.1837 +          }
 13.1838 +          break;
 13.1839 +        }
 13.1840 +        case 118:
 13.1841 +        {
 13.1842 +          PIXEL00_1M
 13.1843 +          if (Diff(cget(2), cget(6)))
 13.1844 +          {
 13.1845 +            PIXEL01_C
 13.1846 +            PIXEL02_C
 13.1847 +            PIXEL12_C
 13.1848 +          }
 13.1849 +          else
 13.1850 +          {
 13.1851 +            PIXEL01_3
 13.1852 +            PIXEL02_4
 13.1853 +            PIXEL12_3
 13.1854 +          }
 13.1855 +          PIXEL10_1
 13.1856 +          PIXEL11
 13.1857 +          PIXEL20_1L
 13.1858 +          PIXEL21_C
 13.1859 +          PIXEL22_1M
 13.1860 +          break;
 13.1861 +        }
 13.1862 +        case 217:
 13.1863 +        {
 13.1864 +          PIXEL00_1U
 13.1865 +          PIXEL01_1
 13.1866 +          PIXEL02_1M
 13.1867 +          PIXEL10_C
 13.1868 +          PIXEL11
 13.1869 +          PIXEL20_1M
 13.1870 +          if (Diff(cget(6), cget(8)))
 13.1871 +          {
 13.1872 +            PIXEL12_C
 13.1873 +            PIXEL21_C
 13.1874 +            PIXEL22_C
 13.1875 +          }
 13.1876 +          else
 13.1877 +          {
 13.1878 +            PIXEL12_3
 13.1879 +            PIXEL21_3
 13.1880 +            PIXEL22_4
 13.1881 +          }
 13.1882 +          break;
 13.1883 +        }
 13.1884 +        case 110:
 13.1885 +        {
 13.1886 +          PIXEL00_1M
 13.1887 +          PIXEL01_C
 13.1888 +          PIXEL02_1R
 13.1889 +          PIXEL11
 13.1890 +          PIXEL12_1
 13.1891 +          if (Diff(cget(8), cget(4)))
 13.1892 +          {
 13.1893 +            PIXEL10_C
 13.1894 +            PIXEL20_C
 13.1895 +            PIXEL21_C
 13.1896 +          }
 13.1897 +          else
 13.1898 +          {
 13.1899 +            PIXEL10_3
 13.1900 +            PIXEL20_4
 13.1901 +            PIXEL21_3
 13.1902 +          }
 13.1903 +          PIXEL22_1M
 13.1904 +          break;
 13.1905 +        }
 13.1906 +        case 155:
 13.1907 +        {
 13.1908 +          if (Diff(cget(4), cget(2)))
 13.1909 +          {
 13.1910 +            PIXEL00_C
 13.1911 +            PIXEL01_C
 13.1912 +            PIXEL10_C
 13.1913 +          }
 13.1914 +          else
 13.1915 +          {
 13.1916 +            PIXEL00_4
 13.1917 +            PIXEL01_3
 13.1918 +            PIXEL10_3
 13.1919 +          }
 13.1920 +          PIXEL02_1M
 13.1921 +          PIXEL11
 13.1922 +          PIXEL12_C
 13.1923 +          PIXEL20_1M
 13.1924 +          PIXEL21_1
 13.1925 +          PIXEL22_1D
 13.1926 +          break;
 13.1927 +        }
 13.1928 +        case 188:
 13.1929 +        {
 13.1930 +          PIXEL00_1M
 13.1931 +          PIXEL01_1
 13.1932 +          PIXEL02_1U
 13.1933 +          PIXEL10_C
 13.1934 +          PIXEL11
 13.1935 +          PIXEL12_C
 13.1936 +          PIXEL20_1D
 13.1937 +          PIXEL21_1
 13.1938 +          PIXEL22_1D
 13.1939 +          break;
 13.1940 +        }
 13.1941 +        case 185:
 13.1942 +        {
 13.1943 +          PIXEL00_1U
 13.1944 +          PIXEL01_1
 13.1945 +          PIXEL02_1M
 13.1946 +          PIXEL10_C
 13.1947 +          PIXEL11
 13.1948 +          PIXEL12_C
 13.1949 +          PIXEL20_1D
 13.1950 +          PIXEL21_1
 13.1951 +          PIXEL22_1D
 13.1952 +          break;
 13.1953 +        }
 13.1954 +        case 61:
 13.1955 +        {
 13.1956 +          PIXEL00_1U
 13.1957 +          PIXEL01_1
 13.1958 +          PIXEL02_1U
 13.1959 +          PIXEL10_C
 13.1960 +          PIXEL11
 13.1961 +          PIXEL12_C
 13.1962 +          PIXEL20_1D
 13.1963 +          PIXEL21_1
 13.1964 +          PIXEL22_1M
 13.1965 +          break;
 13.1966 +        }
 13.1967 +        case 157:
 13.1968 +        {
 13.1969 +          PIXEL00_1U
 13.1970 +          PIXEL01_1
 13.1971 +          PIXEL02_1U
 13.1972 +          PIXEL10_C
 13.1973 +          PIXEL11
 13.1974 +          PIXEL12_C
 13.1975 +          PIXEL20_1M
 13.1976 +          PIXEL21_1
 13.1977 +          PIXEL22_1D
 13.1978 +          break;
 13.1979 +        }
 13.1980 +        case 103:
 13.1981 +        {
 13.1982 +          PIXEL00_1L
 13.1983 +          PIXEL01_C
 13.1984 +          PIXEL02_1R
 13.1985 +          PIXEL10_1
 13.1986 +          PIXEL11
 13.1987 +          PIXEL12_1
 13.1988 +          PIXEL20_1L
 13.1989 +          PIXEL21_C
 13.1990 +          PIXEL22_1M
 13.1991 +          break;
 13.1992 +        }
 13.1993 +        case 227:
 13.1994 +        {
 13.1995 +          PIXEL00_1L
 13.1996 +          PIXEL01_C
 13.1997 +          PIXEL02_1M
 13.1998 +          PIXEL10_1
 13.1999 +          PIXEL11
 13.2000 +          PIXEL12_1
 13.2001 +          PIXEL20_1L
 13.2002 +          PIXEL21_C
 13.2003 +          PIXEL22_1R
 13.2004 +          break;
 13.2005 +        }
 13.2006 +        case 230:
 13.2007 +        {
 13.2008 +          PIXEL00_1M
 13.2009 +          PIXEL01_C
 13.2010 +          PIXEL02_1R
 13.2011 +          PIXEL10_1
 13.2012 +          PIXEL11
 13.2013 +          PIXEL12_1
 13.2014 +          PIXEL20_1L
 13.2015 +          PIXEL21_C
 13.2016 +          PIXEL22_1R
 13.2017 +          break;
 13.2018 +        }
 13.2019 +        case 199:
 13.2020 +        {
 13.2021 +          PIXEL00_1L
 13.2022 +          PIXEL01_C
 13.2023 +          PIXEL02_1R
 13.2024 +          PIXEL10_1
 13.2025 +          PIXEL11
 13.2026 +          PIXEL12_1
 13.2027 +          PIXEL20_1M
 13.2028 +          PIXEL21_C
 13.2029 +          PIXEL22_1R
 13.2030 +          break;
 13.2031 +        }
 13.2032 +        case 220:
 13.2033 +        {
 13.2034 +          PIXEL00_1M
 13.2035 +          PIXEL01_1
 13.2036 +          PIXEL02_1U
 13.2037 +          PIXEL10_C
 13.2038 +          PIXEL11
 13.2039 +          if (Diff(cget(8), cget(4)))
 13.2040 +          {
 13.2041 +            PIXEL20_1M
 13.2042 +          }
 13.2043 +          else
 13.2044 +          {
 13.2045 +            PIXEL20_2
 13.2046 +          }
 13.2047 +          if (Diff(cget(6), cget(8)))
 13.2048 +          {
 13.2049 +            PIXEL12_C
 13.2050 +            PIXEL21_C
 13.2051 +            PIXEL22_C
 13.2052 +          }
 13.2053 +          else
 13.2054 +          {
 13.2055 +            PIXEL12_3
 13.2056 +            PIXEL21_3
 13.2057 +            PIXEL22_4
 13.2058 +          }
 13.2059 +          break;
 13.2060 +        }
 13.2061 +        case 158:
 13.2062 +        {
 13.2063 +          if (Diff(cget(4), cget(2)))
 13.2064 +          {
 13.2065 +            PIXEL00_1M
 13.2066 +          }
 13.2067 +          else
 13.2068 +          {
 13.2069 +            PIXEL00_2
 13.2070 +          }
 13.2071 +          if (Diff(cget(2), cget(6)))
 13.2072 +          {
 13.2073 +            PIXEL01_C
 13.2074 +            PIXEL02_C
 13.2075 +            PIXEL12_C
 13.2076 +          }
 13.2077 +          else
 13.2078 +          {
 13.2079 +            PIXEL01_3
 13.2080 +            PIXEL02_4
 13.2081 +            PIXEL12_3
 13.2082 +          }
 13.2083 +          PIXEL10_C
 13.2084 +          PIXEL11
 13.2085 +          PIXEL20_1M
 13.2086 +          PIXEL21_1
 13.2087 +          PIXEL22_1D
 13.2088 +          break;
 13.2089 +        }
 13.2090 +        case 234:
 13.2091 +        {
 13.2092 +          if (Diff(cget(4), cget(2)))
 13.2093 +          {
 13.2094 +            PIXEL00_1M
 13.2095 +          }
 13.2096 +          else
 13.2097 +          {
 13.2098 +            PIXEL00_2
 13.2099 +          }
 13.2100 +          PIXEL01_C
 13.2101 +          PIXEL02_1M
 13.2102 +          PIXEL11
 13.2103 +          PIXEL12_1
 13.2104 +          if (Diff(cget(8), cget(4)))
 13.2105 +          {
 13.2106 +            PIXEL10_C
 13.2107 +            PIXEL20_C
 13.2108 +            PIXEL21_C
 13.2109 +          }
 13.2110 +          else
 13.2111 +          {
 13.2112 +            PIXEL10_3
 13.2113 +            PIXEL20_4
 13.2114 +            PIXEL21_3
 13.2115 +          }
 13.2116 +          PIXEL22_1R
 13.2117 +          break;
 13.2118 +        }
 13.2119 +        case 242:
 13.2120 +        {
 13.2121 +          PIXEL00_1M
 13.2122 +          PIXEL01_C
 13.2123 +          if (Diff(cget(2), cget(6)))
 13.2124 +          {
 13.2125 +            PIXEL02_1M
 13.2126 +          }
 13.2127 +          else
 13.2128 +          {
 13.2129 +            PIXEL02_2
 13.2130 +          }
 13.2131 +          PIXEL10_1
 13.2132 +          PIXEL11
 13.2133 +          PIXEL20_1L
 13.2134 +          if (Diff(cget(6), cget(8)))
 13.2135 +          {
 13.2136 +            PIXEL12_C
 13.2137 +            PIXEL21_C
 13.2138 +            PIXEL22_C
 13.2139 +          }
 13.2140 +          else
 13.2141 +          {
 13.2142 +            PIXEL12_3
 13.2143 +            PIXEL21_3
 13.2144 +            PIXEL22_4
 13.2145 +          }
 13.2146 +          break;
 13.2147 +        }
 13.2148 +        case 59:
 13.2149 +        {
 13.2150 +          if (Diff(cget(4), cget(2)))
 13.2151 +          {
 13.2152 +            PIXEL00_C
 13.2153 +            PIXEL01_C
 13.2154 +            PIXEL10_C
 13.2155 +          }
 13.2156 +          else
 13.2157 +          {
 13.2158 +            PIXEL00_4
 13.2159 +            PIXEL01_3
 13.2160 +            PIXEL10_3
 13.2161 +          }
 13.2162 +          if (Diff(cget(2), cget(6)))
 13.2163 +          {
 13.2164 +            PIXEL02_1M
 13.2165 +          }
 13.2166 +          else
 13.2167 +          {
 13.2168 +            PIXEL02_2
 13.2169 +          }
 13.2170 +          PIXEL11
 13.2171 +          PIXEL12_C
 13.2172 +          PIXEL20_1D
 13.2173 +          PIXEL21_1
 13.2174 +          PIXEL22_1M
 13.2175 +          break;
 13.2176 +        }
 13.2177 +        case 121:
 13.2178 +        {
 13.2179 +          PIXEL00_1U
 13.2180 +          PIXEL01_1
 13.2181 +          PIXEL02_1M
 13.2182 +          PIXEL11
 13.2183 +          PIXEL12_C
 13.2184 +          if (Diff(cget(8), cget(4)))
 13.2185 +          {
 13.2186 +            PIXEL10_C
 13.2187 +            PIXEL20_C
 13.2188 +            PIXEL21_C
 13.2189 +          }
 13.2190 +          else
 13.2191 +          {
 13.2192 +            PIXEL10_3
 13.2193 +            PIXEL20_4
 13.2194 +            PIXEL21_3
 13.2195 +          }
 13.2196 +          if (Diff(cget(6), cget(8)))
 13.2197 +          {
 13.2198 +            PIXEL22_1M
 13.2199 +          }
 13.2200 +          else
 13.2201 +          {
 13.2202 +            PIXEL22_2
 13.2203 +          }
 13.2204 +          break;
 13.2205 +        }
 13.2206 +        case 87:
 13.2207 +        {
 13.2208 +          PIXEL00_1L
 13.2209 +          if (Diff(cget(2), cget(6)))
 13.2210 +          {
 13.2211 +            PIXEL01_C
 13.2212 +            PIXEL02_C
 13.2213 +            PIXEL12_C
 13.2214 +          }
 13.2215 +          else
 13.2216 +          {
 13.2217 +            PIXEL01_3
 13.2218 +            PIXEL02_4
 13.2219 +            PIXEL12_3
 13.2220 +          }
 13.2221 +          PIXEL10_1
 13.2222 +          PIXEL11
 13.2223 +          PIXEL20_1M
 13.2224 +          PIXEL21_C
 13.2225 +          if (Diff(cget(6), cget(8)))
 13.2226 +          {
 13.2227 +            PIXEL22_1M
 13.2228 +          }
 13.2229 +          else
 13.2230 +          {
 13.2231 +            PIXEL22_2
 13.2232 +          }
 13.2233 +          break;
 13.2234 +        }
 13.2235 +        case 79:
 13.2236 +        {
 13.2237 +          if (Diff(cget(4), cget(2)))
 13.2238 +          {
 13.2239 +            PIXEL00_C
 13.2240 +            PIXEL01_C
 13.2241 +            PIXEL10_C
 13.2242 +          }
 13.2243 +          else
 13.2244 +          {
 13.2245 +            PIXEL00_4
 13.2246 +            PIXEL01_3
 13.2247 +            PIXEL10_3
 13.2248 +          }
 13.2249 +          PIXEL02_1R
 13.2250 +          PIXEL11
 13.2251 +          PIXEL12_1
 13.2252 +          if (Diff(cget(8), cget(4)))
 13.2253 +          {
 13.2254 +            PIXEL20_1M
 13.2255 +          }
 13.2256 +          else
 13.2257 +          {
 13.2258 +            PIXEL20_2
 13.2259 +          }
 13.2260 +          PIXEL21_C
 13.2261 +          PIXEL22_1M
 13.2262 +          break;
 13.2263 +        }
 13.2264 +        case 122:
 13.2265 +        {
 13.2266 +          if (Diff(cget(4), cget(2)))
 13.2267 +          {
 13.2268 +            PIXEL00_1M
 13.2269 +          }
 13.2270 +          else
 13.2271 +          {
 13.2272 +            PIXEL00_2
 13.2273 +          }
 13.2274 +          PIXEL01_C
 13.2275 +          if (Diff(cget(2), cget(6)))
 13.2276 +          {
 13.2277 +            PIXEL02_1M
 13.2278 +          }
 13.2279 +          else
 13.2280 +          {
 13.2281 +            PIXEL02_2
 13.2282 +          }
 13.2283 +          PIXEL11
 13.2284 +          PIXEL12_C
 13.2285 +          if (Diff(cget(8), cget(4)))
 13.2286 +          {
 13.2287 +            PIXEL10_C
 13.2288 +            PIXEL20_C
 13.2289 +            PIXEL21_C
 13.2290 +          }
 13.2291 +          else
 13.2292 +          {
 13.2293 +            PIXEL10_3
 13.2294 +            PIXEL20_4
 13.2295 +            PIXEL21_3
 13.2296 +          }
 13.2297 +          if (Diff(cget(6), cget(8)))
 13.2298 +          {
 13.2299 +            PIXEL22_1M
 13.2300 +          }
 13.2301 +          else
 13.2302 +          {
 13.2303 +            PIXEL22_2
 13.2304 +          }
 13.2305 +          break;
 13.2306 +        }
 13.2307 +        case 94:
 13.2308 +        {
 13.2309 +          if (Diff(cget(4), cget(2)))
 13.2310 +          {
 13.2311 +            PIXEL00_1M
 13.2312 +          }
 13.2313 +          else
 13.2314 +          {
 13.2315 +            PIXEL00_2
 13.2316 +          }
 13.2317 +          if (Diff(cget(2), cget(6)))
 13.2318 +          {
 13.2319 +            PIXEL01_C
 13.2320 +            PIXEL02_C
 13.2321 +            PIXEL12_C
 13.2322 +          }
 13.2323 +          else
 13.2324 +          {
 13.2325 +            PIXEL01_3
 13.2326 +            PIXEL02_4
 13.2327 +            PIXEL12_3
 13.2328 +          }
 13.2329 +          PIXEL10_C
 13.2330 +          PIXEL11
 13.2331 +          if (Diff(cget(8), cget(4)))
 13.2332 +          {
 13.2333 +            PIXEL20_1M
 13.2334 +          }
 13.2335 +          else
 13.2336 +          {
 13.2337 +            PIXEL20_2
 13.2338 +          }
 13.2339 +          PIXEL21_C
 13.2340 +          if (Diff(cget(6), cget(8)))
 13.2341 +          {
 13.2342 +            PIXEL22_1M
 13.2343 +          }
 13.2344 +          else
 13.2345 +          {
 13.2346 +            PIXEL22_2
 13.2347 +          }
 13.2348 +          break;
 13.2349 +        }
 13.2350 +        case 218:
 13.2351 +        {
 13.2352 +          if (Diff(cget(4), cget(2)))
 13.2353 +          {
 13.2354 +            PIXEL00_1M
 13.2355 +          }
 13.2356 +          else
 13.2357 +          {
 13.2358 +            PIXEL00_2
 13.2359 +          }
 13.2360 +          PIXEL01_C
 13.2361 +          if (Diff(cget(2), cget(6)))
 13.2362 +          {
 13.2363 +            PIXEL02_1M
 13.2364 +          }
 13.2365 +          else
 13.2366 +          {
 13.2367 +            PIXEL02_2
 13.2368 +          }
 13.2369 +          PIXEL10_C
 13.2370 +          PIXEL11
 13.2371 +          if (Diff(cget(8), cget(4)))
 13.2372 +          {
 13.2373 +            PIXEL20_1M
 13.2374 +          }
 13.2375 +          else
 13.2376 +          {
 13.2377 +            PIXEL20_2
 13.2378 +          }
 13.2379 +          if (Diff(cget(6), cget(8)))
 13.2380 +          {
 13.2381 +            PIXEL12_C
 13.2382 +            PIXEL21_C
 13.2383 +            PIXEL22_C
 13.2384 +          }
 13.2385 +          else
 13.2386 +          {
 13.2387 +            PIXEL12_3
 13.2388 +            PIXEL21_3
 13.2389 +            PIXEL22_4
 13.2390 +          }
 13.2391 +          break;
 13.2392 +        }
 13.2393 +        case 91:
 13.2394 +        {
 13.2395 +          if (Diff(cget(4), cget(2)))
 13.2396 +          {
 13.2397 +            PIXEL00_C
 13.2398 +            PIXEL01_C
 13.2399 +            PIXEL10_C
 13.2400 +          }
 13.2401 +          else
 13.2402 +          {
 13.2403 +            PIXEL00_4
 13.2404 +            PIXEL01_3
 13.2405 +            PIXEL10_3
 13.2406 +          }
 13.2407 +          if (Diff(cget(2), cget(6)))
 13.2408 +          {
 13.2409 +            PIXEL02_1M
 13.2410 +          }
 13.2411 +          else
 13.2412 +          {
 13.2413 +            PIXEL02_2
 13.2414 +          }
 13.2415 +          PIXEL11
 13.2416 +          PIXEL12_C
 13.2417 +          if (Diff(cget(8), cget(4)))
 13.2418 +          {
 13.2419 +            PIXEL20_1M
 13.2420 +          }
 13.2421 +          else
 13.2422 +          {
 13.2423 +            PIXEL20_2
 13.2424 +          }
 13.2425 +          PIXEL21_C
 13.2426 +          if (Diff(cget(6), cget(8)))
 13.2427 +          {
 13.2428 +            PIXEL22_1M
 13.2429 +          }
 13.2430 +          else
 13.2431 +          {
 13.2432 +            PIXEL22_2
 13.2433 +          }
 13.2434 +          break;
 13.2435 +        }
 13.2436 +        case 229:
 13.2437 +        {
 13.2438 +          PIXEL00_2
 13.2439 +          PIXEL01_1
 13.2440 +          PIXEL02_2
 13.2441 +          PIXEL10_1
 13.2442 +          PIXEL11
 13.2443 +          PIXEL12_1
 13.2444 +          PIXEL20_1L
 13.2445 +          PIXEL21_C
 13.2446 +          PIXEL22_1R
 13.2447 +          break;
 13.2448 +        }
 13.2449 +        case 167:
 13.2450 +        {
 13.2451 +          PIXEL00_1L
 13.2452 +          PIXEL01_C
 13.2453 +          PIXEL02_1R
 13.2454 +          PIXEL10_1
 13.2455 +          PIXEL11
 13.2456 +          PIXEL12_1
 13.2457 +          PIXEL20_2
 13.2458 +          PIXEL21_1
 13.2459 +          PIXEL22_2
 13.2460 +          break;
 13.2461 +        }
 13.2462 +        case 173:
 13.2463 +        {
 13.2464 +          PIXEL00_1U
 13.2465 +          PIXEL01_1
 13.2466 +          PIXEL02_2
 13.2467 +          PIXEL10_C
 13.2468 +          PIXEL11
 13.2469 +          PIXEL12_1
 13.2470 +          PIXEL20_1D
 13.2471 +          PIXEL21_1
 13.2472 +          PIXEL22_2
 13.2473 +          break;
 13.2474 +        }
 13.2475 +        case 181:
 13.2476 +        {
 13.2477 +          PIXEL00_2
 13.2478 +          PIXEL01_1
 13.2479 +          PIXEL02_1U
 13.2480 +          PIXEL10_1
 13.2481 +          PIXEL11
 13.2482 +          PIXEL12_C
 13.2483 +          PIXEL20_2
 13.2484 +          PIXEL21_1
 13.2485 +          PIXEL22_1D
 13.2486 +          break;
 13.2487 +        }
 13.2488 +        case 186:
 13.2489 +        {
 13.2490 +          if (Diff(cget(4), cget(2)))
 13.2491 +          {
 13.2492 +            PIXEL00_1M
 13.2493 +          }
 13.2494 +          else
 13.2495 +          {
 13.2496 +            PIXEL00_2
 13.2497 +          }
 13.2498 +          PIXEL01_C
 13.2499 +          if (Diff(cget(2), cget(6)))
 13.2500 +          {
 13.2501 +            PIXEL02_1M
 13.2502 +          }
 13.2503 +          else
 13.2504 +          {
 13.2505 +            PIXEL02_2
 13.2506 +          }
 13.2507 +          PIXEL10_C
 13.2508 +          PIXEL11
 13.2509 +          PIXEL12_C
 13.2510 +          PIXEL20_1D
 13.2511 +          PIXEL21_1
 13.2512 +          PIXEL22_1D
 13.2513 +          break;
 13.2514 +        }
 13.2515 +        case 115:
 13.2516 +        {
 13.2517 +          PIXEL00_1L
 13.2518 +          PIXEL01_C
 13.2519 +          if (Diff(cget(2), cget(6)))
 13.2520 +          {
 13.2521 +            PIXEL02_1M
 13.2522 +          }
 13.2523 +          else
 13.2524 +          {
 13.2525 +            PIXEL02_2
 13.2526 +          }
 13.2527 +          PIXEL10_1
 13.2528 +          PIXEL11
 13.2529 +          PIXEL12_C
 13.2530 +          PIXEL20_1L
 13.2531 +          PIXEL21_C
 13.2532 +          if (Diff(cget(6), cget(8)))
 13.2533 +          {
 13.2534 +            PIXEL22_1M
 13.2535 +          }
 13.2536 +          else
 13.2537 +          {
 13.2538 +            PIXEL22_2
 13.2539 +          }
 13.2540 +          break;
 13.2541 +        }
 13.2542 +        case 93:
 13.2543 +        {
 13.2544 +          PIXEL00_1U
 13.2545 +          PIXEL01_1
 13.2546 +          PIXEL02_1U
 13.2547 +          PIXEL10_C
 13.2548 +          PIXEL11
 13.2549 +          PIXEL12_C
 13.2550 +          if (Diff(cget(8), cget(4)))
 13.2551 +          {
 13.2552 +            PIXEL20_1M
 13.2553 +          }
 13.2554 +          else
 13.2555 +          {
 13.2556 +            PIXEL20_2
 13.2557 +          }
 13.2558 +          PIXEL21_C
 13.2559 +          if (Diff(cget(6), cget(8)))
 13.2560 +          {
 13.2561 +            PIXEL22_1M
 13.2562 +          }
 13.2563 +          else
 13.2564 +          {
 13.2565 +            PIXEL22_2
 13.2566 +          }
 13.2567 +          break;
 13.2568 +        }
 13.2569 +        case 206:
 13.2570 +        {
 13.2571 +          if (Diff(cget(4), cget(2)))
 13.2572 +          {
 13.2573 +            PIXEL00_1M
 13.2574 +          }
 13.2575 +          else
 13.2576 +          {
 13.2577 +            PIXEL00_2
 13.2578 +          }
 13.2579 +          PIXEL01_C
 13.2580 +          PIXEL02_1R
 13.2581 +          PIXEL10_C
 13.2582 +          PIXEL11
 13.2583 +          PIXEL12_1
 13.2584 +          if (Diff(cget(8), cget(4)))
 13.2585 +          {
 13.2586 +            PIXEL20_1M
 13.2587 +          }
 13.2588 +          else
 13.2589 +          {
 13.2590 +            PIXEL20_2
 13.2591 +          }
 13.2592 +          PIXEL21_C
 13.2593 +          PIXEL22_1R
 13.2594 +          break;
 13.2595 +        }
 13.2596 +        case 205:
 13.2597 +        case 201:
 13.2598 +        {
 13.2599 +          PIXEL00_1U
 13.2600 +          PIXEL01_1
 13.2601 +          PIXEL02_2
 13.2602 +          PIXEL10_C
 13.2603 +          PIXEL11
 13.2604 +          PIXEL12_1
 13.2605 +          if (Diff(cget(8), cget(4)))
 13.2606 +          {
 13.2607 +            PIXEL20_1M
 13.2608 +          }
 13.2609 +          else
 13.2610 +          {
 13.2611 +            PIXEL20_2
 13.2612 +          }
 13.2613 +          PIXEL21_C
 13.2614 +          PIXEL22_1R
 13.2615 +          break;
 13.2616 +        }
 13.2617 +        case 174:
 13.2618 +        case 46:
 13.2619 +        {
 13.2620 +          if (Diff(cget(4), cget(2)))
 13.2621 +          {
 13.2622 +            PIXEL00_1M
 13.2623 +          }
 13.2624 +          else
 13.2625 +          {
 13.2626 +            PIXEL00_2
 13.2627 +          }
 13.2628 +          PIXEL01_C
 13.2629 +          PIXEL02_1R
 13.2630 +          PIXEL10_C
 13.2631 +          PIXEL11
 13.2632 +          PIXEL12_1
 13.2633 +          PIXEL20_1D
 13.2634 +          PIXEL21_1
 13.2635 +          PIXEL22_2
 13.2636 +          break;
 13.2637 +        }
 13.2638 +        case 179:
 13.2639 +        case 147:
 13.2640 +        {
 13.2641 +          PIXEL00_1L
 13.2642 +          PIXEL01_C
 13.2643 +          if (Diff(cget(2), cget(6)))
 13.2644 +          {
 13.2645 +            PIXEL02_1M
 13.2646 +          }
 13.2647 +          else
 13.2648 +          {
 13.2649 +            PIXEL02_2
 13.2650 +          }
 13.2651 +          PIXEL10_1
 13.2652 +          PIXEL11
 13.2653 +          PIXEL12_C
 13.2654 +          PIXEL20_2
 13.2655 +          PIXEL21_1
 13.2656 +          PIXEL22_1D
 13.2657 +          break;
 13.2658 +        }
 13.2659 +        case 117:
 13.2660 +        case 116:
 13.2661 +        {
 13.2662 +          PIXEL00_2
 13.2663 +          PIXEL01_1
 13.2664 +          PIXEL02_1U
 13.2665 +          PIXEL10_1
 13.2666 +          PIXEL11
 13.2667 +          PIXEL12_C
 13.2668 +          PIXEL20_1L
 13.2669 +          PIXEL21_C
 13.2670 +          if (Diff(cget(6), cget(8)))
 13.2671 +          {
 13.2672 +            PIXEL22_1M
 13.2673 +          }
 13.2674 +          else
 13.2675 +          {
 13.2676 +            PIXEL22_2
 13.2677 +          }
 13.2678 +          break;
 13.2679 +        }
 13.2680 +        case 189:
 13.2681 +        {
 13.2682 +          PIXEL00_1U
 13.2683 +          PIXEL01_1
 13.2684 +          PIXEL02_1U
 13.2685 +          PIXEL10_C
 13.2686 +          PIXEL11
 13.2687 +          PIXEL12_C
 13.2688 +          PIXEL20_1D
 13.2689 +          PIXEL21_1
 13.2690 +          PIXEL22_1D
 13.2691 +          break;
 13.2692 +        }
 13.2693 +        case 231:
 13.2694 +        {
 13.2695 +          PIXEL00_1L
 13.2696 +          PIXEL01_C
 13.2697 +          PIXEL02_1R
 13.2698 +          PIXEL10_1
 13.2699 +          PIXEL11
 13.2700 +          PIXEL12_1
 13.2701 +          PIXEL20_1L
 13.2702 +          PIXEL21_C
 13.2703 +          PIXEL22_1R
 13.2704 +          break;
 13.2705 +        }
 13.2706 +        case 126:
 13.2707 +        {
 13.2708 +          PIXEL00_1M
 13.2709 +          if (Diff(cget(2), cget(6)))
 13.2710 +          {
 13.2711 +            PIXEL01_C
 13.2712 +            PIXEL02_C
 13.2713 +            PIXEL12_C
 13.2714 +          }
 13.2715 +          else
 13.2716 +          {
 13.2717 +            PIXEL01_3
 13.2718 +            PIXEL02_4
 13.2719 +            PIXEL12_3
 13.2720 +          }
 13.2721 +          PIXEL11
 13.2722 +          if (Diff(cget(8), cget(4)))
 13.2723 +          {
 13.2724 +            PIXEL10_C
 13.2725 +            PIXEL20_C
 13.2726 +            PIXEL21_C
 13.2727 +          }
 13.2728 +          else
 13.2729 +          {
 13.2730 +            PIXEL10_3
 13.2731 +            PIXEL20_4
 13.2732 +            PIXEL21_3
 13.2733 +          }
 13.2734 +          PIXEL22_1M
 13.2735 +          break;
 13.2736 +        }
 13.2737 +        case 219:
 13.2738 +        {
 13.2739 +          if (Diff(cget(4), cget(2)))
 13.2740 +          {
 13.2741 +            PIXEL00_C
 13.2742 +            PIXEL01_C
 13.2743 +            PIXEL10_C
 13.2744 +          }
 13.2745 +          else
 13.2746 +          {
 13.2747 +            PIXEL00_4
 13.2748 +            PIXEL01_3
 13.2749 +            PIXEL10_3
 13.2750 +          }
 13.2751 +          PIXEL02_1M
 13.2752 +          PIXEL11
 13.2753 +          PIXEL20_1M
 13.2754 +          if (Diff(cget(6), cget(8)))
 13.2755 +          {
 13.2756 +            PIXEL12_C
 13.2757 +            PIXEL21_C
 13.2758 +            PIXEL22_C
 13.2759 +          }
 13.2760 +          else
 13.2761 +          {
 13.2762 +            PIXEL12_3
 13.2763 +            PIXEL21_3
 13.2764 +            PIXEL22_4
 13.2765 +          }
 13.2766 +          break;
 13.2767 +        }
 13.2768 +        case 125:
 13.2769 +        {
 13.2770 +          if (Diff(cget(8), cget(4)))
 13.2771 +          {
 13.2772 +            PIXEL00_1U
 13.2773 +            PIXEL10_C
 13.2774 +            PIXEL20_C
 13.2775 +            PIXEL21_C
 13.2776 +          }
 13.2777 +          else
 13.2778 +          {
 13.2779 +            PIXEL00_2
 13.2780 +            PIXEL10_6
 13.2781 +            PIXEL20_5
 13.2782 +            PIXEL21_1
 13.2783 +          }
 13.2784 +          PIXEL01_1
 13.2785 +          PIXEL02_1U
 13.2786 +          PIXEL11
 13.2787 +          PIXEL12_C
 13.2788 +          PIXEL22_1M
 13.2789 +          break;
 13.2790 +        }
 13.2791 +        case 221:
 13.2792 +        {
 13.2793 +          if (Diff(cget(6), cget(8)))
 13.2794 +          {
 13.2795 +            PIXEL02_1U
 13.2796 +            PIXEL12_C
 13.2797 +            PIXEL21_C
 13.2798 +            PIXEL22_C
 13.2799 +          }
 13.2800 +          else
 13.2801 +          {
 13.2802 +            PIXEL02_2
 13.2803 +            PIXEL12_6
 13.2804 +            PIXEL21_1
 13.2805 +            PIXEL22_5
 13.2806 +          }
 13.2807 +          PIXEL00_1U
 13.2808 +          PIXEL01_1
 13.2809 +          PIXEL10_C
 13.2810 +          PIXEL11
 13.2811 +          PIXEL20_1M
 13.2812 +          break;
 13.2813 +        }
 13.2814 +        case 207:
 13.2815 +        {
 13.2816 +          if (Diff(cget(4), cget(2)))
 13.2817 +          {
 13.2818 +            PIXEL00_C
 13.2819 +            PIXEL01_C
 13.2820 +            PIXEL02_1R
 13.2821 +            PIXEL10_C
 13.2822 +          }
 13.2823 +          else
 13.2824 +          {
 13.2825 +            PIXEL00_5
 13.2826 +            PIXEL01_6
 13.2827 +            PIXEL02_2
 13.2828 +            PIXEL10_1
 13.2829 +          }
 13.2830 +          PIXEL11
 13.2831 +          PIXEL12_1
 13.2832 +          PIXEL20_1M
 13.2833 +          PIXEL21_C
 13.2834 +          PIXEL22_1R
 13.2835 +          break;
 13.2836 +        }
 13.2837 +        case 238:
 13.2838 +        {
 13.2839 +          if (Diff(cget(8), cget(4)))
 13.2840 +          {
 13.2841 +            PIXEL10_C
 13.2842 +            PIXEL20_C
 13.2843 +            PIXEL21_C
 13.2844 +            PIXEL22_1R
 13.2845 +          }
 13.2846 +          else
 13.2847 +          {
 13.2848 +            PIXEL10_1
 13.2849 +            PIXEL20_5
 13.2850 +            PIXEL21_6
 13.2851 +            PIXEL22_2
 13.2852 +          }
 13.2853 +          PIXEL00_1M
 13.2854 +          PIXEL01_C
 13.2855 +          PIXEL02_1R
 13.2856 +          PIXEL11
 13.2857 +          PIXEL12_1
 13.2858 +          break;
 13.2859 +        }
 13.2860 +        case 190:
 13.2861 +        {
 13.2862 +          if (Diff(cget(2), cget(6)))
 13.2863 +          {
 13.2864 +            PIXEL01_C
 13.2865 +            PIXEL02_C
 13.2866 +            PIXEL12_C
 13.2867 +            PIXEL22_1D
 13.2868 +          }
 13.2869 +          else
 13.2870 +          {
 13.2871 +            PIXEL01_1
 13.2872 +            PIXEL02_5
 13.2873 +            PIXEL12_6
 13.2874 +            PIXEL22_2
 13.2875 +          }
 13.2876 +          PIXEL00_1M
 13.2877 +          PIXEL10_C
 13.2878 +          PIXEL11
 13.2879 +          PIXEL20_1D
 13.2880 +          PIXEL21_1
 13.2881 +          break;
 13.2882 +        }
 13.2883 +        case 187:
 13.2884 +        {
 13.2885 +          if (Diff(cget(4), cget(2)))
 13.2886 +          {
 13.2887 +            PIXEL00_C
 13.2888 +            PIXEL01_C
 13.2889 +            PIXEL10_C
 13.2890 +            PIXEL20_1D
 13.2891 +          }
 13.2892 +          else
 13.2893 +          {
 13.2894 +            PIXEL00_5
 13.2895 +            PIXEL01_1
 13.2896 +            PIXEL10_6
 13.2897 +            PIXEL20_2
 13.2898 +          }
 13.2899 +          PIXEL02_1M
 13.2900 +          PIXEL11
 13.2901 +          PIXEL12_C
 13.2902 +          PIXEL21_1
 13.2903 +          PIXEL22_1D
 13.2904 +          break;
 13.2905 +        }
 13.2906 +        case 243:
 13.2907 +        {
 13.2908 +          if (Diff(cget(6), cget(8)))
 13.2909 +          {
 13.2910 +            PIXEL12_C
 13.2911 +            PIXEL20_1L
 13.2912 +            PIXEL21_C
 13.2913 +            PIXEL22_C
 13.2914 +          }
 13.2915 +          else
 13.2916 +          {
 13.2917 +            PIXEL12_1
 13.2918 +            PIXEL20_2
 13.2919 +            PIXEL21_6
 13.2920 +            PIXEL22_5
 13.2921 +          }
 13.2922 +          PIXEL00_1L
 13.2923 +          PIXEL01_C
 13.2924 +          PIXEL02_1M
 13.2925 +          PIXEL10_1
 13.2926 +          PIXEL11
 13.2927 +          break;
 13.2928 +        }
 13.2929 +        case 119:
 13.2930 +        {
 13.2931 +          if (Diff(cget(2), cget(6)))
 13.2932 +          {
 13.2933 +            PIXEL00_1L
 13.2934 +            PIXEL01_C
 13.2935 +            PIXEL02_C
 13.2936 +            PIXEL12_C
 13.2937 +          }
 13.2938 +          else
 13.2939 +          {
 13.2940 +            PIXEL00_2
 13.2941 +            PIXEL01_6
 13.2942 +            PIXEL02_5
 13.2943 +            PIXEL12_1
 13.2944 +          }
 13.2945 +          PIXEL10_1
 13.2946 +          PIXEL11
 13.2947 +          PIXEL20_1L
 13.2948 +          PIXEL21_C
 13.2949 +          PIXEL22_1M
 13.2950 +          break;
 13.2951 +        }
 13.2952 +        case 237:
 13.2953 +        case 233:
 13.2954 +        {
 13.2955 +          PIXEL00_1U
 13.2956 +          PIXEL01_1
 13.2957 +          PIXEL02_2
 13.2958 +          PIXEL10_C
 13.2959 +          PIXEL11
 13.2960 +          PIXEL12_1
 13.2961 +          if (Diff(cget(8), cget(4)))
 13.2962 +          {
 13.2963 +            PIXEL20_C
 13.2964 +          }
 13.2965 +          else
 13.2966 +          {
 13.2967 +            PIXEL20_2
 13.2968 +          }
 13.2969 +          PIXEL21_C
 13.2970 +          PIXEL22_1R
 13.2971 +          break;
 13.2972 +        }
 13.2973 +        case 175:
 13.2974 +        case 47:
 13.2975 +        {
 13.2976 +          if (Diff(cget(4), cget(2)))
 13.2977 +          {
 13.2978 +            PIXEL00_C
 13.2979 +          }
 13.2980 +          else
 13.2981 +          {
 13.2982 +            PIXEL00_2
 13.2983 +          }
 13.2984 +          PIXEL01_C
 13.2985 +          PIXEL02_1R
 13.2986 +          PIXEL10_C
 13.2987 +          PIXEL11
 13.2988 +          PIXEL12_1
 13.2989 +          PIXEL20_1D
 13.2990 +          PIXEL21_1
 13.2991 +          PIXEL22_2
 13.2992 +          break;
 13.2993 +        }
 13.2994 +        case 183:
 13.2995 +        case 151:
 13.2996 +        {
 13.2997 +          PIXEL00_1L
 13.2998 +          PIXEL01_C
 13.2999 +          if (Diff(cget(2), cget(6)))
 13.3000 +          {
 13.3001 +            PIXEL02_C
 13.3002 +          }
 13.3003 +          else
 13.3004 +          {
 13.3005 +            PIXEL02_2
 13.3006 +          }
 13.3007 +          PIXEL10_1
 13.3008 +          PIXEL11
 13.3009 +          PIXEL12_C
 13.3010 +          PIXEL20_2
 13.3011 +          PIXEL21_1
 13.3012 +          PIXEL22_1D
 13.3013 +          break;
 13.3014 +        }
 13.3015 +        case 245:
 13.3016 +        case 244:
 13.3017 +        {
 13.3018 +          PIXEL00_2
 13.3019 +          PIXEL01_1
 13.3020 +          PIXEL02_1U
 13.3021 +          PIXEL10_1
 13.3022 +          PIXEL11
 13.3023 +          PIXEL12_C
 13.3024 +          PIXEL20_1L
 13.3025 +          PIXEL21_C
 13.3026 +          if (Diff(cget(6), cget(8)))
 13.3027 +          {
 13.3028 +            PIXEL22_C
 13.3029 +          }
 13.3030 +          else
 13.3031 +          {
 13.3032 +            PIXEL22_2
 13.3033 +          }
 13.3034 +          break;
 13.3035 +        }
 13.3036 +        case 250:
 13.3037 +        {
 13.3038 +          PIXEL00_1M
 13.3039 +          PIXEL01_C
 13.3040 +          PIXEL02_1M
 13.3041 +          PIXEL11
 13.3042 +          if (Diff(cget(8), cget(4)))
 13.3043 +          {
 13.3044 +            PIXEL10_C
 13.3045 +            PIXEL20_C
 13.3046 +          }
 13.3047 +          else
 13.3048 +          {
 13.3049 +            PIXEL10_3
 13.3050 +            PIXEL20_4
 13.3051 +          }
 13.3052 +          PIXEL21_C
 13.3053 +          if (Diff(cget(6), cget(8)))
 13.3054 +          {
 13.3055 +            PIXEL12_C
 13.3056 +            PIXEL22_C
 13.3057 +          }
 13.3058 +          else
 13.3059 +          {
 13.3060 +            PIXEL12_3
 13.3061 +            PIXEL22_4
 13.3062 +          }
 13.3063 +          break;
 13.3064 +        }
 13.3065 +        case 123:
 13.3066 +        {
 13.3067 +          if (Diff(cget(4), cget(2)))
 13.3068 +          {
 13.3069 +            PIXEL00_C
 13.3070 +            PIXEL01_C
 13.3071 +          }
 13.3072 +          else
 13.3073 +          {
 13.3074 +            PIXEL00_4
 13.3075 +            PIXEL01_3
 13.3076 +          }
 13.3077 +          PIXEL02_1M
 13.3078 +          PIXEL10_C
 13.3079 +          PIXEL11
 13.3080 +          PIXEL12_C
 13.3081 +          if (Diff(cget(8), cget(4)))
 13.3082 +          {
 13.3083 +            PIXEL20_C
 13.3084 +            PIXEL21_C
 13.3085 +          }
 13.3086 +          else
 13.3087 +          {
 13.3088 +            PIXEL20_4
 13.3089 +            PIXEL21_3
 13.3090 +          }
 13.3091 +          PIXEL22_1M
 13.3092 +          break;
 13.3093 +        }
 13.3094 +        case 95:
 13.3095 +        {
 13.3096 +          if (Diff(cget(4), cget(2)))
 13.3097 +          {
 13.3098 +            PIXEL00_C
 13.3099 +            PIXEL10_C
 13.3100 +          }
 13.3101 +          else
 13.3102 +          {
 13.3103 +            PIXEL00_4
 13.3104 +            PIXEL10_3
 13.3105 +          }
 13.3106 +          PIXEL01_C
 13.3107 +          if (Diff(cget(2), cget(6)))
 13.3108 +          {
 13.3109 +            PIXEL02_C
 13.3110 +            PIXEL12_C
 13.3111 +          }
 13.3112 +          else
 13.3113 +          {
 13.3114 +            PIXEL02_4
 13.3115 +            PIXEL12_3
 13.3116 +          }
 13.3117 +          PIXEL11
 13.3118 +          PIXEL20_1M
 13.3119 +          PIXEL21_C
 13.3120 +          PIXEL22_1M
 13.3121 +          break;
 13.3122 +        }
 13.3123 +        case 222:
 13.3124 +        {
 13.3125 +          PIXEL00_1M
 13.3126 +          if (Diff(cget(2), cget(6)))
 13.3127 +          {
 13.3128 +            PIXEL01_C
 13.3129 +            PIXEL02_C
 13.3130 +          }
 13.3131 +          else
 13.3132 +          {
 13.3133 +            PIXEL01_3
 13.3134 +            PIXEL02_4
 13.3135 +          }
 13.3136 +          PIXEL10_C
 13.3137 +          PIXEL11
 13.3138 +          PIXEL12_C
 13.3139 +          PIXEL20_1M
 13.3140 +          if (Diff(cget(6), cget(8)))
 13.3141 +          {
 13.3142 +            PIXEL21_C
 13.3143 +            PIXEL22_C
 13.3144 +          }
 13.3145 +          else
 13.3146 +          {
 13.3147 +            PIXEL21_3
 13.3148 +            PIXEL22_4
 13.3149 +          }
 13.3150 +          break;
 13.3151 +        }
 13.3152 +        case 252:
 13.3153 +        {
 13.3154 +          PIXEL00_1M
 13.3155 +          PIXEL01_1
 13.3156 +          PIXEL02_1U
 13.3157 +          PIXEL11
 13.3158 +          PIXEL12_C
 13.3159 +          if (Diff(cget(8), cget(4)))
 13.3160 +          {
 13.3161 +            PIXEL10_C
 13.3162 +            PIXEL20_C
 13.3163 +          }
 13.3164 +          else
 13.3165 +          {
 13.3166 +            PIXEL10_3
 13.3167 +            PIXEL20_4
 13.3168 +          }
 13.3169 +          PIXEL21_C
 13.3170 +          if (Diff(cget(6), cget(8)))
 13.3171 +          {
 13.3172 +            PIXEL22_C
 13.3173 +          }
 13.3174 +          else
 13.3175 +          {
 13.3176 +            PIXEL22_2
 13.3177 +          }
 13.3178 +          break;
 13.3179 +        }
 13.3180 +        case 249:
 13.3181 +        {
 13.3182 +          PIXEL00_1U
 13.3183 +          PIXEL01_1
 13.3184 +          PIXEL02_1M
 13.3185 +          PIXEL10_C
 13.3186 +          PIXEL11
 13.3187 +          if (Diff(cget(8), cget(4)))
 13.3188 +          {
 13.3189 +            PIXEL20_C
 13.3190 +          }
 13.3191 +          else
 13.3192 +          {
 13.3193 +            PIXEL20_2
 13.3194 +          }
 13.3195 +          PIXEL21_C
 13.3196 +          if (Diff(cget(6), cget(8)))
 13.3197 +          {
 13.3198 +            PIXEL12_C
 13.3199 +            PIXEL22_C
 13.3200 +          }
 13.3201 +          else
 13.3202 +          {
 13.3203 +            PIXEL12_3
 13.3204 +            PIXEL22_4
 13.3205 +          }
 13.3206 +          break;
 13.3207 +        }
 13.3208 +        case 235:
 13.3209 +        {
 13.3210 +          if (Diff(cget(4), cget(2)))
 13.3211 +          {
 13.3212 +            PIXEL00_C
 13.3213 +            PIXEL01_C
 13.3214 +          }
 13.3215 +          else
 13.3216 +          {
 13.3217 +            PIXEL00_4
 13.3218 +            PIXEL01_3
 13.3219 +          }
 13.3220 +          PIXEL02_1M
 13.3221 +          PIXEL10_C
 13.3222 +          PIXEL11
 13.3223 +          PIXEL12_1
 13.3224 +          if (Diff(cget(8), cget(4)))
 13.3225 +          {
 13.3226 +            PIXEL20_C
 13.3227 +          }
 13.3228 +          else
 13.3229 +          {
 13.3230 +            PIXEL20_2
 13.3231 +          }
 13.3232 +          PIXEL21_C
 13.3233 +          PIXEL22_1R
 13.3234 +          break;
 13.3235 +        }
 13.3236 +        case 111:
 13.3237 +        {
 13.3238 +          if (Diff(cget(4), cget(2)))
 13.3239 +          {
 13.3240 +            PIXEL00_C
 13.3241 +          }
 13.3242 +          else
 13.3243 +          {
 13.3244 +            PIXEL00_2
 13.3245 +          }
 13.3246 +          PIXEL01_C
 13.3247 +          PIXEL02_1R
 13.3248 +          PIXEL10_C
 13.3249 +          PIXEL11
 13.3250 +          PIXEL12_1
 13.3251 +          if (Diff(cget(8), cget(4)))
 13.3252 +          {
 13.3253 +            PIXEL20_C
 13.3254 +            PIXEL21_C
 13.3255 +          }
 13.3256 +          else
 13.3257 +          {
 13.3258 +            PIXEL20_4
 13.3259 +            PIXEL21_3
 13.3260 +          }
 13.3261 +          PIXEL22_1M
 13.3262 +          break;
 13.3263 +        }
 13.3264 +        case 63:
 13.3265 +        {
 13.3266 +          if (Diff(cget(4), cget(2)))
 13.3267 +          {
 13.3268 +            PIXEL00_C
 13.3269 +          }
 13.3270 +          else
 13.3271 +          {
 13.3272 +            PIXEL00_2
 13.3273 +          }
 13.3274 +          PIXEL01_C
 13.3275 +          if (Diff(cget(2), cget(6)))
 13.3276 +          {
 13.3277 +            PIXEL02_C
 13.3278 +            PIXEL12_C
 13.3279 +          }
 13.3280 +          else
 13.3281 +          {
 13.3282 +            PIXEL02_4
 13.3283 +            PIXEL12_3
 13.3284 +          }
 13.3285 +          PIXEL10_C
 13.3286 +          PIXEL11
 13.3287 +          PIXEL20_1D
 13.3288 +          PIXEL21_1
 13.3289 +          PIXEL22_1M
 13.3290 +          break;
 13.3291 +        }
 13.3292 +        case 159:
 13.3293 +        {
 13.3294 +          if (Diff(cget(4), cget(2)))
 13.3295 +          {
 13.3296 +            PIXEL00_C
 13.3297 +            PIXEL10_C
 13.3298 +          }
 13.3299 +          else
 13.3300 +          {
 13.3301 +            PIXEL00_4
 13.3302 +            PIXEL10_3
 13.3303 +          }
 13.3304 +          PIXEL01_C
 13.3305 +          if (Diff(cget(2), cget(6)))
 13.3306 +          {
 13.3307 +            PIXEL02_C
 13.3308 +          }
 13.3309 +          else
 13.3310 +          {
 13.3311 +            PIXEL02_2
 13.3312 +          }
 13.3313 +          PIXEL11
 13.3314 +          PIXEL12_C
 13.3315 +          PIXEL20_1M
 13.3316 +          PIXEL21_1
 13.3317 +          PIXEL22_1D
 13.3318 +          break;
 13.3319 +        }
 13.3320 +        case 215:
 13.3321 +        {
 13.3322 +          PIXEL00_1L
 13.3323 +          PIXEL01_C
 13.3324 +          if (Diff(cget(2), cget(6)))
 13.3325 +          {
 13.3326 +            PIXEL02_C
 13.3327 +          }
 13.3328 +          else
 13.3329 +          {
 13.3330 +            PIXEL02_2
 13.3331 +          }
 13.3332 +          PIXEL10_1
 13.3333 +          PIXEL11
 13.3334 +          PIXEL12_C
 13.3335 +          PIXEL20_1M
 13.3336 +          if (Diff(cget(6), cget(8)))
 13.3337 +          {
 13.3338 +            PIXEL21_C
 13.3339 +            PIXEL22_C
 13.3340 +          }
 13.3341 +          else
 13.3342 +          {
 13.3343 +            PIXEL21_3
 13.3344 +            PIXEL22_4
 13.3345 +          }
 13.3346 +          break;
 13.3347 +        }
 13.3348 +        case 246:
 13.3349 +        {
 13.3350 +          PIXEL00_1M
 13.3351 +          if (Diff(cget(2), cget(6)))
 13.3352 +          {
 13.3353 +            PIXEL01_C
 13.3354 +            PIXEL02_C
 13.3355 +          }
 13.3356 +          else
 13.3357 +          {
 13.3358 +            PIXEL01_3
 13.3359 +            PIXEL02_4
 13.3360 +          }
 13.3361 +          PIXEL10_1
 13.3362 +          PIXEL11
 13.3363 +          PIXEL12_C
 13.3364 +          PIXEL20_1L
 13.3365 +          PIXEL21_C
 13.3366 +          if (Diff(cget(6), cget(8)))
 13.3367 +          {
 13.3368 +            PIXEL22_C
 13.3369 +          }
 13.3370 +          else
 13.3371 +          {
 13.3372 +            PIXEL22_2
 13.3373 +          }
 13.3374 +          break;
 13.3375 +        }
 13.3376 +        case 254:
 13.3377 +        {
 13.3378 +          PIXEL00_1M
 13.3379 +          if (Diff(cget(2), cget(6)))
 13.3380 +          {
 13.3381 +            PIXEL01_C
 13.3382 +            PIXEL02_C
 13.3383 +          }
 13.3384 +          else
 13.3385 +          {
 13.3386 +            PIXEL01_3
 13.3387 +            PIXEL02_4
 13.3388 +          }
 13.3389 +          PIXEL11
 13.3390 +          if (Diff(cget(8), cget(4)))
 13.3391 +          {
 13.3392 +            PIXEL10_C
 13.3393 +            PIXEL20_C
 13.3394 +          }
 13.3395 +          else
 13.3396 +          {
 13.3397 +            PIXEL10_3
 13.3398 +            PIXEL20_4
 13.3399 +          }
 13.3400 +          if (Diff(cget(6), cget(8)))
 13.3401 +          {
 13.3402 +            PIXEL12_C
 13.3403 +            PIXEL21_C
 13.3404 +            PIXEL22_C
 13.3405 +          }
 13.3406 +          else
 13.3407 +          {
 13.3408 +            PIXEL12_3
 13.3409 +            PIXEL21_3
 13.3410 +            PIXEL22_2
 13.3411 +          }
 13.3412 +          break;
 13.3413 +        }
 13.3414 +        case 253:
 13.3415 +        {
 13.3416 +          PIXEL00_1U
 13.3417 +          PIXEL01_1
 13.3418 +          PIXEL02_1U
 13.3419 +          PIXEL10_C
 13.3420 +          PIXEL11
 13.3421 +          PIXEL12_C
 13.3422 +          if (Diff(cget(8), cget(4)))
 13.3423 +          {
 13.3424 +            PIXEL20_C
 13.3425 +          }
 13.3426 +          else
 13.3427 +          {
 13.3428 +            PIXEL20_2
 13.3429 +          }
 13.3430 +          PIXEL21_C
 13.3431 +          if (Diff(cget(6), cget(8)))
 13.3432 +          {
 13.3433 +            PIXEL22_C
 13.3434 +          }
 13.3435 +          else
 13.3436 +          {
 13.3437 +            PIXEL22_2
 13.3438 +          }
 13.3439 +          break;
 13.3440 +        }
 13.3441 +        case 251:
 13.3442 +        {
 13.3443 +          if (Diff(cget(4), cget(2)))
 13.3444 +          {
 13.3445 +            PIXEL00_C
 13.3446 +            PIXEL01_C
 13.3447 +          }
 13.3448 +          else
 13.3449 +          {
 13.3450 +            PIXEL00_4
 13.3451 +            PIXEL01_3
 13.3452 +          }
 13.3453 +          PIXEL02_1M
 13.3454 +          PIXEL11
 13.3455 +          if (Diff(cget(8), cget(4)))
 13.3456 +          {
 13.3457 +            PIXEL10_C
 13.3458 +            PIXEL20_C
 13.3459 +            PIXEL21_C
 13.3460 +          }
 13.3461 +          else
 13.3462 +          {
 13.3463 +            PIXEL10_3
 13.3464 +            PIXEL20_2
 13.3465 +            PIXEL21_3
 13.3466 +          }
 13.3467 +          if (Diff(cget(6), cget(8)))
 13.3468 +          {
 13.3469 +            PIXEL12_C
 13.3470 +            PIXEL22_C
 13.3471 +          }
 13.3472 +          else
 13.3473 +          {
 13.3474 +            PIXEL12_3
 13.3475 +            PIXEL22_4
 13.3476 +          }
 13.3477 +          break;
 13.3478 +        }
 13.3479 +        case 239:
 13.3480 +        {
 13.3481 +          if (Diff(cget(4), cget(2)))
 13.3482 +          {
 13.3483 +            PIXEL00_C
 13.3484 +          }
 13.3485 +          else
 13.3486 +          {
 13.3487 +            PIXEL00_2
 13.3488 +          }
 13.3489 +          PIXEL01_C
 13.3490 +          PIXEL02_1R
 13.3491 +          PIXEL10_C
 13.3492 +          PIXEL11
 13.3493 +          PIXEL12_1
 13.3494 +          if (Diff(cget(8), cget(4)))
 13.3495 +          {
 13.3496 +            PIXEL20_C
 13.3497 +          }
 13.3498 +          else
 13.3499 +          {
 13.3500 +            PIXEL20_2
 13.3501 +          }
 13.3502 +          PIXEL21_C
 13.3503 +          PIXEL22_1R
 13.3504 +          break;
 13.3505 +        }
 13.3506 +        case 127:
 13.3507 +        {
 13.3508 +          if (Diff(cget(4), cget(2)))
 13.3509 +          {
 13.3510 +            PIXEL00_C
 13.3511 +            PIXEL01_C
 13.3512 +            PIXEL10_C
 13.3513 +          }
 13.3514 +          else
 13.3515 +          {
 13.3516 +            PIXEL00_2
 13.3517 +            PIXEL01_3
 13.3518 +            PIXEL10_3
 13.3519 +          }
 13.3520 +          if (Diff(cget(2), cget(6)))
 13.3521 +          {
 13.3522 +            PIXEL02_C
 13.3523 +            PIXEL12_C
 13.3524 +          }
 13.3525 +          else
 13.3526 +          {
 13.3527 +            PIXEL02_4
 13.3528 +            PIXEL12_3
 13.3529 +          }
 13.3530 +          PIXEL11
 13.3531 +          if (Diff(cget(8), cget(4)))
 13.3532 +          {
 13.3533 +            PIXEL20_C
 13.3534 +            PIXEL21_C
 13.3535 +          }
 13.3536 +          else
 13.3537 +          {
 13.3538 +            PIXEL20_4
 13.3539 +            PIXEL21_3
 13.3540 +          }
 13.3541 +          PIXEL22_1M
 13.3542 +          break;
 13.3543 +        }
 13.3544 +        case 191:
 13.3545 +        {
 13.3546 +          if (Diff(cget(4), cget(2)))
 13.3547 +          {
 13.3548 +            PIXEL00_C
 13.3549 +          }
 13.3550 +          else
 13.3551 +          {
 13.3552 +            PIXEL00_2
 13.3553 +          }
 13.3554 +          PIXEL01_C
 13.3555 +          if (Diff(cget(2), cget(6)))
 13.3556 +          {
 13.3557 +            PIXEL02_C
 13.3558 +          }
 13.3559 +          else
 13.3560 +          {
 13.3561 +            PIXEL02_2
 13.3562 +          }
 13.3563 +          PIXEL10_C
 13.3564 +          PIXEL11
 13.3565 +          PIXEL12_C
 13.3566 +          PIXEL20_1D
 13.3567 +          PIXEL21_1
 13.3568 +          PIXEL22_1D
 13.3569 +          break;
 13.3570 +        }
 13.3571 +        case 223:
 13.3572 +        {
 13.3573 +          if (Diff(cget(4), cget(2)))
 13.3574 +          {
 13.3575 +            PIXEL00_C
 13.3576 +            PIXEL10_C
 13.3577 +          }
 13.3578 +          else
 13.3579 +          {
 13.3580 +            PIXEL00_4
 13.3581 +            PIXEL10_3
 13.3582 +          }
 13.3583 +          if (Diff(cget(2), cget(6)))
 13.3584 +          {
 13.3585 +            PIXEL01_C
 13.3586 +            PIXEL02_C
 13.3587 +            PIXEL12_C
 13.3588 +          }
 13.3589 +          else
 13.3590 +          {
 13.3591 +            PIXEL01_3
 13.3592 +            PIXEL02_2
 13.3593 +            PIXEL12_3
 13.3594 +          }
 13.3595 +          PIXEL11
 13.3596 +          PIXEL20_1M
 13.3597 +          if (Diff(cget(6), cget(8)))
 13.3598 +          {
 13.3599 +            PIXEL21_C
 13.3600 +            PIXEL22_C
 13.3601 +          }
 13.3602 +          else
 13.3603 +          {
 13.3604 +            PIXEL21_3
 13.3605 +            PIXEL22_4
 13.3606 +          }
 13.3607 +          break;
 13.3608 +        }
 13.3609 +        case 247:
 13.3610 +        {
 13.3611 +          PIXEL00_1L
 13.3612 +          PIXEL01_C
 13.3613 +          if (Diff(cget(2), cget(6)))
 13.3614 +          {
 13.3615 +            PIXEL02_C
 13.3616 +          }
 13.3617 +          else
 13.3618 +          {
 13.3619 +            PIXEL02_2
 13.3620 +          }
 13.3621 +          PIXEL10_1
 13.3622 +          PIXEL11
 13.3623 +          PIXEL12_C
 13.3624 +          PIXEL20_1L
 13.3625 +          PIXEL21_C
 13.3626 +          if (Diff(cget(6), cget(8)))
 13.3627 +          {
 13.3628 +            PIXEL22_C
 13.3629 +          }
 13.3630 +          else
 13.3631 +          {
 13.3632 +            PIXEL22_2
 13.3633 +          }
 13.3634 +          break;
 13.3635 +        }
 13.3636 +        case 255:
 13.3637 +        {
 13.3638 +          if (Diff(cget(4), cget(2)))
 13.3639 +          {
 13.3640 +            PIXEL00_C
 13.3641 +          }
 13.3642 +          else
 13.3643 +          {
 13.3644 +            PIXEL00_2
 13.3645 +          }
 13.3646 +          PIXEL01_C
 13.3647 +          if (Diff(cget(2), cget(6)))
 13.3648 +          {
 13.3649 +            PIXEL02_C
 13.3650 +          }
 13.3651 +          else
 13.3652 +          {
 13.3653 +            PIXEL02_2
 13.3654 +          }
 13.3655 +          PIXEL10_C
 13.3656 +          PIXEL11
 13.3657 +          PIXEL12_C
 13.3658 +          if (Diff(cget(8), cget(4)))
 13.3659 +          {
 13.3660 +            PIXEL20_C
 13.3661 +          }
 13.3662 +          else
 13.3663 +          {
 13.3664 +            PIXEL20_2
 13.3665 +          }
 13.3666 +          PIXEL21_C
 13.3667 +          if (Diff(cget(6), cget(8)))
 13.3668 +          {
 13.3669 +            PIXEL22_C
 13.3670 +          }
 13.3671 +          else
 13.3672 +          {
 13.3673 +            PIXEL22_2
 13.3674 +          }
 13.3675 +          break;
 13.3676 +        }
 13.3677 +      }
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/src/filters/hq_shared32.cpp	Sun Mar 04 20:32:31 2012 -0600
    14.3 @@ -0,0 +1,389 @@
    14.4 +#include "../Port.h"
    14.5 +#include "hq_shared32.h"
    14.6 +#include "interp.h"
    14.7 +
    14.8 +const unsigned __int64 reg_blank = 0x0000000000000000;
    14.9 +const unsigned __int64 const7	 = 0x0000000700070007;
   14.10 +const unsigned __int64 treshold	 = 0x0000000000300706;
   14.11 +
   14.12 +void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2)
   14.13 +{
   14.14 +	//*((int*)pc) = (c1*3+c2)/4;
   14.15 +
   14.16 +#ifdef MMX
   14.17 +	__asm
   14.18 +	{
   14.19 +		mov eax, pc
   14.20 +		movd mm1, c1
   14.21 +		movd mm2, c2
   14.22 +		movq mm0, mm1
   14.23 +		pslld mm0, 2
   14.24 +		psubd mm0, mm1
   14.25 +		paddd mm0, mm2
   14.26 +		psrld mm0, 2
   14.27 +		movd    [eax], mm0
   14.28 +		    EMMS
   14.29 +	}
   14.30 +#else
   14.31 +	__asm
   14.32 +	{
   14.33 +		mov eax, pc
   14.34 +		mov edx, c1
   14.35 +		shl edx, 2
   14.36 +		add edx, c2
   14.37 +		sub edx, c1
   14.38 +		shr edx, 2
   14.39 +		mov        [eax], edx
   14.40 +	}
   14.41 +#endif
   14.42 +}
   14.43 +
   14.44 +void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)
   14.45 +{
   14.46 +	//*((int*)pc) = (c1*2+c2+c3)/4;
   14.47 +
   14.48 +#ifdef MMX
   14.49 +	__asm
   14.50 +	{
   14.51 +		mov eax, pc
   14.52 +		movd mm0, c1
   14.53 +		movd mm1, c2
   14.54 +		movd mm2, c3
   14.55 +		pslld mm0, 1
   14.56 +		paddd mm0, mm1
   14.57 +		paddd mm0, mm2
   14.58 +		psrad mm0, 2
   14.59 +		movd [eax], mm0
   14.60 +		    EMMS
   14.61 +	}
   14.62 +#else
   14.63 +	__asm
   14.64 +	{
   14.65 +		mov eax, pc
   14.66 +		mov edx, c1
   14.67 +		shl edx, 1
   14.68 +		add edx, c2
   14.69 +		add edx, c3
   14.70 +		shr edx, 2
   14.71 +		mov        [eax], edx
   14.72 +	}
   14.73 +#endif
   14.74 +}
   14.75 +
   14.76 +void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2)
   14.77 +{
   14.78 +	//*((int*)pc) = (c1*7+c2)/8;
   14.79 +	//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
   14.80 +	//	            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
   14.81 +
   14.82 +#ifdef MMX
   14.83 +	__asm
   14.84 +	{
   14.85 +		mov eax, pc
   14.86 +		movd mm1, c1
   14.87 +		movd mm2, c2
   14.88 +		punpcklbw mm1, reg_blank
   14.89 +		punpcklbw mm2, reg_blank
   14.90 +		pmullw mm1, const7
   14.91 +		paddw mm1, mm2
   14.92 +		psrlw mm1, 3
   14.93 +		packuswb mm1, reg_blank
   14.94 +		    movd       [eax], mm1
   14.95 +		    EMMS
   14.96 +	}
   14.97 +#else
   14.98 +	__asm
   14.99 +	{
  14.100 +		mov eax, c1
  14.101 +		mov ebx, c2
  14.102 +		mov ecx, eax
  14.103 +		shl ecx, 3
  14.104 +		sub ecx, eax
  14.105 +		add ecx, ebx
  14.106 +		shr ecx, 3
  14.107 +		mov eax, pc
  14.108 +		    mov     [eax], ecx
  14.109 +	}
  14.110 +#endif
  14.111 +}
  14.112 +
  14.113 +void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)
  14.114 +{
  14.115 +	//*((int*)pc) = (c1*2+(c2+c3)*7)/16;
  14.116 +	//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
  14.117 +	//              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
  14.118 +
  14.119 +#ifdef MMX
  14.120 +	__asm
  14.121 +	{
  14.122 +		mov eax, pc
  14.123 +		movd mm1, c1
  14.124 +		movd mm2, c2
  14.125 +		movd mm3, c3
  14.126 +		punpcklbw mm1, reg_blank
  14.127 +		punpcklbw mm2, reg_blank
  14.128 +		punpcklbw mm3, reg_blank
  14.129 +		psllw mm1, 1
  14.130 +		paddw mm2, mm3
  14.131 +		pmullw mm2, const7
  14.132 +		paddw mm1, mm2
  14.133 +		psrlw mm1, 4
  14.134 +		packuswb mm1, reg_blank
  14.135 +		    movd       [eax], mm1
  14.136 +		    EMMS
  14.137 +	}
  14.138 +#else
  14.139 +
  14.140 +	__asm
  14.141 +	{
  14.142 +		mov eax, [c1]
  14.143 +		and     eax, 0FF00h
  14.144 +		shl eax, 1
  14.145 +		mov ecx, [c2]
  14.146 +		and     ecx, 0FF00h
  14.147 +		mov edx, [c3]
  14.148 +		and     edx, 0FF00h
  14.149 +		add ecx, edx
  14.150 +		imul ecx, ecx, 7
  14.151 +		add eax, ecx
  14.152 +		and     eax, 0FF000h
  14.153 +
  14.154 +		mov ebx, [c1]
  14.155 +		and     ebx, 0FF00FFh
  14.156 +		shl ebx, 1
  14.157 +		mov ecx, [c2]
  14.158 +		and     ecx, 0FF00FFh
  14.159 +		mov edx, [c3]
  14.160 +		and     edx, 0FF00FFh
  14.161 +		add ecx, edx
  14.162 +		imul ecx, ecx, 7
  14.163 +		add ebx, ecx
  14.164 +		and     ebx, 0FF00FF0h
  14.165 +
  14.166 +		add eax, ebx
  14.167 +		shr eax, 4
  14.168 +
  14.169 +		mov ebx, pc
  14.170 +		    mov     [ebx], eax
  14.171 +	}
  14.172 +#endif
  14.173 +}
  14.174 +
  14.175 +void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2)
  14.176 +{
  14.177 +	//*((int*)pc) = (c1+c2)/2;
  14.178 +
  14.179 +#ifdef MMX
  14.180 +	__asm
  14.181 +	{
  14.182 +		mov eax, pc
  14.183 +		movd mm0, c1
  14.184 +		movd mm1, c2
  14.185 +		paddd mm0, mm1
  14.186 +		psrad mm0, 1
  14.187 +		movd    [eax], mm0
  14.188 +		    EMMS
  14.189 +	}
  14.190 +#else
  14.191 +	__asm
  14.192 +	{
  14.193 +		mov eax, pc
  14.194 +		mov edx, c1
  14.195 +		add edx, c2
  14.196 +		shr edx, 1
  14.197 +		mov        [eax], edx
  14.198 +	}
  14.199 +#endif
  14.200 +}
  14.201 +
  14.202 +void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2)
  14.203 +{
  14.204 +	*((unsigned short *)pc) = interp_16_31(c1, c2);
  14.205 +	//*((int*)pc) = (c1*3+c2)/4;
  14.206 +}
  14.207 +
  14.208 +void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)
  14.209 +{
  14.210 +	*((unsigned short *)pc) = interp_16_211(c1, c2, c3);
  14.211 +	//*((int*)pc) = (c1*2+c2+c3)/4;
  14.212 +}
  14.213 +
  14.214 +void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2)
  14.215 +{
  14.216 +	*((unsigned short *)pc) = interp_16_71(c1, c2);
  14.217 +//	*((unsigned short*)pc) = (c1*7+c2)/8;
  14.218 +//	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
  14.219 +//		            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
  14.220 +}
  14.221 +
  14.222 +void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)
  14.223 +{
  14.224 +	*((unsigned short *)pc) = interp_16_772(c2, c3, c1);
  14.225 +//	*((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;
  14.226 +//	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
  14.227 +//	              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
  14.228 +}
  14.229 +
  14.230 +void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2)
  14.231 +{
  14.232 +	*((unsigned short *)pc) = interp_16_11(c1, c2);
  14.233 +}
  14.234 +
  14.235 +bool Diff(unsigned int c1, unsigned int c2)
  14.236 +{
  14.237 +	unsigned int
  14.238 +	    YUV1 = RGBtoYUV(c1),
  14.239 +	    YUV2 = RGBtoYUV(c2);
  14.240 +
  14.241 +	if (YUV1 == YUV2) return false;  // Save some processing power
  14.242 +
  14.243 +#ifdef MMX
  14.244 +	unsigned int retval;
  14.245 +	__asm
  14.246 +	{
  14.247 +		mov eax, 0x7FFFFFFF
  14.248 +		movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF
  14.249 +
  14.250 +		; Copy source colors in first reg
  14.251 +		movd mm0, YUV1
  14.252 +		movd mm1, YUV2
  14.253 +
  14.254 +		mov eax, 0x00FF0000
  14.255 +		movd mm6, eax; mm6 = Ymask = 0x00FF0000
  14.256 +
  14.257 +		; Calculate color Y difference
  14.258 +		movq mm2, mm0
  14.259 +		movq mm3, mm1
  14.260 +		pand mm2, mm6
  14.261 +		pand mm3, mm6
  14.262 +		psubd mm2, mm3
  14.263 +		pand mm2, mm7
  14.264 +
  14.265 +		mov eax, 0x0000FF00
  14.266 +		movd mm6, eax; mm6 = Umask = 0x0000FF00
  14.267 +
  14.268 +		; Calculate color U difference
  14.269 +		movq mm3, mm0
  14.270 +		movq mm4, mm1
  14.271 +		pand mm3, mm6
  14.272 +		pand mm4, mm6
  14.273 +		psubd mm3, mm4
  14.274 +		pand mm3, mm7
  14.275 +
  14.276 +		mov eax, 0x000000FF
  14.277 +		movd mm6, eax; mm6 = Vmask = 0x000000FF
  14.278 +
  14.279 +		; Calculate color V difference
  14.280 +		movq mm4, mm0
  14.281 +		movq mm5, mm1
  14.282 +		pand mm4, mm6
  14.283 +		pand mm5, mm6
  14.284 +		psubd mm4, mm5
  14.285 +		pand mm4, mm7
  14.286 +
  14.287 +		mov eax, 0x00300000
  14.288 +		movd mm5, eax; mm5 = trY = 0x00300000
  14.289 +		                           mov eax, 0x00000700
  14.290 +		movd mm6, eax; mm6 = trU = 0x00000700
  14.291 +		                           mov eax, 0x00000006
  14.292 +		movd mm7, eax; mm7 = trV = 0x00000006
  14.293 +
  14.294 +		; Compare the results
  14.295 +		pcmpgtd mm2, trY
  14.296 +		pcmpgtd mm3, trU
  14.297 +		pcmpgtd mm4, trV
  14.298 +		por mm2, mm3
  14.299 +		por mm2, mm4
  14.300 +
  14.301 +		movd retval, mm2
  14.302 +
  14.303 +		        EMMS
  14.304 +	}
  14.305 +	return (retval != 0);
  14.306 +#else
  14.307 +	return
  14.308 +	    (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) ||
  14.309 +	    (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) ||
  14.310 +	    (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV);
  14.311 +#endif
  14.312 +}
  14.313 +
  14.314 +unsigned int RGBtoYUV(unsigned int c)
  14.315 +{   // Division through 3 slows down the emulation about 10% !!!
  14.316 +#ifdef MMX
  14.317 +	unsigned int retval;
  14.318 +	__asm
  14.319 +	{
  14.320 +		movd mm0, c
  14.321 +		movq mm1, mm0
  14.322 +		movq mm2, mm0; mm0 = mm1 = mm2 = c
  14.323 +
  14.324 +		                                 mov eax, 0x000000FF
  14.325 +		movd mm5, eax; mm5 = REDMASK = 0x000000FF
  14.326 +		                               mov eax, 0x0000FF00
  14.327 +		movd mm6, eax; mm6 = GREENMASK = 0x0000FF00
  14.328 +		                                 mov eax, 0x00FF0000
  14.329 +		movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000
  14.330 +
  14.331 +		                                pand mm0, mm5
  14.332 +		pand mm1, mm6
  14.333 +		pand mm2, mm7; mm0 = R mm1 = G mm2 = B
  14.334 +
  14.335 +		                                     movq mm3, mm0
  14.336 +		paddd mm3, mm1
  14.337 +		paddd mm3, mm2
  14.338 +		;       psrld mm3, 2; mm3 = Y
  14.339 +		;       pslld mm3, 16
  14.340 +		pslld mm3, 14; mm3 = Y << 16
  14.341 +
  14.342 +		                     mov eax, 512
  14.343 +		movd mm7, eax; mm7 = 128 << 2 = 512
  14.344 +
  14.345 +		                                movq mm4, mm0
  14.346 +		psubd mm4, mm2
  14.347 +		;       psrld mm4, 2
  14.348 +		;       paddd mm4, mm7; mm4 = U
  14.349 +		;       pslld mm4, 8; mm4 = U << 8
  14.350 +		                            paddd mm4, mm7
  14.351 +		pslld mm4, 6
  14.352 +
  14.353 +		mov eax, 128
  14.354 +		movd mm7, eax; mm7 = 128
  14.355 +
  14.356 +		                     movq mm5, mm1
  14.357 +		pslld mm5, 1
  14.358 +		psubd mm5, mm0
  14.359 +		psubd mm5, mm2
  14.360 +		psrld mm5, 3
  14.361 +		paddd mm5, mm7; mm5 = V
  14.362 +
  14.363 +		                      paddd mm5, mm4
  14.364 +		paddd mm5, mm3
  14.365 +
  14.366 +		movd retval, mm5
  14.367 +
  14.368 +		EMMS
  14.369 +	}
  14.370 +	return retval;
  14.371 +#else
  14.372 +	unsigned char r, g, b, Y, u, v;
  14.373 +	r = (c & 0x000000FF);
  14.374 +	g = (c & 0x0000FF00) >> 8;
  14.375 +	b = (c & 0x00FF0000) >> 16;
  14.376 +	Y = (r + g + b) >> 2;
  14.377 +	u = 128 + ((r - b) >> 2);
  14.378 +	v = 128 + ((-r + 2 * g - b) >> 3);
  14.379 +	return (Y << 16) + (u << 8) + v;
  14.380 +
  14.381 +	// Extremely High Quality Code
  14.382 +	//unsigned char r, g, b;
  14.383 +	//r = c & 0xFF;
  14.384 +	//g = (c >> 8) & 0xFF;
  14.385 +	//b = (c >> 16) & 0xFF;
  14.386 +	//unsigned char y, u, v;
  14.387 +	//y = (0.256788 * r  +  0.504129 * g  +  0.097906 * b) + 16;
  14.388 +	//u = (-0.148223 * r  -  0.290993 * g  +  0.439216 * b) + 128;
  14.389 +	//v = (0.439216 * r  -  0.367788 * g  -  0.071427 * b) + 128;
  14.390 +	//return (y << 16) + (u << 8) + v;
  14.391 +#endif
  14.392 +}
  14.393 \ No newline at end of file
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/src/filters/hq_shared32.h	Sun Mar 04 20:32:31 2012 -0600
    15.3 @@ -0,0 +1,31 @@
    15.4 +#ifndef VBA_HQ_SHARED32_H
    15.5 +#define VBA_HQ_SHARED32_H
    15.6 +
    15.7 +#if _MSC_VER > 1000
    15.8 +#pragma once
    15.9 +#endif // _MSC_VER > 1000
   15.10 +
   15.11 +#define abs32(value) (value & 0x7FFFFFFF)
   15.12 +#define abs16(value) (value & 0x7FFF)
   15.13 +
   15.14 +const int Ymask = 0x00FF0000;
   15.15 +const int Umask = 0x0000FF00;
   15.16 +const int Vmask = 0x000000FF;
   15.17 +const int trY   = 0x00300000;
   15.18 +const int trU   = 0x00000700;
   15.19 +const int trV   = 0x00000006;
   15.20 +
   15.21 +void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2);
   15.22 +void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3);
   15.23 +void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2);
   15.24 +void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3);
   15.25 +void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2);
   15.26 +void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2);
   15.27 +void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3);
   15.28 +void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2);
   15.29 +void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3);
   15.30 +void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2);
   15.31 +bool Diff(unsigned int c1, unsigned int c2);
   15.32 +unsigned int RGBtoYUV(unsigned int c);
   15.33 +
   15.34 +#endif // VBA_HQ_SHARED32_H
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/src/filters/interframe.cpp	Sun Mar 04 20:32:31 2012 -0600
    16.3 @@ -0,0 +1,630 @@
    16.4 +#include <cstdlib>
    16.5 +#include <cstring>
    16.6 +#include "../Port.h"
    16.7 +
    16.8 +#ifdef MMX
    16.9 +extern "C" bool cpu_mmx;
   16.10 +#endif
   16.11 +
   16.12 +/*
   16.13 + * Thanks to Kawaks' Mr. K for the code
   16.14 +
   16.15 +   Incorporated into vba by Anthony Di Franco
   16.16 + */
   16.17 +
   16.18 +static u8 *frm1 = NULL;
   16.19 +static u8 *frm2 = NULL;
   16.20 +static u8 *frm3 = NULL;
   16.21 +
   16.22 +extern u32 RGB_LOW_BITS_MASK;
   16.23 +extern u32 qRGB_COLOR_MASK[2];
   16.24 +
   16.25 +static void Init()
   16.26 +{
   16.27 +	frm1 = (u8 *)calloc(322 * 242, 4);
   16.28 +	// 1 frame ago
   16.29 +	frm2 = (u8 *)calloc(322 * 242, 4);
   16.30 +	// 2 frames ago
   16.31 +	frm3 = (u8 *)calloc(322 * 242, 4);
   16.32 +	// 3 frames ago
   16.33 +}
   16.34 +
   16.35 +void InterframeCleanup()
   16.36 +{
   16.37 +	if (frm1)
   16.38 +		free(frm1);
   16.39 +	if (frm2)
   16.40 +		free(frm2);
   16.41 +	if (frm3)
   16.42 +		free(frm3);
   16.43 +	frm1 = frm2 = frm3 = NULL;
   16.44 +}
   16.45 +
   16.46 +#ifdef MMX
   16.47 +static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
   16.48 +{
   16.49 +	u16 *src0 = (u16 *)srcPtr;
   16.50 +	u16 *src1 = (u16 *)frm1;
   16.51 +	u16 *src2 = (u16 *)frm2;
   16.52 +	u16 *src3 = (u16 *)frm3;
   16.53 +
   16.54 +	int count = width >> 2;
   16.55 +
   16.56 +	for (int i = 0; i < height; i++)
   16.57 +	{
   16.58 +#ifdef __GNUC__
   16.59 +		asm volatile (
   16.60 +		    "push %4\n"
   16.61 +		    "movq 0(%5), %%mm7\n"       // colorMask
   16.62 +		    "0:\n"
   16.63 +		    "movq 0(%0), %%mm0\n"       // src0
   16.64 +		    "movq 0(%1), %%mm1\n"       // src1
   16.65 +		    "movq 0(%2), %%mm2\n"       // src2
   16.66 +		    "movq 0(%3), %%mm3\n"       // src3
   16.67 +		    "movq %%mm0, 0(%3)\n"       // src3 = src0
   16.68 +		    "movq %%mm0, %%mm4\n"
   16.69 +		    "movq %%mm1, %%mm5\n"
   16.70 +		    "pcmpeqw %%mm2, %%mm5\n"       // src1 == src2 (A)
   16.71 +		    "pcmpeqw %%mm3, %%mm4\n"       // src3 == src0 (B)
   16.72 +		    "por %%mm5, %%mm4\n"       // A | B
   16.73 +		    "movq %%mm2, %%mm5\n"
   16.74 +		    "pcmpeqw %%mm0, %%mm5\n"       // src0 == src2 (C)
   16.75 +		    "pcmpeqw %%mm1, %%mm3\n"       // src1 == src3 (D)
   16.76 +		    "por %%mm3, %%mm5\n"       // C|D
   16.77 +		    "pandn %%mm5, %%mm4\n"       // (!(A|B))&(C|D)
   16.78 +		    "movq %%mm0, %%mm2\n"
   16.79 +		    "pand %%mm7, %%mm2\n"       // color & colorMask
   16.80 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
   16.81 +		    "psrlw $1, %%mm2\n"       // (color & colorMask) >> 1 (E)
   16.82 +		    "psrlw $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
   16.83 +		    "paddw %%mm2, %%mm1\n"       // E+F
   16.84 +		    "pand %%mm4, %%mm1\n"       // (E+F) & res
   16.85 +		    "pandn %%mm0, %%mm4\n"       // color& !res
   16.86 +
   16.87 +		    "por %%mm1, %%mm4\n"
   16.88 +		    "movq %%mm4, 0(%0)\n"       // src0 = res
   16.89 +
   16.90 +		    "addl $8, %0\n"
   16.91 +		    "addl $8, %1\n"
   16.92 +		    "addl $8, %2\n"
   16.93 +		    "addl $8, %3\n"
   16.94 +
   16.95 +		    "decl %4\n"
   16.96 +		    "jnz 0b\n"
   16.97 +		    "pop %4\n"
   16.98 +		    "emms\n"
   16.99 +			: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
  16.100 +			: "r" (count), "r" (qRGB_COLOR_MASK)
  16.101 +		    );
  16.102 +#else
  16.103 +		__asm {
  16.104 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
  16.105 +			mov	 eax, src0;
  16.106 +			mov	 ebx, src1;
  16.107 +			mov	 ecx, src2;
  16.108 +			mov	 edx, src3;
  16.109 +			mov	 edi, count;
  16.110 +label0:
  16.111 +			movq mm0, qword ptr [eax]; // src0
  16.112 +			movq	   mm1, qword ptr [ebx]; // src1
  16.113 +			movq	   mm2, qword ptr [ecx]; // src2
  16.114 +			movq	   mm3, qword ptr [edx]; // src3
  16.115 +			movq qword ptr [edx], mm0; // src3 = src0
  16.116 +			movq	   mm4, mm0;
  16.117 +			movq	   mm5, mm1;
  16.118 +			pcmpeqw	   mm5, mm2; // src1 == src2 (A)
  16.119 +			pcmpeqw	   mm4, mm3; // src3 == src0 (B)
  16.120 +			por		   mm4, mm5; // A | B
  16.121 +			movq	   mm5, mm2;
  16.122 +			pcmpeqw	   mm5, mm0; // src0 == src2 (C)
  16.123 +			pcmpeqw	   mm3, mm1; // src1 == src3 (D)
  16.124 +			por		   mm5, mm3; // C|D
  16.125 +			pandn	   mm4, mm5; // (!(A|B))&(C|D)
  16.126 +			movq	   mm2, mm0;
  16.127 +			pand	   mm2, mm7; // color & colorMask
  16.128 +			pand	   mm1, mm7; // src1 & colorMask
  16.129 +			psrlw	   mm2, 1; // (color & colorMask) >> 1 (E)
  16.130 +			psrlw	   mm1, 1; // (src & colorMask) >> 1 (F)
  16.131 +			paddw	   mm1, mm2; // E+F
  16.132 +			pand	   mm1, mm4; // (E+F) & res
  16.133 +			pandn	   mm4, mm0; // color & !res
  16.134 +
  16.135 +			por		   mm4, mm1;
  16.136 +			movq qword ptr [eax], mm4; // src0 = res
  16.137 +
  16.138 +			add eax, 8;
  16.139 +			add ebx, 8;
  16.140 +			add ecx, 8;
  16.141 +			add edx, 8;
  16.142 +
  16.143 +			dec edi;
  16.144 +			jnz label0;
  16.145 +			mov src0, eax;
  16.146 +			mov src1, ebx;
  16.147 +			mov src2, ecx;
  16.148 +			mov src3, edx;
  16.149 +			emms;
  16.150 +		}
  16.151 +#endif
  16.152 +		src0 += 2;
  16.153 +		src1 += 2;
  16.154 +		src2 += 2;
  16.155 +		src3 += 2;
  16.156 +	}
  16.157 +
  16.158 +	/* Swap buffers around */
  16.159 +	u8 *temp = frm1;
  16.160 +	frm1 = frm3;
  16.161 +	frm3 = frm2;
  16.162 +	frm2 = temp;
  16.163 +}
  16.164 +
  16.165 +#endif
  16.166 +
  16.167 +void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.168 +{
  16.169 +	if (frm1 == NULL)
  16.170 +	{
  16.171 +		Init();
  16.172 +	}
  16.173 +#ifdef MMX
  16.174 +	if (cpu_mmx)
  16.175 +	{
  16.176 +		SmartIB_MMX(srcPtr, srcPitch, width, height);
  16.177 +		return;
  16.178 +	}
  16.179 +#endif
  16.180 +
  16.181 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
  16.182 +
  16.183 +	u16 *src0 = (u16 *)srcPtr;
  16.184 +	u16 *src1 = (u16 *)frm1;
  16.185 +	u16 *src2 = (u16 *)frm2;
  16.186 +	u16 *src3 = (u16 *)frm3;
  16.187 +
  16.188 +	int sPitch = srcPitch >> 1;
  16.189 +
  16.190 +	int pos = 0;
  16.191 +	for (int j = 0; j < height; j++)
  16.192 +		for (int i = 0; i < sPitch; i++)
  16.193 +		{
  16.194 +			u16 color = src0[pos];
  16.195 +			src0[pos] =
  16.196 +			    (src1[pos] != src2[pos]) &&
  16.197 +			    (src3[pos] != color) &&
  16.198 +			    ((color == src2[pos]) || (src1[pos] == src3[pos]))
  16.199 +			    ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
  16.200 +			    color;
  16.201 +			src3[pos] = color; /* oldest buffer now holds newest frame */
  16.202 +			pos++;
  16.203 +		}
  16.204 +
  16.205 +	/* Swap buffers around */
  16.206 +	u8 *temp = frm1;
  16.207 +	frm1 = frm3;
  16.208 +	frm3 = frm2;
  16.209 +	frm2 = temp;
  16.210 +}
  16.211 +
  16.212 +#ifdef MMX
  16.213 +static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.214 +{
  16.215 +	u32 *src0 = (u32 *)srcPtr;
  16.216 +	u32 *src1 = (u32 *)frm1;
  16.217 +	u32 *src2 = (u32 *)frm2;
  16.218 +	u32 *src3 = (u32 *)frm3;
  16.219 +
  16.220 +	int count = width >> 1;
  16.221 +
  16.222 +	for (int i = 0; i < height; i++)
  16.223 +	{
  16.224 +#ifdef __GNUC__
  16.225 +		asm volatile (
  16.226 +		    "push %4\n"
  16.227 +		    "movq 0(%5), %%mm7\n"       // colorMask
  16.228 +		    "0:\n"
  16.229 +		    "movq 0(%0), %%mm0\n"       // src0
  16.230 +		    "movq 0(%1), %%mm1\n"       // src1
  16.231 +		    "movq 0(%2), %%mm2\n"       // src2
  16.232 +		    "movq 0(%3), %%mm3\n"       // src3
  16.233 +		    "movq %%mm0, 0(%3)\n"       // src3 = src0
  16.234 +		    "movq %%mm0, %%mm4\n"
  16.235 +		    "movq %%mm1, %%mm5\n"
  16.236 +		    "pcmpeqd %%mm2, %%mm5\n"       // src1 == src2 (A)
  16.237 +		    "pcmpeqd %%mm3, %%mm4\n"       // src3 == src0 (B)
  16.238 +		    "por %%mm5, %%mm4\n"       // A | B
  16.239 +		    "movq %%mm2, %%mm5\n"
  16.240 +		    "pcmpeqd %%mm0, %%mm5\n"       // src0 == src2 (C)
  16.241 +		    "pcmpeqd %%mm1, %%mm3\n"       // src1 == src3 (D)
  16.242 +		    "por %%mm3, %%mm5\n"       // C|D
  16.243 +		    "pandn %%mm5, %%mm4\n"       // (!(A|B))&(C|D)
  16.244 +		    "movq %%mm0, %%mm2\n"
  16.245 +		    "pand %%mm7, %%mm2\n"       // color & colorMask
  16.246 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
  16.247 +		    "psrld $1, %%mm2\n"       // (color & colorMask) >> 1 (E)
  16.248 +		    "psrld $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
  16.249 +		    "paddd %%mm2, %%mm1\n"       // E+F
  16.250 +		    "pand %%mm4, %%mm1\n"       // (E+F) & res
  16.251 +		    "pandn %%mm0, %%mm4\n"       // color& !res
  16.252 +
  16.253 +		    "por %%mm1, %%mm4\n"
  16.254 +		    "movq %%mm4, 0(%0)\n"       // src0 = res
  16.255 +
  16.256 +		    "addl $8, %0\n"
  16.257 +		    "addl $8, %1\n"
  16.258 +		    "addl $8, %2\n"
  16.259 +		    "addl $8, %3\n"
  16.260 +
  16.261 +		    "decl %4\n"
  16.262 +		    "jnz 0b\n"
  16.263 +		    "pop %4\n"
  16.264 +		    "emms\n"
  16.265 +			: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
  16.266 +			: "r" (count), "r" (qRGB_COLOR_MASK)
  16.267 +		    );
  16.268 +#else
  16.269 +		__asm {
  16.270 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
  16.271 +			mov	 eax, src0;
  16.272 +			mov	 ebx, src1;
  16.273 +			mov	 ecx, src2;
  16.274 +			mov	 edx, src3;
  16.275 +			mov	 edi, count;
  16.276 +label0:
  16.277 +			movq mm0, qword ptr [eax]; // src0
  16.278 +			movq	   mm1, qword ptr [ebx]; // src1
  16.279 +			movq	   mm2, qword ptr [ecx]; // src2
  16.280 +			movq	   mm3, qword ptr [edx]; // src3
  16.281 +			movq qword ptr [edx], mm0; // src3 = src0
  16.282 +			movq	   mm4, mm0;
  16.283 +			movq	   mm5, mm1;
  16.284 +			pcmpeqd	   mm5, mm2; // src1 == src2 (A)
  16.285 +			pcmpeqd	   mm4, mm3; // src3 == src0 (B)
  16.286 +			por		   mm4, mm5; // A | B
  16.287 +			movq	   mm5, mm2;
  16.288 +			pcmpeqd	   mm5, mm0; // src0 == src2 (C)
  16.289 +			pcmpeqd	   mm3, mm1; // src1 == src3 (D)
  16.290 +			por		   mm5, mm3; // C|D
  16.291 +			pandn	   mm4, mm5; // (!(A|B))&(C|D)
  16.292 +			movq	   mm2, mm0;
  16.293 +			pand	   mm2, mm7; // color & colorMask
  16.294 +			pand	   mm1, mm7; // src1 & colorMask
  16.295 +			psrld	   mm2, 1; // (color & colorMask) >> 1 (E)
  16.296 +			psrld	   mm1, 1; // (src & colorMask) >> 1 (F)
  16.297 +			paddd	   mm1, mm2; // E+F
  16.298 +			pand	   mm1, mm4; // (E+F) & res
  16.299 +			pandn	   mm4, mm0; // color & !res
  16.300 +
  16.301 +			por		   mm4, mm1;
  16.302 +			movq qword ptr [eax], mm4; // src0 = res
  16.303 +
  16.304 +			add eax, 8;
  16.305 +			add ebx, 8;
  16.306 +			add ecx, 8;
  16.307 +			add edx, 8;
  16.308 +
  16.309 +			dec edi;
  16.310 +			jnz label0;
  16.311 +			mov src0, eax;
  16.312 +			mov src1, ebx;
  16.313 +			mov src2, ecx;
  16.314 +			mov src3, edx;
  16.315 +			emms;
  16.316 +		}
  16.317 +#endif
  16.318 +
  16.319 +		src0++;
  16.320 +		src1++;
  16.321 +		src2++;
  16.322 +		src3++;
  16.323 +	}
  16.324 +	/* Swap buffers around */
  16.325 +	u8 *temp = frm1;
  16.326 +	frm1 = frm3;
  16.327 +	frm3 = frm2;
  16.328 +	frm2 = temp;
  16.329 +}
  16.330 +
  16.331 +#endif
  16.332 +
  16.333 +void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.334 +{
  16.335 +	if (frm1 == NULL)
  16.336 +	{
  16.337 +		Init();
  16.338 +	}
  16.339 +#ifdef MMX
  16.340 +	if (cpu_mmx)
  16.341 +	{
  16.342 +		SmartIB32_MMX(srcPtr, srcPitch, width, height);
  16.343 +		return;
  16.344 +	}
  16.345 +#endif
  16.346 +
  16.347 +	u32 *src0 = (u32 *)srcPtr;
  16.348 +	u32 *src1 = (u32 *)frm1;
  16.349 +	u32 *src2 = (u32 *)frm2;
  16.350 +	u32 *src3 = (u32 *)frm3;
  16.351 +
  16.352 +	u32 colorMask = 0xfefefe;
  16.353 +
  16.354 +	int sPitch = srcPitch >> 2;
  16.355 +	int pos	   = 0;
  16.356 +
  16.357 +	for (int j = 0; j < height; j++)
  16.358 +		for (int i = 0; i < sPitch; i++)
  16.359 +		{
  16.360 +			u32 color = src0[pos];
  16.361 +			src0[pos] =
  16.362 +			    (src1[pos] != src2[pos]) &&
  16.363 +			    (src3[pos] != color) &&
  16.364 +			    ((color == src2[pos]) || (src1[pos] == src3[pos]))
  16.365 +			    ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
  16.366 +			    color;
  16.367 +			src3[pos] = color; /* oldest buffer now holds newest frame */
  16.368 +			pos++;
  16.369 +		}
  16.370 +
  16.371 +	/* Swap buffers around */
  16.372 +	u8 *temp = frm1;
  16.373 +	frm1 = frm3;
  16.374 +	frm3 = frm2;
  16.375 +	frm2 = temp;
  16.376 +}
  16.377 +
  16.378 +#ifdef MMX
  16.379 +static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.380 +{
  16.381 +	u16 *src0 = (u16 *)srcPtr;
  16.382 +	u16 *src1 = (u16 *)frm1;
  16.383 +
  16.384 +	int count = width >> 2;
  16.385 +
  16.386 +	for (int i = 0; i < height; i++)
  16.387 +	{
  16.388 +#ifdef __GNUC__
  16.389 +		asm volatile (
  16.390 +		    "push %2\n"
  16.391 +		    "movq 0(%3), %%mm7\n"       // colorMask
  16.392 +		    "0:\n"
  16.393 +		    "movq 0(%0), %%mm0\n"       // src0
  16.394 +		    "movq 0(%1), %%mm1\n"       // src1
  16.395 +		    "movq %%mm0, 0(%1)\n"       // src1 = src0
  16.396 +		    "pand %%mm7, %%mm0\n"       // color & colorMask
  16.397 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
  16.398 +		    "psrlw $1, %%mm0\n"       // (color & colorMask) >> 1 (E)
  16.399 +		    "psrlw $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
  16.400 +		    "paddw %%mm1, %%mm0\n"       // E+F
  16.401 +
  16.402 +		    "movq %%mm0, 0(%0)\n"       // src0 = res
  16.403 +
  16.404 +		    "addl $8, %0\n"
  16.405 +		    "addl $8, %1\n"
  16.406 +
  16.407 +		    "decl %2\n"
  16.408 +		    "jnz 0b\n"
  16.409 +		    "pop %2\n"
  16.410 +		    "emms\n"
  16.411 +			: "+r" (src0), "+r" (src1)
  16.412 +			: "r" (count), "r" (qRGB_COLOR_MASK)
  16.413 +		    );
  16.414 +#else
  16.415 +		__asm {
  16.416 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
  16.417 +			mov	 eax, src0;
  16.418 +			mov	 ebx, src1;
  16.419 +			mov	 edi, count;
  16.420 +label0:
  16.421 +			movq mm0, qword ptr [eax]; // src0
  16.422 +			movq	   mm1, qword ptr [ebx]; // src1
  16.423 +			movq qword ptr [ebx], mm0; // src1 = src0
  16.424 +			pand	   mm0, mm7; // color & colorMask
  16.425 +			pand	   mm1, mm7; // src1 & colorMask
  16.426 +			psrlw	   mm0, 1; // (color & colorMask) >> 1 (E)
  16.427 +			psrlw	   mm1, 1; // (src & colorMask) >> 1 (F)
  16.428 +			paddw	   mm0, mm1; // E+F
  16.429 +
  16.430 +			movq qword ptr [eax], mm0; // src0 = res
  16.431 +
  16.432 +			add eax, 8;
  16.433 +			add ebx, 8;
  16.434 +
  16.435 +			dec edi;
  16.436 +			jnz label0;
  16.437 +			mov src0, eax;
  16.438 +			mov src1, ebx;
  16.439 +			emms;
  16.440 +		}
  16.441 +#endif
  16.442 +		src0 += 2;
  16.443 +		src1 += 2;
  16.444 +	}
  16.445 +}
  16.446 +
  16.447 +#endif
  16.448 +
  16.449 +void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.450 +{
  16.451 +	if (frm1 == NULL)
  16.452 +	{
  16.453 +		Init();
  16.454 +	}
  16.455 +
  16.456 +#ifdef MMX
  16.457 +	if (cpu_mmx)
  16.458 +	{
  16.459 +		MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
  16.460 +		return;
  16.461 +	}
  16.462 +#endif
  16.463 +
  16.464 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
  16.465 +
  16.466 +	u16 *src0 = (u16 *)srcPtr;
  16.467 +	u16 *src1 = (u16 *)frm1;
  16.468 +
  16.469 +	int sPitch = srcPitch >> 1;
  16.470 +
  16.471 +	int pos = 0;
  16.472 +	for (int j = 0; j < height; j++)
  16.473 +		for (int i = 0; i < sPitch; i++)
  16.474 +		{
  16.475 +			u16 color = src0[pos];
  16.476 +			src0[pos] =
  16.477 +			    (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
  16.478 +			src1[pos] = color;
  16.479 +			pos++;
  16.480 +		}
  16.481 +}
  16.482 +
  16.483 +#ifdef MMX
  16.484 +static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.485 +{
  16.486 +	u32 *src0 = (u32 *)srcPtr;
  16.487 +	u32 *src1 = (u32 *)frm1;
  16.488 +
  16.489 +	int count = width >> 1;
  16.490 +
  16.491 +	for (int i = 0; i < height; i++)
  16.492 +	{
  16.493 +#ifdef __GNUC__
  16.494 +		asm volatile (
  16.495 +		    "push %2\n"
  16.496 +		    "movq 0(%3), %%mm7\n"       // colorMask
  16.497 +		    "0:\n"
  16.498 +		    "movq 0(%0), %%mm0\n"       // src0
  16.499 +		    "movq 0(%1), %%mm1\n"       // src1
  16.500 +		    "movq %%mm0, 0(%1)\n"       // src1 = src0
  16.501 +		    "pand %%mm7, %%mm0\n"       // color & colorMask
  16.502 +		    "pand %%mm7, %%mm1\n"       // src1 & colorMask
  16.503 +		    "psrld $1, %%mm0\n"       // (color & colorMask) >> 1 (E)
  16.504 +		    "psrld $1, %%mm1\n"       // (src & colorMask) >> 1 (F)
  16.505 +		    "paddd %%mm1, %%mm0\n"       // E+F
  16.506 +
  16.507 +		    "movq %%mm0, 0(%0)\n"       // src0 = res
  16.508 +
  16.509 +		    "addl $8, %0\n"
  16.510 +		    "addl $8, %1\n"
  16.511 +
  16.512 +		    "decl %2\n"
  16.513 +		    "jnz 0b\n"
  16.514 +		    "pop %2\n"
  16.515 +		    "emms\n"
  16.516 +			: "+r" (src0), "+r" (src1)
  16.517 +			: "r" (count), "r" (qRGB_COLOR_MASK)
  16.518 +		    );
  16.519 +#else
  16.520 +		__asm {
  16.521 +			movq mm7, qword ptr [qRGB_COLOR_MASK];
  16.522 +			mov	 eax, src0;
  16.523 +			mov	 ebx, src1;
  16.524 +			mov	 edi, count;
  16.525 +label0:
  16.526 +			movq mm0, qword ptr [eax]; // src0
  16.527 +			movq	   mm1, qword ptr [ebx]; // src1
  16.528 +			movq qword ptr [ebx], mm0; // src1 = src0
  16.529 +			pand	   mm0, mm7; // color & colorMask
  16.530 +			pand	   mm1, mm7; // src1 & colorMask
  16.531 +			psrld	   mm0, 1; // (color & colorMask) >> 1 (E)
  16.532 +			psrld	   mm1, 1; // (src & colorMask) >> 1 (F)
  16.533 +			paddd	   mm0, mm1; // E+F
  16.534 +
  16.535 +			movq qword ptr [eax], mm0; // src0 = res
  16.536 +
  16.537 +			add eax, 8;
  16.538 +			add ebx, 8;
  16.539 +
  16.540 +			dec edi;
  16.541 +			jnz label0;
  16.542 +			mov src0, eax;
  16.543 +			mov src1, ebx;
  16.544 +			emms;
  16.545 +		}
  16.546 +#endif
  16.547 +		src0++;
  16.548 +		src1++;
  16.549 +	}
  16.550 +}
  16.551 +
  16.552 +#endif
  16.553 +
  16.554 +void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.555 +{
  16.556 +	if (frm1 == NULL)
  16.557 +	{
  16.558 +		Init();
  16.559 +	}
  16.560 +
  16.561 +#ifdef MMX
  16.562 +	if (cpu_mmx)
  16.563 +	{
  16.564 +		MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
  16.565 +		return;
  16.566 +	}
  16.567 +#endif
  16.568 +
  16.569 +	u32 *src0 = (u32 *)srcPtr;
  16.570 +	u32 *src1 = (u32 *)frm1;
  16.571 +
  16.572 +	u32 colorMask = 0xfefefe;
  16.573 +
  16.574 +	int sPitch = srcPitch >> 2;
  16.575 +	int pos	   = 0;
  16.576 +
  16.577 +	for (int j = 0; j < height; j++)
  16.578 +		for (int i = 0; i < sPitch; i++)
  16.579 +		{
  16.580 +			u32 color = src0[pos];
  16.581 +			src0[pos] = (((color & colorMask) >> 1) +
  16.582 +			             ((src1[pos] & colorMask) >> 1));
  16.583 +			src1[pos] = color;
  16.584 +			pos++;
  16.585 +		}
  16.586 +}
  16.587 +
  16.588 +static int count = 0;
  16.589 +
  16.590 +void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
  16.591 +{
  16.592 +	if (frm1 == NULL)
  16.593 +	{
  16.594 +		Init();
  16.595 +	}
  16.596 +
  16.597 +	u16 colorMask = ~RGB_LOW_BITS_MASK;
  16.598 +
  16.599 +	u16 *src0 = (u16 *)srcPtr;
  16.600 +	u16 *src1 = (u16 *)frm1;
  16.601 +
  16.602 +	int sPitch = srcPitch >> 1;
  16.603 +
  16.604 +	int pos = 0;
  16.605 +	for (int j = 0; j < height; j++)
  16.606 +	{
  16.607 +		bool render = count ? (j & 1) != 0 : (j & 1) == 0;
  16.608 +		if (render)
  16.609 +		{
  16.610 +			for (int i = 0; i < sPitch; i++)
  16.611 +			{
  16.612 +				u16 color = src0[pos];
  16.613 +				src0[pos] =
  16.614 +				    (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
  16.615 +				src1[pos] = color;
  16.616 +				pos++;
  16.617 +			}
  16.618 +		}
  16.619 +		else
  16.620 +		{
  16.621 +			for (int i = 0; i < sPitch; i++)
  16.622 +			{
  16.623 +				u16 color = src0[pos];
  16.624 +				src0[pos] =
  16.625 +				    (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
  16.626 +				src1[pos] = color;
  16.627 +				pos++;
  16.628 +			}
  16.629 +		}
  16.630 +	}
  16.631 +	count = count ^ 1;
  16.632 +}
  16.633 +
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/src/filters/interp.h	Sun Mar 04 20:32:31 2012 -0600
    17.3 @@ -0,0 +1,351 @@
    17.4 +/*
    17.5 + * This file is part of the Advance project.
    17.6 + *
    17.7 + * Copyright (C) 2003 Andrea Mazzoleni
    17.8 + *
    17.9 + * This program is free software; you can redistribute it and/or modify
   17.10 + * it under the terms of the GNU General Public License as published by
   17.11 + * the Free Software Foundation; either version 2 of the License, or
   17.12 + * (at your option) any later version.
   17.13 + *
   17.14 + * This program is distributed in the hope that it will be useful,
   17.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   17.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17.17 + * GNU General Public License for more details.
   17.18 + *
   17.19 + * You should have received a copy of the GNU General Public License
   17.20 + * along with this program; if not, write to the Free Software
   17.21 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   17.22 + *
   17.23 + * In addition, as a special exception, Andrea Mazzoleni
   17.24 + * gives permission to link the code of this program with
   17.25 + * the MAME library (or with modified versions of MAME that use the
   17.26 + * same license as MAME), and distribute linked combinations including
   17.27 + * the two.  You must obey the GNU General Public License in all
   17.28 + * respects for all of the code used other than MAME.  If you modify
   17.29 + * this file, you may extend this exception to your version of the
   17.30 + * file, but you are not obligated to do so.  If you do not wish to
   17.31 + * do so, delete this exception statement from your version.
   17.32 + */
   17.33 +
   17.34 +#ifndef __INTERP_H
   17.35 +#define __INTERP_H
   17.36 +
   17.37 +/***************************************************************************/
   17.38 +/* Basic types */
   17.39 +
   17.40 +/***************************************************************************/
   17.41 +/* interpolation */
   17.42 +
   17.43 +extern unsigned interp_mask[2];
   17.44 +extern unsigned interp_bits_per_pixel;
   17.45 +
   17.46 +#define INTERP_16_MASK_1(v) (v & interp_mask[0])
   17.47 +#define INTERP_16_MASK_2(v) (v & interp_mask[1])
   17.48 +
   17.49 +static inline u16 interp_16_521(u16 p1, u16 p2, u16 p3)
   17.50 +{
   17.51 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*2 + INTERP_16_MASK_1(p3)*1) / 8)
   17.52 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*2 + INTERP_16_MASK_2(p3)*1) / 8);
   17.53 +}
   17.54 +
   17.55 +static inline u16 interp_16_332(u16 p1, u16 p2, u16 p3)
   17.56 +{
   17.57 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)*2) / 8)
   17.58 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)*2) / 8);
   17.59 +}
   17.60 +
   17.61 +static inline u16 interp_16_611(u16 p1, u16 p2, u16 p3)
   17.62 +{
   17.63 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*6 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 8)
   17.64 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*6 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 8);
   17.65 +}
   17.66 +
   17.67 +static inline u16 interp_16_71(u16 p1, u16 p2)
   17.68 +{
   17.69 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*7 + INTERP_16_MASK_1(p2)) / 8)
   17.70 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*7 + INTERP_16_MASK_2(p2)) / 8);
   17.71 +}
   17.72 +
   17.73 +static inline u16 interp_16_211(u16 p1, u16 p2, u16 p3)
   17.74 +{
   17.75 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*2 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 4)
   17.76 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*2 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 4);
   17.77 +}
   17.78 +
   17.79 +static inline u16 interp_16_772(u16 p1, u16 p2, u16 p3)
   17.80 +{
   17.81 +  return INTERP_16_MASK_1(((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2))*7 + INTERP_16_MASK_1(p3)*2) / 16)
   17.82 +    | INTERP_16_MASK_2(((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2))*7 + INTERP_16_MASK_2(p3)*2) / 16);
   17.83 +}
   17.84 +
   17.85 +static inline u16 interp_16_11(u16 p1, u16 p2)
   17.86 +{
   17.87 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2)) / 2)
   17.88 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2)) / 2);
   17.89 +}
   17.90 +
   17.91 +static inline u16 interp_16_31(u16 p1, u16 p2)
   17.92 +{
   17.93 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)) / 4)
   17.94 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)) / 4);
   17.95 +}
   17.96 +
   17.97 +static inline u16 interp_16_1411(u16 p1, u16 p2, u16 p3)
   17.98 +{
   17.99 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*14 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 16)
  17.100 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*14 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 16);
  17.101 +}
  17.102 +
  17.103 +static inline u16 interp_16_431(u16 p1, u16 p2, u16 p3)
  17.104 +{
  17.105 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*4 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)) / 8)
  17.106 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*4 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)) / 8);
  17.107 +}
  17.108 +
  17.109 +static inline u16 interp_16_53(u16 p1, u16 p2)
  17.110 +{
  17.111 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*3) / 8)
  17.112 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*3) / 8);
  17.113 +}
  17.114 +
  17.115 +static inline u16 interp_16_151(u16 p1, u16 p2)
  17.116 +{
  17.117 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*15 + INTERP_16_MASK_1(p2)) / 16)
  17.118 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*15 + INTERP_16_MASK_2(p2)) / 16);
  17.119 +}
  17.120 +
  17.121 +static inline u16 interp_16_97(u16 p1, u16 p2)
  17.122 +{
  17.123 +  return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*9 + INTERP_16_MASK_1(p2)*7) / 16)
  17.124 +    | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*9 + INTERP_16_MASK_2(p2)*7) / 16);
  17.125 +}
  17.126 +
  17.127 +#define INTERP_32_MASK_1(v) (v & 0xFF00FF)
  17.128 +#define INTERP_32_MASK_2(v) (v & 0x00FF00)
  17.129 +
  17.130 +static inline u32 interp_32_521(u32 p1, u32 p2, u32 p3)
  17.131 +{
  17.132 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*2 + INTERP_32_MASK_1(p3)*1) / 8)
  17.133 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*2 + INTERP_32_MASK_2(p3)*1) / 8);
  17.134 +}
  17.135 +
  17.136 +static inline u32 interp_32_332(u32 p1, u32 p2, u32 p3)
  17.137 +{
  17.138 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)*2) / 8)
  17.139 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)*2) / 8);
  17.140 +}
  17.141 +
  17.142 +static inline u32 interp_32_211(u32 p1, u32 p2, u32 p3)
  17.143 +{
  17.144 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*2 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 4)
  17.145 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*2 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 4);
  17.146 +}
  17.147 +
  17.148 +static inline u32 interp_32_611(u32 p1, u32 p2, u32 p3)
  17.149 +{
  17.150 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*6 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 8)
  17.151 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*6 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 8);
  17.152 +}
  17.153 +
  17.154 +static inline u32 interp_32_71(u32 p1, u32 p2)
  17.155 +{
  17.156 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*7 + INTERP_32_MASK_1(p2)) / 8)
  17.157 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*7 + INTERP_32_MASK_2(p2)) / 8);
  17.158 +}
  17.159 +
  17.160 +static inline u32 interp_32_772(u32 p1, u32 p2, u32 p3)
  17.161 +{
  17.162 +  return INTERP_32_MASK_1(((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2))*7 + INTERP_32_MASK_1(p3)*2) / 16)
  17.163 +    | INTERP_32_MASK_2(((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2))*7 + INTERP_32_MASK_2(p3)*2) / 16);
  17.164 +}
  17.165 +
  17.166 +static inline u32 interp_32_11(u32 p1, u32 p2)
  17.167 +{
  17.168 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2)) / 2)
  17.169 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2)) / 2);
  17.170 +}
  17.171 +
  17.172 +static inline u32 interp_32_31(u32 p1, u32 p2)
  17.173 +{
  17.174 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)) / 4)
  17.175 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)) / 4);
  17.176 +}
  17.177 +
  17.178 +static inline u32 interp_32_1411(u32 p1, u32 p2, u32 p3)
  17.179 +{
  17.180 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*14 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 16)
  17.181 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*14 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 16);
  17.182 +}
  17.183 +
  17.184 +static inline u32 interp_32_431(u32 p1, u32 p2, u32 p3)
  17.185 +{
  17.186 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*4 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)) / 8)
  17.187 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*4 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)) / 8);
  17.188 +}
  17.189 +
  17.190 +static inline u32 interp_32_53(u32 p1, u32 p2)
  17.191 +{
  17.192 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*3) / 8)
  17.193 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*3) / 8);
  17.194 +}
  17.195 +
  17.196 +static inline u32 interp_32_151(u32 p1, u32 p2)
  17.197 +{
  17.198 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*15 + INTERP_32_MASK_1(p2)) / 16)
  17.199 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*15 + INTERP_32_MASK_2(p2)) / 16);
  17.200 +}
  17.201 +
  17.202 +static inline u32 interp_32_97(u32 p1, u32 p2)
  17.203 +{
  17.204 +  return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*9 + INTERP_32_MASK_1(p2)*7) / 16)
  17.205 +    | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*9 + INTERP_32_MASK_2(p2)*7) / 16);
  17.206 +}
  17.207 +
  17.208 +/***************************************************************************/
  17.209 +/* diff */
  17.210 +
  17.211 +#define INTERP_Y_LIMIT (0x30*4)
  17.212 +#define INTERP_U_LIMIT (0x07*4)
  17.213 +#define INTERP_V_LIMIT (0x06*8)
  17.214 +
  17.215 +static int interp_16_diff(u16 p1, u16 p2)
  17.216 +{
  17.217 +  int r, g, b;
  17.218 +  int y, u, v;
  17.219 +
  17.220 +  if (p1 == p2)
  17.221 +    return 0;
  17.222 +
  17.223 +  if (interp_bits_per_pixel == 16) {
  17.224 +    b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
  17.225 +    g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3;
  17.226 +    r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
  17.227 +  } else {
  17.228 +    b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
  17.229 +    g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2;
  17.230 +    r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7;
  17.231 +  }
  17.232 +
  17.233 +  y = r + g + b;
  17.234 +  u = r - b;
  17.235 +  v = -r + 2*g - b;
  17.236 +
  17.237 +  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
  17.238 +    return 1;
  17.239 +
  17.240 +  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
  17.241 +    return 1;
  17.242 +
  17.243 +  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
  17.244 +    return 1;
  17.245 +
  17.246 +return 0;
  17.247 +}
  17.248 +
  17.249 +static int interp_32_diff(u32 p1, u32 p2)
  17.250 +{
  17.251 +  int r, g, b;
  17.252 +  int y, u, v;
  17.253 +
  17.254 +  if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8))
  17.255 +    return 0;
  17.256 +
  17.257 +  b = (int)((p1 & 0xFF) - (p2 & 0xFF));
  17.258 +  g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8;
  17.259 +  r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16;
  17.260 +
  17.261 +  y = r + g + b;
  17.262 +  u = r - b;
  17.263 +  v = -r + 2*g - b;
  17.264 +
  17.265 +  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
  17.266 +    return 1;
  17.267 +
  17.268 +  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
  17.269 +    return 1;
  17.270 +
  17.271 +  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
  17.272 +    return 1;
  17.273 +
  17.274 +  return 0;
  17.275 +}
  17.276 +
  17.277 +
  17.278 +#define INTERP_LIMIT2 (96000)
  17.279 +#define ABS(x) ((x) < 0 ? -(x) : (x))
  17.280 +#define MAX(x,y) ((x) > (y) ? (x) : (y))
  17.281 +#define MIN(x,y) ((x) < (y) ? (x) : (y))
  17.282 +
  17.283 +static int interp_16_diff2(u16 p1, u16 p2)
  17.284 +{
  17.285 +  int r, g, b;
  17.286 +  int y, u, v;
  17.287 +
  17.288 +  if ((p1 & 0xF79E) == (p2 & 0xF79E))
  17.289 +    return 0;
  17.290 +
  17.291 +  if (interp_bits_per_pixel == 16) {
  17.292 +    b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
  17.293 +    g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3;
  17.294 +    r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
  17.295 +  } else {
  17.296 +    b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
  17.297 +    g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2;
  17.298 +    r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7;
  17.299 +  }
  17.300 +
  17.301 +//  yb =  30*r + 58*g + 12*b;
  17.302 +  y =  33*r + 36*g + 31*b;
  17.303 +  u = -14*r - 29*g + 44*b; 
  17.304 +  v =  62*r - 51*g - 10*b;
  17.305 +
  17.306 +  if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2)
  17.307 +    return 1;
  17.308 +  return 0;
  17.309 +}
  17.310 +
  17.311 +static int interp_32_diff2(u32 p1, u32 p2)
  17.312 +{
  17.313 +  int r, g, b;
  17.314 +  int y, u, v;
  17.315 +
  17.316 +  if ((p1 & 0xF0F0F0) == (p2 & 0xF0F0F0))
  17.317 +    return 0;
  17.318 +
  17.319 +  b = (int)((p1 & 0xF8) - (p2 & 0xF8));
  17.320 +  g = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
  17.321 +  r = (int)((p1 & 0xF80000) - (p2 & 0xF80000)) >> 16;
  17.322 +
  17.323 +//  y =  30*r + 58*g + 12*b;
  17.324 +  y =  33*r + 36*g + 31*b;
  17.325 +  u = -14*r - 29*g + 44*b; 
  17.326 +  v =  62*r - 51*g - 10*b;
  17.327 +
  17.328 +  if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2)
  17.329 +    return 1;
  17.330 +
  17.331 +  return 0;
  17.332 +}
  17.333 +
  17.334 +static void interp_set(unsigned bits_per_pixel)
  17.335 +{
  17.336 +  interp_bits_per_pixel = bits_per_pixel;
  17.337 +
  17.338 +  switch (bits_per_pixel) {
  17.339 +  case 15 :
  17.340 +    interp_mask[0] = 0x7C1F;
  17.341 +    interp_mask[1] = 0x03E0;
  17.342 +    break;
  17.343 +  case 16 :
  17.344 +    interp_mask[0] = 0xF81F;
  17.345 +    interp_mask[1] = 0x07E0;
  17.346 +    break;
  17.347 +  case 32 :
  17.348 +    interp_mask[0] = 0xFF00FF;
  17.349 +    interp_mask[1] = 0x00FF00;
  17.350 +    break;
  17.351 +  }
  17.352 +}
  17.353 +
  17.354 +#endif
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/src/filters/lq2x.h	Sun Mar 04 20:32:31 2012 -0600
    18.3 @@ -0,0 +1,1284 @@
    18.4 +case 0 : 
    18.5 +case 2 : 
    18.6 +case 4 : 
    18.7 +case 6 : 
    18.8 +case 8 : 
    18.9 +case 12 : 
   18.10 +case 16 : 
   18.11 +case 20 : 
   18.12 +case 24 : 
   18.13 +case 28 : 
   18.14 +case 32 : 
   18.15 +case 34 : 
   18.16 +case 36 : 
   18.17 +case 38 : 
   18.18 +case 40 : 
   18.19 +case 44 : 
   18.20 +case 48 : 
   18.21 +case 52 : 
   18.22 +case 56 : 
   18.23 +case 60 : 
   18.24 +case 64 : 
   18.25 +case 66 : 
   18.26 +case 68 : 
   18.27 +case 70 : 
   18.28 +case 96 : 
   18.29 +case 98 : 
   18.30 +case 100 : 
   18.31 +case 102 : 
   18.32 +case 128 : 
   18.33 +case 130 : 
   18.34 +case 132 : 
   18.35 +case 134 : 
   18.36 +case 136 : 
   18.37 +case 140 : 
   18.38 +case 144 : 
   18.39 +case 148 : 
   18.40 +case 152 : 
   18.41 +case 156 : 
   18.42 +case 160 : 
   18.43 +case 162 : 
   18.44 +case 164 : 
   18.45 +case 166 : 
   18.46 +case 168 : 
   18.47 +case 172 : 
   18.48 +case 176 : 
   18.49 +case 180 : 
   18.50 +case 184 : 
   18.51 +case 188 : 
   18.52 +case 192 : 
   18.53 +case 194 : 
   18.54 +case 196 : 
   18.55 +case 198 : 
   18.56 +case 224 : 
   18.57 +case 226 : 
   18.58 +case 228 : 
   18.59 +case 230 : 
   18.60 +{
   18.61 +  P0 = IC(0);
   18.62 +  P1 = IC(0);
   18.63 +  P2 = IC(0);
   18.64 +  P3 = IC(0);
   18.65 +} break;
   18.66 +case 1 : 
   18.67 +case 5 : 
   18.68 +case 9 : 
   18.69 +case 13 : 
   18.70 +case 17 : 
   18.71 +case 21 : 
   18.72 +case 25 : 
   18.73 +case 29 : 
   18.74 +case 33 : 
   18.75 +case 37 : 
   18.76 +case 41 : 
   18.77 +case 45 : 
   18.78 +case 49 : 
   18.79 +case 53 : 
   18.80 +case 57 : 
   18.81 +case 61 : 
   18.82 +case 65 : 
   18.83 +case 69 : 
   18.84 +case 97 : 
   18.85 +case 101 : 
   18.86 +case 129 : 
   18.87 +case 133 : 
   18.88 +case 137 : 
   18.89 +case 141 : 
   18.90 +case 145 : 
   18.91 +case 149 : 
   18.92 +case 153 : 
   18.93 +case 157 : 
   18.94 +case 161 : 
   18.95 +case 165 : 
   18.96 +case 169 : 
   18.97 +case 173 : 
   18.98 +case 177 : 
   18.99 +case 181 : 
  18.100 +case 185 : 
  18.101 +case 189 : 
  18.102 +case 193 : 
  18.103 +case 197 : 
  18.104 +case 225 : 
  18.105 +case 229 : 
  18.106 +{
  18.107 +  P0 = IC(1);
  18.108 +  P1 = IC(1);
  18.109 +  P2 = IC(1);
  18.110 +  P3 = IC(1);
  18.111 +} break;
  18.112 +case 3 : 
  18.113 +case 35 : 
  18.114 +case 67 : 
  18.115 +case 99 : 
  18.116 +case 131 : 
  18.117 +case 163 : 
  18.118 +case 195 : 
  18.119 +case 227 : 
  18.120 +{
  18.121 +  P0 = IC(2);
  18.122 +  P1 = IC(2);
  18.123 +  P2 = IC(2);
  18.124 +  P3 = IC(2);
  18.125 +} break;
  18.126 +case 7 : 
  18.127 +case 39 : 
  18.128 +case 71 : 
  18.129 +case 103 : 
  18.130 +case 135 : 
  18.131 +case 167 : 
  18.132 +case 199 : 
  18.133 +case 231 : 
  18.134 +{
  18.135 +  P0 = IC(3);
  18.136 +  P1 = IC(3);
  18.137 +  P2 = IC(3);
  18.138 +  P3 = IC(3);
  18.139 +} break;
  18.140 +case 10 : 
  18.141 +case 138 : 
  18.142 +{
  18.143 +  P1 = IC(0);
  18.144 +  P2 = IC(0);
  18.145 +  P3 = IC(0);
  18.146 +  if (MUL) {
  18.147 +    P0 = IC(0);
  18.148 +  } else {
  18.149 +    P0 = I211(0, 1, 3);
  18.150 +  }
  18.151 +} break;
  18.152 +case 11 : 
  18.153 +case 27 : 
  18.154 +case 75 : 
  18.155 +case 139 : 
  18.156 +case 155 : 
  18.157 +case 203 : 
  18.158 +{
  18.159 +  P1 = IC(2);
  18.160 +  P2 = IC(2);
  18.161 +  P3 = IC(2);
  18.162 +  if (MUL) {
  18.163 +    P0 = IC(2);
  18.164 +  } else {
  18.165 +    P0 = I211(2, 1, 3);
  18.166 +  }
  18.167 +} break;
  18.168 +case 14 : 
  18.169 +case 142 : 
  18.170 +{
  18.171 +  P2 = IC(0);
  18.172 +  P3 = IC(0);
  18.173 +  if (MUL) {
  18.174 +    P0 = IC(0);
  18.175 +    P1 = IC(0);
  18.176 +  } else {
  18.177 +    P0 = I332(1, 3, 0);
  18.178 +    P1 = I31(0, 1);
  18.179 +  }
  18.180 +} break;
  18.181 +case 15 : 
  18.182 +case 143 : 
  18.183 +case 207 : 
  18.184 +{
  18.185 +  P2 = IC(4);
  18.186 +  P3 = IC(4);
  18.187 +  if (MUL) {
  18.188 +    P0 = IC(4);
  18.189 +    P1 = IC(4);
  18.190 +  } else {
  18.191 +    P0 = I332(1, 3, 4);
  18.192 +    P1 = I31(4, 1);
  18.193 +  }
  18.194 +} break;
  18.195 +case 18 : 
  18.196 +case 22 : 
  18.197 +case 30 : 
  18.198 +case 50 : 
  18.199 +case 54 : 
  18.200 +case 62 : 
  18.201 +case 86 : 
  18.202 +case 118 : 
  18.203 +{
  18.204 +  P0 = IC(0);
  18.205 +  P2 = IC(0);
  18.206 +  P3 = IC(0);
  18.207 +  if (MUR) {
  18.208 +    P1 = IC(0);
  18.209 +  } else {
  18.210 +    P1 = I211(0, 1, 5);
  18.211 +  }
  18.212 +} break;
  18.213 +case 19 : 
  18.214 +case 51 : 
  18.215 +{
  18.216 +  P2 = IC(2);
  18.217 +  P3 = IC(2);
  18.218 +  if (MUR) {
  18.219 +    P0 = IC(2);
  18.220 +    P1 = IC(2);
  18.221 +  } else {
  18.222 +    P0 = I31(2, 1);
  18.223 +    P1 = I332(1, 5, 2);
  18.224 +  }
  18.225 +} break;
  18.226 +case 23 : 
  18.227 +case 55 : 
  18.228 +case 119 : 
  18.229 +{
  18.230 +  P2 = IC(3);
  18.231 +  P3 = IC(3);
  18.232 +  if (MUR) {
  18.233 +    P0 = IC(3);
  18.234 +    P1 = IC(3);
  18.235 +  } else {
  18.236 +    P0 = I31(3, 1);
  18.237 +    P1 = I332(1, 5, 3);
  18.238 +  }
  18.239 +} break;
  18.240 +case 26 : 
  18.241 +{
  18.242 +  P2 = IC(0);
  18.243 +  P3 = IC(0);
  18.244 +  if (MUL) {
  18.245 +    P0 = IC(0);
  18.246 +  } else {
  18.247 +    P0 = I211(0, 1, 3);
  18.248 +  }
  18.249 +  if (MUR) {
  18.250 +    P1 = IC(0);
  18.251 +  } else {
  18.252 +    P1 = I211(0, 1, 5);
  18.253 +  }
  18.254 +} break;
  18.255 +case 31 : 
  18.256 +case 95 : 
  18.257 +{
  18.258 +  P2 = IC(4);
  18.259 +  P3 = IC(4);
  18.260 +  if (MUL) {
  18.261 +    P0 = IC(4);
  18.262 +  } else {
  18.263 +    P0 = I211(4, 1, 3);
  18.264 +  }
  18.265 +  if (MUR) {
  18.266 +    P1 = IC(4);
  18.267 +  } else {
  18.268 +    P1 = I211(4, 1, 5);
  18.269 +  }
  18.270 +} break;
  18.271 +case 42 : 
  18.272 +case 170 : 
  18.273 +{
  18.274 +  P1 = IC(0);
  18.275 +  P3 = IC(0);
  18.276 +  if (MUL) {
  18.277 +    P0 = IC(0);
  18.278 +    P2 = IC(0);
  18.279 +  } else {
  18.280 +    P0 = I332(1, 3, 0);
  18.281 +    P2 = I31(0, 3);
  18.282 +  }
  18.283 +} break;
  18.284 +case 43 : 
  18.285 +case 171 : 
  18.286 +case 187 : 
  18.287 +{
  18.288 +  P1 = IC(2);
  18.289 +  P3 = IC(2);
  18.290 +  if (MUL) {
  18.291 +    P0 = IC(2);
  18.292 +    P2 = IC(2);
  18.293 +  } else {
  18.294 +    P0 = I332(1, 3, 2);
  18.295 +    P2 = I31(2, 3);
  18.296 +  }
  18.297 +} break;
  18.298 +case 46 : 
  18.299 +case 174 : 
  18.300 +{
  18.301 +  P1 = IC(0);
  18.302 +  P2 = IC(0);
  18.303 +  P3 = IC(0);
  18.304 +  if (MUL) {
  18.305 +    P0 = IC(0);
  18.306 +  } else {
  18.307 +    P0 = I611(0, 1, 3);
  18.308 +  }
  18.309 +} break;
  18.310 +case 47 : 
  18.311 +case 175 : 
  18.312 +{
  18.313 +  P1 = IC(4);
  18.314 +  P2 = IC(4);
  18.315 +  P3 = IC(4);
  18.316 +  if (MUL) {
  18.317 +    P0 = IC(4);
  18.318 +  } else {
  18.319 +    P0 = I1411(4, 1, 3);
  18.320 +  }
  18.321 +} break;
  18.322 +case 58 : 
  18.323 +case 154 : 
  18.324 +case 186 : 
  18.325 +{
  18.326 +  P2 = IC(0);
  18.327 +  P3 = IC(0);
  18.328 +  if (MUL) {
  18.329 +    P0 = IC(0);
  18.330 +  } else {
  18.331 +    P0 = I611(0, 1, 3);
  18.332 +  }
  18.333 +  if (MUR) {
  18.334 +    P1 = IC(0);
  18.335 +  } else {
  18.336 +    P1 = I611(0, 1, 5);
  18.337 +  }
  18.338 +} break;
  18.339 +case 59 : 
  18.340 +{
  18.341 +  P2 = IC(2);
  18.342 +  P3 = IC(2);
  18.343 +  if (MUL) {
  18.344 +    P0 = IC(2);
  18.345 +  } else {
  18.346 +    P0 = I211(2, 1, 3);
  18.347 +  }
  18.348 +  if (MUR) {
  18.349 +    P1 = IC(2);
  18.350 +  } else {
  18.351 +    P1 = I611(2, 1, 5);
  18.352 +  }
  18.353 +} break;
  18.354 +case 63 : 
  18.355 +{
  18.356 +  P2 = IC(4);
  18.357 +  P3 = IC(4);
  18.358 +  if (MUL) {
  18.359 +    P0 = IC(4);
  18.360 +  } else {
  18.361 +    P0 = I1411(4, 1, 3);
  18.362 +  }
  18.363 +  if (MUR) {
  18.364 +    P1 = IC(4);
  18.365 +  } else {
  18.366 +    P1 = I211(4, 1, 5);
  18.367 +  }
  18.368 +} break;
  18.369 +case 72 : 
  18.370 +case 76 : 
  18.371 +case 104 : 
  18.372 +case 106 : 
  18.373 +case 108 : 
  18.374 +case 110 : 
  18.375 +case 120 : 
  18.376 +case 124 : 
  18.377 +{
  18.378 +  P0 = IC(0);
  18.379 +  P1 = IC(0);
  18.380 +  P3 = IC(0);
  18.381 +  if (MDL) {
  18.382 +    P2 = IC(0);
  18.383 +  } else {
  18.384 +    P2 = I211(0, 3, 7);
  18.385 +  }
  18.386 +} break;
  18.387 +case 73 : 
  18.388 +case 77 : 
  18.389 +case 105 : 
  18.390 +case 109 : 
  18.391 +case 125 : 
  18.392 +{
  18.393 +  P1 = IC(1);
  18.394 +  P3 = IC(1);
  18.395 +  if (MDL) {
  18.396 +    P0 = IC(1);
  18.397 +    P2 = IC(1);
  18.398 +  } else {
  18.399 +    P0 = I31(1, 3);
  18.400 +    P2 = I332(3, 7, 1);
  18.401 +  }
  18.402 +} break;
  18.403 +case 74 : 
  18.404 +{
  18.405 +  P1 = IC(0);
  18.406 +  P3 = IC(0);
  18.407 +  if (MDL) {
  18.408 +    P2 = IC(0);
  18.409 +  } else {
  18.410 +    P2 = I211(0, 3, 7);
  18.411 +  }
  18.412 +  if (MUL) {
  18.413 +    P0 = IC(0);
  18.414 +  } else {
  18.415 +    P0 = I211(0, 1, 3);
  18.416 +  }
  18.417 +} break;
  18.418 +case 78 : 
  18.419 +case 202 : 
  18.420 +case 206 : 
  18.421 +{
  18.422 +  P1 = IC(0);
  18.423 +  P3 = IC(0);
  18.424 +  if (MDL) {
  18.425 +    P2 = IC(0);
  18.426 +  } else {
  18.427 +    P2 = I611(0, 3, 7);
  18.428 +  }
  18.429 +  if (MUL) {
  18.430 +    P0 = IC(0);
  18.431 +  } else {
  18.432 +    P0 = I611(0, 1, 3);
  18.433 +  }
  18.434 +} break;
  18.435 +case 79 : 
  18.436 +{
  18.437 +  P1 = IC(4);
  18.438 +  P3 = IC(4);
  18.439 +  if (MDL) {
  18.440 +    P2 = IC(4);
  18.441 +  } else {
  18.442 +    P2 = I611(4, 3, 7);
  18.443 +  }
  18.444 +  if (MUL) {
  18.445 +    P0 = IC(4);
  18.446 +  } else {
  18.447 +    P0 = I211(4, 1, 3);
  18.448 +  }
  18.449 +} break;
  18.450 +case 80 : 
  18.451 +case 208 : 
  18.452 +case 210 : 
  18.453 +case 216 : 
  18.454 +{
  18.455 +  P0 = IC(0);
  18.456 +  P1 = IC(0);
  18.457 +  P2 = IC(0);
  18.458 +  if (MDR) {
  18.459 +    P3 = IC(0);
  18.460 +  } else {
  18.461 +    P3 = I211(0, 5, 7);
  18.462 +  }
  18.463 +} break;
  18.464 +case 81 : 
  18.465 +case 209 : 
  18.466 +case 217 : 
  18.467 +{
  18.468 +  P0 = IC(1);
  18.469 +  P1 = IC(1);
  18.470 +  P2 = IC(1);
  18.471 +  if (MDR) {
  18.472 +    P3 = IC(1);
  18.473 +  } else {
  18.474 +    P3 = I211(1, 5, 7);
  18.475 +  }
  18.476 +} break;
  18.477 +case 82 : 
  18.478 +case 214 : 
  18.479 +case 222 : 
  18.480 +{
  18.481 +  P0 = IC(0);
  18.482 +  P2 = IC(0);
  18.483 +  if (MDR) {
  18.484 +    P3 = IC(0);
  18.485 +  } else {
  18.486 +    P3 = I211(0, 5, 7);
  18.487 +  }
  18.488 +  if (MUR) {
  18.489 +    P1 = IC(0);
  18.490 +  } else {
  18.491 +    P1 = I211(0, 1, 5);
  18.492 +  }
  18.493 +} break;
  18.494 +case 83 : 
  18.495 +case 115 : 
  18.496 +{
  18.497 +  P0 = IC(2);
  18.498 +  P2 = IC(2);
  18.499 +  if (MDR) {
  18.500 +    P3 = IC(2);
  18.501 +  } else {
  18.502 +    P3 = I611(2, 5, 7);
  18.503 +  }
  18.504 +  if (MUR) {
  18.505 +    P1 = IC(2);
  18.506 +  } else {
  18.507 +    P1 = I611(2, 1, 5);
  18.508 +  }
  18.509 +} break;
  18.510 +case 84 : 
  18.511 +case 212 : 
  18.512 +{
  18.513 +  P0 = IC(0);
  18.514 +  P2 = IC(0);
  18.515 +  if (MDR) {
  18.516 +    P1 = IC(0);
  18.517 +    P3 = IC(0);
  18.518 +  } else {
  18.519 +    P1 = I31(0, 5);
  18.520 +    P3 = I332(5, 7, 0);
  18.521 +  }
  18.522 +} break;
  18.523 +case 85 : 
  18.524 +case 213 : 
  18.525 +case 221 : 
  18.526 +{
  18.527 +  P0 = IC(1);
  18.528 +  P2 = IC(1);
  18.529 +  if (MDR) {
  18.530 +    P1 = IC(1);
  18.531 +    P3 = IC(1);
  18.532 +  } else {
  18.533 +    P1 = I31(1, 5);
  18.534 +    P3 = I332(5, 7, 1);
  18.535 +  }
  18.536 +} break;
  18.537 +case 87 : 
  18.538 +{
  18.539 +  P0 = IC(3);
  18.540 +  P2 = IC(3);
  18.541 +  if (MDR) {
  18.542 +    P3 = IC(3);
  18.543 +  } else {
  18.544 +    P3 = I611(3, 5, 7);
  18.545 +  }
  18.546 +  if (MUR) {
  18.547 +    P1 = IC(3);
  18.548 +  } else {
  18.549 +    P1 = I211(3, 1, 5);
  18.550 +  }
  18.551 +} break;
  18.552 +case 88 : 
  18.553 +case 248 : 
  18.554 +case 250 : 
  18.555 +{
  18.556 +  P0 = IC(0);
  18.557 +  P1 = IC(0);
  18.558 +  if (MDL) {
  18.559 +    P2 = IC(0);
  18.560 +  } else {
  18.561 +    P2 = I211(0, 3, 7);
  18.562 +  }
  18.563 +  if (MDR) {
  18.564 +    P3 = IC(0);
  18.565 +  } else {
  18.566 +    P3 = I211(0, 5, 7);
  18.567 +  }
  18.568 +} break;
  18.569 +case 89 : 
  18.570 +case 93 : 
  18.571 +{
  18.572 +  P0 = IC(1);
  18.573 +  P1 = IC(1);
  18.574 +  if (MDL) {
  18.575 +    P2 = IC(1);
  18.576 +  } else {
  18.577 +    P2 = I611(1, 3, 7);
  18.578 +  }
  18.579 +  if (MDR) {
  18.580 +    P3 = IC(1);
  18.581 +  } else {
  18.582 +    P3 = I611(1, 5, 7);
  18.583 +  }
  18.584 +} break;
  18.585 +case 90 : 
  18.586 +{
  18.587 +  if (MDL) {
  18.588 +    P2 = IC(0);
  18.589 +  } else {
  18.590 +    P2 = I611(0, 3, 7);
  18.591 +  }
  18.592 +  if (MDR) {
  18.593 +    P3 = IC(0);
  18.594 +  } else {
  18.595 +    P3 = I611(0, 5, 7);
  18.596 +  }
  18.597 +  if (MUL) {
  18.598 +    P0 = IC(0);
  18.599 +  } else {
  18.600 +    P0 = I611(0, 1, 3);
  18.601 +  }
  18.602 +  if (MUR) {
  18.603 +    P1 = IC(0);
  18.604 +  } else {
  18.605 +    P1 = I611(0, 1, 5);
  18.606 +  }
  18.607 +} break;
  18.608 +case 91 : 
  18.609 +{
  18.610 +  if (MDL) {
  18.611 +    P2 = IC(2);
  18.612 +  } else {
  18.613 +    P2 = I611(2, 3, 7);
  18.614 +  }
  18.615 +  if (MDR) {
  18.616 +    P3 = IC(2);
  18.617 +  } else {
  18.618 +    P3 = I611(2, 5, 7);
  18.619 +  }
  18.620 +  if (MUL) {
  18.621 +    P0 = IC(2);
  18.622 +  } else {
  18.623 +    P0 = I211(2, 1, 3);
  18.624 +  }
  18.625 +  if (MUR) {
  18.626 +    P1 = IC(2);
  18.627 +  } else {
  18.628 +    P1 = I611(2, 1, 5);
  18.629 +  }
  18.630 +} break;
  18.631 +case 92 : 
  18.632 +{
  18.633 +  P0 = IC(0);
  18.634 +  P1 = IC(0);
  18.635 +  if (MDL) {
  18.636 +    P2 = IC(0);
  18.637 +  } else {
  18.638 +    P2 = I611(0, 3, 7);
  18.639 +  }
  18.640 +  if (MDR) {
  18.641 +    P3 = IC(0);
  18.642 +  } else {
  18.643 +    P3 = I611(0, 5, 7);
  18.644 +  }
  18.645 +} break;
  18.646 +case 94 : 
  18.647 +{
  18.648 +  if (MDL) {
  18.649 +    P2 = IC(0);
  18.650 +  } else {
  18.651 +    P2 = I611(0, 3, 7);
  18.652 +  }
  18.653 +  if (MDR) {
  18.654 +    P3 = IC(0);
  18.655 +  } else {
  18.656 +    P3 = I611(0, 5, 7);
  18.657 +  }
  18.658 +  if (MUL) {
  18.659 +    P0 = IC(0);
  18.660 +  } else {
  18.661 +    P0 = I611(0, 1, 3);
  18.662 +  }
  18.663 +  if (MUR) {
  18.664 +    P1 = IC(0);
  18.665 +  } else {
  18.666 +    P1 = I211(0, 1, 5);
  18.667 +  }
  18.668 +} break;
  18.669 +case 107 : 
  18.670 +case 123 : 
  18.671 +{
  18.672 +  P1 = IC(2);
  18.673 +  P3 = IC(2);
  18.674 +  if (MDL) {
  18.675 +    P2 = IC(2);
  18.676 +  } else {
  18.677 +    P2 = I211(2, 3, 7);
  18.678 +  }
  18.679 +  if (MUL) {
  18.680 +    P0 = IC(2);
  18.681 +  } else {
  18.682 +    P0 = I211(2, 1, 3);
  18.683 +  }
  18.684 +} break;
  18.685 +case 111 : 
  18.686 +{
  18.687 +  P1 = IC(4);
  18.688 +  P3 = IC(4);
  18.689 +  if (MDL) {
  18.690 +    P2 = IC(4);
  18.691 +  } else {
  18.692 +    P2 = I211(4, 3, 7);
  18.693 +  }
  18.694 +  if (MUL) {
  18.695 +    P0 = IC(4);
  18.696 +  } else {
  18.697 +    P0 = I1411(4, 1, 3);
  18.698 +  }
  18.699 +} break;
  18.700 +case 112 : 
  18.701 +case 240 : 
  18.702 +{
  18.703 +  P0 = IC(0);
  18.704 +  P1 = IC(0);
  18.705 +  if (MDR) {
  18.706 +    P2 = IC(0);
  18.707 +    P3 = IC(0);
  18.708 +  } else {
  18.709 +    P2 = I31(0, 7);
  18.710 +    P3 = I332(5, 7, 0);
  18.711 +  }
  18.712 +} break;
  18.713 +case 113 : 
  18.714 +case 241 : 
  18.715 +{
  18.716 +  P0 = IC(1);
  18.717 +  P1 = IC(1);
  18.718 +  if (MDR) {
  18.719 +    P2 = IC(1);
  18.720 +    P3 = IC(1);
  18.721 +  } else {
  18.722 +    P2 = I31(1, 7);
  18.723 +    P3 = I332(5, 7, 1);
  18.724 +  }
  18.725 +} break;
  18.726 +case 114 : 
  18.727 +{
  18.728 +  P0 = IC(0);
  18.729 +  P2 = IC(0);
  18.730 +  if (MDR) {
  18.731 +    P3 = IC(0);
  18.732 +  } else {
  18.733 +    P3 = I611(0, 5, 7);
  18.734 +  }
  18.735 +  if (MUR) {
  18.736 +    P1 = IC(0);
  18.737 +  } else {
  18.738 +    P1 = I611(0, 1, 5);
  18.739 +  }
  18.740 +} break;
  18.741 +case 116 : 
  18.742 +{
  18.743 +  P0 = IC(0);
  18.744 +  P1 = IC(0);
  18.745 +  P2 = IC(0);
  18.746 +  if (MDR) {
  18.747 +    P3 = IC(0);
  18.748 +  } else {
  18.749 +    P3 = I611(0, 5, 7);
  18.750 +  }
  18.751 +} break;
  18.752 +case 117 : 
  18.753 +{
  18.754 +  P0 = IC(1);
  18.755 +  P1 = IC(1);
  18.756 +  P2 = IC(1);
  18.757 +  if (MDR) {
  18.758 +    P3 = IC(1);
  18.759 +  } else {
  18.760 +    P3 = I611(1, 5, 7);
  18.761 +  }
  18.762 +} break;
  18.763 +case 121 : 
  18.764 +{
  18.765 +  P0 = IC(1);
  18.766 +  P1 = IC(1);
  18.767 +  if (MDL) {
  18.768 +    P2 = IC(1);
  18.769 +  } else {
  18.770 +    P2 = I211(1, 3, 7);
  18.771 +  }
  18.772 +  if (MDR) {
  18.773 +    P3 = IC(1);
  18.774 +  } else {
  18.775 +    P3 = I611(1, 5, 7);
  18.776 +  }
  18.777 +} break;
  18.778 +case 122 : 
  18.779 +{
  18.780 +  if (MDL) {
  18.781 +    P2 = IC(0);
  18.782 +  } else {
  18.783 +    P2 = I211(0, 3, 7);
  18.784 +  }
  18.785 +  if (MDR) {
  18.786 +    P3 = IC(0);
  18.787 +  } else {
  18.788 +    P3 = I611(0, 5, 7);
  18.789 +  }
  18.790 +  if (MUL) {
  18.791 +    P0 = IC(0);
  18.792 +  } else {
  18.793 +    P0 = I611(0, 1, 3);
  18.794 +  }
  18.795 +  if (MUR) {
  18.796 +    P1 = IC(0);
  18.797 +  } else {
  18.798 +    P1 = I611(0, 1, 5);
  18.799 +  }
  18.800 +} break;
  18.801 +case 126 : 
  18.802 +{
  18.803 +  P0 = IC(0);
  18.804 +  P3 = IC(0);
  18.805 +  if (MDL) {
  18.806 +    P2 = IC(0);
  18.807 +  } else {
  18.808 +    P2 = I211(0, 3, 7);
  18.809 +  }
  18.810 +  if (MUR) {
  18.811 +    P1 = IC(0);
  18.812 +  } else {
  18.813 +    P1 = I211(0, 1, 5);
  18.814 +  }
  18.815 +} break;
  18.816 +case 127 : 
  18.817 +{
  18.818 +  P3 = IC(4);
  18.819 +  if (MDL) {
  18.820 +    P2 = IC(4);
  18.821 +  } else {
  18.822 +    P2 = I211(4, 3, 7);
  18.823 +  }
  18.824 +  if (MUL) {
  18.825 +    P0 = IC(4);
  18.826 +  } else {
  18.827 +    P0 = I1411(4, 1, 3);
  18.828 +  }
  18.829 +  if (MUR) {
  18.830 +    P1 = IC(4);
  18.831 +  } else {
  18.832 +    P1 = I211(4, 1, 5);
  18.833 +  }
  18.834 +} break;
  18.835 +case 146 : 
  18.836 +case 150 : 
  18.837 +case 178 : 
  18.838 +case 182 : 
  18.839 +case 190 : 
  18.840 +{
  18.841 +  P0 = IC(0);
  18.842 +  P2 = IC(0);
  18.843 +  if (MUR) {
  18.844 +    P1 = IC(0);
  18.845 +    P3 = IC(0);
  18.846 +  } else {
  18.847 +    P1 = I332(1, 5, 0);
  18.848 +    P3 = I31(0, 5);
  18.849 +  }
  18.850 +} break;
  18.851 +case 147 : 
  18.852 +case 179 : 
  18.853 +{
  18.854 +  P0 = IC(2);
  18.855 +  P2 = IC(2);
  18.856 +  P3 = IC(2);
  18.857 +  if (MUR) {
  18.858 +    P1 = IC(2);
  18.859 +  } else {
  18.860 +    P1 = I611(2, 1, 5);
  18.861 +  }
  18.862 +} break;
  18.863 +case 151 : 
  18.864 +case 183 : 
  18.865 +{
  18.866 +  P0 = IC(3);
  18.867 +  P2 = IC(3);
  18.868 +  P3 = IC(3);
  18.869 +  if (MUR) {
  18.870 +    P1 = IC(3);
  18.871 +  } else {
  18.872 +    P1 = I1411(3, 1, 5);
  18.873 +  }
  18.874 +} break;
  18.875 +case 158 : 
  18.876 +{
  18.877 +  P2 = IC(0);
  18.878 +  P3 = IC(0);
  18.879 +  if (MUL) {
  18.880 +    P0 = IC(0);
  18.881 +  } else {
  18.882 +    P0 = I611(0, 1, 3);
  18.883 +  }
  18.884 +  if (MUR) {
  18.885 +    P1 = IC(0);
  18.886 +  } else {
  18.887 +    P1 = I211(0, 1, 5);
  18.888 +  }
  18.889 +} break;
  18.890 +case 159 : 
  18.891 +{
  18.892 +  P2 = IC(4);
  18.893 +  P3 = IC(4);
  18.894 +  if (MUL) {
  18.895 +    P0 = IC(4);
  18.896 +  } else {
  18.897 +    P0 = I211(4, 1, 3);
  18.898 +  }
  18.899 +  if (MUR) {
  18.900 +    P1 = IC(4);
  18.901 +  } else {
  18.902 +    P1 = I1411(4, 1, 5);
  18.903 +  }
  18.904 +} break;
  18.905 +case 191 : 
  18.906 +{
  18.907 +  P2 = IC(4);
  18.908 +  P3 = IC(4);
  18.909 +  if (MUL) {
  18.910 +    P0 = IC(4);
  18.911 +  } else {
  18.912 +    P0 = I1411(4, 1, 3);
  18.913 +  }
  18.914 +  if (MUR) {
  18.915 +    P1 = IC(4);
  18.916 +  } else {
  18.917 +    P1 = I1411(4, 1, 5);
  18.918 +  }
  18.919 +} break;
  18.920 +case 200 : 
  18.921 +case 204 : 
  18.922 +case 232 : 
  18.923 +case 236 : 
  18.924 +case 238 : 
  18.925 +{
  18.926 +  P0 = IC(0);
  18.927 +  P1 = IC(0);
  18.928 +  if (MDL) {
  18.929 +    P2 = IC(0);
  18.930 +    P3 = IC(0);
  18.931 +  } else {
  18.932 +    P2 = I332(3, 7, 0);
  18.933 +    P3 = I31(0, 7);
  18.934 +  }
  18.935 +} break;
  18.936 +case 201 : 
  18.937 +case 205 : 
  18.938 +{
  18.939 +  P0 = IC(1);
  18.940 +  P1 = IC(1);
  18.941 +  P3 = IC(1);
  18.942 +  if (MDL) {
  18.943 +    P2 = IC(1);
  18.944 +  } else {
  18.945 +    P2 = I611(1, 3, 7);
  18.946 +  }
  18.947 +} break;
  18.948 +case 211 : 
  18.949 +{
  18.950 +  P0 = IC(2);
  18.951 +  P1 = IC(2);
  18.952 +  P2 = IC(2);
  18.953 +  if (MDR) {
  18.954 +    P3 = IC(2);
  18.955 +  } else {
  18.956 +    P3 = I211(2, 5, 7);
  18.957 +  }
  18.958 +} break;
  18.959 +case 215 : 
  18.960 +{
  18.961 +  P0 = IC(3);
  18.962 +  P2 = IC(3);
  18.963 +  if (MDR) {
  18.964 +    P3 = IC(3);
  18.965 +  } else {
  18.966 +    P3 = I211(3, 5, 7);
  18.967 +  }
  18.968 +  if (MUR) {
  18.969 +    P1 = IC(3);
  18.970 +  } else {
  18.971 +    P1 = I1411(3, 1, 5);
  18.972 +  }
  18.973 +} break;
  18.974 +case 218 : 
  18.975 +{
  18.976 +  if (MDL) {
  18.977 +    P2 = IC(0);
  18.978 +  } else {
  18.979 +    P2 = I611(0, 3, 7);
  18.980 +  }
  18.981 +  if (MDR) {
  18.982 +    P3 = IC(0);
  18.983 +  } else {
  18.984 +    P3 = I211(0, 5, 7);
  18.985 +  }
  18.986 +  if (MUL) {
  18.987 +    P0 = IC(0);
  18.988 +  } else {
  18.989 +    P0 = I611(0, 1, 3);
  18.990 +  }
  18.991 +  if (MUR) {
  18.992 +    P1 = IC(0);
  18.993 +  } else {
  18.994 +    P1 = I611(0, 1, 5);
  18.995 +  }
  18.996 +} break;
  18.997 +case 219 : 
  18.998 +{
  18.999 +  P1 = IC(2);
 18.1000 +  P2 = IC(2);
 18.1001 +  if (MDR) {
 18.1002 +    P3 = IC(2);
 18.1003 +  } else {
 18.1004 +    P3 = I211(2, 5, 7);
 18.1005 +  }
 18.1006 +  if (MUL) {
 18.1007 +    P0 = IC(2);
 18.1008 +  } else {
 18.1009 +    P0 = I211(2, 1, 3);
 18.1010 +  }
 18.1011 +} break;
 18.1012 +case 220 : 
 18.1013 +{
 18.1014 +  P0 = IC(0);
 18.1015 +  P1 = IC(0);
 18.1016 +  if (MDL) {
 18.1017 +    P2 = IC(0);
 18.1018 +  } else {
 18.1019 +    P2 = I611(0, 3, 7);
 18.1020 +  }
 18.1021 +  if (MDR) {
 18.1022 +    P3 = IC(0);
 18.1023 +  } else {
 18.1024 +    P3 = I211(0, 5, 7);
 18.1025 +  }
 18.1026 +} break;
 18.1027 +case 223 : 
 18.1028 +{
 18.1029 +  P2 = IC(4);
 18.1030 +  if (MDR) {
 18.1031 +    P3 = IC(4);
 18.1032 +  } else {
 18.1033 +    P3 = I211(4, 5, 7);
 18.1034 +  }
 18.1035 +  if (MUL) {
 18.1036 +    P0 = IC(4);
 18.1037 +  } else {
 18.1038 +    P0 = I211(4, 1, 3);
 18.1039 +  }
 18.1040 +  if (MUR) {
 18.1041 +    P1 = IC(4);
 18.1042 +  } else {
 18.1043 +    P1 = I1411(4, 1, 5);
 18.1044 +  }
 18.1045 +} break;
 18.1046 +case 233 : 
 18.1047 +case 237 : 
 18.1048 +{
 18.1049 +  P0 = IC(1);
 18.1050 +  P1 = IC(1);
 18.1051 +  P3 = IC(1);
 18.1052 +  if (MDL) {
 18.1053 +    P2 = IC(1);
 18.1054 +  } else {
 18.1055 +    P2 = I1411(1, 3, 7);
 18.1056 +  }
 18.1057 +} break;
 18.1058 +case 234 : 
 18.1059 +{
 18.1060 +  P1 = IC(0);
 18.1061 +  P3 = IC(0);
 18.1062 +  if (MDL) {
 18.1063 +    P2 = IC(0);
 18.1064 +  } else {
 18.1065 +    P2 = I211(0, 3, 7);
 18.1066 +  }
 18.1067 +  if (MUL) {
 18.1068 +    P0 = IC(0);
 18.1069 +  } else {
 18.1070 +    P0 = I611(0, 1, 3);
 18.1071 +  }
 18.1072 +} break;
 18.1073 +case 235 : 
 18.1074 +{
 18.1075 +  P1 = IC(2);
 18.1076 +  P3 = IC(2);
 18.1077 +  if (MDL) {
 18.1078 +    P2 = IC(2);
 18.1079 +  } else {
 18.1080 +    P2 = I1411(2, 3, 7);
 18.1081 +  }
 18.1082 +  if (MUL) {
 18.1083 +    P0 = IC(2);
 18.1084 +  } else {
 18.1085 +    P0 = I211(2, 1, 3);
 18.1086 +  }
 18.1087 +} break;
 18.1088 +case 239 : 
 18.1089 +{
 18.1090 +  P1 = IC(4);
 18.1091 +  P3 = IC(4);
 18.1092 +  if (MDL) {
 18.1093 +    P2 = IC(4);
 18.1094 +  } else {
 18.1095 +    P2 = I1411(4, 3, 7);
 18.1096 +  }
 18.1097 +  if (MUL) {
 18.1098 +    P0 = IC(4);
 18.1099 +  } else {
 18.1100 +    P0 = I1411(4, 1, 3);
 18.1101 +  }
 18.1102 +} break;
 18.1103 +case 242 : 
 18.1104 +{
 18.1105 +  P0 = IC(0);
 18.1106 +  P2 = IC(0);
 18.1107 +  if (MDR) {
 18.1108 +    P3 = IC(0);
 18.1109 +  } else {
 18.1110 +    P3 = I211(0, 5, 7);
 18.1111 +  }
 18.1112 +  if (MUR) {
 18.1113 +    P1 = IC(0);
 18.1114 +  } else {
 18.1115 +    P1 = I611(0, 1, 5);
 18.1116 +  }
 18.1117 +} break;
 18.1118 +case 243 : 
 18.1119 +{
 18.1120 +  P0 = IC(2);
 18.1121 +  P1 = IC(2);
 18.1122 +  if (MDR) {
 18.1123 +    P2 = IC(2);
 18.1124 +    P3 = IC(2);
 18.1125 +  } else {
 18.1126 +    P2 = I31(2, 7);
 18.1127 +    P3 = I332(5, 7, 2);
 18.1128 +  }
 18.1129 +} break;
 18.1130 +case 244 : 
 18.1131 +{
 18.1132 +  P0 = IC(0);
 18.1133 +  P1 = IC(0);
 18.1134 +  P2 = IC(0);
 18.1135 +  if (MDR) {
 18.1136 +    P3 = IC(0);
 18.1137 +  } else {
 18.1138 +    P3 = I1411(0, 5, 7);
 18.1139 +  }
 18.1140 +} break;
 18.1141 +case 245 : 
 18.1142 +{
 18.1143 +  P0 = IC(1);
 18.1144 +  P1 = IC(1);
 18.1145 +  P2 = IC(1);
 18.1146 +  if (MDR) {
 18.1147 +    P3 = IC(1);
 18.1148 +  } else {
 18.1149 +    P3 = I1411(1, 5, 7);
 18.1150 +  }
 18.1151 +} break;
 18.1152 +case 246 : 
 18.1153 +{
 18.1154 +  P0 = IC(0);
 18.1155 +  P2 = IC(0);
 18.1156 +  if (MDR) {
 18.1157 +    P3 = IC(0);
 18.1158 +  } else {
 18.1159 +    P3 = I1411(0, 5, 7);
 18.1160 +  }
 18.1161 +  if (MUR) {
 18.1162 +    P1 = IC(0);
 18.1163 +  } else {
 18.1164 +    P1 = I211(0, 1, 5);
 18.1165 +  }
 18.1166 +} break;
 18.1167 +case 247 : 
 18.1168 +{
 18.1169 +  P0 = IC(3);
 18.1170 +  P2 = IC(3);
 18.1171 +  if (MDR) {
 18.1172 +    P3 = IC(3);
 18.1173 +  } else {
 18.1174 +    P3 = I1411(3, 5, 7);
 18.1175 +  }
 18.1176 +  if (MUR) {
 18.1177 +    P1 = IC(3);
 18.1178 +  } else {
 18.1179 +    P1 = I1411(3, 1, 5);
 18.1180 +  }
 18.1181 +} break;
 18.1182 +case 249 : 
 18.1183 +{
 18.1184 +  P0 = IC(1);
 18.1185 +  P1 = IC(1);
 18.1186 +  if (MDL) {
 18.1187 +    P2 = IC(1);
 18.1188 +  } else {
 18.1189 +    P2 = I1411(1, 3, 7);
 18.1190 +  }
 18.1191 +  if (MDR) {
 18.1192 +    P3 = IC(1);
 18.1193 +  } else {
 18.1194 +    P3 = I211(1, 5, 7);
 18.1195 +  }
 18.1196 +} break;
 18.1197 +case 251 : 
 18.1198 +{
 18.1199 +  P1 = IC(2);
 18.1200 +  if (MDL) {
 18.1201 +    P2 = IC(2);
 18.1202 +  } else {
 18.1203 +    P2 = I1411(2, 3, 7);
 18.1204 +  }
 18.1205 +  if (MDR) {
 18.1206 +    P3 = IC(2);
 18.1207 +  } else {
 18.1208 +    P3 = I211(2, 5, 7);
 18.1209 +  }
 18.1210 +  if (MUL) {
 18.1211 +    P0 = IC(2);
 18.1212 +  } else {
 18.1213 +    P0 = I211(2, 1, 3);
 18.1214 +  }
 18.1215 +} break;
 18.1216 +case 252 : 
 18.1217 +{
 18.1218 +  P0 = IC(0);
 18.1219 +  P1 = IC(0);
 18.1220 +  if (MDL) {
 18.1221 +    P2 = IC(0);
 18.1222 +  } else {
 18.1223 +    P2 = I211(0, 3, 7);
 18.1224 +  }
 18.1225 +  if (MDR) {
 18.1226 +    P3 = IC(0);
 18.1227 +  } else {
 18.1228 +    P3 = I1411(0, 5, 7);
 18.1229 +  }
 18.1230 +} break;
 18.1231 +case 253 : 
 18.1232 +{
 18.1233 +  P0 = IC(1);
 18.1234 +  P1 = IC(1);
 18.1235 +  if (MDL) {
 18.1236 +    P2 = IC(1);
 18.1237 +  } else {
 18.1238 +    P2 = I1411(1, 3, 7);
 18.1239 +  }
 18.1240 +  if (MDR) {
 18.1241 +    P3 = IC(1);
 18.1242 +  } else {
 18.1243 +    P3 = I1411(1, 5, 7);
 18.1244 +  }
 18.1245 +} break;
 18.1246 +case 254 : 
 18.1247 +{
 18.1248 +  P0 = IC(0);
 18.1249 +  if (MDL) {
 18.1250 +    P2 = IC(0);
 18.1251 +  } else {
 18.1252 +    P2 = I211(0, 3, 7);
 18.1253 +  }
 18.1254 +  if (MDR) {
 18.1255 +    P3 = IC(0);
 18.1256 +  } else {
 18.1257 +    P3 = I1411(0, 5, 7);
 18.1258 +  }
 18.1259 +  if (MUR) {
 18.1260 +    P1 = IC(0);
 18.1261 +  } else {
 18.1262 +    P1 = I211(0, 1, 5);
 18.1263 +  }
 18.1264 +} break;
 18.1265 +case 255 : 
 18.1266 +{
 18.1267 +  if (MDL) {
 18.1268 +    P2 = IC(4);
 18.1269 +  } else {
 18.1270 +    P2 = I1411(4, 3, 7);
 18.1271 +  }
 18.1272 +  if (MDR) {
 18.1273 +    P3 = IC(4);
 18.1274 +  } else {
 18.1275 +    P3 = I1411(4, 5, 7);
 18.1276 +  }
 18.1277 +  if (MUL) {
 18.1278 +    P0 = IC(4);
 18.1279 +  } else {
 18.1280 +    P0 = I1411(4, 1, 3);
 18.1281 +  }
 18.1282 +  if (MUR) {
 18.1283 +    P1 = IC(4);
 18.1284 +  } else {
 18.1285 +    P1 = I1411(4, 1, 5);
 18.1286 +  }
 18.1287 +} break;
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/src/filters/motionblur.cpp	Sun Mar 04 20:32:31 2012 -0600
    19.3 @@ -0,0 +1,183 @@
    19.4 +#include "../Port.h"
    19.5 +
    19.6 +extern u32 RGB_LOW_BITS_MASK;
    19.7 +
    19.8 +void MotionBlur(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
    19.9 +                u8 *dstPtr, u32 dstPitch, int width, int height)
   19.10 +{
   19.11 +	u8 *nextLine, *finish;
   19.12 +	u32 colorMask	 = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16));
   19.13 +	u32 lowPixelMask = RGB_LOW_BITS_MASK;
   19.14 +
   19.15 +	nextLine = dstPtr + dstPitch;
   19.16 +
   19.17 +	do
   19.18 +	{
   19.19 +		u32 *bP = (u32 *) srcPtr;
   19.20 +		u32 *xP = (u32 *) deltaPtr;
   19.21 +		u32 *dP = (u32 *) dstPtr;
   19.22 +		u32 *nL = (u32 *) nextLine;
   19.23 +		u32	 currentPixel;
   19.24 +		u32	 nextPixel;
   19.25 +		u32	 currentDelta;
   19.26 +		u32	 nextDelta;
   19.27 +
   19.28 +		finish	  = (u8 *) bP + ((width + 2) << 1);
   19.29 +		nextPixel = *bP++;
   19.30 +		nextDelta = *xP++;
   19.31 +
   19.32 +		do
   19.33 +		{
   19.34 +			currentPixel = nextPixel;
   19.35 +			currentDelta = nextDelta;
   19.36 +			nextPixel	 = *bP++;
   19.37 +			nextDelta	 = *xP++;
   19.38 +
   19.39 +			if (currentPixel != currentDelta)
   19.40 +			{
   19.41 +				u32 colorA, product, colorB;
   19.42 +
   19.43 +				*(xP - 2) = currentPixel;
   19.44 +#ifdef WORDS_BIGENDIAN
   19.45 +				colorA = currentPixel >> 16;
   19.46 +				colorB = currentDelta >> 16;
   19.47 +#else
   19.48 +				colorA = currentPixel & 0xffff;
   19.49 +				colorB = currentDelta & 0xffff;
   19.50 +#endif
   19.51 +
   19.52 +				product =   ((((colorA & colorMask) >> 1) +
   19.53 +				              ((colorB & colorMask) >> 1) +
   19.54 +				              (colorA & colorB & lowPixelMask)));
   19.55 +
   19.56 +				*(dP) = product | product << 16;
   19.57 +				*(nL) = product | product << 16;
   19.58 +
   19.59 +#ifdef WORDS_BIGENDIAN
   19.60 +				colorA = (currentPixel & 0xffff);
   19.61 +				colorB = (currentDelta & 0xffff);
   19.62 +#else
   19.63 +				colorA = currentPixel >> 16;
   19.64 +				colorB = currentDelta >> 16;
   19.65 +#endif
   19.66 +				product = ((((colorA & colorMask) >> 1) +
   19.67 +				            ((colorB & colorMask) >> 1) +
   19.68 +				            (colorA & colorB & lowPixelMask)));
   19.69 +
   19.70 +				*(dP + 1) = product | product << 16;
   19.71 +				*(nL + 1) = product | product << 16;
   19.72 +			}
   19.73 +			else
   19.74 +			{
   19.75 +				u32 colorA, product;
   19.76 +
   19.77 +				*(xP - 2) = currentPixel;
   19.78 +#ifdef WORDS_BIGENDIAN
   19.79 +				colorA = currentPixel >> 16;
   19.80 +#else
   19.81 +				colorA = currentPixel & 0xffff;
   19.82 +#endif
   19.83 +
   19.84 +				product = colorA;
   19.85 +
   19.86 +				*(dP) = product | product << 16;
   19.87 +				*(nL) = product | product << 16;
   19.88 +#ifdef WORDS_BIGENDIAN
   19.89 +				colorA = (currentPixel & 0xffff);
   19.90 +#else
   19.91 +				colorA = currentPixel >> 16;
   19.92 +#endif
   19.93 +				product = colorA;
   19.94 +
   19.95 +				*(dP + 1) = product | product << 16;
   19.96 +				*(nL + 1) = product | product << 16;
   19.97 +			}
   19.98 +
   19.99 +			dP += 2;
  19.100 +			nL += 2;
  19.101 +		}
  19.102 +		while ((u8 *) bP < finish);
  19.103 +
  19.104 +		deltaPtr += srcPitch;
  19.105 +		srcPtr	 += srcPitch;
  19.106 +		dstPtr	 += dstPitch << 1;
  19.107 +		nextLine += dstPitch << 1;
  19.108 +	}
  19.109 +	while (--height);
  19.110 +}
  19.111 +
  19.112 +void MotionBlur32(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
  19.113 +                  u8 *dstPtr, u32 dstPitch, int width, int height)
  19.114 +{
  19.115 +	u8 *nextLine, *finish;
  19.116 +	u32 colorMask	 = ~RGB_LOW_BITS_MASK;
  19.117 +	u32 lowPixelMask = RGB_LOW_BITS_MASK;
  19.118 +
  19.119 +	nextLine = dstPtr + dstPitch;
  19.120 +
  19.121 +	do
  19.122 +	{
  19.123 +		u32 *bP = (u32 *) srcPtr;
  19.124 +		u32 *xP = (u32 *) deltaPtr;
  19.125 +		u32 *dP = (u32 *) dstPtr;
  19.126 +		u32 *nL = (u32 *) nextLine;
  19.127 +		u32	 currentPixel;
  19.128 +		u32	 nextPixel;
  19.129 +		u32	 currentDelta;
  19.130 +		u32	 nextDelta;
  19.131 +
  19.132 +		finish	  = (u8 *) bP + ((width + 1) << 2);
  19.133 +		nextPixel = *bP++;
  19.134 +		nextDelta = *xP++;
  19.135 +
  19.136 +		do
  19.137 +		{
  19.138 +			currentPixel = nextPixel;
  19.139 +			currentDelta = nextDelta;
  19.140 +			nextPixel	 = *bP++;
  19.141 +			nextDelta	 = *xP++;
  19.142 +
  19.143 +			u32 colorA, product, colorB;
  19.144 +
  19.145 +			*(xP - 2) = currentPixel;
  19.146 +			colorA	  = currentPixel;
  19.147 +			colorB	  = currentDelta;
  19.148 +
  19.149 +			product =   ((((colorA & colorMask) >> 1) +
  19.150 +			              ((colorB & colorMask) >> 1) +
  19.151 +			              (colorA & colorB & lowPixelMask)));
  19.152 +
  19.153 +			*(dP)	  = product;
  19.154 +			*(dP + 1) = product;
  19.155 +			*(nL)	  = product;
  19.156 +			*(nL + 1) = product;
  19.157 +
  19.158 +			*(xP - 1) = nextPixel;
  19.159 +
  19.160 +			colorA = nextPixel;
  19.161 +			colorB = nextDelta;
  19.162 +
  19.163 +			product = ((((colorA & colorMask) >> 1) +
  19.164 +			            ((colorB & colorMask) >> 1) +
  19.165 +			            (colorA & colorB & lowPixelMask)));
  19.166 +
  19.167 +			*(dP + 2) = product;
  19.168 +			*(dP + 3) = product;
  19.169 +			*(nL + 2) = product;
  19.170 +			*(nL + 3) = product;
  19.171 +
  19.172 +			nextPixel = *bP++;
  19.173 +			nextDelta = *xP++;
  19.174 +
  19.175 +			dP += 4;
  19.176 +			nL += 4;
  19.177 +		}
  19.178 +		while ((u8 *) bP < finish);
  19.179 +
  19.180 +		deltaPtr += srcPitch;
  19.181 +		srcPtr	 += srcPitch;
  19.182 +		dstPtr	 += dstPitch << 1;
  19.183 +		nextLine += dstPitch << 1;
  19.184 +	}
  19.185 +	while (--height);
  19.186 +}
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/src/filters/pixel.cpp	Sun Mar 04 20:32:31 2012 -0600
    20.3 @@ -0,0 +1,196 @@
    20.4 +#include "../Port.h"
    20.5 +
    20.6 +extern u32 RGB_LOW_BITS_MASK;
    20.7 +
    20.8 +void Pixelate2x16(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr,
    20.9 +                  u8 *dstPtr, u32 dstPitch, int width, int height)
   20.10 +{
   20.11 +	u8 *nextLine, *finish;
   20.12 +	u32 colorMask = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16));
   20.13 +	colorMask = (colorMask >> 2) & (colorMask >> 1);
   20.14 +
   20.15 +	nextLine = dstPtr + dstPitch;
   20.16 +
   20.17 +	do
   20.18 +	{
   20.19 +		u32 *bP = (u32 *) srcPtr;
   20.20 +		u32 *xP = (u32 *) deltaPtr;
   20.21 +		u32 *dP = (u32 *) dstPtr;
   20.22 +		u32 *nL = (u32 *) nextLine;
   20.23 +		u32  currentPixel;
   20.24 +		u32  nextPixel;
   20.25 +		u32  currentDelta;
   20.26 +		u32  nextDelta;
   20.27 +
   20.28 +		finish    = (u8 *) bP + ((width+2) << 1);
   20.29 +		nextPixel = *bP++;
   20.30 +		nextDelta = *xP++;
   20.31 +
   20.32 +		do
   20.33 +		{
   20.34 +			currentPixel = nextPixel;
   20.35 +			currentDelta = nextDelta;
   20.36 +			nextPixel    = *bP++;
   20.37 +			nextDelta    = *xP++;
   20.38 +
   20.39 +			if ((nextPixel != nextDelta) || (currentPixel != currentDelta))
   20.40 +			{
   20.41 +				u32 colorA, colorB, product;
   20.42 +
   20.43 +				*(xP - 2) = currentPixel;
   20.44 +#ifdef WORDS_BIGENDIAN
   20.45 +				colorA = currentPixel >> 16;
   20.46 +				colorB = currentPixel & 0xffff;
   20.47 +#else
   20.48 +				colorA = currentPixel & 0xffff;
   20.49 +				colorB = currentPixel >> 16;
   20.50 +#endif
   20.51 +				product = (colorA >> 2) & colorMask;
   20.52 +
   20.53 +#ifdef WORDS_BIGENDIAN
   20.54 +				*(nL) = (product << 16) | (product);
   20.55 +				*(dP) = (colorA << 16) | product;
   20.56 +#else
   20.57 +				*(nL) = product | (product << 16);
   20.58 +				*(dP) = colorA | (product << 16);
   20.59 +#endif
   20.60 +
   20.61 +#ifdef WORDS_BIGENDIAN
   20.62 +				colorA = nextPixel >> 16;
   20.63 +#else
   20.64 +				colorA = nextPixel & 0xffff;
   20.65 +#endif
   20.66 +				product = (colorB >> 2) & colorMask;
   20.67 +#ifdef WORDS_BIGENDIAN
   20.68 +				*(nL + 1) = (product << 16) | (product);
   20.69 +				*(dP + 1) = (colorB << 16) | (product);
   20.70 +#else
   20.71 +				*(nL + 1) = (product) | (product << 16);
   20.72 +				*(dP + 1) = (colorB) | (product << 16);
   20.73 +#endif
   20.74 +			}
   20.75 +
   20.76 +			dP += 2;
   20.77 +			nL += 2;
   20.78 +		}
   20.79 +		while ((u8 *) bP < finish);
   20.80 +
   20.81 +		deltaPtr += srcPitch;
   20.82 +		srcPtr   += srcPitch;
   20.83 +		dstPtr   += dstPitch << 1;
   20.84 +		nextLine += dstPitch << 1;
   20.85 +	}
   20.86 +	while (--height);
   20.87 +}
   20.88 +
   20.89 +void Pixelate2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   20.90 +                  u8 *dstPtr, u32 dstPitch, int width, int height)
   20.91 +{
   20.92 +	u8 *nextLine, *finish;
   20.93 +	u32 colorMask = ((u32)~RGB_LOW_BITS_MASK >> 2) & ((u32)~RGB_LOW_BITS_MASK >> 1);
   20.94 +
   20.95 +	nextLine = dstPtr + dstPitch;
   20.96 +
   20.97 +	do
   20.98 +	{
   20.99 +		u32 *bP = (u32 *) srcPtr;
  20.100 +		//    u32 *xP = (u32 *) deltaPtr;
  20.101 +		u32 *dP = (u32 *) dstPtr;
  20.102 +		u32 *nL = (u32 *) nextLine;
  20.103 +		u32  currentPixel;
  20.104 +		u32  nextPixel;
  20.105 +
  20.106 +		finish    = (u8 *) bP + ((width+1) << 2);
  20.107 +		nextPixel = *bP++;
  20.108 +
  20.109 +		do
  20.110 +		{
  20.111 +			u32 product;
  20.112 +
  20.113 +			currentPixel = nextPixel;
  20.114 +			nextPixel    = *bP++;
  20.115 +			product = (currentPixel >> 2) & colorMask;
  20.116 +			*(nL)   = product;
  20.117 +			*(nL+1) = product;
  20.118 +			*(dP)   = currentPixel;
  20.119 +			*(dP+1) = product;
  20.120 +
  20.121 +			currentPixel = nextPixel;
  20.122 +			nextPixel = *bP++;
  20.123 +			product   = (currentPixel >> 2) & colorMask;
  20.124 +			*(nL + 2) = product;
  20.125 +			*(nL + 3) = product;
  20.126 +			*(dP + 2) = currentPixel;
  20.127 +			*(dP + 3) = product;
  20.128 +
  20.129 +			dP += 4;
  20.130 +			nL += 4;
  20.131 +		}
  20.132 +		while ((u8 *) bP < finish);
  20.133 +
  20.134 +		srcPtr   += srcPitch;
  20.135 +		dstPtr   += dstPitch << 1;
  20.136 +		nextLine += dstPitch << 1;
  20.137 +	}
  20.138 +	while (--height);
  20.139 +}
  20.140 +
  20.141 +// generic Pixelate Nx magnification filter
  20.142 +template <int magnification, typename ColorType>
  20.143 +void PixelateNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  20.144 +                u8 *dstPtr, u32 dstPitch, int width, int height)
  20.145 +{
  20.146 +	ColorType colorMask = ((ColorType)~RGB_LOW_BITS_MASK >> 2) & ((ColorType)~RGB_LOW_BITS_MASK >> 1);
  20.147 +
  20.148 +	srcPitch      = srcPitch / sizeof(ColorType) - width;
  20.149 +	u32 dstNextP  = dstPitch / sizeof(ColorType);
  20.150 +	u32 dstNextL  = (dstNextP - width) * magnification; // skip to the next magnificated 'line'
  20.151 +	dstNextP     -= magnification;
  20.152 +
  20.153 +	u32 offset    = (dstPitch + sizeof(ColorType)) * magnification - dstPitch;
  20.154 +
  20.155 +	ColorType *src   = (ColorType *)srcPtr;
  20.156 +	ColorType *dst   = (ColorType *)dstPtr;
  20.157 +
  20.158 +	do // per src line
  20.159 +	{
  20.160 +		u8 *finishP = (u8 *)dst + offset;
  20.161 +		for (int x = 0; x < width; ++x) // per pixel in line
  20.162 +		{
  20.163 +			ColorType col    = *src;
  20.164 +			ColorType *dst2  = dst;
  20.165 +			u8 *finishM = (u8 *)(dst + magnification);
  20.166 +
  20.167 +			ColorType product = (col >> 2) & colorMask;
  20.168 +			do
  20.169 +			{
  20.170 +				*dst2 = product;
  20.171 +			} while ((u8 *)++dst2 < finishM);
  20.172 +			dst2    += dstNextP;
  20.173 +			finishM += dstPitch;
  20.174 +			do // dst magnificated pixel
  20.175 +			{
  20.176 +				*dst2++ = product;
  20.177 +				do
  20.178 +				{
  20.179 +					*dst2 = col;
  20.180 +				} while ((u8 *)++dst2 < finishM);
  20.181 +				dst2    += dstNextP;
  20.182 +				finishM += dstPitch;
  20.183 +			} while ((u8 *)dst2 < finishP);
  20.184 +
  20.185 +			++src;
  20.186 +			dst     += magnification;
  20.187 +			finishP += magnification * sizeof(ColorType);
  20.188 +		}
  20.189 +		src += srcPitch;
  20.190 +		dst += dstNextL;
  20.191 +	} while (--height);
  20.192 +}
  20.193 +
  20.194 +typedef void (*PixelateNxFP)(u8*, u32, u8*, u8*, u32, int, int);
  20.195 +
  20.196 +PixelateNxFP Pixelate3x16 = PixelateNx<3, u16>;
  20.197 +PixelateNxFP Pixelate3x32 = PixelateNx<3, u32>;
  20.198 +PixelateNxFP Pixelate4x16 = PixelateNx<4, u16>;
  20.199 +PixelateNxFP Pixelate4x32 = PixelateNx<4, u32>;
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/src/filters/scanline.cpp	Sun Mar 04 20:32:31 2012 -0600
    21.3 @@ -0,0 +1,225 @@
    21.4 +#include "../Port.h"
    21.5 +
    21.6 +extern u32 RGB_LOW_BITS_MASK;
    21.7 +
    21.8 +void Scanlines(u8 *srcPtr, u32 srcPitch, u8 *,
    21.9 +               u8 *dstPtr, u32 dstPitch, int width, int height)
   21.10 +{
   21.11 +	u8 *nextLine, *finish;
   21.12 +
   21.13 +	nextLine = dstPtr + dstPitch;
   21.14 +
   21.15 +	do
   21.16 +	{
   21.17 +		u32 *bP = (u32 *) srcPtr;
   21.18 +		u32 *dP = (u32 *) dstPtr;
   21.19 +		u32 *nL = (u32 *) nextLine;
   21.20 +		u32	 currentPixel;
   21.21 +		u32	 nextPixel;
   21.22 +
   21.23 +		finish	  = (u8 *) bP + ((width + 2) << 1);
   21.24 +		nextPixel = *bP++;
   21.25 +
   21.26 +		do
   21.27 +		{
   21.28 +			currentPixel = nextPixel;
   21.29 +			nextPixel	 = *bP++;
   21.30 +			u32 colorA, colorB;
   21.31 +
   21.32 +#ifdef WORDS_BIGENDIAN
   21.33 +			colorA = currentPixel >> 16;
   21.34 +			colorB = currentPixel & 0xffff;
   21.35 +#else
   21.36 +			colorA = currentPixel & 0xffff;
   21.37 +			colorB = currentPixel >> 16;
   21.38 +#endif
   21.39 +
   21.40 +			*(dP) = colorA | colorA << 16;
   21.41 +			*(nL) = 0;
   21.42 +
   21.43 +#ifdef WORDS_BIGENDIAN
   21.44 +			colorA = nextPixel >> 16;
   21.45 +#else
   21.46 +			colorA = nextPixel & 0xffff;
   21.47 +#endif
   21.48 +
   21.49 +			*(dP + 1) = colorB | (colorB << 16);
   21.50 +			*(nL + 1) = 0;
   21.51 +
   21.52 +			dP += 2;
   21.53 +			nL += 2;
   21.54 +		}
   21.55 +		while ((u8 *) bP < finish);
   21.56 +
   21.57 +		srcPtr	 += srcPitch;
   21.58 +		dstPtr	 += dstPitch << 1;
   21.59 +		nextLine += dstPitch << 1;
   21.60 +	}
   21.61 +	while (--height);
   21.62 +}
   21.63 +
   21.64 +void Scanlines32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   21.65 +                 u8 *dstPtr, u32 dstPitch, int width, int height)
   21.66 +{
   21.67 +	u8 *nextLine, *finish;
   21.68 +
   21.69 +	nextLine = dstPtr + dstPitch;
   21.70 +
   21.71 +	do
   21.72 +	{
   21.73 +		u32 *bP = (u32 *) srcPtr;
   21.74 +		u32 *dP = (u32 *) dstPtr;
   21.75 +		u32 *nL = (u32 *) nextLine;
   21.76 +		u32	 currentPixel;
   21.77 +		u32	 nextPixel;
   21.78 +
   21.79 +		finish	  = (u8 *) bP + ((width + 1) << 2);
   21.80 +		nextPixel = *bP++;
   21.81 +
   21.82 +		do
   21.83 +		{
   21.84 +			currentPixel = nextPixel;
   21.85 +			nextPixel	 = *bP++;
   21.86 +
   21.87 +			u32 colorA, colorB;
   21.88 +
   21.89 +			colorA = currentPixel;
   21.90 +			colorB = nextPixel;
   21.91 +
   21.92 +			*(dP)	  = colorA;
   21.93 +			*(dP + 1) = colorA;
   21.94 +			*(nL)	  = 0;
   21.95 +			*(nL + 1) = 0;
   21.96 +
   21.97 +			*(dP + 2) = colorB;
   21.98 +			*(dP + 3) = colorB;
   21.99 +			*(nL + 2) = 0;
  21.100 +			*(nL + 3) = 0;
  21.101 +
  21.102 +			nextPixel = *bP++;
  21.103 +
  21.104 +			dP += 4;
  21.105 +			nL += 4;
  21.106 +		}
  21.107 +		while ((u8 *) bP < finish);
  21.108 +
  21.109 +		srcPtr	 += srcPitch;
  21.110 +		dstPtr	 += dstPitch << 1;
  21.111 +		nextLine += dstPitch << 1;
  21.112 +	}
  21.113 +	while (--height);
  21.114 +}
  21.115 +
  21.116 +void ScanlinesTV(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  21.117 +                 u8 *dstPtr, u32 dstPitch, int width, int height)
  21.118 +{
  21.119 +	u8 *nextLine, *finish;
  21.120 +	u32 colorMask = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16));
  21.121 +
  21.122 +	nextLine = dstPtr + dstPitch;
  21.123 +
  21.124 +	do
  21.125 +	{
  21.126 +		u32 *bP = (u32 *) srcPtr;
  21.127 +		u32 *dP = (u32 *) dstPtr;
  21.128 +		u32 *nL = (u32 *) nextLine;
  21.129 +		u32	 currentPixel;
  21.130 +		u32	 nextPixel;
  21.131 +
  21.132 +		finish	  = (u8 *) bP + ((width + 2) << 1);
  21.133 +		nextPixel = *bP++;
  21.134 +
  21.135 +		do
  21.136 +		{
  21.137 +			currentPixel = nextPixel;
  21.138 +			nextPixel	 = *bP++;
  21.139 +
  21.140 +			u32 colorA, colorB;
  21.141 +
  21.142 +#ifdef WORDS_BIGENDIAN
  21.143 +			colorA = currentPixel >> 16;
  21.144 +			colorB = currentPixel & 0xFFFF;
  21.145 +#else
  21.146 +			colorA = currentPixel & 0xFFFF;
  21.147 +			colorB = currentPixel >> 16;
  21.148 +#endif
  21.149 +
  21.150 +			*(dP) = colorA = colorA | ((((colorA & colorMask) >> 1) +
  21.151 +			                            ((colorB & colorMask) >> 1))) << 16;
  21.152 +			colorA	= ((colorA & colorMask) >> 1);
  21.153 +			colorA += ((colorA & colorMask) >> 1);
  21.154 +			*(nL)	= colorA;
  21.155 +
  21.156 +			colorA = nextPixel & 0xFFFF;
  21.157 +
  21.158 +			*(dP + 1) = colorB = colorB | ((((colorA & colorMask) >> 1) +
  21.159 +			                                ((colorB & colorMask) >> 1))) << 16;
  21.160 +			colorB	= ((colorB & colorMask) >> 1);
  21.161 +			colorB += ((colorB & colorMask) >> 1);
  21.162 +
  21.163 +			*(nL + 1) = colorB;
  21.164 +
  21.165 +			dP += 2;
  21.166 +			nL += 2;
  21.167 +		}
  21.168 +		while ((u8 *) bP < finish);
  21.169 +
  21.170 +		srcPtr	 += srcPitch;
  21.171 +		dstPtr	 += dstPitch << 1;
  21.172 +		nextLine += dstPitch << 1;
  21.173 +	}
  21.174 +	while (--height);
  21.175 +}
  21.176 +
  21.177 +void ScanlinesTV32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  21.178 +                   u8 *dstPtr, u32 dstPitch, int width, int height)
  21.179 +{
  21.180 +	u8 *nextLine, *finish;
  21.181 +	u32 colorMask = ~RGB_LOW_BITS_MASK;
  21.182 +
  21.183 +	nextLine = dstPtr + dstPitch;
  21.184 +
  21.185 +	do
  21.186 +	{
  21.187 +		u32 *bP = (u32 *) srcPtr;
  21.188 +		u32 *dP = (u32 *) dstPtr;
  21.189 +		u32 *nL = (u32 *) nextLine;
  21.190 +		u32	 currentPixel;
  21.191 +		u32	 nextPixel;
  21.192 +
  21.193 +		finish	  = (u8 *) bP + ((width + 1) << 2);
  21.194 +		nextPixel = *bP++;
  21.195 +
  21.196 +		do
  21.197 +		{
  21.198 +			currentPixel = nextPixel;
  21.199 +			nextPixel	 = *bP++;
  21.200 +
  21.201 +			u32 colorA, colorB, temp;
  21.202 +
  21.203 +			colorA = currentPixel;
  21.204 +			colorB = nextPixel;
  21.205 +
  21.206 +			*(dP)	  = colorA;
  21.207 +			*(dP + 1) = temp = ((colorA & colorMask) >> 1) +
  21.208 +			                   ((colorB & colorMask) >> 1);
  21.209 +			temp	= ((temp & colorMask) >> 1);
  21.210 +			temp   += ((temp & colorMask) >> 1);
  21.211 +			colorA	= ((colorA & colorMask) >> 1);
  21.212 +			colorA += ((colorA & colorMask) >> 1);
  21.213 +
  21.214 +			*(nL)	  = colorA;
  21.215 +			*(nL + 1) = temp;
  21.216 +
  21.217 +			dP += 2;
  21.218 +			nL += 2;
  21.219 +		}
  21.220 +		while ((u8 *) bP < finish);
  21.221 +
  21.222 +		srcPtr	 += srcPitch;
  21.223 +		dstPtr	 += dstPitch << 1;
  21.224 +		nextLine += dstPitch << 1;
  21.225 +	}
  21.226 +	while (--height);
  21.227 +}
  21.228 +
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/src/filters/simple2x.cpp	Sun Mar 04 20:32:31 2012 -0600
    22.3 @@ -0,0 +1,189 @@
    22.4 +#include "../Port.h"
    22.5 +
    22.6 +void Simple2x16(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
    22.7 +                u8 *dstPtr, u32 dstPitch, int width, int height)
    22.8 +{
    22.9 +	u8 *nextLine, *finish;
   22.10 +
   22.11 +	nextLine = dstPtr + dstPitch;
   22.12 +
   22.13 +	do
   22.14 +	{
   22.15 +		u32 *bP = (u32 *) srcPtr;
   22.16 +		u32 *dP = (u32 *) dstPtr;
   22.17 +		u32 *nL = (u32 *) nextLine;
   22.18 +		u32	 currentPixel;
   22.19 +
   22.20 +		finish		 = (u8 *) bP + ((width + 2) << 1);
   22.21 +		currentPixel = *bP++;
   22.22 +
   22.23 +		do
   22.24 +		{
   22.25 +#ifdef WORDS_BIGENDIAN
   22.26 +			u32 color = currentPixel >> 16;
   22.27 +#else
   22.28 +			u32 color = currentPixel & 0xffff;
   22.29 +#endif
   22.30 +
   22.31 +			color = color | (color << 16);
   22.32 +
   22.33 +			*(dP) = color;
   22.34 +			*(nL) = color;
   22.35 +
   22.36 +#ifdef WORDS_BIGENDIAN
   22.37 +			color = currentPixel & 0xffff;
   22.38 +#else
   22.39 +			color = currentPixel >> 16;
   22.40 +#endif
   22.41 +			color	  = color | (color << 16);
   22.42 +			*(dP + 1) = color;
   22.43 +			*(nL + 1) = color;
   22.44 +
   22.45 +			currentPixel = *bP++;
   22.46 +
   22.47 +			dP += 2;
   22.48 +			nL += 2;
   22.49 +		}
   22.50 +		while ((u8 *) bP < finish);
   22.51 +
   22.52 +		srcPtr	 += srcPitch;
   22.53 +		dstPtr	 += dstPitch << 1;
   22.54 +		nextLine += dstPitch << 1;
   22.55 +	}
   22.56 +	while (--height);
   22.57 +}
   22.58 +
   22.59 +void Simple2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
   22.60 +                u8 *dstPtr, u32 dstPitch, int width, int height)
   22.61 +{
   22.62 +	u8 *nextLine, *finish;
   22.63 +
   22.64 +	nextLine = dstPtr + dstPitch;
   22.65 +
   22.66 +	do
   22.67 +	{
   22.68 +		u32 *bP = (u32 *) srcPtr;
   22.69 +		u32 *dP = (u32 *) dstPtr;
   22.70 +		u32 *nL = (u32 *) nextLine;
   22.71 +		u32	 currentPixel;
   22.72 +
   22.73 +		finish		 = (u8 *) bP + ((width + 1) << 2);
   22.74 +		currentPixel = *bP++;
   22.75 +
   22.76 +		do
   22.77 +		{
   22.78 +			u32 color = currentPixel;
   22.79 +
   22.80 +			*(dP)	  = color;
   22.81 +			*(dP + 1) = color;
   22.82 +			*(nL)	  = color;
   22.83 +			*(nL + 1) = color;
   22.84 +
   22.85 +			currentPixel = *bP++;
   22.86 +
   22.87 +			dP += 2;
   22.88 +			nL += 2;
   22.89 +		}
   22.90 +		while ((u8 *) bP < finish);
   22.91 +
   22.92 +		srcPtr	 += srcPitch;
   22.93 +		dstPtr	 += dstPitch << 1;
   22.94 +		nextLine += dstPitch << 1;
   22.95 +	}
   22.96 +	while (--height);
   22.97 +}
   22.98 +
   22.99 +#if 0
  22.100 +// generic Simple Nx magnification filter
  22.101 +template <int magnification, typename ColorType>
  22.102 +void SimpleNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  22.103 +              u8 *dstPtr, u32 dstPitch, int width, int height)
  22.104 +{
  22.105 +	srcPitch = srcPitch / sizeof(ColorType) - width;
  22.106 +	u32 dstNextP = dstPitch / sizeof(ColorType);
  22.107 +	u32 dstNextL = (dstNextP - width) * magnification;  // skip to the next magnificated 'line'
  22.108 +	dstNextP -= magnification;
  22.109 +
  22.110 +	u32 offset = (dstPitch + sizeof(ColorType)) * magnification - dstPitch;
  22.111 +
  22.112 +	ColorType *src = (ColorType *)srcPtr;
  22.113 +	ColorType *dst = (ColorType *)dstPtr;
  22.114 +
  22.115 +	do // per src line
  22.116 +	{
  22.117 +		u8 *finishP = (u8 *)dst + offset;
  22.118 +		for (int x = 0; x < width; ++x) // per pixel in line
  22.119 +		{
  22.120 +			ColorType  col	   = *src;
  22.121 +			ColorType *dst2	   = dst;
  22.122 +			u8 *	   finishM = (u8 *)(dst + magnification);
  22.123 +			do // dst magnificated pixel
  22.124 +			{
  22.125 +				do
  22.126 +				{
  22.127 +					*dst2 = col;
  22.128 +				}
  22.129 +				while ((u8 *)++dst2 < finishM);
  22.130 +				dst2	+= dstNextP;
  22.131 +				finishM += dstPitch;
  22.132 +			}
  22.133 +			while ((u8 *)dst2 < finishP);
  22.134 +
  22.135 +			++src;
  22.136 +			dst		+= magnification;
  22.137 +			finishP += magnification * sizeof(ColorType);
  22.138 +		}
  22.139 +		src += srcPitch;
  22.140 +		dst += dstNextL;
  22.141 +	}
  22.142 +	while (--height);
  22.143 +}
  22.144 +
  22.145 +#else
  22.146 +
  22.147 +// generic Simple Nx magnification filter
  22.148 +template <int magnification, typename ColorType>
  22.149 +void SimpleNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
  22.150 +              u8 *dstPtr, u32 dstPitch, int width, int height)
  22.151 +{
  22.152 +	srcPitch  = srcPitch / sizeof(ColorType) - width;
  22.153 +	dstPitch /= sizeof(ColorType);
  22.154 +	u32 dstBlank = (dstPitch - width) * magnification; // skip to the next magnificated 'line'
  22.155 +	dstPitch -= magnification;
  22.156 +
  22.157 +	ColorType *src = (ColorType *)srcPtr;
  22.158 +	ColorType *dst = (ColorType *)dstPtr;
  22.159 +
  22.160 +	do // per src line
  22.161 +	{
  22.162 +		for (int x = 0; x < width; ++x) // per pixel in src line
  22.163 +		{
  22.164 +			ColorType  col	= *src;
  22.165 +			ColorType *dst2 = dst;
  22.166 +			for (int dy = 0; dy < magnification; ++dy) // dst magnificated pixel
  22.167 +			{
  22.168 +				for (int dx = 0; dx < magnification; ++dx)
  22.169 +				{
  22.170 +					*dst2 = col;
  22.171 +					++dst2;
  22.172 +				}
  22.173 +				dst2 += dstPitch;
  22.174 +			}
  22.175 +
  22.176 +			++src;
  22.177 +			dst += magnification;
  22.178 +		}
  22.179 +		src += srcPitch;
  22.180 +		dst += dstBlank;
  22.181 +	}
  22.182 +	while (--height);
  22.183 +}
  22.184 +
  22.185 +#endif
  22.186 +
  22.187 +typedef void (*SimpleNxFP)(u8 *, u32, u8 *, u8 *, u32, int, int);
  22.188 +
  22.189 +SimpleNxFP Simple3x16 = SimpleNx<3, u16>;
  22.190 +SimpleNxFP Simple3x32 = SimpleNx<3, u32>;
  22.191 +SimpleNxFP Simple4x16 = SimpleNx<4, u16>;
  22.192 +SimpleNxFP Simple4x32 = SimpleNx<4, u32>;