# HG changeset patch # User Robert McIntyre # Date 1330914751 21600 # Node ID b970226568d2b1b9d00cd68282563a521d93930e # Parent 18eaae41bde3c0921a431c10c383783b8d25e1a5 brought in filters package diff -r 18eaae41bde3 -r b970226568d2 configure.ac --- a/configure.ac Sun Mar 04 18:30:06 2012 -0600 +++ b/configure.ac Sun Mar 04 20:32:31 2012 -0600 @@ -18,7 +18,7 @@ AC_PROG_RANLIB AC_PROG_CPP AC_PROG_MKDIR_P - +AC_PATH_PROG(NASM, nasm) # Checks for libraries. AC_CHECK_LIB([SDL], [SDL_Init]) @@ -61,7 +61,8 @@ src/gb/Makefile src/gba/Makefile src/common/Makefile - src/SFMT/Makefile]) + src/SFMT/Makefile + src/filters/Makefile]) diff -r 18eaae41bde3 -r b970226568d2 src/Makefile.am --- a/src/Makefile.am Sun Mar 04 18:30:06 2012 -0600 +++ b/src/Makefile.am Sun Mar 04 20:32:31 2012 -0600 @@ -1,3 +1,3 @@ -SUBDIRS = SFMT lua gb gba common +SUBDIRS = SFMT lua gb gba common filters noinst_HEADERS = Port.h NLS.h diff -r 18eaae41bde3 -r b970226568d2 src/filters/2xSaI.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/2xSaI.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,1434 @@ +#include "../common/System.h" + +extern "C" +{ +#ifdef MMX + void _2xSaILine(u8 *srcPtr, u8 *deltaPtr, u32 srcPitch, + u32 width, u8 *dstPtr, u32 dstPitch); + void _2xSaISuperEagleLine(u8 *srcPtr, u8 *deltaPtr, + u32 srcPitch, u32 width, + u8 *dstPtr, u32 dstPitch); + void _2xSaISuper2xSaILine(u8 *srcPtr, u8 *deltaPtr, + u32 srcPitch, u32 width, + u8 *dstPtr, u32 dstPitch); + void Init_2xSaIMMX(u32 BitFormat); + void BilinearMMX(u16 *A, u16 *B, u16 *C, u16 *D, + u16 *dx, u16 *dy, u8 *dP); + void BilinearMMXGrid0(u16 *A, u16 *B, u16 *C, u16 *D, + u16 *dx, u16 *dy, u8 *dP); + void BilinearMMXGrid1(u16 *A, u16 *B, u16 *C, u16 *D, + u16 *dx, u16 *dy, u8 *dP); + void EndMMX(); + + bool cpu_mmx = 1; +#endif +} +static u32 colorMask = 0xF7DEF7DE; +static u32 lowPixelMask = 0x08210821; +static u32 qcolorMask = 0xE79CE79C; +static u32 qlowpixelMask = 0x18631863; +static u32 redblueMask = 0xF81F; +static u32 greenMask = 0x7E0; + +u32 qRGB_COLOR_MASK[2] = { 0xF7DEF7DE, 0xF7DEF7DE }; + +extern void hq2x_init(unsigned); + +int Init_2xSaI(u32 BitFormat) +{ + if (systemColorDepth == 16) + { + if (BitFormat == 565) + { + colorMask = 0xF7DEF7DE; + lowPixelMask = 0x08210821; + qcolorMask = 0xE79CE79C; + qlowpixelMask = 0x18631863; + redblueMask = 0xF81F; + greenMask = 0x7E0; + qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0xF7DEF7DE; + hq2x_init(16); + } + else if (BitFormat == 555) + { + colorMask = 0x7BDE7BDE; + lowPixelMask = 0x04210421; + qcolorMask = 0x739C739C; + qlowpixelMask = 0x0C630C63; + redblueMask = 0x7C1F; + greenMask = 0x3E0; + qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0x7BDE7BDE; + hq2x_init(15); + } + else + { + return 0; + } + } + else if (systemColorDepth == 32) + { + colorMask = 0xfefefe; + lowPixelMask = 0x010101; + qcolorMask = 0xfcfcfc; + qlowpixelMask = 0x030303; + qRGB_COLOR_MASK[0] = qRGB_COLOR_MASK[1] = 0xfefefe; + hq2x_init(32); + } + else + return 0; + +#ifdef MMX + Init_2xSaIMMX(BitFormat); +#endif + + return 1; +} + +static inline int GetResult1(u32 A, u32 B, u32 C, u32 D, + u32 /* E */) +{ + int x = 0; + int y = 0; + int r = 0; + + if (A == C) + x += 1; + else if (B == C) + y += 1; + if (A == D) + x += 1; + else if (B == D) + y += 1; + if (x <= 1) + r += 1; + if (y <= 1) + r -= 1; + return r; +} + +static inline int GetResult2(u32 A, u32 B, u32 C, u32 D, + u32 /* E */) +{ + int x = 0; + int y = 0; + int r = 0; + + if (A == C) + x += 1; + else if (B == C) + y += 1; + if (A == D) + x += 1; + else if (B == D) + y += 1; + if (x <= 1) + r -= 1; + if (y <= 1) + r += 1; + return r; +} + +static inline int GetResult(u32 A, u32 B, u32 C, u32 D) +{ + int x = 0; + int y = 0; + int r = 0; + + if (A == C) + x += 1; + else if (B == C) + y += 1; + if (A == D) + x += 1; + else if (B == D) + y += 1; + if (x <= 1) + r += 1; + if (y <= 1) + r -= 1; + return r; +} + +static inline u32 INTERPOLATE(u32 A, u32 B) +{ + if (A != B) + { + return (((A & colorMask) >> 1) + ((B & colorMask) >> 1) + + (A & B & lowPixelMask)); + } + else + return A; +} + +static inline u32 Q_INTERPOLATE(u32 A, u32 B, u32 C, u32 D) +{ + register u32 x = ((A & qcolorMask) >> 2) + + ((B & qcolorMask) >> 2) + + ((C & qcolorMask) >> 2) + ((D & qcolorMask) >> 2); + register u32 y = (A & qlowpixelMask) + + (B & qlowpixelMask) + (C & qlowpixelMask) + (D & qlowpixelMask); + + y = (y >> 2) & qlowpixelMask; + return x + y; +} + +static inline int GetResult1_32(u32 A, u32 B, u32 C, u32 D, + u32 /* E */) +{ + int x = 0; + int y = 0; + int r = 0; + + if (A == C) + x += 1; + else if (B == C) + y += 1; + if (A == D) + x += 1; + else if (B == D) + y += 1; + if (x <= 1) + r += 1; + if (y <= 1) + r -= 1; + return r; +} + +static inline int GetResult2_32(u32 A, u32 B, u32 C, u32 D, + u32 /* E */) +{ + int x = 0; + int y = 0; + int r = 0; + + if (A == C) + x += 1; + else if (B == C) + y += 1; + if (A == D) + x += 1; + else if (B == D) + y += 1; + if (x <= 1) + r -= 1; + if (y <= 1) + r += 1; + return r; +} + +#define BLUE_MASK565 0x001F001F +#define RED_MASK565 0xF800F800 +#define GREEN_MASK565 0x07E007E0 + +#define BLUE_MASK555 0x001F001F +#define RED_MASK555 0x7C007C00 +#define GREEN_MASK555 0x03E003E0 + +void Super2xSaI(u8 *srcPtr, u32 srcPitch, + u8 *deltaPtr, u8 *dstPtr, u32 dstPitch, + int width, int height) +{ + u16 *bP; + u8 * dP; + u32 inc_bP; + u32 Nextline = srcPitch >> 1; +#ifdef MMX + if (cpu_mmx) + { + for (; height; height--) + { + _2xSaISuper2xSaILine(srcPtr, deltaPtr, srcPitch, width, + dstPtr, dstPitch); + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + deltaPtr += srcPitch; + } + } + else +#endif + { + inc_bP = 1; + + for (; height; height--) + { + bP = (u16 *) srcPtr; + dP = (u8 *) dstPtr; + + for (u32 finish = width; finish; finish -= inc_bP) + { + u32 color4, color5, color6; + u32 color1, color2, color3; + u32 colorA0, colorA1, colorA2, colorA3, + colorB0, colorB1, colorB2, colorB3, colorS1, colorS2; + u32 product1a, product1b, product2a, product2b; + + //--------------------------------------- B1 B2 + // 4 5 6 S2 + // 1 2 3 S1 + // A1 A2 + + colorB0 = *(bP - Nextline - 1); + colorB1 = *(bP - Nextline); + colorB2 = *(bP - Nextline + 1); + colorB3 = *(bP - Nextline + 2); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + Nextline - 1); + color2 = *(bP + Nextline); + color3 = *(bP + Nextline + 1); + colorS1 = *(bP + Nextline + 2); + + colorA0 = *(bP + Nextline + Nextline - 1); + colorA1 = *(bP + Nextline + Nextline); + colorA2 = *(bP + Nextline + Nextline + 1); + colorA3 = *(bP + Nextline + Nextline + 2); + + //-------------------------------------- + if (color2 == color6 && color5 != color3) + { + product2b = product1b = color2; + } + else if (color5 == color3 && color2 != color6) + { + product2b = product1b = color5; + } + else if (color5 == color3 && color2 == color6) + { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) + product2b = product1b = color6; + else if (r < 0) + product2b = product1b = color5; + else + { + product2b = product1b = INTERPOLATE(color5, color6); + } + } + else + { + if (color6 == color3 && color3 == colorA1 + && color2 != colorA2 && color3 != colorA0) + product2b = + Q_INTERPOLATE(color3, color3, color3, color2); + else if (color5 == color2 && color2 == colorA2 + && colorA1 != color3 && color2 != colorA3) + product2b = + Q_INTERPOLATE(color2, color2, color2, color3); + else + product2b = INTERPOLATE(color2, color3); + + if (color6 == color3 && color6 == colorB1 + && color5 != colorB2 && color6 != colorB0) + product1b = + Q_INTERPOLATE(color6, color6, color6, color5); + else if (color5 == color2 && color5 == colorB2 + && colorB1 != color6 && color5 != colorB3) + product1b = + Q_INTERPOLATE(color6, color5, color5, color5); + else + product1b = INTERPOLATE(color5, color6); + } + + if (color5 == color3 && color2 != color6 && color4 == color5 + && color5 != colorA2) + product2a = INTERPOLATE(color2, color5); + else + if (color5 == color1 && color6 == color5 + && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 + && color2 != colorB2) + product1a = INTERPOLATE(color2, color5); + else + if (color4 == color2 && color3 == color2 + && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + +#ifdef WORDS_BIGENDIAN + product1a = (product1a << 16) | product1b; + product2a = (product2a << 16) | product2b; +#else + product1a = product1a | (product1b << 16); + product2a = product2a | (product2b << 16); +#endif + + *((u32 *) dP) = product1a; + *((u32 *) (dP + dstPitch)) = product2a; + + bP += inc_bP; + dP += sizeof(u32); + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + deltaPtr += srcPitch; + } // endof: for (; height; height--) + } +} + +void Super2xSaI32(u8 *srcPtr, u32 srcPitch, + u8 * /* deltaPtr */, u8 *dstPtr, u32 dstPitch, + int width, int height) +{ + u32 *bP; + u32 *dP; + u32 inc_bP; + u32 Nextline = srcPitch >> 2; + inc_bP = 1; + + for (; height; height--) + { + bP = (u32 *) srcPtr; + dP = (u32 *) dstPtr; + + for (u32 finish = width; finish; finish -= inc_bP) + { + u32 color4, color5, color6; + u32 color1, color2, color3; + u32 colorA0, colorA1, colorA2, colorA3, + colorB0, colorB1, colorB2, colorB3, colorS1, colorS2; + u32 product1a, product1b, product2a, product2b; + + //--------------------------------------- B1 B2 + // 4 5 6 S2 + // 1 2 3 S1 + // A1 A2 + + colorB0 = *(bP - Nextline - 1); + colorB1 = *(bP - Nextline); + colorB2 = *(bP - Nextline + 1); + colorB3 = *(bP - Nextline + 2); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + Nextline - 1); + color2 = *(bP + Nextline); + color3 = *(bP + Nextline + 1); + colorS1 = *(bP + Nextline + 2); + + colorA0 = *(bP + Nextline + Nextline - 1); + colorA1 = *(bP + Nextline + Nextline); + colorA2 = *(bP + Nextline + Nextline + 1); + colorA3 = *(bP + Nextline + Nextline + 2); + + //-------------------------------------- + if (color2 == color6 && color5 != color3) + { + product2b = product1b = color2; + } + else if (color5 == color3 && color2 != color6) + { + product2b = product1b = color5; + } + else if (color5 == color3 && color2 == color6) + { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) + product2b = product1b = color6; + else if (r < 0) + product2b = product1b = color5; + else + { + product2b = product1b = INTERPOLATE(color5, color6); + } + } + else + { + if (color6 == color3 && color3 == colorA1 + && color2 != colorA2 && color3 != colorA0) + product2b = + Q_INTERPOLATE(color3, color3, color3, color2); + else if (color5 == color2 && color2 == colorA2 + && colorA1 != color3 && color2 != colorA3) + product2b = + Q_INTERPOLATE(color2, color2, color2, color3); + else + product2b = INTERPOLATE(color2, color3); + + if (color6 == color3 && color6 == colorB1 + && color5 != colorB2 && color6 != colorB0) + product1b = + Q_INTERPOLATE(color6, color6, color6, color5); + else if (color5 == color2 && color5 == colorB2 + && colorB1 != color6 && color5 != colorB3) + product1b = + Q_INTERPOLATE(color6, color5, color5, color5); + else + product1b = INTERPOLATE(color5, color6); + } + + if (color5 == color3 && color2 != color6 && color4 == color5 + && color5 != colorA2) + product2a = INTERPOLATE(color2, color5); + else + if (color5 == color1 && color6 == color5 + && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 + && color2 != colorB2) + product1a = INTERPOLATE(color2, color5); + else + if (color4 == color2 && color3 == color2 + && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + *(dP) = product1a; + *(dP + 1) = product1b; + *(dP + (dstPitch >> 2)) = product2a; + *(dP + (dstPitch >> 2) + 1) = product2b; + + bP += inc_bP; + dP += 2; + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + // deltaPtr += srcPitch; + } // endof: for (; height; height--) +} + +void SuperEagle(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 * dP; + u16 *bP; + u16 *xP; + u32 inc_bP; + +#ifdef MMX + if (cpu_mmx) + { + for (; height; height--) + { + _2xSaISuperEagleLine(srcPtr, deltaPtr, srcPitch, width, + dstPtr, dstPitch); + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + deltaPtr += srcPitch; + } + } + else +#endif + { + inc_bP = 1; + + u32 Nextline = srcPitch >> 1; + + for (; height; height--) + { + bP = (u16 *) srcPtr; + xP = (u16 *) deltaPtr; + dP = dstPtr; + for (u32 finish = width; finish; finish -= inc_bP) + { + u32 color4, color5, color6; + u32 color1, color2, color3; + u32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2; + u32 product1a, product1b, product2a, product2b; + + colorB1 = *(bP - Nextline); + colorB2 = *(bP - Nextline + 1); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + Nextline - 1); + color2 = *(bP + Nextline); + color3 = *(bP + Nextline + 1); + colorS1 = *(bP + Nextline + 2); + + colorA1 = *(bP + Nextline + Nextline); + colorA2 = *(bP + Nextline + Nextline + 1); + + // -------------------------------------- + if (color2 == color6 && color5 != color3) + { + product1b = product2a = color2; + if ((color1 == color2) || (color6 == colorB2)) + { + product1a = INTERPOLATE(color2, color5); + product1a = INTERPOLATE(color2, product1a); + // product1a = color2; + } + else + { + product1a = INTERPOLATE(color5, color6); + } + + if ((color6 == colorS2) || (color2 == colorA1)) + { + product2b = INTERPOLATE(color2, color3); + product2b = INTERPOLATE(color2, product2b); + // product2b = color2; + } + else + { + product2b = INTERPOLATE(color2, color3); + } + } + else if (color5 == color3 && color2 != color6) + { + product2b = product1a = color5; + + if ((colorB1 == color5) || (color3 == colorS1)) + { + product1b = INTERPOLATE(color5, color6); + product1b = INTERPOLATE(color5, product1b); + // product1b = color5; + } + else + { + product1b = INTERPOLATE(color5, color6); + } + + if ((color3 == colorA2) || (color4 == color5)) + { + product2a = INTERPOLATE(color5, color2); + product2a = INTERPOLATE(color5, product2a); + // product2a = color5; + } + else + { + product2a = INTERPOLATE(color2, color3); + } + } + else if (color5 == color3 && color2 == color6) + { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) + { + product1b = product2a = color2; + product1a = product2b = INTERPOLATE(color5, color6); + } + else if (r < 0) + { + product2b = product1a = color5; + product1b = product2a = INTERPOLATE(color5, color6); + } + else + { + product2b = product1a = color5; + product1b = product2a = color2; + } + } + else + { + product2b = product1a = INTERPOLATE(color2, color6); + product2b = + Q_INTERPOLATE(color3, color3, color3, product2b); + product1a = + Q_INTERPOLATE(color5, color5, color5, product1a); + + product2a = product1b = INTERPOLATE(color5, color3); + product2a = + Q_INTERPOLATE(color2, color2, color2, product2a); + product1b = + Q_INTERPOLATE(color6, color6, color6, product1b); + + // product1a = color5; + // product1b = color6; + // product2a = color2; + // product2b = color3; + } +#ifdef WORDS_BIGENDIAN + product1a = (product1a << 16) | product1b; + product2a = (product2a << 16) | product2b; +#else + product1a = product1a | (product1b << 16); + product2a = product2a | (product2b << 16); +#endif + + *((u32 *) dP) = product1a; + *((u32 *) (dP + dstPitch)) = product2a; + *xP = color5; + + bP += inc_bP; + xP += inc_bP; + dP += sizeof(u32); + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + deltaPtr += srcPitch; + } // endof: for (height; height; height--) + } +} + +void SuperEagle32(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dP; + u32 *bP; + u32 *xP; + u32 inc_bP; + + inc_bP = 1; + + u32 Nextline = srcPitch >> 2; + + for (; height; height--) + { + bP = (u32 *) srcPtr; + xP = (u32 *) deltaPtr; + dP = (u32 *)dstPtr; + for (u32 finish = width; finish; finish -= inc_bP) + { + u32 color4, color5, color6; + u32 color1, color2, color3; + u32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2; + u32 product1a, product1b, product2a, product2b; + + colorB1 = *(bP - Nextline); + colorB2 = *(bP - Nextline + 1); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + Nextline - 1); + color2 = *(bP + Nextline); + color3 = *(bP + Nextline + 1); + colorS1 = *(bP + Nextline + 2); + + colorA1 = *(bP + Nextline + Nextline); + colorA2 = *(bP + Nextline + Nextline + 1); + + // -------------------------------------- + if (color2 == color6 && color5 != color3) + { + product1b = product2a = color2; + if ((color1 == color2) || (color6 == colorB2)) + { + product1a = INTERPOLATE(color2, color5); + product1a = INTERPOLATE(color2, product1a); + // product1a = color2; + } + else + { + product1a = INTERPOLATE(color5, color6); + } + + if ((color6 == colorS2) || (color2 == colorA1)) + { + product2b = INTERPOLATE(color2, color3); + product2b = INTERPOLATE(color2, product2b); + // product2b = color2; + } + else + { + product2b = INTERPOLATE(color2, color3); + } + } + else if (color5 == color3 && color2 != color6) + { + product2b = product1a = color5; + + if ((colorB1 == color5) || (color3 == colorS1)) + { + product1b = INTERPOLATE(color5, color6); + product1b = INTERPOLATE(color5, product1b); + // product1b = color5; + } + else + { + product1b = INTERPOLATE(color5, color6); + } + + if ((color3 == colorA2) || (color4 == color5)) + { + product2a = INTERPOLATE(color5, color2); + product2a = INTERPOLATE(color5, product2a); + // product2a = color5; + } + else + { + product2a = INTERPOLATE(color2, color3); + } + } + else if (color5 == color3 && color2 == color6) + { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) + { + product1b = product2a = color2; + product1a = product2b = INTERPOLATE(color5, color6); + } + else if (r < 0) + { + product2b = product1a = color5; + product1b = product2a = INTERPOLATE(color5, color6); + } + else + { + product2b = product1a = color5; + product1b = product2a = color2; + } + } + else + { + product2b = product1a = INTERPOLATE(color2, color6); + product2b = + Q_INTERPOLATE(color3, color3, color3, product2b); + product1a = + Q_INTERPOLATE(color5, color5, color5, product1a); + + product2a = product1b = INTERPOLATE(color5, color3); + product2a = + Q_INTERPOLATE(color2, color2, color2, product2a); + product1b = + Q_INTERPOLATE(color6, color6, color6, product1b); + + // product1a = color5; + // product1b = color6; + // product2a = color2; + // product2b = color3; + } + *(dP) = product1a; + *(dP + 1) = product1b; + *(dP + (dstPitch >> 2)) = product2a; + *(dP + (dstPitch >> 2) + 1) = product2b; + *xP = color5; + + bP += inc_bP; + xP += inc_bP; + dP += 2; + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + deltaPtr += srcPitch; + } // endof: for (height; height; height--) +} + +void _2xSaI(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 * dP; + u16 *bP; + u32 inc_bP; + +#ifdef MMX + if (cpu_mmx) + { + for (; height; height -= 1) + { + _2xSaILine(srcPtr, deltaPtr, srcPitch, width, dstPtr, dstPitch); + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + deltaPtr += srcPitch; + } + } + else +#endif + { + inc_bP = 1; + + u32 Nextline = srcPitch >> 1; + + for (; height; height--) + { + bP = (u16 *) srcPtr; + dP = dstPtr; + + for (u32 finish = width; finish; finish -= inc_bP) + { + register u32 colorA, colorB; + u32 colorC, colorD, + colorE, colorF, colorG, colorH, + colorI, colorJ, colorK, colorL, + + colorM, colorN, colorO, colorP; + u32 product, product1, product2; + + //--------------------------------------- + // Map of the pixels: I|E F|J + // G|A B|K + // H|C D|L + // M|N O|P + colorI = *(bP - Nextline - 1); + colorE = *(bP - Nextline); + colorF = *(bP - Nextline + 1); + colorJ = *(bP - Nextline + 2); + + colorG = *(bP - 1); + colorA = *(bP); + colorB = *(bP + 1); + colorK = *(bP + 2); + + colorH = *(bP + Nextline - 1); + colorC = *(bP + Nextline); + colorD = *(bP + Nextline + 1); + colorL = *(bP + Nextline + 2); + + colorM = *(bP + Nextline + Nextline - 1); + colorN = *(bP + Nextline + Nextline); + colorO = *(bP + Nextline + Nextline + 1); + colorP = *(bP + Nextline + Nextline + 2); + + if ((colorA == colorD) && (colorB != colorC)) + { + if (((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) + && (colorB != colorE) && (colorB == colorJ))) + { + product = colorA; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) + && (colorG != colorC) && (colorC == colorM))) + { + product1 = colorA; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorA; + } + else if ((colorB == colorC) && (colorA != colorD)) + { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) + && (colorA != colorF) && (colorA == colorI))) + { + product = colorB; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) + && (colorA != colorH) && (colorA == colorI))) + { + product1 = colorC; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorB; + } + else if ((colorA == colorD) && (colorB == colorC)) + { + if (colorA == colorB) + { + product = colorA; + product1 = colorA; + product2 = colorA; + } + else + { + register int r = 0; + + product1 = INTERPOLATE(colorA, colorC); + product = INTERPOLATE(colorA, colorB); + + r += + GetResult1(colorA, colorB, colorG, colorE, + colorI); + r += + GetResult2(colorB, colorA, colorK, colorF, + colorJ); + r += + GetResult2(colorB, colorA, colorH, colorN, + colorM); + r += + GetResult1(colorA, colorB, colorL, colorO, + colorP); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else + { + product2 = + Q_INTERPOLATE(colorA, colorB, colorC, + colorD); + } + } + } + else + { + product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) + && (colorB != colorE) && (colorB == colorJ)) + { + product = colorA; + } + else if ((colorB == colorE) && (colorB == colorD) + && (colorA != colorF) && (colorA == colorI)) + { + product = colorB; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if ((colorA == colorB) && (colorA == colorH) + && (colorG != colorC) && (colorC == colorM)) + { + product1 = colorA; + } + else if ((colorC == colorG) && (colorC == colorD) + && (colorA != colorH) && (colorA == colorI)) + { + product1 = colorC; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + } + +#ifdef WORDS_BIGENDIAN + product = (colorA << 16) | product; + product1 = (product1 << 16) | product2; +#else + product = colorA | (product << 16); + product1 = product1 | (product2 << 16); +#endif + *((s32 *) dP) = product; + *((u32 *) (dP + dstPitch)) = product1; + + bP += inc_bP; + dP += sizeof(u32); + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + deltaPtr += srcPitch; + } // endof: for (height; height; height--) + } +} + +void _2xSaI32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dP; + u32 *bP; + u32 inc_bP = 1; + + u32 Nextline = srcPitch >> 2; + + for (; height; height--) + { + bP = (u32 *) srcPtr; + dP = (u32 *) dstPtr; + + for (u32 finish = width; finish; finish -= inc_bP) + { + register u32 colorA, colorB; + u32 colorC, colorD, + colorE, colorF, colorG, colorH, + colorI, colorJ, colorK, colorL, + + colorM, colorN, colorO, colorP; + u32 product, product1, product2; + + //--------------------------------------- + // Map of the pixels: I|E F|J + // G|A B|K + // H|C D|L + // M|N O|P + colorI = *(bP - Nextline - 1); + colorE = *(bP - Nextline); + colorF = *(bP - Nextline + 1); + colorJ = *(bP - Nextline + 2); + + colorG = *(bP - 1); + colorA = *(bP); + colorB = *(bP + 1); + colorK = *(bP + 2); + + colorH = *(bP + Nextline - 1); + colorC = *(bP + Nextline); + colorD = *(bP + Nextline + 1); + colorL = *(bP + Nextline + 2); + + colorM = *(bP + Nextline + Nextline - 1); + colorN = *(bP + Nextline + Nextline); + colorO = *(bP + Nextline + Nextline + 1); + colorP = *(bP + Nextline + Nextline + 2); + + if ((colorA == colorD) && (colorB != colorC)) + { + if (((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) + && (colorB != colorE) && (colorB == colorJ))) + { + product = colorA; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) + && (colorG != colorC) && (colorC == colorM))) + { + product1 = colorA; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorA; + } + else if ((colorB == colorC) && (colorA != colorD)) + { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) + && (colorA != colorF) && (colorA == colorI))) + { + product = colorB; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) + && (colorA != colorH) && (colorA == colorI))) + { + product1 = colorC; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorB; + } + else if ((colorA == colorD) && (colorB == colorC)) + { + if (colorA == colorB) + { + product = colorA; + product1 = colorA; + product2 = colorA; + } + else + { + register int r = 0; + + product1 = INTERPOLATE(colorA, colorC); + product = INTERPOLATE(colorA, colorB); + + r += + GetResult1(colorA, colorB, colorG, colorE, + colorI); + r += + GetResult2(colorB, colorA, colorK, colorF, + colorJ); + r += + GetResult2(colorB, colorA, colorH, colorN, + colorM); + r += + GetResult1(colorA, colorB, colorL, colorO, + colorP); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else + { + product2 = + Q_INTERPOLATE(colorA, colorB, colorC, + colorD); + } + } + } + else + { + product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) + && (colorB != colorE) && (colorB == colorJ)) + { + product = colorA; + } + else if ((colorB == colorE) && (colorB == colorD) + && (colorA != colorF) && (colorA == colorI)) + { + product = colorB; + } + else + { + product = INTERPOLATE(colorA, colorB); + } + + if ((colorA == colorB) && (colorA == colorH) + && (colorG != colorC) && (colorC == colorM)) + { + product1 = colorA; + } + else if ((colorC == colorG) && (colorC == colorD) + && (colorA != colorH) && (colorA == colorI)) + { + product1 = colorC; + } + else + { + product1 = INTERPOLATE(colorA, colorC); + } + } + *(dP) = colorA; + *(dP + 1) = product; + *(dP + (dstPitch >> 2)) = product1; + *(dP + (dstPitch >> 2) + 1) = product2; + + bP += inc_bP; + dP += 2; + } // end of for ( finish= width etc..) + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + // deltaPtr += srcPitch; + } // endof: for (height; height; height--) +} + +static u32 Bilinear(u32 A, u32 B, u32 x) +{ + unsigned long areaA, areaB; + unsigned long result; + + if (A == B) + return A; + + areaB = (x >> 11) & 0x1f; // reduce 16 bit fraction to 5 bits + areaA = 0x20 - areaB; + + A = (A & redblueMask) | ((A & greenMask) << 16); + B = (B & redblueMask) | ((B & greenMask) << 16); + + result = ((areaA * A) + (areaB * B)) >> 5; + + return (result & redblueMask) | ((result >> 16) & greenMask); +} + +static u32 Bilinear4(u32 A, u32 B, u32 C, u32 D, u32 x, + u32 y) +{ + unsigned long areaA, areaB, areaC, areaD; + unsigned long result, xy; + + x = (x >> 11) & 0x1f; + y = (y >> 11) & 0x1f; + xy = (x * y) >> 5; + + A = (A & redblueMask) | ((A & greenMask) << 16); + B = (B & redblueMask) | ((B & greenMask) << 16); + C = (C & redblueMask) | ((C & greenMask) << 16); + D = (D & redblueMask) | ((D & greenMask) << 16); + + areaA = 0x20 + xy - x - y; + areaB = x - xy; + areaC = y - xy; + areaD = xy; + + result = ((areaA * A) + (areaB * B) + (areaC * C) + (areaD * D)) >> 5; + + return (result & redblueMask) | ((result >> 16) & greenMask); +} + +void Scale_2xSaI(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, + u32 dstWidth, u32 dstHeight, int width, int height) +{ + u8 * dP; + u16 *bP; + + u32 w; + u32 h; + u32 dw; + u32 dh; + u32 hfinish; + u32 wfinish; + + u32 Nextline = srcPitch >> 1; + + wfinish = (width - 1) << 16; // convert to fixed point + dw = wfinish / (dstWidth - 1); + hfinish = (height - 1) << 16; // convert to fixed point + dh = hfinish / (dstHeight - 1); + + for (h = 0; h < hfinish; h += dh) + { + u32 y1, y2; + + y1 = h & 0xffff; // fraction part of fixed point + bP = (u16 *) (srcPtr + ((h >> 16) * srcPitch)); + dP = dstPtr; + y2 = 0x10000 - y1; + + w = 0; + + for (; w < wfinish; ) + { + u32 A, B, C, D; + u32 E, F, G, H; + u32 I, J, K, L; + u32 x1, x2, a1, f1, f2; + u32 position, product1; + + position = w >> 16; + A = bP[position]; // current pixel + B = bP[position + 1]; // next pixel + C = bP[position + Nextline]; + D = bP[position + Nextline + 1]; + E = bP[position - Nextline]; + F = bP[position - Nextline + 1]; + G = bP[position - 1]; + H = bP[position + Nextline - 1]; + I = bP[position + 2]; + J = bP[position + Nextline + 2]; + K = bP[position + Nextline + Nextline]; + L = bP[position + Nextline + Nextline + 1]; + + x1 = w & 0xffff; // fraction part of fixed point + x2 = 0x10000 - x1; + + /*0*/ + if (A == B && C == D && A == C) + product1 = A; + else /*1*/ if (A == D && B != C) + { + f1 = (x1 >> 1) + (0x10000 >> 2); + f2 = (y1 >> 1) + (0x10000 >> 2); + if (y1 <= f1 && A == J && A != E) // close to B + { + a1 = f1 - y1; + product1 = Bilinear(A, B, a1); + } + else if (y1 >= f1 && A == G && A != L) // close to C + { + a1 = y1 - f1; + product1 = Bilinear(A, C, a1); + } + else if (x1 >= f2 && A == E && A != J) // close to B + { + a1 = x1 - f2; + product1 = Bilinear(A, B, a1); + } + else if (x1 <= f2 && A == L && A != G) // close to C + { + a1 = f2 - x1; + product1 = Bilinear(A, C, a1); + } + else if (y1 >= x1) // close to C + { + a1 = y1 - x1; + product1 = Bilinear(A, C, a1); + } + else if (y1 <= x1) // close to B + { + a1 = x1 - y1; + product1 = Bilinear(A, B, a1); + } + } + else + /*2*/ + if (B == C && A != D) + { + f1 = (x1 >> 1) + (0x10000 >> 2); + f2 = (y1 >> 1) + (0x10000 >> 2); + if (y2 >= f1 && B == H && B != F) // close to A + { + a1 = y2 - f1; + product1 = Bilinear(B, A, a1); + } + else if (y2 <= f1 && B == I && B != K) // close to D + { + a1 = f1 - y2; + product1 = Bilinear(B, D, a1); + } + else if (x2 >= f2 && B == F && B != H) // close to A + { + a1 = x2 - f2; + product1 = Bilinear(B, A, a1); + } + else if (x2 <= f2 && B == K && B != I) // close to D + { + a1 = f2 - x2; + product1 = Bilinear(B, D, a1); + } + else if (y2 >= x1) // close to A + { + a1 = y2 - x1; + product1 = Bilinear(B, A, a1); + } + else if (y2 <= x1) // close to D + { + a1 = x1 - y2; + product1 = Bilinear(B, D, a1); + } + } + /*3*/ + else + { + product1 = Bilinear4(A, B, C, D, x1, y1); + } + + //end First Pixel + *(u32 *) dP = product1; + dP += 2; + w += dw; + } + dstPtr += dstPitch; + } +} diff -r 18eaae41bde3 -r b970226568d2 src/filters/2xSaImmx.asm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/2xSaImmx.asm Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,2109 @@ +;/*---------------------------------------------------------------------* +; * The following (piece of) code, (part of) the 2xSaI engine, * +; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. * +; * Non-Commercial use of this software is allowed and is encouraged, * +; * provided that appropriate credit be given. * +; * You may freely modify this code, but I request * +; * that any improvements to the engine be submitted to me, so * +; * that I can implement these improvements in newer versions of * +; * the software. * +; * If you need more information, have any comments or suggestions, * +; * you can e-mail me. My e-mail: derek-liauw@usa.net. * +; *---------------------------------------------------------------------*/ + +;---------------------- +; 2xSaI version 0.59 WIP, soon to become version 0.60 +;---------------------- + +;%define FAR_POINTER + + + + BITS 32 +%ifdef __DJGPP__ + GLOBAL __2xSaILine + GLOBAL __2xSaISuperEagleLine + GLOBAL __2xSaISuper2xSaILine + GLOBAL _Init_2xSaIMMX +%else + GLOBAL _2xSaILine + GLOBAL _2xSaISuperEagleLine + GLOBAL _2xSaISuper2xSaILine + GLOBAL Init_2xSaIMMX +%endif + SECTION .text ALIGN = 32 + +%ifdef FAR_POINTER +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, +; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment); +%else +;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width, +; uint8 *dstPtr, uint32 dstPitch); +%endif + +srcPtr equ 8 +deltaPtr equ 12 +srcPitch equ 16 +width equ 20 +dstOffset equ 24 +dstPitch equ 28 +dstSegment equ 32 + + + + +colorB0 equ -2 +colorB1 equ 0 +colorB2 equ 2 +colorB3 equ 4 + +color7 equ -2 +color8 equ 0 +color9 equ 2 + +color4 equ -2 +color5 equ 0 +color6 equ 2 +colorS2 equ 4 + +color1 equ -2 +color2 equ 0 +color3 equ 2 +colorS1 equ 4 + +colorA0 equ -2 +colorA1 equ 0 +colorA2 equ 2 +colorA3 equ 4 + + + + +%ifdef __DJGPP__ +__2xSaISuper2xSaILine: +%else +_2xSaISuper2xSaILine: +%endif +; Store some stuff + push ebp + mov ebp, esp + pushad + +; Prepare the destination +%ifdef FAR_POINTER + ; Set the selector + mov eax, [ebp+dstSegment] + mov fs, ax +%endif + mov edx, [ebp+dstOffset] ; edx points to the screen +; Prepare the source + ; eax points to colorA + mov eax, [ebp+srcPtr] ;eax points to colorA + mov ebx, [ebp+srcPitch] ;ebx contains the source pitch + mov ecx, [ebp+width] ;ecx contains the number of pixels to process + ; eax now points to colorB1 + sub eax, ebx ;eax points to B1 which is the base + +; Main Loop +.Loop: push ecx + + ;-----Check Delta------------------ + mov ecx, [ebp+deltaPtr] + + + ;load source img + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB3] + movq mm2, [eax+ebx+color4] + movq mm3, [eax+ebx+colorS2] + movq mm4, [eax+ebx+ebx+color1] + movq mm5, [eax+ebx+ebx+colorS1] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA0] + movq mm7, [eax+ebx+ebx+colorA3] + pop eax + + ;compare to delta + pcmpeqw mm0, [ecx+2+colorB0] + pcmpeqw mm1, [ecx+2+colorB3] + pcmpeqw mm2, [ecx+ebx+2+color4] + pcmpeqw mm3, [ecx+ebx+2+colorS2] + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] + add ecx, ebx + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] + sub ecx, ebx + + + ;compose results + pand mm0, mm1 + pand mm2, mm3 + pand mm4, mm5 + pand mm6, mm7 + pand mm0, mm2 + pand mm4, mm6 + pxor mm7, mm7 + pand mm0, mm4 + movq mm6, [eax+colorB0] + pcmpeqw mm7, mm0 ;did any compare give us a zero ? + + movq [ecx+2+colorB0], mm6 + + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_PROCESS ;no, so we can skip + + ;End Delta + + ;--------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I56Pixel], mm0 + movq mm7, mm0 + + ;------------------- + movq mm0, mm7 + movq mm1, mm4 ;5,5,5,6 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I5556Pixel], mm0 + ;-------------------- + + movq mm0, mm7 + movq mm1, mm5 ;6,6,6,5 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I5666Pixel], mm0 + + ;------------------------- + ;------------------------- + movq mm0, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color3] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I23Pixel], mm0 + movq mm7, mm0 + + ;--------------------- + movq mm0, mm7 + movq mm1, mm4 ;2,2,2,3 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2223Pixel], mm0 + + ;---------------------- + movq mm0, mm7 + movq mm1, mm5 ;3,3,3,2 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2333Pixel], mm0 + + + ;-------------------- +;//////////////////////////////// +; Decide which "branch" to take +;-------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm6, mm0 + movq mm7, mm1 + pcmpeqw mm0, [eax+ebx+ebx+color3] + pcmpeqw mm1, [eax+ebx+ebx+color2] + pcmpeqw mm6, mm7 + + movq mm2, mm0 + movq mm3, mm0 + + pand mm0, mm1 ;colorA == colorD && colorB == colorC + pxor mm7, mm7 + + pcmpeqw mm2, mm7 + pand mm6, mm0 + pand mm2, mm1 ;colorA != colorD && colorB == colorC + + pcmpeqw mm1, mm7 + + pand mm1, mm3 ;colorA == colorD && colorB != colorC + pxor mm0, mm6 + por mm1, mm6 + movq mm7, mm0 + movq [Mask26], mm2 + packsswb mm7, mm7 + movq [Mask35], mm1 + + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_GUESS + +;--------------------------------------------- + movq mm6, mm0 + movq mm4, [eax+ebx+colorA] + movq mm5, [eax+ebx+colorB] + pxor mm7, mm7 + pand mm6, [ONE] + + movq mm0, [eax+colorE] + movq mm1, [eax+ebx+colorG] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+colorF] + movq mm1, [eax+ebx+colorK] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + push eax + add eax, ebx + movq mm0, [eax+ebx+colorH] + movq mm1, [eax+ebx+ebx+colorN] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+ebx+colorL] + movq mm1, [eax+ebx+ebx+colorO] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + pop eax + movq mm1, mm7 + pxor mm0, mm0 + pcmpgtw mm7, mm0 + pcmpgtw mm0, mm1 + + por mm7, [Mask35] + por mm0, [Mask26] + movq [Mask35], mm7 + movq [Mask26], mm0 + +.SKIP_GUESS: + + ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... + + + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+ebx+color2] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + ;--------------------------- + + + +%ifdef dfhsdfhsdahdsfhdsfh + + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) + product2a = INTERPOLATE (color2, color5); + else + if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) + product1a = INTERPOLATE (color2, color5); + else + if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + +%endif + + + movq mm7, [Mask26] + movq mm6, [eax+colorB2] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+ebx+color3] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + movq mm2, [eax+ebx+color5] + movq mm1, [eax+ebx+color4] + movq mm3, [eax+colorB0] + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+color5] + pand mm6, mm1 + por mm7, mm6 + movq [final1a], mm7 ;finished 1a + + + + ;-------------------------------- + + movq mm7, [Mask35] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA2] + pop eax + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+color6] + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + movq mm2, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color1] + push eax + add eax, ebx + movq mm3, [eax+ebx+ebx+colorA0] + pop eax + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+ebx+color2] + pand mm6, mm1 + por mm7, mm6 + movq [final2a], mm7 ;finished 2a + + + ;-------------------------------------------- + + +%ifdef dfhsdfhsdahdsfhdsfh + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) + product2b = Q_INTERPOLATE (color3, color3, color3, color2); + else + if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) + product2b = Q_INTERPOLATE (color2, color2, color2, color3); + else + product2b = INTERPOLATE (color2, color3); + + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) + product1b = Q_INTERPOLATE (color6, color6, color6, color5); + else + if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) + product1b = Q_INTERPOLATE (color6, color5, color5, color5); + else + product1b = INTERPOLATE (color5, color6); +%endif + + push eax + add eax, ebx + pxor mm7, mm7 + movq mm0, [eax+ebx+ebx+colorA0] + movq mm1, [eax+ebx+ebx+colorA1] + movq mm2, [eax+ebx+ebx+colorA2] + movq mm3, [eax+ebx+ebx+colorA3] + pop eax + movq mm4, [eax+ebx+ebx+color2] + movq mm5, [eax+ebx+ebx+color3] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + + push eax + add eax, ebx + movq mm1, [eax+ebx+ebx+colorA1] + pop eax + movq mm4, [eax+ebx+ebx+color2] + movq mm5, [eax+ebx+color5] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + + movq mm7, mm0 + por mm7, mm1 + + movq mm4, [Mask35] + movq mm3, [Mask26] + + movq mm6, mm4 + pand mm6, mm7 + pxor mm4, mm6 + + movq mm6, mm3 + pand mm6, mm7 + pxor mm3, mm6 + + movq mm2, mm0 + movq mm7, [I2333Pixel] + movq mm6, [I2223Pixel] + movq mm5, [I23Pixel] + + + por mm2, mm4 + pand mm4, [eax+ebx+ebx+color3] + por mm2, mm3 + pand mm3, [eax+ebx+ebx+color2] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final2b], mm0 + + ;----------------------------------- + + + pxor mm7, mm7 + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB1] + movq mm2, [eax+colorB2] + movq mm3, [eax+colorB3] + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+color6] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + movq mm1, [eax+colorB1] + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+ebx+color2] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + + movq mm7, mm0 + por mm7, mm1 + + movq mm4, [Mask35] + movq mm3, [Mask26] + + movq mm6, mm4 + pand mm6, mm7 + pxor mm4, mm6 + + movq mm6, mm3 + pand mm6, mm7 + pxor mm3, mm6 + + movq mm2, mm0 + movq mm7, [I5666Pixel] + movq mm6, [I5556Pixel] + movq mm5, [I56Pixel] + + + por mm2, mm4 + pand mm4, [eax+ebx+color5] + por mm2, mm3 + pand mm3, [eax+ebx+color6] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final1b], mm0 + + ;--------- + + movq mm0, [final1a] + movq mm4, [final2a] + movq mm2, [final1b] + movq mm6, [final2b] + + + movq mm1, mm0 + movq mm5, mm4 + + + punpcklwd mm0, mm2 + punpckhwd mm1, mm2 + + punpcklwd mm4, mm6 + punpckhwd mm5, mm6 + + +%ifdef FAR_POINTER + movq [fs:edx], mm0 + movq [fs:edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [fs:edx], mm4 + movq [fs:edx+8], mm5 + pop edx +%else + movq [edx], mm0 + movq [edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [edx], mm4 + movq [edx+8], mm5 + pop edx +%endif +.SKIP_PROCESS: + mov ecx, [ebp+deltaPtr] + add ecx, 8 + mov [ebp+deltaPtr], ecx + add edx, 16 + add eax, 8 + + pop ecx + sub ecx, 4 + cmp ecx, 0 + jg near .Loop + +; Restore some stuff + popad + mov esp, ebp + pop ebp + emms + ret + + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + + + +%ifdef __DJGPP__ +__2xSaISuperEagleLine: +%else +_2xSaISuperEagleLine: +%endif +; Store some stuff + push ebp + mov ebp, esp + pushad + +; Prepare the destination +%ifdef FAR_POINTER + ; Set the selector + mov eax, [ebp+dstSegment] + mov fs, ax +%endif + mov edx, [ebp+dstOffset] ; edx points to the screen +; Prepare the source + ; eax points to colorA + mov eax, [ebp+srcPtr] + mov ebx, [ebp+srcPitch] + mov ecx, [ebp+width] + ; eax now points to colorB1 + sub eax, ebx + +; Main Loop +.Loop: push ecx + + ;-----Check Delta------------------ + mov ecx, [ebp+deltaPtr] + + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB3] + movq mm2, [eax+ebx+color4] + movq mm3, [eax+ebx+colorS2] + movq mm4, [eax+ebx+ebx+color1] + movq mm5, [eax+ebx+ebx+colorS1] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA0] + movq mm7, [eax+ebx+ebx+colorA3] + pop eax + + pcmpeqw mm0, [ecx+2+colorB0] + pcmpeqw mm1, [ecx+2+colorB3] + pcmpeqw mm2, [ecx+ebx+2+color4] + pcmpeqw mm3, [ecx+ebx+2+colorS2] + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] + add ecx, ebx + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] + sub ecx, ebx + + + pand mm0, mm1 + pand mm2, mm3 + pand mm4, mm5 + pand mm6, mm7 + pand mm0, mm2 + pand mm4, mm6 + pxor mm7, mm7 + pand mm0, mm4 + movq mm6, [eax+colorB0] + pcmpeqw mm7, mm0 + + movq [ecx+2+colorB0], mm6 + + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_PROCESS + + ;End Delta + + ;--------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I56Pixel], mm0 + movq mm7, mm0 + + ;------------------- + movq mm0, mm7 + movq mm1, mm4 ;5,5,5,6 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [product1a], mm0 + ;-------------------- + + movq mm0, mm7 + movq mm1, mm5 ;6,6,6,5 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [product1b], mm0 + + ;------------------------- + ;------------------------- + movq mm0, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color3] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I23Pixel], mm0 + movq mm7, mm0 + + ;--------------------- + movq mm0, mm7 + movq mm1, mm4 ;2,2,2,3 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [product2a], mm0 + + ;---------------------- + movq mm0, mm7 + movq mm1, mm5 ;3,3,3,2 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [product2b], mm0 + + + ;//////////////////////////////// + ; Decide which "branch" to take + ;-------------------------------- + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+color6] + movq mm6, [eax+ebx+ebx+color3] + movq mm7, [eax+ebx+ebx+color2] + + pxor mm3, mm3 + movq mm0, mm4 + movq mm1, mm5 + + pcmpeqw mm0, mm6 + pcmpeqw mm1, mm7 + pcmpeqw mm1, mm3 + pand mm0, mm1 + movq [Mask35], mm0 + + movq mm0, [eax+ebx+ebx+colorS1] + movq mm1, [eax+ebx+color4] + push eax + add eax, ebx + movq mm2, [eax+ebx+ebx+colorA2] + pop eax + movq mm3, [eax+colorB1] + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm3, mm4 + pand mm0, mm1 + pand mm2, mm3 + por mm0, mm2 + pand mm0, [Mask35] + movq [Mask35b], mm0 + + ;----------- + pxor mm3, mm3 + movq mm0, mm4 + movq mm1, mm5 + + pcmpeqw mm0, mm6 + pcmpeqw mm1, mm7 + pcmpeqw mm0, mm3 + pand mm0, mm1 + movq [Mask26], mm0 + + movq mm0, [eax+ebx+ebx+color1] + movq mm1, [eax+ebx+colorS2] + push eax + add eax, ebx + movq mm2, [eax+ebx+ebx+colorA1] + pop eax + movq mm3, [eax+colorB2] + pcmpeqw mm0, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm1 + pand mm2, mm3 + por mm0, mm2 + pand mm0, [Mask26] + movq [Mask26b], mm0 + + ;-------------------- + movq mm0, mm4 + movq mm1, mm5 + movq mm2, mm0 + + pcmpeqw mm2, mm1 + pcmpeqw mm0, mm6 + pcmpeqw mm1, mm7 + pand mm0, mm1 + pand mm2, mm0 + pxor mm0, mm2 + movq mm7, mm0 + + ;------------------ + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_GUESS + +;--------------------------------------------- +; Map of the pixels: I|E F|J +; G|A B|K +; H|C D|L +; M|N O|P + movq mm6, mm0 + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+color6] + pxor mm7, mm7 + pand mm6, [ONE] + + movq mm0, [eax+colorB1] + movq mm1, [eax+ebx+color4] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+colorB2] + movq mm1, [eax+ebx+colorS2] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + push eax + add eax, ebx + movq mm0, [eax+ebx+color1] + movq mm1, [eax+ebx+ebx+colorA1] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+ebx+colorS1] + movq mm1, [eax+ebx+ebx+colorA2] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + pop eax + movq mm1, mm7 + pxor mm0, mm0 + pcmpgtw mm7, mm0 + pcmpgtw mm0, mm1 + + por mm7, [Mask35] + por mm0, [Mask26] + movq [Mask35], mm7 + movq [Mask26], mm0 + +.SKIP_GUESS: + ;Start the ASSEMBLY !!! + + movq mm4, [Mask35] + movq mm5, [Mask26] + movq mm6, [Mask35b] + movq mm7, [Mask26b] + + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm2, [eax+ebx+ebx+color2] + movq mm3, [eax+ebx+ebx+color3] + pcmpeqw mm0, mm2 + pcmpeqw mm1, mm3 + movq mm2, mm4 + movq mm3, mm5 + por mm0, mm1 + por mm2, mm3 + pand mm2, mm0 + pxor mm0, mm2 + movq mm3, mm0 + + movq mm2, mm0 + pxor mm0, mm0 + por mm2, mm4 + pxor mm4, mm6 + por mm2, mm5 + pxor mm5, mm7 + pcmpeqw mm2, mm0 + ;---------------- + + movq mm0, [eax+ebx+color5] + movq mm1, mm3 + por mm1, mm4 + por mm1, mm6 + pand mm0, mm1 + movq mm1, mm5 + pand mm1, [I56Pixel] + por mm0, mm1 + movq mm1, mm7 + pand mm1, [product1b] + por mm0, mm1 + movq mm1, mm2 + pand mm1, [product1a] + por mm0, mm1 + movq [final1a], mm0 + + movq mm0, [eax+ebx+color6] + movq mm1, mm3 + por mm1, mm5 + por mm1, mm7 + pand mm0, mm1 + movq mm1, mm4 + pand mm1, [I56Pixel] + por mm0, mm1 + movq mm1, mm6 + pand mm1, [product1a] + por mm0, mm1 + movq mm1, mm2 + pand mm1, [product1b] + por mm0, mm1 + movq [final1b], mm0 + + movq mm0, [eax+ebx+ebx+color2] + movq mm1, mm3 + por mm1, mm5 + por mm1, mm7 + pand mm0, mm1 + movq mm1, mm4 + pand mm1, [I23Pixel] + por mm0, mm1 + movq mm1, mm6 + pand mm1, [product2b] + por mm0, mm1 + movq mm1, mm2 + pand mm1, [product2a] + por mm0, mm1 + movq [final2a], mm0 + + movq mm0, [eax+ebx+ebx+color3] + movq mm1, mm3 + por mm1, mm4 + por mm1, mm6 + pand mm0, mm1 + movq mm1, mm5 + pand mm1, [I23Pixel] + por mm0, mm1 + movq mm1, mm7 + pand mm1, [product2a] + por mm0, mm1 + movq mm1, mm2 + pand mm1, [product2b] + por mm0, mm1 + movq [final2b], mm0 + + + movq mm0, [final1a] + movq mm2, [final1b] + movq mm1, mm0 + movq mm4, [final2a] + movq mm6, [final2b] + movq mm5, mm4 + punpcklwd mm0, mm2 + punpckhwd mm1, mm2 + punpcklwd mm4, mm6 + punpckhwd mm5, mm6 + + + + +%ifdef FAR_POINTER + movq [fs:edx], mm0 + movq [fs:edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [fs:edx], mm4 + movq [fs:edx+8], mm5 + pop edx +%else + movq [edx], mm0 + movq [edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [edx], mm4 + movq [edx+8], mm5 + pop edx +%endif +.SKIP_PROCESS: + mov ecx, [ebp+deltaPtr] + add ecx, 8 + mov [ebp+deltaPtr], ecx + add edx, 16 + add eax, 8 + + pop ecx + sub ecx, 4 + cmp ecx, 0 + jg near .Loop + +; Restore some stuff + popad + mov esp, ebp + pop ebp + emms + ret + + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + + +;This is version 0.50 +colorI equ -2 +colorE equ 0 +colorF equ 2 +colorJ equ 4 + +colorG equ -2 +colorA equ 0 +colorB equ 2 +colorK equ 4 + +colorH equ -2 +colorC equ 0 +colorD equ 2 +colorL equ 4 + +colorM equ -2 +colorN equ 0 +colorO equ 2 +colorP equ 4 + +%ifdef __DJGPP__ +__2xSaILine: +%else +_2xSaILine: +%endif +; Store some stuff + push ebp + mov ebp, esp + pushad + +; Prepare the destination +%ifdef FAR_POINTER + ; Set the selector + mov eax, [ebp+dstSegment] + mov fs, ax +%endif + mov edx, [ebp+dstOffset] ; edx points to the screen +; Prepare the source + ; eax points to colorA + mov eax, [ebp+srcPtr] + mov ebx, [ebp+srcPitch] + mov ecx, [ebp+width] + ; eax now points to colorE + sub eax, ebx + + +; Main Loop +.Loop: push ecx + + ;-----Check Delta------------------ + mov ecx, [ebp+deltaPtr] + + movq mm0, [eax+colorI] + movq mm1, [eax+colorJ] + movq mm2, [eax+ebx+colorG] + movq mm3, [eax+ebx+colorK] + movq mm4, [eax+ebx+ebx+colorH] + movq mm5, [eax+ebx+ebx+colorL] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorM] + movq mm7, [eax+ebx+ebx+colorP] + pop eax + + pcmpeqw mm0, [ecx+2+colorI] + pcmpeqw mm1, [ecx+2+colorK] + pcmpeqw mm2, [ecx+ebx+2+colorG] + pcmpeqw mm3, [ecx+ebx+2+colorK] + pcmpeqw mm4, [ecx+ebx+ebx+2+colorH] + pcmpeqw mm5, [ecx+ebx+ebx+2+colorL] + add ecx, ebx + pcmpeqw mm6, [ecx+ebx+ebx+2+colorM] + pcmpeqw mm7, [ecx+ebx+ebx+2+colorP] + sub ecx, ebx + + + pand mm0, mm1 + pand mm2, mm3 + pand mm4, mm5 + pand mm6, mm7 + pand mm0, mm2 + pand mm4, mm6 + pxor mm7, mm7 + pand mm0, mm4 + movq mm6, [eax+colorI] + pcmpeqw mm7, mm0 + + movq [ecx+2+colorI], mm6 + + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_PROCESS + + ;End Delta + + ;--------------------------------- + + +;1 + ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL) + movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA + movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB + + movq mm1, mm0 + movq mm3, mm2 + + pcmpeqw mm0, [eax+ebx+ebx+colorD] + pcmpeqw mm1, [eax+colorE] + pcmpeqw mm2, [eax+ebx+ebx+colorL] + pcmpeqw mm3, [eax+ebx+ebx+colorC] + + pand mm0, mm1 + pxor mm1, mm1 + pand mm0, mm2 + pcmpeqw mm3, mm1 + pand mm0, mm3 ;result in mm0 + + ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ) + movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA + movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB + movq mm5, mm4 + movq mm7, mm6 + + pcmpeqw mm4, [eax+ebx+ebx+colorC] + pcmpeqw mm5, [eax+colorF] + pcmpeqw mm6, [eax+colorJ] + pcmpeqw mm7, [eax+colorE] + + pand mm4, mm5 + pxor mm5, mm5 + pand mm4, mm6 + pcmpeqw mm7, mm5 + pand mm4, mm7 ;result in mm4 + + por mm0, mm4 ;combine the masks + movq [Mask1], mm0 + + ;-------------------------------------------- + +;2 + ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH) + movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB + movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA + movq mm1, mm0 + movq mm3, mm2 + + pcmpeqw mm0, [eax+ebx+ebx+colorC] + pcmpeqw mm1, [eax+colorF] + pcmpeqw mm2, [eax+ebx+ebx+colorH] + pcmpeqw mm3, [eax+ebx+ebx+colorD] + + pand mm0, mm1 + pxor mm1, mm1 + pand mm0, mm2 + pcmpeqw mm3, mm1 + pand mm0, mm3 ;result in mm0 + + ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI) + movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB + movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA + movq mm5, mm4 + movq mm7, mm6 + + pcmpeqw mm4, [eax+ebx+ebx+colorD] + pcmpeqw mm5, [eax+colorE] + pcmpeqw mm6, [eax+colorI] + pcmpeqw mm7, [eax+colorF] + + pand mm4, mm5 + pxor mm5, mm5 + pand mm4, mm6 + pcmpeqw mm7, mm5 + pand mm4, mm7 ;result in mm4 + + por mm0, mm4 ;combine the masks + movq [Mask2], mm0 + + +;interpolate colorA and colorB + movq mm0, [eax+ebx+colorA] + movq mm1, [eax+ebx+colorB] + + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + + ;assemble the pixels + movq mm1, [eax+ebx+colorA] + movq mm2, [eax+ebx+colorB] + + movq mm3, [Mask1] + movq mm5, mm1 + movq mm4, [Mask2] + movq mm6, mm1 + + pand mm1, mm3 + por mm3, mm4 + pxor mm7, mm7 + pand mm2, mm4 + + pcmpeqw mm3, mm7 + por mm1, mm2 + pand mm0, mm3 + + por mm0, mm1 + + punpcklwd mm5, mm0 + punpckhwd mm6, mm0 + +%ifdef FAR_POINTER + movq [fs:edx], mm5 + movq [fs:edx+8], mm6 +%else + movq [edx], mm5 + movq [edx+8], mm6 +%endif + +;------------------------------------------------ +; Create the Nextline +;------------------------------------------------ +;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO) + movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA + movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC + movq mm1, mm0 + movq mm3, mm2 + + push eax + add eax, ebx + pcmpeqw mm0, [eax+ebx+colorD] + pcmpeqw mm1, [eax+colorG] + pcmpeqw mm2, [eax+ebx+ebx+colorO] + pcmpeqw mm3, [eax+colorB] + pop eax + + pand mm0, mm1 + pxor mm1, mm1 + pand mm0, mm2 + pcmpeqw mm3, mm1 + pand mm0, mm3 ;result in mm0 + + ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM) + movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA + movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC + movq mm5, mm4 + movq mm7, mm6 + + push eax + add eax, ebx + pcmpeqw mm4, [eax+ebx+colorH] + pcmpeqw mm5, [eax+colorB] + pcmpeqw mm6, [eax+ebx+ebx+colorM] + pcmpeqw mm7, [eax+colorG] + pop eax + + pand mm4, mm5 + pxor mm5, mm5 + pand mm4, mm6 + pcmpeqw mm7, mm5 + pand mm4, mm7 ;result in mm4 + + por mm0, mm4 ;combine the masks + movq [Mask1], mm0 + ;-------------------------------------------- + +;4 + ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF) + movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC + movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA + movq mm1, mm0 + movq mm3, mm2 + + pcmpeqw mm0, [eax+ebx+colorB] + pcmpeqw mm1, [eax+ebx+ebx+colorH] + pcmpeqw mm2, [eax+colorF] + pcmpeqw mm3, [eax+ebx+ebx+colorD] + + pand mm0, mm1 + pxor mm1, mm1 + pand mm0, mm2 + pcmpeqw mm3, mm1 + pand mm0, mm3 ;result in mm0 + + ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI) + movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC + movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA + movq mm5, mm4 + movq mm7, mm6 + + pcmpeqw mm4, [eax+ebx+ebx+colorD] + pcmpeqw mm5, [eax+ebx+colorG] + pcmpeqw mm6, [eax+colorI] + pcmpeqw mm7, [eax+ebx+ebx+colorH] + + pand mm4, mm5 + pxor mm5, mm5 + pand mm4, mm6 + pcmpeqw mm7, mm5 + pand mm4, mm7 ;result in mm4 + + por mm0, mm4 ;combine the masks + movq [Mask2], mm0 + ;---------------------------------------------- + +;interpolate colorA and colorC + movq mm0, [eax+ebx+colorA] + movq mm1, [eax+ebx+ebx+colorC] + + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + ;------------- + + ;assemble the pixels + movq mm1, [eax+ebx+colorA] + movq mm2, [eax+ebx+ebx+colorC] + + movq mm3, [Mask1] + movq mm4, [Mask2] + + pand mm1, mm3 + pand mm2, mm4 + + por mm3, mm4 + pxor mm7, mm7 + por mm1, mm2 + + pcmpeqw mm3, mm7 + pand mm0, mm3 + por mm0, mm1 + movq [ACPixel], mm0 + +;//////////////////////////////// +; Decide which "branch" to take +;-------------------------------- + movq mm0, [eax+ebx+colorA] + movq mm1, [eax+ebx+colorB] + movq mm6, mm0 + movq mm7, mm1 + pcmpeqw mm0, [eax+ebx+ebx+colorD] + pcmpeqw mm1, [eax+ebx+ebx+colorC] + pcmpeqw mm6, mm7 + + movq mm2, mm0 + movq mm3, mm0 + + pand mm0, mm1 ;colorA == colorD && colorB == colorC + pxor mm7, mm7 + + pcmpeqw mm2, mm7 + pand mm6, mm0 + pand mm2, mm1 ;colorA != colorD && colorB == colorC + + pcmpeqw mm1, mm7 + + pand mm1, mm3 ;colorA == colorD && colorB != colorC + pxor mm0, mm6 + por mm1, mm6 + movq mm7, mm0 + movq [Mask2], mm2 + packsswb mm7, mm7 + movq [Mask1], mm1 + + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_GUESS + +;--------------------------------------------- +; Map of the pixels: I|E F|J +; G|A B|K +; H|C D|L +; M|N O|P + movq mm6, mm0 + movq mm4, [eax+ebx+colorA] + movq mm5, [eax+ebx+colorB] + pxor mm7, mm7 + pand mm6, [ONE] + + movq mm0, [eax+colorE] + movq mm1, [eax+ebx+colorG] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+colorF] + movq mm1, [eax+ebx+colorK] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + push eax + add eax, ebx + movq mm0, [eax+ebx+colorH] + movq mm1, [eax+ebx+ebx+colorN] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+ebx+colorL] + movq mm1, [eax+ebx+ebx+colorO] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + pop eax + movq mm1, mm7 + pxor mm0, mm0 + pcmpgtw mm7, mm0 + pcmpgtw mm0, mm1 + + por mm7, [Mask1] + por mm0, [Mask2] + movq [Mask1], mm7 + movq [Mask2], mm0 + +.SKIP_GUESS: + ;---------------------------- + ;interpolate A, B, C and D + movq mm0, [eax+ebx+colorA] + movq mm1, [eax+ebx+colorB] + movq mm4, mm0 + movq mm2, [eax+ebx+ebx+colorC] + movq mm5, mm1 + movq mm3, [qcolorMask] + movq mm6, mm2 + movq mm7, [qlowpixelMask] + + pand mm0, mm3 + pand mm1, mm3 + pand mm2, mm3 + pand mm3, [eax+ebx+ebx+colorD] + + psrlw mm0, 2 + pand mm4, mm7 + psrlw mm1, 2 + pand mm5, mm7 + psrlw mm2, 2 + pand mm6, mm7 + psrlw mm3, 2 + pand mm7, [eax+ebx+ebx+colorD] + + paddw mm0, mm1 + paddw mm2, mm3 + + paddw mm4, mm5 + paddw mm6, mm7 + + paddw mm4, mm6 + paddw mm0, mm2 + psrlw mm4, 2 + pand mm4, [qlowpixelMask] + paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D + +;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + ;assemble the pixels + movq mm1, [Mask1] + movq mm2, [Mask2] + movq mm4, [eax+ebx+colorA] + movq mm5, [eax+ebx+colorB] + pand mm4, mm1 + pand mm5, mm2 + + pxor mm7, mm7 + por mm1, mm2 + por mm4, mm5 + pcmpeqw mm1, mm7 + pand mm0, mm1 + por mm4, mm0 ;mm4 contains the diagonal pixels + + movq mm0, [ACPixel] + movq mm1, mm0 + punpcklwd mm0, mm4 + punpckhwd mm1, mm4 + + push edx + add edx, [ebp+dstPitch] + +%ifdef FAR_POINTER + movq [fs:edx], mm0 + movq [fs:edx+8], mm1 +%else + movq [edx], mm0 + movq [edx+8], mm1 +%endif + pop edx + +.SKIP_PROCESS: + mov ecx, [ebp+deltaPtr] + add ecx, 8 + mov [ebp+deltaPtr], ecx + add edx, 16 + add eax, 8 + + pop ecx + sub ecx, 4 + cmp ecx, 0 + jg near .Loop + +; Restore some stuff + popad + mov esp, ebp + pop ebp + emms + ret + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + +%ifdef __DJGPP__ +_Init_2xSaIMMX: +%else +Init_2xSaIMMX: +%endif +; Store some stuff + push ebp + mov ebp, esp + push edx + + +;Damn thing doesn't work +; mov eax,1 +; cpuid +; test edx, 0x00800000 ;test bit 23 +; jz end2 ;bit not set => no MMX detected + + mov eax, [ebp+8] ;PixelFormat + cmp eax, 555 + jz Bits555 + cmp eax, 565 + jz Bits565 +end2: + mov eax, 1 + jmp end3 +Bits555: + mov edx, 0x7BDE7BDE + mov eax, colorMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0x04210421 + mov eax, lowPixelMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0x739C739C + mov eax, qcolorMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0x0C630C63 + mov eax, qlowpixelMask + mov [eax], edx + mov [eax+4], edx + mov eax, 0 + jmp end3 +Bits565: + mov edx, 0xF7DEF7DE + mov eax, colorMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0x08210821 + mov eax, lowPixelMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0xE79CE79C + mov eax, qcolorMask + mov [eax], edx + mov [eax+4], edx + mov edx, 0x18631863 + mov eax, qlowpixelMask + mov [eax], edx + mov [eax+4], edx + mov eax, 0 + jmp end3 +end3: + pop edx + mov esp, ebp + pop ebp + ret + + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + + SECTION .data ALIGN = 32 +;Some constants +colorMask dd 0xF7DEF7DE,0xF7DEF7DE +lowPixelMask dd 0x08210821,0x08210821 + +qcolorMask dd 0xE79CE79C,0xE79CE79C +qlowpixelMask dd 0x18631863,0x18631863 + +darkenMask dd 0xC718C718,0xC718C718 +GreenMask dd 0x07E007E0,0x07E007E0 +RedBlueMask dd 0xF81FF81F,0xF81FF81F + +FALSE dd 0x00000000,0x00000000 +TRUE dd 0xffffffff,0xffffffff +ONE dd 0x00010001,0x00010001 + + + SECTION .bss ALIGN = 32 +ACPixel resb 8 +Mask1 resb 8 +Mask2 resb 8 + +I56Pixel resb 8 +I23Pixel resb 8 +I5556Pixel resb 8 +I2223Pixel resb 8 +I5666Pixel resb 8 +I2333Pixel resb 8 +Mask26 resb 8 +Mask35 resb 8 +Mask26b resb 8 +Mask35b resb 8 +product1a resb 8 +product1b resb 8 +product2a resb 8 +product2b resb 8 +final1a resb 8 +final1b resb 8 +final2a resb 8 +final2b resb 8 diff -r 18eaae41bde3 -r b970226568d2 src/filters/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/Makefile Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,488 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# src/filters/Makefile. Generated from Makefile.in by configure. + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + + + +pkgdatadir = $(datadir)/VisualBoyAdvance +pkglibdir = $(libdir)/VisualBoyAdvance +pkgincludedir = $(includedir)/VisualBoyAdvance +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = x86_64-unknown-linux-gnu +host_triplet = x86_64-unknown-linux-gnu +target_triplet = x86_64-unknown-linux-gnu +subdir = src/filters +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +OBJDIR = $(top_srcdir)/src/obj +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/gettext.m4 \ + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/lib-ld.m4 \ + $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_CLEAN_FILES = +LIBRARIES = $(noinst_LIBRARIES) +AR = ar +ARFLAGS = cru +lib386_a_AR = $(AR) $(ARFLAGS) +lib386_a_LIBADD = +am_lib386_a_OBJECTS = 2xSaImmx.$(OBJEXT) +lib386_a_OBJECTS = $(patsubst %,$(OBJDIR)/%,$(am_lib386_a_OBJECTS)) +libfilter_a_AR = $(AR) $(ARFLAGS) +libfilter_a_LIBADD = +am_libfilter_a_OBJECTS = 2xSaI.$(OBJEXT) admame.$(OBJEXT) \ + bilinear.$(OBJEXT) hq2x.$(OBJEXT) interframe.$(OBJEXT) \ + motionblur.$(OBJEXT) pixel.$(OBJEXT) scanline.$(OBJEXT) \ + simple2x.$(OBJEXT) +libfilter_a_OBJECTS = $(patsubst %,$(OBJDIR)/%,$(am_libfilter_a_OBJECTS)) +DEFAULT_INCLUDES = -I. +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = $(lib386_a_SOURCES) $(libfilter_a_SOURCES) +DIST_SOURCES = $(lib386_a_SOURCES) $(libfilter_a_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = ${SHELL} /home/r/proj/vba/trunk/missing --run aclocal-1.10 +AMTAR = ${SHELL} /home/r/proj/vba/trunk/missing --run tar +AUTOCONF = ${SHELL} /home/r/proj/vba/trunk/missing --run autoconf +AUTOHEADER = ${SHELL} /home/r/proj/vba/trunk/missing --run autoheader +AUTOMAKE = ${SHELL} /home/r/proj/vba/trunk/missing --run automake-1.10 +AWK = gawk +CC = gcc +CCDEPMODE = depmode=gcc3 +CFLAGS = -g -O2 +CPP = gcc -E +CPPFLAGS = +CXX = g++ +CXXDEPMODE = depmode=gcc3 +CXXFLAGS = -g -O2 -DC_CORE -DPROFILING -DDEV_VERSION +CYGPATH_W = echo +DEFS = -DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -DPACKAGE=\"VisualBoyAdvance\" -DVERSION=\"1.7.2\" -DYYTEXT_POINTER=1 -DHAVE_LIBZ=1 -DHAVE_LIBPNG=1 -DHAVE_LIBPTHREAD=1 -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_MALLOC_H=1 -DHAVE_STRINGS_H=1 -DHAVE_UNISTD_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_NETINET_IN_H=1 +DEPDIR = .deps +ECHO_C = +ECHO_N = -n +ECHO_T = +EGREP = /bin/grep -E +EXEEXT = +GETTEXT_PACKAGE = +GMSGFMT = +GREP = /bin/grep +GTKMM_CFLAGS = +GTKMM_CPPFLAGS = +GTKMM_LIBS = +INSTALL = /usr/bin/install -c +INSTALL_DATA = ${INSTALL} -m 644 +INSTALL_PROGRAM = ${INSTALL} +INSTALL_SCRIPT = ${INSTALL} +INSTALL_STRIP_PROGRAM = $(install_sh) -c -s +INTLLIBS = +LDFLAGS = +LEX = flex +LEXLIB = -lfl +LEX_OUTPUT_ROOT = lex.yy +LIBICONV = +LIBINTL = +LIBOBJS = +LIBS = -lpthread -lpng -lz +LTLIBICONV = +LTLIBINTL = +LTLIBOBJS = +MAKEINFO = ${SHELL} /home/r/proj/vba/trunk/missing --run makeinfo +MKDIR_P = /bin/mkdir -p +MKINSTALLDIRS = +MSGFMT = +MSGMERGE = +NASM = /usr/bin/nasm +OBJEXT = o +PACKAGE = VisualBoyAdvance +PACKAGE_BUGREPORT = +PACKAGE_NAME = +PACKAGE_STRING = +PACKAGE_TARNAME = +PACKAGE_VERSION = +PATH_SEPARATOR = : +PKG_CONFIG = +POSUB = +RANLIB = ranlib +SDL_CFLAGS = -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT +SDL_CONFIG = /usr/bin/sdl-config +SDL_LIBS = -L/usr/lib -Wl,-rpath,/usr/lib -lSDL -lpthread +SET_MAKE = +SHELL = /bin/sh +STRIP = +USE_NLS = +VBA_EXTRA = +VBA_LIBS = ../gba/libgba.a ../gb/libgb.a ../common/libgbcom.a ../filters/libfilter.a ../lua/libgblua.a ../prof/libprof.a +VBA_SRC_EXTRA = lua prof sdl +VERSION = 1.7.2 +XGETTEXT = +XMKMF = +YACC = bison -y +YFLAGS = +abs_builddir = /home/r/proj/vba/trunk/src/filters +abs_srcdir = /home/r/proj/vba/trunk/src/filters +abs_top_builddir = /home/r/proj/vba/trunk +abs_top_srcdir = /home/r/proj/vba/trunk +ac_ct_CC = gcc +ac_ct_CXX = g++ +am__include = include +am__leading_dot = . +am__quote = +am__tar = ${AMTAR} chof - "$$tardir" +am__untar = ${AMTAR} xf - +bindir = ${exec_prefix}/bin +build = x86_64-unknown-linux-gnu +build_alias = +build_cpu = x86_64 +build_os = linux-gnu +build_vendor = unknown +builddir = . +datadir = ${datarootdir} +datarootdir = ${prefix}/share +docdir = ${datarootdir}/doc/${PACKAGE} +dvidir = ${docdir} +exec_prefix = ${prefix} +host = x86_64-unknown-linux-gnu +host_alias = +host_cpu = x86_64 +host_os = linux-gnu +host_vendor = unknown +htmldir = ${docdir} +includedir = ${prefix}/include +infodir = ${datarootdir}/info +install_sh = $(SHELL) /home/r/proj/vba/trunk/install-sh +libdir = ${exec_prefix}/lib +libexecdir = ${exec_prefix}/libexec +localedir = ${datarootdir}/locale +localstatedir = ${prefix}/var +mandir = ${datarootdir}/man +mkdir_p = /bin/mkdir -p +oldincludedir = /usr/include +pdfdir = ${docdir} +prefix = /usr/local +program_transform_name = s,x,x, +psdir = ${docdir} +sbindir = ${exec_prefix}/sbin +sharedstatedir = ${prefix}/com +srcdir = . +sysconfdir = ${prefix}/etc +target = x86_64-unknown-linux-gnu +target_alias = +target_cpu = x86_64 +target_os = linux-gnu +target_vendor = unknown +top_builddir = ../.. +top_srcdir = ../.. +SUFFIXES = .asm +noinst_LIBRARIES = lib386.a libfilter.a +lib386_a_SOURCES = 2xSaImmx.asm +libfilter_a_SOURCES = \ + 2xSaI.cpp \ + admame.cpp \ + bilinear.cpp \ + hq2x.cpp \ + hq2x.h \ + interframe.cpp \ + interp.h \ + lq2x.h \ + motionblur.cpp \ + pixel.cpp \ + scanline.cpp \ + simple2x.cpp + +all: all-am + +.SUFFIXES: +.SUFFIXES: .asm .cpp .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/filters/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/filters/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) +lib386.a: $(lib386_a_OBJECTS) $(lib386_a_DEPENDENCIES) + -rm -f lib386.a + $(lib386_a_AR) lib386.a $(lib386_a_OBJECTS) $(lib386_a_LIBADD) + $(RANLIB) lib386.a +libfilter.a: $(libfilter_a_OBJECTS) $(libfilter_a_DEPENDENCIES) + -rm -f libfilter.a + $(libfilter_a_AR) libfilter.a $(libfilter_a_OBJECTS) $(libfilter_a_LIBADD) + $(RANLIB) libfilter.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +include ./$(DEPDIR)/2xSaI.Po +include ./$(DEPDIR)/admame.Po +include ./$(DEPDIR)/bilinear.Po +include ./$(DEPDIR)/hq2x.Po +include ./$(DEPDIR)/interframe.Po +include ./$(DEPDIR)/motionblur.Po +include ./$(DEPDIR)/pixel.Po +include ./$(DEPDIR)/scanline.Po +include ./$(DEPDIR)/simple2x.Po + +$(OBJDIR)/%.o: %.cpp + $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< + mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +# source='$<' object='$@' libtool=no \ +# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \ +# $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: + $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` + mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +# source='$<' object='$@' libtool=no \ +# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \ +# $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(lib386_a_OBJECTS) $(libfilter_a_OBJECTS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-exec-am: + +install-html: install-html-am + +install-info: install-info-am + +install-man: + +install-pdf: install-pdf-am + +install-ps: install-ps-am + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-noinstLIBRARIES ctags distclean distclean-compile \ + distclean-generic distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am + +LBITS := $(shell getconf LONG_BIT) +ifeq ($(LBITS),64) + ELFFLAG = elf64 +else + ELFFLAG = elf +endif + +$(OBJDIR)/%.o: %.asm + $(NASM) -f $(ELFFLAG) -o $@ $< +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 18eaae41bde3 -r b970226568d2 src/filters/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/Makefile.am Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,22 @@ +SUFFIXES = .asm + +noinst_LIBRARIES = lib386.a libfilter.a + +lib386_a_SOURCES = 2xSaImmx.asm + +.asm.o: + $(NASM) -f elf -o $@ $< + +libfilter_a_SOURCES = \ + 2xSaI.cpp \ + admame.cpp \ + bilinear.cpp \ + hq2x.cpp \ + hq2x.h \ + interframe.cpp \ + interp.h \ + lq2x.h \ + motionblur.cpp \ + pixel.cpp \ + scanline.cpp \ + simple2x.cpp diff -r 18eaae41bde3 -r b970226568d2 src/filters/admame.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/admame.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,1036 @@ +/* + * This file is part of the Advance project. + * + * Copyright (C) 1999-2002 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * In addition, as a special exception, Andrea Mazzoleni + * gives permission to link the code of this program with + * the MAME library (or with modified versions of MAME that use the + * same license as MAME), and distribute linked combinations including + * the two. You must obey the GNU General Public License in all + * respects for all of the code used other than MAME. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + */ + +#include "../Port.h" + +#ifdef MMX +extern "C" bool cpu_mmx; +#endif + +static void internal_scale2x_16_def(u16 *dst, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + /* first pixel */ + dst[0] = src1[0]; + if (src1[1] == src0[0] && src2[0] != src0[0]) + dst[1] = src0[0]; + else + dst[1] = src1[0]; + ++src0; + ++src1; + ++src2; + dst += 2; + + /* central pixels */ + count -= 2; + while (count) + { + if (src0[0] != src2[0] && src1[-1] != src1[1]) + { + dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0]; + dst[1] = src1[1] == src0[0] ? src0[0] : src1[0]; + } + else + { + dst[0] = src1[0]; + dst[1] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 2; + --count; + } + + /* last pixel */ + if (src1[-1] == src0[0] && src2[0] != src0[0]) + dst[0] = src0[0]; + else + dst[0] = src1[0]; + dst[1] = src1[0]; +} + +static void internal_scale2x_32_def(u32 *dst, + const u32 *src0, + const u32 *src1, + const u32 *src2, + unsigned count) +{ + /* first pixel */ + dst[0] = src1[0]; + if (src1[1] == src0[0] && src2[0] != src0[0]) + dst[1] = src0[0]; + else + dst[1] = src1[0]; + ++src0; + ++src1; + ++src2; + dst += 2; + + /* central pixels */ + count -= 2; + while (count) + { + if (src0[0] != src2[0] && src1[-1] != src1[1]) + { + dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0]; + dst[1] = src1[1] == src0[0] ? src0[0] : src1[0]; + } + else + { + dst[0] = src1[0]; + dst[1] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 2; + --count; + } + + /* last pixel */ + if (src1[-1] == src0[0] && src2[0] != src0[0]) + dst[0] = src0[0]; + else + dst[0] = src1[0]; + dst[1] = src1[0]; +} + +#ifdef MMX +static void internal_scale2x_16_mmx_single(u16 *dst, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + /* always do the first and last run */ + count -= 2 * 4; + +#ifdef __GNUC__ + __asm__ __volatile__ ( + /* first run */ + /* set the current, current_pre, current_next registers */ + "movq 0(%1), %%mm0\n" + "movq 0(%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psllq $48,%%mm0\n" + "psllq $48,%%mm1\n" + "psrlq $48, %%mm0\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $16,%%mm2\n" + "psrlq $16,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqw %%mm6,%%mm2\n" + "pcmpeqw %%mm6,%%mm4\n" + "pcmpeqw (%2),%%mm3\n" + "pcmpeqw (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqw %%mm1,%%mm2\n" + "pcmpeqw %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklwd %%mm4,%%mm2\n" + "punpckhwd %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + + /* central runs */ + "shrl $2,%4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1),%%mm0\n" + "movq (%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psrlq $48,%%mm0\n" + "psllq $48,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $16,%%mm2\n" + "psrlq $16,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqw %%mm6,%%mm2\n" + "pcmpeqw %%mm6,%%mm4\n" + "pcmpeqw (%2),%%mm3\n" + "pcmpeqw (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqw %%mm1,%%mm2\n" + "pcmpeqw %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklwd %%mm4,%%mm2\n" + "punpckhwd %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + + /* final run */ + /* set the current, current_pre, current_next registers */ + "movq (%1),%%mm1\n" + "movq (%1),%%mm7\n" + "movq -8(%1),%%mm0\n" + "psrlq $48,%%mm1\n" + "psrlq $48,%%mm0\n" + "psllq $48,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $16,%%mm2\n" + "psrlq $16,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqw %%mm6,%%mm2\n" + "pcmpeqw %%mm6,%%mm4\n" + "pcmpeqw (%2),%%mm3\n" + "pcmpeqw (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqw %%mm1,%%mm2\n" + "pcmpeqw %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklwd %%mm4,%%mm2\n" + "punpckhwd %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + "emms\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +#else + __asm { + mov eax, src0; + mov ebx, src1; + mov ecx, src2; + mov edx, dst; + mov esi, count; + + /* first run */ + /* set the current, current_pre, current_next registers */ + movq mm0, qword ptr [ebx]; + movq mm7, qword ptr [ebx]; + movq mm1, qword ptr [ebx + 8]; + psllq mm0, 48; + psllq mm1, 48; + psrlq mm0, 48; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 16; + psrlq mm3, 16; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr [eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqw mm2, mm6; + pcmpeqw mm4, mm6; + pcmpeqw mm3, qword ptr [ecx]; + pcmpeqw mm5, qword ptr [ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqw mm2, mm1; + pcmpeqw mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst0 */ + movq mm3, mm2; + punpcklwd mm2, mm4; + punpckhwd mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + /* next */ + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 16; + + /* central runs */ + shr esi, 2; + jz label1; + align 4; +label0: + + /* set the current, current_pre, current_next registers */ + movq mm0, qword ptr [ebx - 8]; + movq mm7, qword ptr [ebx]; + movq mm1, qword ptr [ebx + 8]; + psrlq mm0, 48; + psllq mm1, 48; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 16; + psrlq mm3, 16; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr [eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqw mm2, mm6; + pcmpeqw mm4, mm6; + pcmpeqw mm3, qword ptr [ecx]; + pcmpeqw mm5, qword ptr [ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqw mm2, mm1; + pcmpeqw mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst */ + movq mm3, mm2; + punpcklwd mm2, mm4; + punpckhwd mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + /* next */ + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 16; + + dec esi; + jnz label0; +label1: + + /* final run */ + /* set the current, current_pre, current_next registers */ + movq mm1, qword ptr [ebx]; + movq mm7, qword ptr [ebx]; + movq mm0, qword ptr [ebx - 8]; + psrlq mm1, 48; + psrlq mm0, 48; + psllq mm1, 48; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 16; + psrlq mm3, 16; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr [eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqw mm2, mm6; + pcmpeqw mm4, mm6; + pcmpeqw mm3, qword ptr [ecx]; + pcmpeqw mm5, qword ptr [ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqw mm2, mm1; + pcmpeqw mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst */ + movq mm3, mm2; + punpcklwd mm2, mm4; + punpckhwd mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + mov src0, eax; + mov src1, ebx; + mov src2, ecx; + mov dst, edx; + mov count, esi; + + emms; + } +#endif +} + +static void internal_scale2x_32_mmx_single(u32 *dst, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count) +{ + /* always do the first and last run */ + count -= 2 * 2; + +#ifdef __GNUC__ + __asm__ __volatile__ ( + /* first run */ + /* set the current, current_pre, current_next registers */ + "movq 0(%1),%%mm0\n" + "movq 0(%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psllq $32,%%mm0\n" + "psllq $32,%%mm1\n" + "psrlq $32,%%mm0\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $32,%%mm2\n" + "psrlq $32,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqd %%mm6,%%mm2\n" + "pcmpeqd %%mm6,%%mm4\n" + "pcmpeqd (%2),%%mm3\n" + "pcmpeqd (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqd %%mm1,%%mm2\n" + "pcmpeqd %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpckldq %%mm4,%%mm2\n" + "punpckhdq %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3, 8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + + /* central runs */ + "shrl $1,%4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1),%%mm0\n" + "movq (%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psrlq $32,%%mm0\n" + "psllq $32,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $32,%%mm2\n" + "psrlq $32,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqd %%mm6,%%mm2\n" + "pcmpeqd %%mm6,%%mm4\n" + "pcmpeqd (%2),%%mm3\n" + "pcmpeqd (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqd %%mm1,%%mm2\n" + "pcmpeqd %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpckldq %%mm4,%%mm2\n" + "punpckhdq %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + + /* final run */ + /* set the current, current_pre, current_next registers */ + "movq (%1),%%mm1\n" + "movq (%1),%%mm7\n" + "movq -8(%1), %%mm0\n" + "psrlq $32,%%mm1\n" + "psrlq $32,%%mm0\n" + "psllq $32,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $32,%%mm2\n" + "psrlq $32,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqd %%mm6,%%mm2\n" + "pcmpeqd %%mm6,%%mm4\n" + "pcmpeqd (%2),%%mm3\n" + "pcmpeqd (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqd %%mm1,%%mm2\n" + "pcmpeqd %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpckldq %%mm4,%%mm2\n" + "punpckhdq %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + "emms\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +#else + __asm { + mov eax, src0; + mov ebx, src1; + mov ecx, src2; + mov edx, dst; + mov esi, count; + + /* first run */ + /* set the current, current_pre, current_next registers */ + movq mm0, qword ptr [ebx]; + movq mm7, qword ptr [ebx]; + movq mm1, qword ptr [ebx + 8]; + psllq mm0, 32; + psllq mm1, 32; + psrlq mm0, 32; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 32; + psrlq mm3, 32; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr [eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqd mm2, mm6; + pcmpeqd mm4, mm6; + pcmpeqd mm3, qword ptr [ecx]; + pcmpeqd mm5, qword ptr [ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqd mm2, mm1; + pcmpeqd mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst */ + movq mm3, mm2; + punpckldq mm2, mm4; + punpckhdq mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + /* next */ + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 16; + + /* central runs */ + shr esi, 1; + jz label1; +label0: + + /* set the current, current_pre, current_next registers */ + movq mm0, qword ptr [ebx - 8]; + movq mm7, qword ptr [ebx]; + movq mm1, qword ptr [ebx + 8]; + psrlq mm0, 32; + psllq mm1, 32; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 32; + psrlq mm3, 32; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr[eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqd mm2, mm6; + pcmpeqd mm4, mm6; + pcmpeqd mm3, qword ptr[ecx]; + pcmpeqd mm5, qword ptr[ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqd mm2, mm1; + pcmpeqd mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst */ + movq mm3, mm2; + punpckldq mm2, mm4; + punpckhdq mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + /* next */ + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 16; + + dec esi; + jnz label0; +label1: + + /* final run */ + /* set the current, current_pre, current_next registers */ + movq mm1, qword ptr [ebx]; + movq mm7, qword ptr [ebx]; + movq mm0, qword ptr [ebx - 8]; + psrlq mm1, 32; + psrlq mm0, 32; + psllq mm1, 32; + movq mm2, mm7; + movq mm3, mm7; + psllq mm2, 32; + psrlq mm3, 32; + por mm0, mm2; + por mm1, mm3; + + /* current_upper */ + movq mm6, qword ptr [eax]; + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + movq mm2, mm0; + movq mm4, mm1; + movq mm3, mm0; + movq mm5, mm1; + pcmpeqd mm2, mm6; + pcmpeqd mm4, mm6; + pcmpeqd mm3, qword ptr [ecx]; + pcmpeqd mm5, qword ptr [ecx]; + pandn mm3, mm2; + pandn mm5, mm4; + movq mm2, mm0; + movq mm4, mm1; + pcmpeqd mm2, mm1; + pcmpeqd mm4, mm0; + pandn mm2, mm3; + pandn mm4, mm5; + movq mm3, mm2; + movq mm5, mm4; + pand mm2, mm6; + pand mm4, mm6; + pandn mm3, mm7; + pandn mm5, mm7; + por mm2, mm3; + por mm4, mm5; + + /* set *dst */ + movq mm3, mm2; + punpckldq mm2, mm4; + punpckhdq mm3, mm4; + movq qword ptr [edx], mm2; + movq qword ptr [edx + 8], mm3; + + mov src0, eax; + mov src1, ebx; + mov src2, ecx; + mov dst, edx; + mov count, esi; + + emms; + } +#endif +} + +static void internal_scale2x_16_mmx(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + // assert( count >= 2*4 ); + internal_scale2x_16_mmx_single(dst0, src0, src1, src2, count); + internal_scale2x_16_mmx_single(dst1, src2, src1, src0, count); +} + +static void internal_scale2x_32_mmx(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count) +{ + // assert( count >= 2*2 ); + internal_scale2x_32_mmx_single(dst0, src0, src1, src2, count); + internal_scale2x_32_mmx_single(dst1, src2, src1, src0, count); +} + +#endif + +void AdMame2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *dst0 = (u16 *)dstPtr; + u16 *dst1 = dst0 + (dstPitch >> 1); + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = src0 + (srcPitch >> 1); + u16 *src2 = src1 + (srcPitch >> 1); +#ifdef MMX + if (cpu_mmx) + { + internal_scale2x_16_mmx(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch; + dst1 += dstPitch; + internal_scale2x_16_mmx(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + internal_scale2x_16_mmx(dst0, dst1, src0, src1, src1, width); + } + else + { +#endif + internal_scale2x_16_def(dst0, src0, src0, src1, width); + internal_scale2x_16_def(dst1, src1, src0, src0, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch; + dst1 += dstPitch; + internal_scale2x_16_def(dst0, src0, src1, src2, width); + internal_scale2x_16_def(dst1, src2, src1, src0, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + internal_scale2x_16_def(dst0, src0, src1, src1, width); + internal_scale2x_16_def(dst1, src1, src1, src0, width); +#ifdef MMX +} + +#endif +} + +void AdMame2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dst0 = (u32 *)dstPtr; + u32 *dst1 = dst0 + (dstPitch >> 2); + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = src0 + (srcPitch >> 2); + u32 *src2 = src1 + (srcPitch >> 2); +#ifdef MMX + if (cpu_mmx) + { + internal_scale2x_32_mmx(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + internal_scale2x_32_mmx(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + internal_scale2x_32_mmx(dst0, dst1, src0, src1, src1, width); + } + else + { +#endif + internal_scale2x_32_def(dst0, src0, src0, src1, width); + internal_scale2x_32_def(dst1, src1, src0, src0, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + internal_scale2x_32_def(dst0, src0, src1, src2, width); + internal_scale2x_32_def(dst1, src2, src1, src0, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + internal_scale2x_32_def(dst0, src0, src1, src1, width); + internal_scale2x_32_def(dst1, src1, src1, src0, width); +#ifdef MMX +} + +#endif +} diff -r 18eaae41bde3 -r b970226568d2 src/filters/bilinear.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/bilinear.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,418 @@ +/** Code adapted from Exult source code by Forgotten +** Scale.cc - Trying to scale with bilinear interpolation. +** +** Written: 6/14/00 - JSF +**/ + +#include "../common/System.h" + +static u8 row_cur[3 * 322]; +static u8 row_next[3 * 322]; + +static u8 *rgb_row_cur = row_cur; +static u8 *rgb_row_next = row_next; + +#ifdef RGB +#undef RGB // wingdi.h has it +#endif +#define RGB(r, g, b) \ + ((r) >> 3) << systemRedShift | \ + ((g) >> 3) << systemGreenShift | \ + ((b) >> 3) << systemBlueShift \ + +static void fill_rgb_row_16(u16 *from, int src_width, u8 *row, int width) +{ + u8 *copy_start = row + src_width * 3; + u8 *all_stop = row + width * 3; + while (row < copy_start) + { + u16 color = *from++; + *row++ = ((color >> systemRedShift) & 0x1f) << 3; + *row++ = ((color >> systemGreenShift) & 0x1f) << 3; + *row++ = ((color >> systemBlueShift) & 0x1f) << 3; + } + // any remaining elements to be written to 'row' are a replica of the + // preceding pixel + u8 *p = row - 3; + while (row < all_stop) + { + // we're guaranteed three elements per pixel; could unroll the loop + // further, especially with a Duff's Device, but the gains would be + // probably limited (judging by profiler output) + *row++ = *p++; + *row++ = *p++; + *row++ = *p++; + } +} + +static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width) +{ + u8 *copy_start = row + src_width * 3; + u8 *all_stop = row + width * 3; + while (row < copy_start) + { + u32 color = *from++; + *row++ = ((color >> systemRedShift) & 0x1f) << 3; + *row++ = ((color >> systemGreenShift) & 0x1f) << 3; + *row++ = ((color >> systemBlueShift) & 0x1f) << 3; + } + // any remaining elements to be written to 'row' are a replica of the + // preceding pixel + u8 *p = row - 3; + while (row < all_stop) + { + // we're guaranteed three elements per pixel; could unroll the loop + // further, especially with a Duff's Device, but the gains would be + // probably limited (judging by profiler output) + *row++ = *p++; + *row++ = *p++; + *row++ = *p++; + } +} + +void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *to = (u16 *)dstPtr; + u16 *to_odd = (u16 *)(dstPtr + dstPitch); + + int from_width = width; + u16 *from = (u16 *)srcPtr; + fill_rgb_row_16(from, from_width, rgb_row_cur, width + 1); + + for (int y = 0; y < height; y++) + { + u16 *from_orig = from; + u16 *to_orig = to; + + if (y + 1 < height) + fill_rgb_row_16(from + width + 2, from_width, rgb_row_next, + width + 1); + else + fill_rgb_row_16(from, from_width, rgb_row_next, width + 1); + + // every pixel in the src region, is extended to 4 pixels in the + // destination, arranged in a square 'quad'; if the current src + // pixel is 'a', then in what follows 'b' is the src pixel to the + // right, 'c' is the src pixel below, and 'd' is the src pixel to + // the right and down + u8 *cur_row = rgb_row_cur; + u8 *next_row = rgb_row_next; + u8 *ar = cur_row++; + u8 *ag = cur_row++; + u8 *ab = cur_row++; + u8 *cr = next_row++; + u8 *cg = next_row++; + u8 *cb = next_row++; + for (int x = 0; x < width; x++) + { + u8 *br = cur_row++; + u8 *bg = cur_row++; + u8 *bb = cur_row++; + u8 *dr = next_row++; + u8 *dg = next_row++; + u8 *db = next_row++; + + // upper left pixel in quad: just copy it in + *to++ = RGB(*ar, *ag, *ab); + + // upper right + *to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1); + + // lower left + *to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1); + + // lower right + *to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2, + (*ag + *bg + *cg + *dg) >> 2, + (*ab + *bb + *cb + *db) >> 2); + + // 'b' becomes 'a', 'd' becomes 'c' + ar = br; + ag = bg; + ab = bb; + cr = dr; + cg = dg; + cb = db; + } + + // the "next" rgb row becomes the current; the old current rgb row is + // recycled and serves as the new "next" row + u8 *temp; + temp = rgb_row_cur; + rgb_row_cur = rgb_row_next; + rgb_row_next = temp; + + // update the pointers for start of next pair of lines + from = (u16 *)((u8 *)from_orig + srcPitch); + to = (u16 *)((u8 *)to_orig + (dstPitch << 1)); + to_odd = (u16 *)((u8 *)to + dstPitch); + } +} + +void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *to = (u16 *)dstPtr; + u16 *to_odd = (u16 *)(dstPtr + dstPitch); + + int from_width = width; + u16 *from = (u16 *)srcPtr; + fill_rgb_row_16(from, from_width, rgb_row_cur, width + 1); + + for (int y = 0; y < height; y++) + { + u16 *from_orig = from; + u16 *to_orig = to; + + if (y + 1 < height) + fill_rgb_row_16(from + width + 2, from_width, rgb_row_next, + width + 1); + else + fill_rgb_row_16(from, from_width, rgb_row_next, width + 1); + + // every pixel in the src region, is extended to 4 pixels in the + // destination, arranged in a square 'quad'; if the current src + // pixel is 'a', then in what follows 'b' is the src pixel to the + // right, 'c' is the src pixel below, and 'd' is the src pixel to + // the right and down + u8 *cur_row = rgb_row_cur; + u8 *next_row = rgb_row_next; + u8 *ar = cur_row++; + u8 *ag = cur_row++; + u8 *ab = cur_row++; + u8 *cr = next_row++; + u8 *cg = next_row++; + u8 *cb = next_row++; + for (int x = 0; x < width; x++) + { + u8 *br = cur_row++; + u8 *bg = cur_row++; + u8 *bb = cur_row++; + u8 *dr = next_row++; + u8 *dg = next_row++; + u8 *db = next_row++; + + // upper left pixel in quad: just copy it in + //*to++ = manip.rgb(*ar, *ag, *ab); +#ifdef USE_ORIGINAL_BILINEAR_PLUS + *to++ = RGB( + (((*ar) << 2) + ((*ar)) + (*cr + *br + *br)) >> 3, + (((*ag) << 2) + ((*ag)) + (*cg + *bg + *bg)) >> 3, + (((*ab) << 2) + ((*ab)) + (*cb + *bb + *bb)) >> 3); +#else + *to++ = RGB( + (((*ar) << 3) + ((*ar) << 1) + (*cr + *br + *br + *cr)) >> 4, + (((*ag) << 3) + ((*ag) << 1) + (*cg + *bg + *bg + *cg)) >> 4, + (((*ab) << 3) + ((*ab) << 1) + (*cb + *bb + *bb + *cb)) >> 4); +#endif + + // upper right + *to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1); + + // lower left + *to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1); + + // lower right + *to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2, + (*ag + *bg + *cg + *dg) >> 2, + (*ab + *bb + *cb + *db) >> 2); + + // 'b' becomes 'a', 'd' becomes 'c' + ar = br; + ag = bg; + ab = bb; + cr = dr; + cg = dg; + cb = db; + } + + // the "next" rgb row becomes the current; the old current rgb row is + // recycled and serves as the new "next" row + u8 *temp; + temp = rgb_row_cur; + rgb_row_cur = rgb_row_next; + rgb_row_next = temp; + + // update the pointers for start of next pair of lines + from = (u16 *)((u8 *)from_orig + srcPitch); + to = (u16 *)((u8 *)to_orig + (dstPitch << 1)); + to_odd = (u16 *)((u8 *)to + dstPitch); + } +} + +void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *to = (u32 *)dstPtr; + u32 *to_odd = (u32 *)(dstPtr + dstPitch); + + int from_width = width; + if (width + 1 < from_width) + from_width = width + 1; + u32 *from = (u32 *)srcPtr; + fill_rgb_row_32(from, from_width, rgb_row_cur, width + 1); + + for (int y = 0; y < height; y++) + { + u32 *from_orig = from; + u32 *to_orig = to; + + if (y + 1 < height) + fill_rgb_row_32(from + width + 1, from_width, rgb_row_next, + width + 1); + else + fill_rgb_row_32(from, from_width, rgb_row_next, width + 1); + + // every pixel in the src region, is extended to 4 pixels in the + // destination, arranged in a square 'quad'; if the current src + // pixel is 'a', then in what follows 'b' is the src pixel to the + // right, 'c' is the src pixel below, and 'd' is the src pixel to + // the right and down + u8 *cur_row = rgb_row_cur; + u8 *next_row = rgb_row_next; + u8 *ar = cur_row++; + u8 *ag = cur_row++; + u8 *ab = cur_row++; + u8 *cr = next_row++; + u8 *cg = next_row++; + u8 *cb = next_row++; + for (int x = 0; x < width; x++) + { + u8 *br = cur_row++; + u8 *bg = cur_row++; + u8 *bb = cur_row++; + u8 *dr = next_row++; + u8 *dg = next_row++; + u8 *db = next_row++; + + // upper left pixel in quad: just copy it in + *to++ = RGB(*ar, *ag, *ab); + + // upper right + *to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1); + + // lower left + *to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1); + + // lower right + *to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2, + (*ag + *bg + *cg + *dg) >> 2, + (*ab + *bb + *cb + *db) >> 2); + + // 'b' becomes 'a', 'd' becomes 'c' + ar = br; + ag = bg; + ab = bb; + cr = dr; + cg = dg; + cb = db; + } + + // the "next" rgb row becomes the current; the old current rgb row is + // recycled and serves as the new "next" row + u8 *temp; + temp = rgb_row_cur; + rgb_row_cur = rgb_row_next; + rgb_row_next = temp; + + // update the pointers for start of next pair of lines + from = (u32 *)((u8 *)from_orig + srcPitch); + to = (u32 *)((u8 *)to_orig + (dstPitch << 1)); + to_odd = (u32 *)((u8 *)to + dstPitch); + } +} + +void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *to = (u32 *)dstPtr; + u32 *to_odd = (u32 *)(dstPtr + dstPitch); + + int from_width = width; + if (width + 1 < from_width) + from_width = width + 1; + u32 *from = (u32 *)srcPtr; + fill_rgb_row_32(from, from_width, rgb_row_cur, width + 1); + + for (int y = 0; y < height; y++) + { + u32 *from_orig = from; + u32 *to_orig = to; + + if (y + 1 < height) + fill_rgb_row_32(from + width + 1, from_width, rgb_row_next, + width + 1); + else + fill_rgb_row_32(from, from_width, rgb_row_next, width + 1); + + // every pixel in the src region, is extended to 4 pixels in the + // destination, arranged in a square 'quad'; if the current src + // pixel is 'a', then in what follows 'b' is the src pixel to the + // right, 'c' is the src pixel below, and 'd' is the src pixel to + // the right and down + u8 *cur_row = rgb_row_cur; + u8 *next_row = rgb_row_next; + u8 *ar = cur_row++; + u8 *ag = cur_row++; + u8 *ab = cur_row++; + u8 *cr = next_row++; + u8 *cg = next_row++; + u8 *cb = next_row++; + for (int x = 0; x < width; x++) + { + u8 *br = cur_row++; + u8 *bg = cur_row++; + u8 *bb = cur_row++; + u8 *dr = next_row++; + u8 *dg = next_row++; + u8 *db = next_row++; + + // upper left pixel in quad: just copy it in + //*to++ = manip.rgb(*ar, *ag, *ab); +#ifdef USE_ORIGINAL_BILINEAR_PLUS + *to++ = RGB( + (((*ar) << 2) + ((*ar)) + (*cr + *br + *br)) >> 3, + (((*ag) << 2) + ((*ag)) + (*cg + *bg + *bg)) >> 3, + (((*ab) << 2) + ((*ab)) + (*cb + *bb + *bb)) >> 3); +#else + *to++ = RGB( + (((*ar) << 3) + ((*ar) << 1) + (*cr + *br + *br + *cr)) >> 4, + (((*ag) << 3) + ((*ag) << 1) + (*cg + *bg + *bg + *cg)) >> 4, + (((*ab) << 3) + ((*ab) << 1) + (*cb + *bb + *bb + *cb)) >> 4); +#endif + + // upper right + *to++ = RGB((*ar + *br) >> 1, (*ag + *bg) >> 1, (*ab + *bb) >> 1); + + // lower left + *to_odd++ = RGB((*ar + *cr) >> 1, (*ag + *cg) >> 1, (*ab + *cb) >> 1); + + // lower right + *to_odd++ = RGB((*ar + *br + *cr + *dr) >> 2, + (*ag + *bg + *cg + *dg) >> 2, + (*ab + *bb + *cb + *db) >> 2); + + // 'b' becomes 'a', 'd' becomes 'c' + ar = br; + ag = bg; + ab = bb; + cr = dr; + cg = dg; + cb = db; + } + + // the "next" rgb row becomes the current; the old current rgb row is + // recycled and serves as the new "next" row + u8 *temp; + temp = rgb_row_cur; + rgb_row_cur = rgb_row_next; + rgb_row_next = temp; + + // update the pointers for start of next pair of lines + from = (u32 *)((u8 *)from_orig + srcPitch); + to = (u32 *)((u8 *)to_orig + (dstPitch << 1)); + to_odd = (u32 *)((u8 *)to + dstPitch); + } +} + diff -r 18eaae41bde3 -r b970226568d2 src/filters/filters.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/filters.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,58 @@ + +#ifndef VBA_FILTERS_H +#define VBA_FILTERS_H + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +extern void Pixelate2x16(u8*, u32, u8*, u8*, u32, int, int); +extern void Pixelate2x32(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Pixelate3x16)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Pixelate3x32)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Pixelate4x16)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Pixelate4x32)(u8*, u32, u8*, u8*, u32, int, int); +extern void MotionBlur(u8*, u32, u8*, u8*, u32, int, int); +extern void MotionBlur32(u8*, u32, u8*, u8*, u32, int, int); +extern void _2xSaI(u8*, u32, u8*, u8*, u32, int, int); +extern void _2xSaI32(u8*, u32, u8*, u8*, u32, int, int); +extern void Super2xSaI(u8*, u32, u8*, u8*, u32, int, int); +extern void Super2xSaI32(u8*, u32, u8*, u8*, u32, int, int); +extern void SuperEagle(u8*, u32, u8*, u8*, u32, int, int); +extern void SuperEagle32(u8*, u32, u8*, u8*, u32, int, int); +extern void AdMame2x(u8*, u32, u8*, u8*, u32, int, int); +extern void AdMame2x32(u8*, u32, u8*, u8*, u32, int, int); +extern void Simple2x16(u8*, u32, u8*, u8*, u32, int, int); +extern void Simple2x32(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Simple3x16)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Simple3x32)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Simple4x16)(u8*, u32, u8*, u8*, u32, int, int); +extern void (*Simple4x32)(u8*, u32, u8*, u8*, u32, int, int); +extern void Bilinear(u8*, u32, u8*, u8*, u32, int, int); +extern void Bilinear32(u8*, u32, u8*, u8*, u32, int, int); +extern void BilinearPlus(u8*, u32, u8*, u8*, u32, int, int); +extern void BilinearPlus32(u8*, u32, u8*, u8*, u32, int, int); +extern void Scanlines(u8*, u32, u8*, u8*, u32, int, int); +extern void Scanlines32(u8*, u32, u8*, u8*, u32, int, int); +extern void ScanlinesTV(u8*, u32, u8*, u8*, u32, int, int); +extern void ScanlinesTV32(u8*, u32, u8*, u8*, u32, int, int); +extern void hq2x(u8*, u32, u8*, u8*, u32, int, int); +extern void hq2x32(u8*, u32, u8*, u8*, u32, int, int); +extern void hq2xS(u8*, u32, u8*, u8*, u32, int, int); +extern void hq2xS32(u8*, u32, u8*, u8*, u32, int, int); +extern void lq2x(u8*, u32, u8*, u8*, u32, int, int); +extern void lq2x32(u8*, u32, u8*, u8*, u32, int, int); +extern void hq3x(u8*, u32, u8*, u8*, u32, int, int); +extern void hq3x32(u8*, u32, u8*, u8*, u32, int, int); +extern void hq3xS(u8*, u32, u8*, u8*, u32, int, int); +extern void hq3xS32(u8*, u32, u8*, u8*, u32, int, int); + +extern void SmartIB(u8*, u32, int, int); +extern void SmartIB32(u8*, u32, int, int); +extern void MotionBlurIB(u8*, u32, int, int); +extern void InterlaceIB(u8*, u32, int, int); +extern void MotionBlurIB32(u8*, u32, int, int); + +extern void InterframeCleanup(); + +#endif // VBA_FILTERS_H diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq2x.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq2x.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,966 @@ +/* + * This file is part of the Advance project. + * + * Copyright (C) 2003 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * In addition, as a special exception, Andrea Mazzoleni + * gives permission to link the code of this program with + * the MAME library (or with modified versions of MAME that use the + * same license as MAME), and distribute linked combinations including + * the two. You must obey the GNU General Public License in all + * respects for all of the code used other than MAME. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ +#include "../Port.h" +#include "interp.h" + +unsigned interp_mask[2]; +unsigned interp_bits_per_pixel; + +/***************************************************************************/ +/* HQ2x C implementation */ + +/* + * This effect is a rewritten implementation of the hq2x effect made by Maxim Stepin + */ + +static void hq2x_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u16 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + if (i > 0) + { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } + else + { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i < count - 1) + { + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + } + else + { + c[2] = c[1]; + c[5] = c[4]; + c[8] = c[7]; + } + + mask = 0; + + if (interp_16_diff(c[0], c[4])) + mask |= 1 << 0; + if (interp_16_diff(c[1], c[4])) + mask |= 1 << 1; + if (interp_16_diff(c[2], c[4])) + mask |= 1 << 2; + if (interp_16_diff(c[3], c[4])) + mask |= 1 << 3; + if (interp_16_diff(c[5], c[4])) + mask |= 1 << 4; + if (interp_16_diff(c[6], c[4])) + mask |= 1 << 5; + if (interp_16_diff(c[7], c[4])) + mask |= 1 << 6; + if (interp_16_diff(c[8], c[4])) + mask |= 1 << 7; + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR interp_16_diff(c[1], c[5]) // top-right +#define MDR interp_16_diff(c[5], c[7]) // bottom-right +#define MDL interp_16_diff(c[7], c[3]) // bottom-left +#define MUL interp_16_diff(c[3], c[1]) // top-left +#define IC(p0) c[p0] +#define I11(p0, p1) interp_16_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_16_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_16_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_16_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_16_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_16_151(c[p0], c[p1]) + + switch (mask) + { +#include "hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +static void hq2x_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u32 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + if (i > 0) + { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } + else + { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i < count - 1) + { + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + } + else + { + c[2] = c[1]; + c[5] = c[4]; + c[8] = c[7]; + } + + mask = 0; + + if (interp_32_diff(c[0], c[4])) + mask |= 1 << 0; + if (interp_32_diff(c[1], c[4])) + mask |= 1 << 1; + if (interp_32_diff(c[2], c[4])) + mask |= 1 << 2; + if (interp_32_diff(c[3], c[4])) + mask |= 1 << 3; + if (interp_32_diff(c[5], c[4])) + mask |= 1 << 4; + if (interp_32_diff(c[6], c[4])) + mask |= 1 << 5; + if (interp_32_diff(c[7], c[4])) + mask |= 1 << 6; + if (interp_32_diff(c[8], c[4])) + mask |= 1 << 7; + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR interp_32_diff(c[1], c[5]) // top-right +#define MDR interp_32_diff(c[5], c[7]) // bottom-right +#define MDL interp_32_diff(c[7], c[3]) // bottom-left +#define MUL interp_32_diff(c[3], c[1]) // top-left +#define IC(p0) c[p0] +#define I11(p0, p1) interp_32_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_32_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_32_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_32_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_32_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_32_151(c[p0], c[p1]) + + switch (mask) + { +#include "hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +/***************************************************************************/ +/* HQ2xS C implementation */ + +/* + * This effect is derived from the hq2x effect made by Maxim Stepin + */ + +static void hq2xS_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u16 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + + mask = 0; + + // hq2xS dynamic edge detection: + // simply comparing the center color against its surroundings will give bad results in many cases, + // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block + int brightArray[9]; + int maxBright = 0, minBright = 999999; + for (int j = 0; j < 9; j++) + { + int r, g, b; + if (interp_bits_per_pixel == 16) + { + b = (int)((c[j] & 0x1F)) << 3; + g = (int)((c[j] & 0x7E0)) >> 3; + r = (int)((c[j] & 0xF800)) >> 8; + } + else + { + b = (int)((c[j] & 0x1F)) << 3; + g = (int)((c[j] & 0x3E0)) >> 2; + r = (int)((c[j] & 0x7C00)) >> 7; + } + const int bright = r + r + r + g + g + g + b + b; + if (bright > maxBright) maxBright = bright; + if (bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if (diffBright > 7) + { + #define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if (ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if (ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if (ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if (ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if (ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if (ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if (ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if (ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR false //(ABS(brightArray[1] - brightArray[5]) > diffBright) // top-right +#define MDR false //(ABS(brightArray[5] - brightArray[7]) > diffBright) // bottom-right +#define MDL false //(ABS(brightArray[7] - brightArray[3]) > diffBright) // bottom-left +#define MUL false //(ABS(brightArray[3] - brightArray[1]) > diffBright) // top-left +#define IC(p0) c[p0] +#define I11(p0, p1) interp_16_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_16_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_16_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_16_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_16_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_16_151(c[p0], c[p1]) + + switch (mask) + { +#include "hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +static void hq2xS_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u32 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + + mask = 0; + + // hq2xS dynamic edge detection: + // simply comparing the center color against its surroundings will give bad results in many cases, + // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block + int brightArray[9]; + int maxBright = 0, minBright = 999999; + for (int j = 0; j < 9; j++) + { + const int b = (int)((c[j] & 0xF8)); + const int g = (int)((c[j] & 0xF800)) >> 8; + const int r = (int)((c[j] & 0xF80000)) >> 16; + const int bright = r + r + r + g + g + g + b + b; + if (bright > maxBright) maxBright = bright; + if (bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if (diffBright > 7) + { + #define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if (ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if (ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if (ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if (ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if (ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if (ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if (ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if (ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR false //(ABS(brightArray[1] - brightArray[5]) > diffBright) // top-right +#define MDR false //(ABS(brightArray[5] - brightArray[7]) > diffBright) // bottom-right +#define MDL false //(ABS(brightArray[7] - brightArray[3]) > diffBright) // bottom-left +#define MUL false //(ABS(brightArray[3] - brightArray[1]) > diffBright) // top-left +#define IC(p0) c[p0] +#define I11(p0, p1) interp_32_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_32_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_32_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_32_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_32_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_32_151(c[p0], c[p1]) + + switch (mask) + { +#include "hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +/***************************************************************************/ +/* LQ2x C implementation */ + +/* + * This effect is derived from the hq2x effect made by Maxim Stepin + */ + +static void lq2x_16_def(u16 *dst0, u16 *dst1, const u16 *src0, const u16 *src1, const u16 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u16 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + if (i > 0) + { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } + else + { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i < count - 1) + { + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + } + else + { + c[2] = c[1]; + c[5] = c[4]; + c[8] = c[7]; + } + + mask = 0; + + if (c[0] != c[4]) + mask |= 1 << 0; + if (c[1] != c[4]) + mask |= 1 << 1; + if (c[2] != c[4]) + mask |= 1 << 2; + if (c[3] != c[4]) + mask |= 1 << 3; + if (c[5] != c[4]) + mask |= 1 << 4; + if (c[6] != c[4]) + mask |= 1 << 5; + if (c[7] != c[4]) + mask |= 1 << 6; + if (c[8] != c[4]) + mask |= 1 << 7; + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR (c[1] != c[5]) +#define MDR (c[5] != c[7]) +#define MDL (c[7] != c[3]) +#define MUL (c[3] != c[1]) +#define IC(p0) c[p0] +#define I11(p0, p1) interp_16_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_16_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_16_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_16_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_16_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_16_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_16_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_16_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_16_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_16_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_16_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_16_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_16_151(c[p0], c[p1]) + + switch (mask) + { +#include "lq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +static void lq2x_32_def(u32 *dst0, u32 *dst1, const u32 *src0, const u32 *src1, const u32 *src2, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) + { + unsigned char mask; + + u32 c[9]; + + c[1] = src0[0]; + c[4] = src1[0]; + c[7] = src2[0]; + + if (i > 0) + { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } + else + { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i < count - 1) + { + c[2] = src0[1]; + c[5] = src1[1]; + c[8] = src2[1]; + } + else + { + c[2] = c[1]; + c[5] = c[4]; + c[8] = c[7]; + } + + mask = 0; + + if (c[0] != c[4]) + mask |= 1 << 0; + if (c[1] != c[4]) + mask |= 1 << 1; + if (c[2] != c[4]) + mask |= 1 << 2; + if (c[3] != c[4]) + mask |= 1 << 3; + if (c[5] != c[4]) + mask |= 1 << 4; + if (c[6] != c[4]) + mask |= 1 << 5; + if (c[7] != c[4]) + mask |= 1 << 6; + if (c[8] != c[4]) + mask |= 1 << 7; + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define MUR (c[1] != c[5]) +#define MDR (c[5] != c[7]) +#define MDL (c[7] != c[3]) +#define MUL (c[3] != c[1]) +#define IC(p0) c[p0] +#define I11(p0, p1) interp_32_11(c[p0], c[p1]) +#define I211(p0, p1, p2) interp_32_211(c[p0], c[p1], c[p2]) +#define I31(p0, p1) interp_32_31(c[p0], c[p1]) +#define I332(p0, p1, p2) interp_32_332(c[p0], c[p1], c[p2]) +#define I431(p0, p1, p2) interp_32_431(c[p0], c[p1], c[p2]) +#define I521(p0, p1, p2) interp_32_521(c[p0], c[p1], c[p2]) +#define I53(p0, p1) interp_32_53(c[p0], c[p1]) +#define I611(p0, p1, p2) interp_32_611(c[p0], c[p1], c[p2]) +#define I71(p0, p1) interp_32_71(c[p0], c[p1]) +#define I772(p0, p1, p2) interp_32_772(c[p0], c[p1], c[p2]) +#define I97(p0, p1) interp_32_97(c[p0], c[p1]) +#define I1411(p0, p1, p2) interp_32_1411(c[p0], c[p1], c[p2]) +#define I151(p0, p1) interp_32_151(c[p0], c[p1]) + + switch (mask) + { +#include "lq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef MUR +#undef MDR +#undef MDL +#undef MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +void hq2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *dst0 = (u16 *)dstPtr; + u16 *dst1 = dst0 + (dstPitch >> 1); + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = src0 + (srcPitch >> 1); + u16 *src2 = src1 + (srcPitch >> 1); + + hq2x_16_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src1, width); +} + +void hq2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dst0 = (u32 *)dstPtr; + u32 *dst1 = dst0 + (dstPitch >> 2); + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = src0 + (srcPitch >> 2); + u32 *src2 = src1 + (srcPitch >> 2); + hq2x_32_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src1, width); +} + +void hq2xS(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *dst0 = (u16 *)dstPtr; + u16 *dst1 = dst0 + (dstPitch >> 1); + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = src0 + (srcPitch >> 1); + u16 *src2 = src1 + (srcPitch >> 1); + + hq2xS_16_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch; + dst1 += dstPitch; + hq2xS_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + hq2xS_16_def(dst0, dst1, src0, src1, src1, width); +} + +void hq2xS32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dst0 = (u32 *)dstPtr; + u32 *dst1 = dst0 + (dstPitch >> 2); + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = src0 + (srcPitch >> 2); + u32 *src2 = src1 + (srcPitch >> 2); + hq2xS_32_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2xS_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2xS_32_def(dst0, dst1, src0, src1, src1, width); +} + +void lq2x(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *dst0 = (u16 *)dstPtr; + u16 *dst1 = dst0 + (dstPitch >> 1); + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = src0 + (srcPitch >> 1); + u16 *src2 = src1 + (srcPitch >> 1); + + lq2x_16_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch; + dst1 += dstPitch; + lq2x_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + lq2x_16_def(dst0, dst1, src0, src1, src1, width); +} + +void lq2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u32 *dst0 = (u32 *)dstPtr; + u32 *dst1 = dst0 + (dstPitch >> 2); + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = src0 + (srcPitch >> 2); + u32 *src2 = src1 + (srcPitch >> 2); + lq2x_32_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while (count) + { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + lq2x_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + lq2x_32_def(dst0, dst1, src0, src1, src1, width); +} + +void hq2x_init(unsigned bits_per_pixel) +{ + interp_set(bits_per_pixel); +} + diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq2x.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq2x.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,1824 @@ +case 0 : +case 1 : +case 4 : +case 5 : +case 32 : +case 33 : +case 36 : +case 37 : +case 128 : +case 129 : +case 132 : +case 133 : +case 160 : +case 161 : +case 164 : +case 165 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 2 : +case 34 : +case 130 : +case 162 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 3 : +case 35 : +case 131 : +case 163 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 6 : +case 38 : +case 134 : +case 166 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 7 : +case 39 : +case 135 : +case 167 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 8 : +case 12 : +case 136 : +case 140 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); +} break; +case 9 : +case 13 : +case 137 : +case 141 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); +} break; +case 10 : +case 138 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 11 : +case 139 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 14 : +case 142 : +{ + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = I31(4, 0); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 15 : +case 143 : +{ + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = IC(4); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 16 : +case 17 : +case 48 : +case 49 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); +} break; +case 18 : +case 50 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 19 : +case 51 : +{ + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (MUR) { + P0 = I31(4, 3); + P1 = I31(4, 2); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 20 : +case 21 : +case 52 : +case 53 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); +} break; +case 22 : +case 54 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 23 : +case 55 : +{ + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (MUR) { + P0 = I31(4, 3); + P1 = IC(4); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 24 : +case 66 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 25 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 26 : +case 31 : +case 95 : +{ + P2 = I31(4, 6); + P3 = I31(4, 8); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 27 : +case 75 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 28 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 29 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 30 : +case 86 : +{ + P0 = I31(4, 0); + P2 = I31(4, 6); + P3 = I31(4, 8); + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 40 : +case 44 : +case 168 : +case 172 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); +} break; +case 41 : +case 45 : +case 169 : +case 173 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); +} break; +case 42 : +case 170 : +{ + P1 = I31(4, 2); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = I31(4, 0); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 43 : +case 171 : +{ + P1 = I31(4, 2); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = IC(4); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 46 : +case 174 : +{ + P1 = I31(4, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 47 : +case 175 : +{ + P1 = I31(4, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 56 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 57 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 58 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 59 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 60 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 61 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 62 : +{ + P0 = I31(4, 0); + P2 = I31(4, 7); + P3 = I31(4, 8); + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 63 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 64 : +case 65 : +case 68 : +case 69 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 67 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 70 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 71 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 72 : +case 76 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 73 : +case 77 : +{ + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (MDL) { + P0 = I31(4, 1); + P2 = I31(4, 6); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 74 : +case 107 : +case 123 : +{ + P1 = I31(4, 2); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 78 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 79 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 80 : +case 81 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 82 : +case 214 : +case 222 : +{ + P0 = I31(4, 0); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 83 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 84 : +case 85 : +{ + P0 = I211(4, 1, 3); + P2 = I31(4, 6); + if (MDR) { + P1 = I31(4, 1); + P3 = I31(4, 8); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 87 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 88 : +case 248 : +case 250 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 89 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 90 : +{ + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 91 : +{ + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 92 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 93 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 94 : +{ + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 96 : +case 97 : +case 100 : +case 101 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 98 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 99 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 102 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 103 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 104 : +case 108 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 105 : +case 109 : +{ + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (MDL) { + P0 = I31(4, 1); + P2 = IC(4); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 106 : +case 120 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 110 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 111 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 112 : +case 113 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + if (MDR) { + P2 = I31(4, 3); + P3 = I31(4, 8); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 114 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 115 : +{ + P0 = I31(4, 3); + P2 = I31(4, 3); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 116 : +case 117 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I31(4, 3); + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 118 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + P3 = I31(4, 8); + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 119 : +{ + P2 = I31(4, 3); + P3 = I31(4, 8); + if (MUR) { + P0 = I31(4, 3); + P1 = IC(4); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 121 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 122 : +{ + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 124 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 125 : +{ + P1 = I31(4, 1); + P3 = I31(4, 8); + if (MDL) { + P0 = I31(4, 1); + P2 = IC(4); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 126 : +{ + P0 = I31(4, 0); + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 127 : +{ + P3 = I31(4, 8); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 144 : +case 145 : +case 176 : +case 177 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); +} break; +case 146 : +case 178 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + if (MUR) { + P1 = I31(4, 2); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 147 : +case 179 : +{ + P0 = I31(4, 3); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 148 : +case 149 : +case 180 : +case 181 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); +} break; +case 150 : +case 182 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + if (MUR) { + P1 = IC(4); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 151 : +case 183 : +{ + P0 = I31(4, 3); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 152 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 153 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 154 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 155 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 156 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 157 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 158 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 159 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 184 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 185 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 186 : +{ + P2 = I31(4, 7); + P3 = I31(4, 7); + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 187 : +{ + P1 = I31(4, 2); + P3 = I31(4, 7); + if (MUL) { + P0 = IC(4); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 188 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 189 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 190 : +{ + P0 = I31(4, 0); + P2 = I31(4, 7); + if (MUR) { + P1 = IC(4); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 191 : +{ + P2 = I31(4, 7); + P3 = I31(4, 7); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 192 : +case 193 : +case 196 : +case 197 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 194 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 195 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 198 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 199 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 200 : +case 204 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + if (MDL) { + P2 = I31(4, 6); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 201 : +case 205 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P3 = I31(4, 5); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } +} break; +case 202 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 203 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 206 : +{ + P1 = I31(4, 5); + P3 = I31(4, 5); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 207 : +{ + P2 = I31(4, 6); + P3 = I31(4, 5); + if (MUL) { + P0 = IC(4); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 208 : +case 209 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 210 : +case 216 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 211 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 212 : +case 213 : +{ + P0 = I211(4, 1, 3); + P2 = I31(4, 6); + if (MDR) { + P1 = I31(4, 1); + P3 = IC(4); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 215 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 217 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 218 : +{ + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 219 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 220 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 221 : +{ + P0 = I31(4, 1); + P2 = I31(4, 6); + if (MDR) { + P1 = I31(4, 1); + P3 = IC(4); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 223 : +{ + P2 = I31(4, 6); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 224 : +case 225 : +case 228 : +case 229 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 226 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 227 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 230 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 231 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 232 : +case 236 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + if (MDL) { + P2 = IC(4); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 233 : +case 237 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P3 = I31(4, 5); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } +} break; +case 234 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 235 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 238 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + if (MDL) { + P2 = IC(4); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 239 : +{ + P1 = I31(4, 5); + P3 = I31(4, 5); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 240 : +case 241 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + if (MDR) { + P2 = I31(4, 3); + P3 = IC(4); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 242 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 243 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + if (MDR) { + P2 = I31(4, 3); + P3 = IC(4); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 244 : +case 245 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I31(4, 3); + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 246 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 247 : +{ + P0 = I31(4, 3); + P2 = I31(4, 3); + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 249 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 251 : +{ + P1 = I31(4, 2); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 252 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 253 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 254 : +{ + P0 = I31(4, 0); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 255 : +{ + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq3x32.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq3x32.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,445 @@ +#include "../Port.h" +#include "hq_shared32.h" +#include "interp.h" + +#define SIZE_PIXEL 2 // 16bit = 2 bytes +#define PIXELTYPE unsigned short +#define Interp1 Interp1_16 +#define Interp2 Interp2_16 +#define Interp3 Interp3_16 +#define Interp4 Interp4_16 +#define Interp5 Interp5_16 + +void hq3x(unsigned char *pIn, unsigned int srcPitch, + unsigned char *, + unsigned char *pOut, unsigned int dstPitch, + int Xres, int Yres) +{ + int i, j; + unsigned int line; + PIXELTYPE c[10]; + + // +----+----+----+ + // | | | | + // | c1 | c2 | c3 | + // +----+----+----+ + // | | | | + // | c4 | c5 | c6 | + // +----+----+----+ + // | | | | + // | c7 | c8 | c9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) + { + if ((j > 0) || (j < Yres - 1)) + line = srcPitch; + else + line = 0; + + for (i = 0; i < Xres; i++) + { + c[2] = *((PIXELTYPE *)(pIn - line)); + c[5] = *((PIXELTYPE *)(pIn)); + c[8] = *((PIXELTYPE *)(pIn + line)); + + if (i > 0) + { + c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL)); + c[4] = *((PIXELTYPE *)(pIn - SIZE_PIXEL)); + c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL)); + } + else + { + c[1] = c[2]; + c[4] = c[5]; + c[7] = c[8]; + } + + if (i < Xres - 1) + { + c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL)); + c[6] = *((PIXELTYPE *)(pIn + SIZE_PIXEL)); + c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL)); + } + else + { + c[3] = c[2]; + c[6] = c[5]; + c[9] = c[8]; + } + + int pattern = 0; + + if (interp_16_diff(c[1], c[5])) + pattern |= 1 << 0; + if (interp_16_diff(c[2], c[5])) + pattern |= 1 << 1; + if (interp_16_diff(c[3], c[5])) + pattern |= 1 << 2; + if (interp_16_diff(c[4], c[5])) + pattern |= 1 << 3; + if (interp_16_diff(c[6], c[5])) + pattern |= 1 << 4; + if (interp_16_diff(c[7], c[5])) + pattern |= 1 << 5; + if (interp_16_diff(c[8], c[5])) + pattern |= 1 << 6; + if (interp_16_diff(c[9], c[5])) + pattern |= 1 << 7; + +#define Diff interp_16_diff +#include "hq3x32.h" +#undef Diff + pIn += SIZE_PIXEL; + pOut += 3 << 1; + } + pIn += srcPitch - (Xres << 1); + pOut += dstPitch - (3 * Xres << 1); + pOut += dstPitch << 1; + // pIn+=SIZE_PIXEL; + // pOut+=3*SIZE_PIXEL; + //} + //pIn+=srcPitch-(4*Xres); + //pOut+=dstPitch-(3*Xres*SIZE_PIXEL); + //pOut+=2*dstPitch; + } +} + +void hq3xS(unsigned char *pIn, unsigned int srcPitch, + unsigned char *, + unsigned char *pOut, unsigned int dstPitch, + int Xres, int Yres) +{ + int i, j; + PIXELTYPE c[10]; + + // +----+----+----+ + // | | | | + // | c1 | c2 | c3 | + // +----+----+----+ + // | | | | + // | c4 | c5 | c6 | + // +----+----+----+ + // | | | | + // | c7 | c8 | c9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) + { + for (i = 0; i < Xres; i++) + { + c[2] = *((PIXELTYPE *)(pIn - srcPitch)); + c[5] = *((PIXELTYPE *)(pIn)); + c[8] = *((PIXELTYPE *)(pIn + srcPitch)); + + c[1] = *((PIXELTYPE *)(pIn - srcPitch - SIZE_PIXEL)); + c[4] = *((PIXELTYPE *)(pIn - SIZE_PIXEL)); + c[7] = *((PIXELTYPE *)(pIn + srcPitch - SIZE_PIXEL)); + + c[3] = *((PIXELTYPE *)(pIn - srcPitch + SIZE_PIXEL)); + c[6] = *((PIXELTYPE *)(pIn + SIZE_PIXEL)); + c[9] = *((PIXELTYPE *)(pIn + srcPitch + SIZE_PIXEL)); + + int pattern = 0; + + // hq3xS dynamic edge detection: + // simply comparing the center color against its surroundings will give bad results in many cases, + // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block + int brightArray[10]; + int maxBright = 0, minBright = 999999; + for (int j = 1; j < 10; j++) + { + int r, g, b; + if (interp_bits_per_pixel == 16) + { + b = (int)((c[j] & 0x1F)) << 3; + g = (int)((c[j] & 0x7E0)) >> 3; + r = (int)((c[j] & 0xF800)) >> 8; + } + else + { + b = (int)((c[j] & 0x1F)) << 3; + g = (int)((c[j] & 0x3E0)) >> 2; + r = (int)((c[j] & 0x7C00)) >> 7; + } + const int bright = r + r + r + g + g + g + b + b; + if (bright > maxBright) maxBright = bright; + if (bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + const int diffBright = ((maxBright - minBright) * 7) >> 4; + if (diffBright > 7) + { + #define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[5]; + if (ABS(brightArray[1] - centerBright) > diffBright) + pattern |= 1 << 0; + if (ABS(brightArray[2] - centerBright) > diffBright) + pattern |= 1 << 1; + if (ABS(brightArray[3] - centerBright) > diffBright) + pattern |= 1 << 2; + if (ABS(brightArray[4] - centerBright) > diffBright) + pattern |= 1 << 3; + if (ABS(brightArray[6] - centerBright) > diffBright) + pattern |= 1 << 4; + if (ABS(brightArray[7] - centerBright) > diffBright) + pattern |= 1 << 5; + if (ABS(brightArray[8] - centerBright) > diffBright) + pattern |= 1 << 6; + if (ABS(brightArray[9] - centerBright) > diffBright) + pattern |= 1 << 7; + } + +#define Diff(x, y) false //(ABS((x) - (y)) > diffBright) +#undef cget +#define cget(x) brightArray[x] +#include "hq3x32.h" +#undef cget +#undef Diff + pIn += SIZE_PIXEL; + pOut += 3 << 1; + } + pIn += srcPitch - (Xres << 1); + pOut += dstPitch - (3 * Xres << 1); + pOut += dstPitch << 1; + // pIn+=SIZE_PIXEL; + // pOut+=3*SIZE_PIXEL; + //} + //pIn+=srcPitch-(4*Xres); + //pOut+=dstPitch-(3*Xres*SIZE_PIXEL); + //pOut+=2*dstPitch; + } +} + +#undef Interp1 +#undef Interp2 +#undef Interp3 +#undef Interp4 +#undef Interp5 +#undef SIZE_PIXEL +#undef PIXELTYPE +#define SIZE_PIXEL 4 // 32bit = 4 bytes +#define PIXELTYPE unsigned int + +void hq3x32(unsigned char *pIn, unsigned int srcPitch, + unsigned char *, + unsigned char *pOut, unsigned int dstPitch, + int Xres, int Yres) +{ + unsigned int YUV1, YUV2; + int i, j, k; + unsigned int line; + PIXELTYPE c[10]; + + // +----+----+----+ + // | | | | + // | c1 | c2 | c3 | + // +----+----+----+ + // | | | | + // | c4 | c5 | c6 | + // +----+----+----+ + // | | | | + // | c7 | c8 | c9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) + { + if ((j > 0) && (j < Yres - 1)) + line = srcPitch; + else + line = 0; + + for (i = 0; i < Xres; i++) + { + c[2] = *((PIXELTYPE *)(pIn - line)); + c[5] = *((PIXELTYPE *)(pIn)); + c[8] = *((PIXELTYPE *)(pIn + line)); + + if (i > 0) + { + c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL)); + c[4] = *((PIXELTYPE *)(pIn - SIZE_PIXEL)); + c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL)); + } + else + { + c[1] = c[2]; + c[4] = c[5]; + c[7] = c[8]; + } + + if (i < Xres - 1) + { + c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL)); + c[6] = *((PIXELTYPE *)(pIn + SIZE_PIXEL)); + c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL)); + } + else + { + c[3] = c[2]; + c[6] = c[5]; + c[9] = c[8]; + } + + int pattern = 0; + int flag = 1; + + YUV1 = RGBtoYUV(c[5]); + + for (k = 1; k <= 9; k++) + { + if (k == 5) continue; + + if (c[k] != c[5]) + { + YUV2 = RGBtoYUV(c[k]); + if ( + (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) || + (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) || + (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV) + ) + pattern |= flag; + } + flag <<= 1; + } + +#include "hq3x32.h" + pIn += SIZE_PIXEL; + pOut += 3 << 2; + } + pIn += srcPitch - (Xres << 2); + pOut += dstPitch - (3 * Xres << 2); + pOut += dstPitch << 1; + // pIn+=SIZE_PIXEL; + // pOut+=3*SIZE_PIXEL; + //} + //pIn+=srcPitch-(4*Xres); + //pOut+=dstPitch-(3*Xres*SIZE_PIXEL); + //pOut+=2*dstPitch; + } +} + +void hq3xS32(unsigned char *pIn, unsigned int srcPitch, + unsigned char *, + unsigned char *pOut, unsigned int dstPitch, + int Xres, int Yres) +{ + int i, j; + unsigned int line; + PIXELTYPE c[10]; + + // +----+----+----+ + // | | | | + // | c1 | c2 | c3 | + // +----+----+----+ + // | | | | + // | c4 | c5 | c6 | + // +----+----+----+ + // | | | | + // | c7 | c8 | c9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) + { + if ((j > 0) && (j < Yres - 1)) + line = srcPitch; + else + line = 0; + + for (i = 0; i < Xres; i++) + { + c[2] = *((PIXELTYPE *)(pIn - line)); + c[5] = *((PIXELTYPE *)(pIn)); + c[8] = *((PIXELTYPE *)(pIn + line)); + + if (i > 0) + { + c[1] = *((PIXELTYPE *)(pIn - line - SIZE_PIXEL)); + c[4] = *((PIXELTYPE *)(pIn - SIZE_PIXEL)); + c[7] = *((PIXELTYPE *)(pIn + line - SIZE_PIXEL)); + } + else + { + c[1] = c[2]; + c[4] = c[5]; + c[7] = c[8]; + } + + if (i < Xres - 1) + { + c[3] = *((PIXELTYPE *)(pIn - line + SIZE_PIXEL)); + c[6] = *((PIXELTYPE *)(pIn + SIZE_PIXEL)); + c[9] = *((PIXELTYPE *)(pIn + line + SIZE_PIXEL)); + } + else + { + c[3] = c[2]; + c[6] = c[5]; + c[9] = c[8]; + } + + int pattern = 0; + + // hq3xS dynamic edge detection: + // simply comparing the center color against its surroundings will give bad results in many cases, + // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block + int brightArray[10]; + int maxBright = 0, minBright = 999999; + for (int j = 1; j < 10; j++) + { + const int b = (int)((c[j] & 0xF8)); + const int g = (int)((c[j] & 0xF800)) >> 8; + const int r = (int)((c[j] & 0xF80000)) >> 16; + const int bright = r + r + r + g + g + g + b + b; + if (bright > maxBright) maxBright = bright; + if (bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if (diffBright > 7) + { + #define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[5]; + if (ABS(brightArray[1] - centerBright) > diffBright) + pattern |= 1 << 0; + if (ABS(brightArray[2] - centerBright) > diffBright) + pattern |= 1 << 1; + if (ABS(brightArray[3] - centerBright) > diffBright) + pattern |= 1 << 2; + if (ABS(brightArray[4] - centerBright) > diffBright) + pattern |= 1 << 3; + if (ABS(brightArray[6] - centerBright) > diffBright) + pattern |= 1 << 4; + if (ABS(brightArray[7] - centerBright) > diffBright) + pattern |= 1 << 5; + if (ABS(brightArray[8] - centerBright) > diffBright) + pattern |= 1 << 6; + if (ABS(brightArray[9] - centerBright) > diffBright) + pattern |= 1 << 7; + } + +#define Diff(x, y) false //(ABS((x) - (y)) > diffBright) +#undef cget +#define cget(x) brightArray[x] +#include "hq3x32.h" +#undef cget +#undef Diff + pIn += SIZE_PIXEL; + pOut += 3 << 2; + } + pIn += srcPitch - (Xres << 2); + pOut += dstPitch - (3 * Xres << 2); + pOut += dstPitch << 1; + // pIn+=SIZE_PIXEL; + // pOut+=3*SIZE_PIXEL; + //} + //pIn+=srcPitch-(4*Xres); + //pOut+=dstPitch-(3*Xres*SIZE_PIXEL); + //pOut+=2*dstPitch; + } +} \ No newline at end of file diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq3x32.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq3x32.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,3674 @@ +#define PIXEL00_1M Interp1( pOut, c[5], c[1] ); +#define PIXEL00_1U Interp1( pOut, c[5], c[2] ); +#define PIXEL00_1L Interp1( pOut, c[5], c[4] ); +#define PIXEL00_2 Interp2( pOut, c[5], c[4], c[2] ); +#define PIXEL00_4 Interp4( pOut, c[5], c[4], c[2] ); +#define PIXEL00_5 Interp5( pOut, c[4], c[2] ); +#define PIXEL00_C *((PIXELTYPE*)(pOut)) = c[5]; + +#define PIXEL01_1 Interp1( pOut+SIZE_PIXEL, c[5], c[2] ); +#define PIXEL01_3 Interp3( pOut+SIZE_PIXEL, c[5], c[2] ); +#define PIXEL01_6 Interp1( pOut+SIZE_PIXEL, c[2], c[5] ); +#define PIXEL01_C *((PIXELTYPE*)(pOut+SIZE_PIXEL)) = c[5]; + +#define PIXEL02_1M Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[3] ); +#define PIXEL02_1U Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2] ); +#define PIXEL02_1R Interp1( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] ); +#define PIXEL02_2 Interp2( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2], c[6] ); +#define PIXEL02_4 Interp4( pOut+SIZE_PIXEL+SIZE_PIXEL, c[5], c[2], c[6] ); +#define PIXEL02_5 Interp5( pOut+SIZE_PIXEL+SIZE_PIXEL, c[2], c[6] ); +#define PIXEL02_C *((PIXELTYPE*)(pOut+SIZE_PIXEL+SIZE_PIXEL)) = c[5]; + +#define PIXEL10_1 Interp1( pOut+dstPitch, c[5], c[4] ); +#define PIXEL10_3 Interp3( pOut+dstPitch, c[5], c[4] ); +#define PIXEL10_6 Interp1( pOut+dstPitch, c[4], c[5] ); +#define PIXEL10_C *((PIXELTYPE*)(pOut+dstPitch)) = c[5]; + +#define PIXEL11 *((PIXELTYPE*)(pOut+dstPitch+SIZE_PIXEL)) = c[5]; + +#define PIXEL12_1 Interp1( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] ); +#define PIXEL12_3 Interp3( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] ); +#define PIXEL12_6 Interp1( pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[6], c[5] ); +#define PIXEL12_C *((PIXELTYPE*)(pOut+dstPitch+SIZE_PIXEL+SIZE_PIXEL)) = c[5]; + +#define PIXEL20_1M Interp1( pOut+dstPitch+dstPitch, c[5], c[7] ); +#define PIXEL20_1D Interp1( pOut+dstPitch+dstPitch, c[5], c[8] ); +#define PIXEL20_1L Interp1( pOut+dstPitch+dstPitch, c[5], c[4] ); +#define PIXEL20_2 Interp2( pOut+dstPitch+dstPitch, c[5], c[8], c[4] ); +#define PIXEL20_4 Interp4( pOut+dstPitch+dstPitch, c[5], c[8], c[4] ); +#define PIXEL20_5 Interp5( pOut+dstPitch+dstPitch, c[8], c[4] ); +#define PIXEL20_C *((PIXELTYPE*)(pOut+dstPitch+dstPitch)) = c[5]; + +#define PIXEL21_1 Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[5], c[8] ); +#define PIXEL21_3 Interp3( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[5], c[8] ); +#define PIXEL21_6 Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL, c[8], c[5] ); +#define PIXEL21_C *((PIXELTYPE*)(pOut+dstPitch+dstPitch+SIZE_PIXEL)) = c[5]; + +#define PIXEL22_1M Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[9] ); +#define PIXEL22_1D Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[8] ); +#define PIXEL22_1R Interp1( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6] ); +#define PIXEL22_2 Interp2( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6], c[8] ); +#define PIXEL22_4 Interp4( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[5], c[6], c[8] ); +#define PIXEL22_5 Interp5( pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL, c[6], c[8] ); +#define PIXEL22_C *((PIXELTYPE*)(pOut+dstPitch+dstPitch+SIZE_PIXEL+SIZE_PIXEL)) = c[5]; + +#ifndef cget +#define cget(x) c[x] +#endif + + switch (pattern) + { + case 0: + case 1: + case 4: + case 32: + case 128: + case 5: + case 132: + case 160: + case 33: + case 129: + case 36: + case 133: + case 164: + case 161: + case 37: + case 165: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 2: + case 34: + case 130: + case 162: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 16: + case 17: + case 48: + case 49: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 64: + case 65: + case 68: + case 69: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 8: + case 12: + case 136: + case 140: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 3: + case 35: + case 131: + case 163: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 6: + case 38: + case 134: + case 166: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 20: + case 21: + case 52: + case 53: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 144: + case 145: + case 176: + case 177: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + } + case 192: + case 193: + case 196: + case 197: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 96: + case 97: + case 100: + case 101: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 40: + case 44: + case 168: + case 172: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + } + case 9: + case 13: + case 137: + case 141: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 18: + case 50: + { + PIXEL00_1M + + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_1M + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 80: + case 81: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_1M + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 72: + case 76: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_1M + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 10: + case 138: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 66: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 24: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 7: + case 39: + case 135: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 148: + case 149: + case 180: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + } + case 224: + case 228: + case 225: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 41: + case 169: + case 45: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + } + case 22: + case 54: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 208: + case 209: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 104: + case 108: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 11: + case 139: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 19: + case 51: + { + if (Diff(cget(2), cget(6))) + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL12_C + } + else + { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 146: + case 178: + { + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_1M + PIXEL12_C + PIXEL22_1D + } + else + { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + break; + } + case 84: + case 85: + { + if (Diff(cget(6), cget(8))) + { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_1M + } + else + { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + break; + } + case 112: + case 113: + { + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + } + else + { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + } + case 200: + case 204: + { + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + } + else + { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + break; + } + case 73: + case 77: + { + if (Diff(cget(8), cget(4))) + { + PIXEL00_1U + PIXEL10_C + PIXEL20_1M + PIXEL21_C + } + else + { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + break; + } + case 42: + case 170: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } + else + { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + break; + } + case 14: + case 142: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } + else + { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 67: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 70: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 28: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 152: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 194: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 98: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 56: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 25: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 26: + case 31: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 82: + case 214: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + } + else + { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 88: + case 248: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + } + else + { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL22_4 + } + break; + } + case 74: + case 107: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + } + else + { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 27: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 86: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 216: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 106: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 30: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 210: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 120: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 75: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 29: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + } + case 198: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 184: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 99: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 57: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 71: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 156: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 226: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 60: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 195: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 102: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 153: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 58: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 83: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 92: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 202: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 78: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1M + break; + } + case 154: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 114: + { + PIXEL00_1M + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 89: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 90: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 55: + case 23: + { + if (Diff(cget(2), cget(6))) + { + PIXEL00_1L + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + } + case 182: + case 150: + { + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + PIXEL22_1D + } + else + { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + break; + } + case 213: + case 212: + { + if (Diff(cget(6), cget(8))) + { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + break; + } + case 241: + case 240: + { + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + } + case 236: + case 232: + { + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + PIXEL22_1R + } + else + { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + break; + } + case 109: + case 105: + { + if (Diff(cget(8), cget(4))) + { + PIXEL00_1U + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + break; + } + case 171: + case 43: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } + else + { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + break; + } + case 143: + case 15: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } + else + { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + } + case 124: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 203: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 62: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 211: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 118: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 217: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 110: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 155: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 188: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 185: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 61: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 157: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 103: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 227: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 230: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 199: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 220: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 158: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 234: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1R + break; + } + case 242: + { + PIXEL00_1M + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 59: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 121: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 87: + { + PIXEL00_1L + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 79: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1R + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1M + break; + } + case 122: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 94: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 218: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 91: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 229: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 167: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + } + case 173: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + } + case 181: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + } + case 186: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 115: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 93: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 206: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 205: + case 201: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_1M + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 174: + case 46: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_1M + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + } + case 179: + case 147: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_1M + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + } + case 117: + case 116: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_1M + } + else + { + PIXEL22_2 + } + break; + } + case 189: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 231: + { + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + } + case 126: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 219: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 125: + { + if (Diff(cget(8), cget(4))) + { + PIXEL00_1U + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + PIXEL22_1M + break; + } + case 221: + { + if (Diff(cget(6), cget(8))) + { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_1U + PIXEL01_1 + PIXEL10_C + PIXEL11 + PIXEL20_1M + break; + } + case 207: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } + else + { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + } + case 238: + { + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + PIXEL22_1R + } + else + { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + break; + } + case 190: + { + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + PIXEL22_1D + } + else + { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + break; + } + case 187: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } + else + { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL21_1 + PIXEL22_1D + break; + } + case 243: + { + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + } + case 119: + { + if (Diff(cget(2), cget(6))) + { + PIXEL00_1L + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + } + case 237: + case 233: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 175: + case 47: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + } + case 183: + case 151: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + } + case 245: + case 244: + { + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + case 250: + { + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + } + else + { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL22_4 + } + break; + } + case 123: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + } + else + { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 95: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + } + case 222: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + } + else + { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 252: + { + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + } + else + { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + case 249: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL22_4 + } + break; + } + case 235: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + } + else + { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 111: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 63: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + } + case 159: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + } + case 215: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 246: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + } + else + { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + case 254: + { + PIXEL00_1M + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + } + else + { + PIXEL01_3 + PIXEL02_4 + } + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + } + else + { + PIXEL10_3 + PIXEL20_4 + } + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL21_3 + PIXEL22_2 + } + break; + } + case 253: + { + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + case 251: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + } + else + { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL10_3 + PIXEL20_2 + PIXEL21_3 + } + if (Diff(cget(6), cget(8))) + { + PIXEL12_C + PIXEL22_C + } + else + { + PIXEL12_3 + PIXEL22_4 + } + break; + } + case 239: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + } + case 127: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } + else + { + PIXEL00_2 + PIXEL01_3 + PIXEL10_3 + } + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + PIXEL21_C + } + else + { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + } + case 191: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + } + case 223: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + PIXEL10_C + } + else + { + PIXEL00_4 + PIXEL10_3 + } + if (Diff(cget(2), cget(6))) + { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } + else + { + PIXEL01_3 + PIXEL02_2 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + if (Diff(cget(6), cget(8))) + { + PIXEL21_C + PIXEL22_C + } + else + { + PIXEL21_3 + PIXEL22_4 + } + break; + } + case 247: + { + PIXEL00_1L + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + case 255: + { + if (Diff(cget(4), cget(2))) + { + PIXEL00_C + } + else + { + PIXEL00_2 + } + PIXEL01_C + if (Diff(cget(2), cget(6))) + { + PIXEL02_C + } + else + { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + if (Diff(cget(8), cget(4))) + { + PIXEL20_C + } + else + { + PIXEL20_2 + } + PIXEL21_C + if (Diff(cget(6), cget(8))) + { + PIXEL22_C + } + else + { + PIXEL22_2 + } + break; + } + } diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq_shared32.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq_shared32.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,389 @@ +#include "../Port.h" +#include "hq_shared32.h" +#include "interp.h" + +const unsigned __int64 reg_blank = 0x0000000000000000; +const unsigned __int64 const7 = 0x0000000700070007; +const unsigned __int64 treshold = 0x0000000000300706; + +void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2) +{ + //*((int*)pc) = (c1*3+c2)/4; + +#ifdef MMX + __asm + { + mov eax, pc + movd mm1, c1 + movd mm2, c2 + movq mm0, mm1 + pslld mm0, 2 + psubd mm0, mm1 + paddd mm0, mm2 + psrld mm0, 2 + movd [eax], mm0 + EMMS + } +#else + __asm + { + mov eax, pc + mov edx, c1 + shl edx, 2 + add edx, c2 + sub edx, c1 + shr edx, 2 + mov [eax], edx + } +#endif +} + +void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) +{ + //*((int*)pc) = (c1*2+c2+c3)/4; + +#ifdef MMX + __asm + { + mov eax, pc + movd mm0, c1 + movd mm1, c2 + movd mm2, c3 + pslld mm0, 1 + paddd mm0, mm1 + paddd mm0, mm2 + psrad mm0, 2 + movd [eax], mm0 + EMMS + } +#else + __asm + { + mov eax, pc + mov edx, c1 + shl edx, 1 + add edx, c2 + add edx, c3 + shr edx, 2 + mov [eax], edx + } +#endif +} + +void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2) +{ + //*((int*)pc) = (c1*7+c2)/8; + //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + + // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; + +#ifdef MMX + __asm + { + mov eax, pc + movd mm1, c1 + movd mm2, c2 + punpcklbw mm1, reg_blank + punpcklbw mm2, reg_blank + pmullw mm1, const7 + paddw mm1, mm2 + psrlw mm1, 3 + packuswb mm1, reg_blank + movd [eax], mm1 + EMMS + } +#else + __asm + { + mov eax, c1 + mov ebx, c2 + mov ecx, eax + shl ecx, 3 + sub ecx, eax + add ecx, ebx + shr ecx, 3 + mov eax, pc + mov [eax], ecx + } +#endif +} + +void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) +{ + //*((int*)pc) = (c1*2+(c2+c3)*7)/16; + //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + + // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; + +#ifdef MMX + __asm + { + mov eax, pc + movd mm1, c1 + movd mm2, c2 + movd mm3, c3 + punpcklbw mm1, reg_blank + punpcklbw mm2, reg_blank + punpcklbw mm3, reg_blank + psllw mm1, 1 + paddw mm2, mm3 + pmullw mm2, const7 + paddw mm1, mm2 + psrlw mm1, 4 + packuswb mm1, reg_blank + movd [eax], mm1 + EMMS + } +#else + + __asm + { + mov eax, [c1] + and eax, 0FF00h + shl eax, 1 + mov ecx, [c2] + and ecx, 0FF00h + mov edx, [c3] + and edx, 0FF00h + add ecx, edx + imul ecx, ecx, 7 + add eax, ecx + and eax, 0FF000h + + mov ebx, [c1] + and ebx, 0FF00FFh + shl ebx, 1 + mov ecx, [c2] + and ecx, 0FF00FFh + mov edx, [c3] + and edx, 0FF00FFh + add ecx, edx + imul ecx, ecx, 7 + add ebx, ecx + and ebx, 0FF00FF0h + + add eax, ebx + shr eax, 4 + + mov ebx, pc + mov [ebx], eax + } +#endif +} + +void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2) +{ + //*((int*)pc) = (c1+c2)/2; + +#ifdef MMX + __asm + { + mov eax, pc + movd mm0, c1 + movd mm1, c2 + paddd mm0, mm1 + psrad mm0, 1 + movd [eax], mm0 + EMMS + } +#else + __asm + { + mov eax, pc + mov edx, c1 + add edx, c2 + shr edx, 1 + mov [eax], edx + } +#endif +} + +void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2) +{ + *((unsigned short *)pc) = interp_16_31(c1, c2); + //*((int*)pc) = (c1*3+c2)/4; +} + +void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) +{ + *((unsigned short *)pc) = interp_16_211(c1, c2, c3); + //*((int*)pc) = (c1*2+c2+c3)/4; +} + +void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2) +{ + *((unsigned short *)pc) = interp_16_71(c1, c2); +// *((unsigned short*)pc) = (c1*7+c2)/8; +// *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + +// (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; +} + +void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) +{ + *((unsigned short *)pc) = interp_16_772(c2, c3, c1); +// *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16; +// *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + +// (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; +} + +void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2) +{ + *((unsigned short *)pc) = interp_16_11(c1, c2); +} + +bool Diff(unsigned int c1, unsigned int c2) +{ + unsigned int + YUV1 = RGBtoYUV(c1), + YUV2 = RGBtoYUV(c2); + + if (YUV1 == YUV2) return false; // Save some processing power + +#ifdef MMX + unsigned int retval; + __asm + { + mov eax, 0x7FFFFFFF + movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF + + ; Copy source colors in first reg + movd mm0, YUV1 + movd mm1, YUV2 + + mov eax, 0x00FF0000 + movd mm6, eax; mm6 = Ymask = 0x00FF0000 + + ; Calculate color Y difference + movq mm2, mm0 + movq mm3, mm1 + pand mm2, mm6 + pand mm3, mm6 + psubd mm2, mm3 + pand mm2, mm7 + + mov eax, 0x0000FF00 + movd mm6, eax; mm6 = Umask = 0x0000FF00 + + ; Calculate color U difference + movq mm3, mm0 + movq mm4, mm1 + pand mm3, mm6 + pand mm4, mm6 + psubd mm3, mm4 + pand mm3, mm7 + + mov eax, 0x000000FF + movd mm6, eax; mm6 = Vmask = 0x000000FF + + ; Calculate color V difference + movq mm4, mm0 + movq mm5, mm1 + pand mm4, mm6 + pand mm5, mm6 + psubd mm4, mm5 + pand mm4, mm7 + + mov eax, 0x00300000 + movd mm5, eax; mm5 = trY = 0x00300000 + mov eax, 0x00000700 + movd mm6, eax; mm6 = trU = 0x00000700 + mov eax, 0x00000006 + movd mm7, eax; mm7 = trV = 0x00000006 + + ; Compare the results + pcmpgtd mm2, trY + pcmpgtd mm3, trU + pcmpgtd mm4, trV + por mm2, mm3 + por mm2, mm4 + + movd retval, mm2 + + EMMS + } + return (retval != 0); +#else + return + (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) || + (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) || + (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV); +#endif +} + +unsigned int RGBtoYUV(unsigned int c) +{ // Division through 3 slows down the emulation about 10% !!! +#ifdef MMX + unsigned int retval; + __asm + { + movd mm0, c + movq mm1, mm0 + movq mm2, mm0; mm0 = mm1 = mm2 = c + + mov eax, 0x000000FF + movd mm5, eax; mm5 = REDMASK = 0x000000FF + mov eax, 0x0000FF00 + movd mm6, eax; mm6 = GREENMASK = 0x0000FF00 + mov eax, 0x00FF0000 + movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000 + + pand mm0, mm5 + pand mm1, mm6 + pand mm2, mm7; mm0 = R mm1 = G mm2 = B + + movq mm3, mm0 + paddd mm3, mm1 + paddd mm3, mm2 + ; psrld mm3, 2; mm3 = Y + ; pslld mm3, 16 + pslld mm3, 14; mm3 = Y << 16 + + mov eax, 512 + movd mm7, eax; mm7 = 128 << 2 = 512 + + movq mm4, mm0 + psubd mm4, mm2 + ; psrld mm4, 2 + ; paddd mm4, mm7; mm4 = U + ; pslld mm4, 8; mm4 = U << 8 + paddd mm4, mm7 + pslld mm4, 6 + + mov eax, 128 + movd mm7, eax; mm7 = 128 + + movq mm5, mm1 + pslld mm5, 1 + psubd mm5, mm0 + psubd mm5, mm2 + psrld mm5, 3 + paddd mm5, mm7; mm5 = V + + paddd mm5, mm4 + paddd mm5, mm3 + + movd retval, mm5 + + EMMS + } + return retval; +#else + unsigned char r, g, b, Y, u, v; + r = (c & 0x000000FF); + g = (c & 0x0000FF00) >> 8; + b = (c & 0x00FF0000) >> 16; + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((-r + 2 * g - b) >> 3); + return (Y << 16) + (u << 8) + v; + + // Extremely High Quality Code + //unsigned char r, g, b; + //r = c & 0xFF; + //g = (c >> 8) & 0xFF; + //b = (c >> 16) & 0xFF; + //unsigned char y, u, v; + //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16; + //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128; + //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128; + //return (y << 16) + (u << 8) + v; +#endif +} \ No newline at end of file diff -r 18eaae41bde3 -r b970226568d2 src/filters/hq_shared32.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/hq_shared32.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,31 @@ +#ifndef VBA_HQ_SHARED32_H +#define VBA_HQ_SHARED32_H + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#define abs32(value) (value & 0x7FFFFFFF) +#define abs16(value) (value & 0x7FFF) + +const int Ymask = 0x00FF0000; +const int Umask = 0x0000FF00; +const int Vmask = 0x000000FF; +const int trY = 0x00300000; +const int trU = 0x00000700; +const int trV = 0x00000006; + +void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2); +void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3); +void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2); +void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3); +void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2); +void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2); +void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3); +void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2); +void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3); +void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2); +bool Diff(unsigned int c1, unsigned int c2); +unsigned int RGBtoYUV(unsigned int c); + +#endif // VBA_HQ_SHARED32_H diff -r 18eaae41bde3 -r b970226568d2 src/filters/interframe.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/interframe.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,630 @@ +#include +#include +#include "../Port.h" + +#ifdef MMX +extern "C" bool cpu_mmx; +#endif + +/* + * Thanks to Kawaks' Mr. K for the code + + Incorporated into vba by Anthony Di Franco + */ + +static u8 *frm1 = NULL; +static u8 *frm2 = NULL; +static u8 *frm3 = NULL; + +extern u32 RGB_LOW_BITS_MASK; +extern u32 qRGB_COLOR_MASK[2]; + +static void Init() +{ + frm1 = (u8 *)calloc(322 * 242, 4); + // 1 frame ago + frm2 = (u8 *)calloc(322 * 242, 4); + // 2 frames ago + frm3 = (u8 *)calloc(322 * 242, 4); + // 3 frames ago +} + +void InterframeCleanup() +{ + if (frm1) + free(frm1); + if (frm2) + free(frm2); + if (frm3) + free(frm3); + frm1 = frm2 = frm3 = NULL; +} + +#ifdef MMX +static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = (u16 *)frm1; + u16 *src2 = (u16 *)frm2; + u16 *src3 = (u16 *)frm3; + + int count = width >> 2; + + for (int i = 0; i < height; i++) + { +#ifdef __GNUC__ + asm volatile ( + "push %4\n" + "movq 0(%5), %%mm7\n" // colorMask + "0:\n" + "movq 0(%0), %%mm0\n" // src0 + "movq 0(%1), %%mm1\n" // src1 + "movq 0(%2), %%mm2\n" // src2 + "movq 0(%3), %%mm3\n" // src3 + "movq %%mm0, 0(%3)\n" // src3 = src0 + "movq %%mm0, %%mm4\n" + "movq %%mm1, %%mm5\n" + "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A) + "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B) + "por %%mm5, %%mm4\n" // A | B + "movq %%mm2, %%mm5\n" + "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C) + "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D) + "por %%mm3, %%mm5\n" // C|D + "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D) + "movq %%mm0, %%mm2\n" + "pand %%mm7, %%mm2\n" // color & colorMask + "pand %%mm7, %%mm1\n" // src1 & colorMask + "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E) + "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F) + "paddw %%mm2, %%mm1\n" // E+F + "pand %%mm4, %%mm1\n" // (E+F) & res + "pandn %%mm0, %%mm4\n" // color& !res + + "por %%mm1, %%mm4\n" + "movq %%mm4, 0(%0)\n" // src0 = res + + "addl $8, %0\n" + "addl $8, %1\n" + "addl $8, %2\n" + "addl $8, %3\n" + + "decl %4\n" + "jnz 0b\n" + "pop %4\n" + "emms\n" + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3) + : "r" (count), "r" (qRGB_COLOR_MASK) + ); +#else + __asm { + movq mm7, qword ptr [qRGB_COLOR_MASK]; + mov eax, src0; + mov ebx, src1; + mov ecx, src2; + mov edx, src3; + mov edi, count; +label0: + movq mm0, qword ptr [eax]; // src0 + movq mm1, qword ptr [ebx]; // src1 + movq mm2, qword ptr [ecx]; // src2 + movq mm3, qword ptr [edx]; // src3 + movq qword ptr [edx], mm0; // src3 = src0 + movq mm4, mm0; + movq mm5, mm1; + pcmpeqw mm5, mm2; // src1 == src2 (A) + pcmpeqw mm4, mm3; // src3 == src0 (B) + por mm4, mm5; // A | B + movq mm5, mm2; + pcmpeqw mm5, mm0; // src0 == src2 (C) + pcmpeqw mm3, mm1; // src1 == src3 (D) + por mm5, mm3; // C|D + pandn mm4, mm5; // (!(A|B))&(C|D) + movq mm2, mm0; + pand mm2, mm7; // color & colorMask + pand mm1, mm7; // src1 & colorMask + psrlw mm2, 1; // (color & colorMask) >> 1 (E) + psrlw mm1, 1; // (src & colorMask) >> 1 (F) + paddw mm1, mm2; // E+F + pand mm1, mm4; // (E+F) & res + pandn mm4, mm0; // color & !res + + por mm4, mm1; + movq qword ptr [eax], mm4; // src0 = res + + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 8; + + dec edi; + jnz label0; + mov src0, eax; + mov src1, ebx; + mov src2, ecx; + mov src3, edx; + emms; + } +#endif + src0 += 2; + src1 += 2; + src2 += 2; + src3 += 2; + } + + /* Swap buffers around */ + u8 *temp = frm1; + frm1 = frm3; + frm3 = frm2; + frm2 = temp; +} + +#endif + +void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + if (frm1 == NULL) + { + Init(); + } +#ifdef MMX + if (cpu_mmx) + { + SmartIB_MMX(srcPtr, srcPitch, width, height); + return; + } +#endif + + u16 colorMask = ~RGB_LOW_BITS_MASK; + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = (u16 *)frm1; + u16 *src2 = (u16 *)frm2; + u16 *src3 = (u16 *)frm3; + + int sPitch = srcPitch >> 1; + + int pos = 0; + for (int j = 0; j < height; j++) + for (int i = 0; i < sPitch; i++) + { + u16 color = src0[pos]; + src0[pos] = + (src1[pos] != src2[pos]) && + (src3[pos] != color) && + ((color == src2[pos]) || (src1[pos] == src3[pos])) + ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) : + color; + src3[pos] = color; /* oldest buffer now holds newest frame */ + pos++; + } + + /* Swap buffers around */ + u8 *temp = frm1; + frm1 = frm3; + frm3 = frm2; + frm2 = temp; +} + +#ifdef MMX +static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = (u32 *)frm1; + u32 *src2 = (u32 *)frm2; + u32 *src3 = (u32 *)frm3; + + int count = width >> 1; + + for (int i = 0; i < height; i++) + { +#ifdef __GNUC__ + asm volatile ( + "push %4\n" + "movq 0(%5), %%mm7\n" // colorMask + "0:\n" + "movq 0(%0), %%mm0\n" // src0 + "movq 0(%1), %%mm1\n" // src1 + "movq 0(%2), %%mm2\n" // src2 + "movq 0(%3), %%mm3\n" // src3 + "movq %%mm0, 0(%3)\n" // src3 = src0 + "movq %%mm0, %%mm4\n" + "movq %%mm1, %%mm5\n" + "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A) + "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B) + "por %%mm5, %%mm4\n" // A | B + "movq %%mm2, %%mm5\n" + "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C) + "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D) + "por %%mm3, %%mm5\n" // C|D + "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D) + "movq %%mm0, %%mm2\n" + "pand %%mm7, %%mm2\n" // color & colorMask + "pand %%mm7, %%mm1\n" // src1 & colorMask + "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E) + "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F) + "paddd %%mm2, %%mm1\n" // E+F + "pand %%mm4, %%mm1\n" // (E+F) & res + "pandn %%mm0, %%mm4\n" // color& !res + + "por %%mm1, %%mm4\n" + "movq %%mm4, 0(%0)\n" // src0 = res + + "addl $8, %0\n" + "addl $8, %1\n" + "addl $8, %2\n" + "addl $8, %3\n" + + "decl %4\n" + "jnz 0b\n" + "pop %4\n" + "emms\n" + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3) + : "r" (count), "r" (qRGB_COLOR_MASK) + ); +#else + __asm { + movq mm7, qword ptr [qRGB_COLOR_MASK]; + mov eax, src0; + mov ebx, src1; + mov ecx, src2; + mov edx, src3; + mov edi, count; +label0: + movq mm0, qword ptr [eax]; // src0 + movq mm1, qword ptr [ebx]; // src1 + movq mm2, qword ptr [ecx]; // src2 + movq mm3, qword ptr [edx]; // src3 + movq qword ptr [edx], mm0; // src3 = src0 + movq mm4, mm0; + movq mm5, mm1; + pcmpeqd mm5, mm2; // src1 == src2 (A) + pcmpeqd mm4, mm3; // src3 == src0 (B) + por mm4, mm5; // A | B + movq mm5, mm2; + pcmpeqd mm5, mm0; // src0 == src2 (C) + pcmpeqd mm3, mm1; // src1 == src3 (D) + por mm5, mm3; // C|D + pandn mm4, mm5; // (!(A|B))&(C|D) + movq mm2, mm0; + pand mm2, mm7; // color & colorMask + pand mm1, mm7; // src1 & colorMask + psrld mm2, 1; // (color & colorMask) >> 1 (E) + psrld mm1, 1; // (src & colorMask) >> 1 (F) + paddd mm1, mm2; // E+F + pand mm1, mm4; // (E+F) & res + pandn mm4, mm0; // color & !res + + por mm4, mm1; + movq qword ptr [eax], mm4; // src0 = res + + add eax, 8; + add ebx, 8; + add ecx, 8; + add edx, 8; + + dec edi; + jnz label0; + mov src0, eax; + mov src1, ebx; + mov src2, ecx; + mov src3, edx; + emms; + } +#endif + + src0++; + src1++; + src2++; + src3++; + } + /* Swap buffers around */ + u8 *temp = frm1; + frm1 = frm3; + frm3 = frm2; + frm2 = temp; +} + +#endif + +void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + if (frm1 == NULL) + { + Init(); + } +#ifdef MMX + if (cpu_mmx) + { + SmartIB32_MMX(srcPtr, srcPitch, width, height); + return; + } +#endif + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = (u32 *)frm1; + u32 *src2 = (u32 *)frm2; + u32 *src3 = (u32 *)frm3; + + u32 colorMask = 0xfefefe; + + int sPitch = srcPitch >> 2; + int pos = 0; + + for (int j = 0; j < height; j++) + for (int i = 0; i < sPitch; i++) + { + u32 color = src0[pos]; + src0[pos] = + (src1[pos] != src2[pos]) && + (src3[pos] != color) && + ((color == src2[pos]) || (src1[pos] == src3[pos])) + ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) : + color; + src3[pos] = color; /* oldest buffer now holds newest frame */ + pos++; + } + + /* Swap buffers around */ + u8 *temp = frm1; + frm1 = frm3; + frm3 = frm2; + frm2 = temp; +} + +#ifdef MMX +static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = (u16 *)frm1; + + int count = width >> 2; + + for (int i = 0; i < height; i++) + { +#ifdef __GNUC__ + asm volatile ( + "push %2\n" + "movq 0(%3), %%mm7\n" // colorMask + "0:\n" + "movq 0(%0), %%mm0\n" // src0 + "movq 0(%1), %%mm1\n" // src1 + "movq %%mm0, 0(%1)\n" // src1 = src0 + "pand %%mm7, %%mm0\n" // color & colorMask + "pand %%mm7, %%mm1\n" // src1 & colorMask + "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E) + "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F) + "paddw %%mm1, %%mm0\n" // E+F + + "movq %%mm0, 0(%0)\n" // src0 = res + + "addl $8, %0\n" + "addl $8, %1\n" + + "decl %2\n" + "jnz 0b\n" + "pop %2\n" + "emms\n" + : "+r" (src0), "+r" (src1) + : "r" (count), "r" (qRGB_COLOR_MASK) + ); +#else + __asm { + movq mm7, qword ptr [qRGB_COLOR_MASK]; + mov eax, src0; + mov ebx, src1; + mov edi, count; +label0: + movq mm0, qword ptr [eax]; // src0 + movq mm1, qword ptr [ebx]; // src1 + movq qword ptr [ebx], mm0; // src1 = src0 + pand mm0, mm7; // color & colorMask + pand mm1, mm7; // src1 & colorMask + psrlw mm0, 1; // (color & colorMask) >> 1 (E) + psrlw mm1, 1; // (src & colorMask) >> 1 (F) + paddw mm0, mm1; // E+F + + movq qword ptr [eax], mm0; // src0 = res + + add eax, 8; + add ebx, 8; + + dec edi; + jnz label0; + mov src0, eax; + mov src1, ebx; + emms; + } +#endif + src0 += 2; + src1 += 2; + } +} + +#endif + +void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + if (frm1 == NULL) + { + Init(); + } + +#ifdef MMX + if (cpu_mmx) + { + MotionBlurIB_MMX(srcPtr, srcPitch, width, height); + return; + } +#endif + + u16 colorMask = ~RGB_LOW_BITS_MASK; + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = (u16 *)frm1; + + int sPitch = srcPitch >> 1; + + int pos = 0; + for (int j = 0; j < height; j++) + for (int i = 0; i < sPitch; i++) + { + u16 color = src0[pos]; + src0[pos] = + (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)); + src1[pos] = color; + pos++; + } +} + +#ifdef MMX +static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = (u32 *)frm1; + + int count = width >> 1; + + for (int i = 0; i < height; i++) + { +#ifdef __GNUC__ + asm volatile ( + "push %2\n" + "movq 0(%3), %%mm7\n" // colorMask + "0:\n" + "movq 0(%0), %%mm0\n" // src0 + "movq 0(%1), %%mm1\n" // src1 + "movq %%mm0, 0(%1)\n" // src1 = src0 + "pand %%mm7, %%mm0\n" // color & colorMask + "pand %%mm7, %%mm1\n" // src1 & colorMask + "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E) + "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F) + "paddd %%mm1, %%mm0\n" // E+F + + "movq %%mm0, 0(%0)\n" // src0 = res + + "addl $8, %0\n" + "addl $8, %1\n" + + "decl %2\n" + "jnz 0b\n" + "pop %2\n" + "emms\n" + : "+r" (src0), "+r" (src1) + : "r" (count), "r" (qRGB_COLOR_MASK) + ); +#else + __asm { + movq mm7, qword ptr [qRGB_COLOR_MASK]; + mov eax, src0; + mov ebx, src1; + mov edi, count; +label0: + movq mm0, qword ptr [eax]; // src0 + movq mm1, qword ptr [ebx]; // src1 + movq qword ptr [ebx], mm0; // src1 = src0 + pand mm0, mm7; // color & colorMask + pand mm1, mm7; // src1 & colorMask + psrld mm0, 1; // (color & colorMask) >> 1 (E) + psrld mm1, 1; // (src & colorMask) >> 1 (F) + paddd mm0, mm1; // E+F + + movq qword ptr [eax], mm0; // src0 = res + + add eax, 8; + add ebx, 8; + + dec edi; + jnz label0; + mov src0, eax; + mov src1, ebx; + emms; + } +#endif + src0++; + src1++; + } +} + +#endif + +void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + if (frm1 == NULL) + { + Init(); + } + +#ifdef MMX + if (cpu_mmx) + { + MotionBlurIB32_MMX(srcPtr, srcPitch, width, height); + return; + } +#endif + + u32 *src0 = (u32 *)srcPtr; + u32 *src1 = (u32 *)frm1; + + u32 colorMask = 0xfefefe; + + int sPitch = srcPitch >> 2; + int pos = 0; + + for (int j = 0; j < height; j++) + for (int i = 0; i < sPitch; i++) + { + u32 color = src0[pos]; + src0[pos] = (((color & colorMask) >> 1) + + ((src1[pos] & colorMask) >> 1)); + src1[pos] = color; + pos++; + } +} + +static int count = 0; + +void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height) +{ + if (frm1 == NULL) + { + Init(); + } + + u16 colorMask = ~RGB_LOW_BITS_MASK; + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = (u16 *)frm1; + + int sPitch = srcPitch >> 1; + + int pos = 0; + for (int j = 0; j < height; j++) + { + bool render = count ? (j & 1) != 0 : (j & 1) == 0; + if (render) + { + for (int i = 0; i < sPitch; i++) + { + u16 color = src0[pos]; + src0[pos] = + (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1)); + src1[pos] = color; + pos++; + } + } + else + { + for (int i = 0; i < sPitch; i++) + { + u16 color = src0[pos]; + src0[pos] = + (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)); + src1[pos] = color; + pos++; + } + } + } + count = count ^ 1; +} + diff -r 18eaae41bde3 -r b970226568d2 src/filters/interp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/interp.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,351 @@ +/* + * This file is part of the Advance project. + * + * Copyright (C) 2003 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * In addition, as a special exception, Andrea Mazzoleni + * gives permission to link the code of this program with + * the MAME library (or with modified versions of MAME that use the + * same license as MAME), and distribute linked combinations including + * the two. You must obey the GNU General Public License in all + * respects for all of the code used other than MAME. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +#ifndef __INTERP_H +#define __INTERP_H + +/***************************************************************************/ +/* Basic types */ + +/***************************************************************************/ +/* interpolation */ + +extern unsigned interp_mask[2]; +extern unsigned interp_bits_per_pixel; + +#define INTERP_16_MASK_1(v) (v & interp_mask[0]) +#define INTERP_16_MASK_2(v) (v & interp_mask[1]) + +static inline u16 interp_16_521(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*2 + INTERP_16_MASK_1(p3)*1) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*2 + INTERP_16_MASK_2(p3)*1) / 8); +} + +static inline u16 interp_16_332(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)*2) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)*2) / 8); +} + +static inline u16 interp_16_611(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*6 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*6 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 8); +} + +static inline u16 interp_16_71(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*7 + INTERP_16_MASK_1(p2)) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*7 + INTERP_16_MASK_2(p2)) / 8); +} + +static inline u16 interp_16_211(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*2 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 4) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*2 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 4); +} + +static inline u16 interp_16_772(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1(((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2))*7 + INTERP_16_MASK_1(p3)*2) / 16) + | INTERP_16_MASK_2(((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2))*7 + INTERP_16_MASK_2(p3)*2) / 16); +} + +static inline u16 interp_16_11(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2)) / 2) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2)) / 2); +} + +static inline u16 interp_16_31(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)) / 4) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)) / 4); +} + +static inline u16 interp_16_1411(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*14 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 16) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*14 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 16); +} + +static inline u16 interp_16_431(u16 p1, u16 p2, u16 p3) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*4 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*4 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)) / 8); +} + +static inline u16 interp_16_53(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*3) / 8) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*3) / 8); +} + +static inline u16 interp_16_151(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*15 + INTERP_16_MASK_1(p2)) / 16) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*15 + INTERP_16_MASK_2(p2)) / 16); +} + +static inline u16 interp_16_97(u16 p1, u16 p2) +{ + return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*9 + INTERP_16_MASK_1(p2)*7) / 16) + | INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*9 + INTERP_16_MASK_2(p2)*7) / 16); +} + +#define INTERP_32_MASK_1(v) (v & 0xFF00FF) +#define INTERP_32_MASK_2(v) (v & 0x00FF00) + +static inline u32 interp_32_521(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*2 + INTERP_32_MASK_1(p3)*1) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*2 + INTERP_32_MASK_2(p3)*1) / 8); +} + +static inline u32 interp_32_332(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)*2) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)*2) / 8); +} + +static inline u32 interp_32_211(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*2 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 4) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*2 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 4); +} + +static inline u32 interp_32_611(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*6 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*6 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 8); +} + +static inline u32 interp_32_71(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*7 + INTERP_32_MASK_1(p2)) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*7 + INTERP_32_MASK_2(p2)) / 8); +} + +static inline u32 interp_32_772(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1(((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2))*7 + INTERP_32_MASK_1(p3)*2) / 16) + | INTERP_32_MASK_2(((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2))*7 + INTERP_32_MASK_2(p3)*2) / 16); +} + +static inline u32 interp_32_11(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2)) / 2) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2)) / 2); +} + +static inline u32 interp_32_31(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)) / 4) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)) / 4); +} + +static inline u32 interp_32_1411(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*14 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 16) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*14 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 16); +} + +static inline u32 interp_32_431(u32 p1, u32 p2, u32 p3) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*4 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*4 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)) / 8); +} + +static inline u32 interp_32_53(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*3) / 8) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*3) / 8); +} + +static inline u32 interp_32_151(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*15 + INTERP_32_MASK_1(p2)) / 16) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*15 + INTERP_32_MASK_2(p2)) / 16); +} + +static inline u32 interp_32_97(u32 p1, u32 p2) +{ + return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*9 + INTERP_32_MASK_1(p2)*7) / 16) + | INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*9 + INTERP_32_MASK_2(p2)*7) / 16); +} + +/***************************************************************************/ +/* diff */ + +#define INTERP_Y_LIMIT (0x30*4) +#define INTERP_U_LIMIT (0x07*4) +#define INTERP_V_LIMIT (0x06*8) + +static int interp_16_diff(u16 p1, u16 p2) +{ + int r, g, b; + int y, u, v; + + if (p1 == p2) + return 0; + + if (interp_bits_per_pixel == 16) { + b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3; + g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3; + r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8; + } else { + b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3; + g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2; + r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7; + } + + y = r + g + b; + u = r - b; + v = -r + 2*g - b; + + if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT) + return 1; + + if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT) + return 1; + + if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT) + return 1; + +return 0; +} + +static int interp_32_diff(u32 p1, u32 p2) +{ + int r, g, b; + int y, u, v; + + if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8)) + return 0; + + b = (int)((p1 & 0xFF) - (p2 & 0xFF)); + g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8; + r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16; + + y = r + g + b; + u = r - b; + v = -r + 2*g - b; + + if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT) + return 1; + + if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT) + return 1; + + if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT) + return 1; + + return 0; +} + + +#define INTERP_LIMIT2 (96000) +#define ABS(x) ((x) < 0 ? -(x) : (x)) +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#define MIN(x,y) ((x) < (y) ? (x) : (y)) + +static int interp_16_diff2(u16 p1, u16 p2) +{ + int r, g, b; + int y, u, v; + + if ((p1 & 0xF79E) == (p2 & 0xF79E)) + return 0; + + if (interp_bits_per_pixel == 16) { + b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3; + g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3; + r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8; + } else { + b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3; + g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2; + r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7; + } + +// yb = 30*r + 58*g + 12*b; + y = 33*r + 36*g + 31*b; + u = -14*r - 29*g + 44*b; + v = 62*r - 51*g - 10*b; + + if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2) + return 1; + return 0; +} + +static int interp_32_diff2(u32 p1, u32 p2) +{ + int r, g, b; + int y, u, v; + + if ((p1 & 0xF0F0F0) == (p2 & 0xF0F0F0)) + return 0; + + b = (int)((p1 & 0xF8) - (p2 & 0xF8)); + g = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8; + r = (int)((p1 & 0xF80000) - (p2 & 0xF80000)) >> 16; + +// y = 30*r + 58*g + 12*b; + y = 33*r + 36*g + 31*b; + u = -14*r - 29*g + 44*b; + v = 62*r - 51*g - 10*b; + + if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2) + return 1; + + return 0; +} + +static void interp_set(unsigned bits_per_pixel) +{ + interp_bits_per_pixel = bits_per_pixel; + + switch (bits_per_pixel) { + case 15 : + interp_mask[0] = 0x7C1F; + interp_mask[1] = 0x03E0; + break; + case 16 : + interp_mask[0] = 0xF81F; + interp_mask[1] = 0x07E0; + break; + case 32 : + interp_mask[0] = 0xFF00FF; + interp_mask[1] = 0x00FF00; + break; + } +} + +#endif diff -r 18eaae41bde3 -r b970226568d2 src/filters/lq2x.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/lq2x.h Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,1284 @@ +case 0 : +case 2 : +case 4 : +case 6 : +case 8 : +case 12 : +case 16 : +case 20 : +case 24 : +case 28 : +case 32 : +case 34 : +case 36 : +case 38 : +case 40 : +case 44 : +case 48 : +case 52 : +case 56 : +case 60 : +case 64 : +case 66 : +case 68 : +case 70 : +case 96 : +case 98 : +case 100 : +case 102 : +case 128 : +case 130 : +case 132 : +case 134 : +case 136 : +case 140 : +case 144 : +case 148 : +case 152 : +case 156 : +case 160 : +case 162 : +case 164 : +case 166 : +case 168 : +case 172 : +case 176 : +case 180 : +case 184 : +case 188 : +case 192 : +case 194 : +case 196 : +case 198 : +case 224 : +case 226 : +case 228 : +case 230 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); +} break; +case 1 : +case 5 : +case 9 : +case 13 : +case 17 : +case 21 : +case 25 : +case 29 : +case 33 : +case 37 : +case 41 : +case 45 : +case 49 : +case 53 : +case 57 : +case 61 : +case 65 : +case 69 : +case 97 : +case 101 : +case 129 : +case 133 : +case 137 : +case 141 : +case 145 : +case 149 : +case 153 : +case 157 : +case 161 : +case 165 : +case 169 : +case 173 : +case 177 : +case 181 : +case 185 : +case 189 : +case 193 : +case 197 : +case 225 : +case 229 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + P3 = IC(1); +} break; +case 3 : +case 35 : +case 67 : +case 99 : +case 131 : +case 163 : +case 195 : +case 227 : +{ + P0 = IC(2); + P1 = IC(2); + P2 = IC(2); + P3 = IC(2); +} break; +case 7 : +case 39 : +case 71 : +case 103 : +case 135 : +case 167 : +case 199 : +case 231 : +{ + P0 = IC(3); + P1 = IC(3); + P2 = IC(3); + P3 = IC(3); +} break; +case 10 : +case 138 : +{ + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } +} break; +case 11 : +case 27 : +case 75 : +case 139 : +case 155 : +case 203 : +{ + P1 = IC(2); + P2 = IC(2); + P3 = IC(2); + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 14 : +case 142 : +{ + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + P1 = IC(0); + } else { + P0 = I332(1, 3, 0); + P1 = I31(0, 1); + } +} break; +case 15 : +case 143 : +case 207 : +{ + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + P1 = IC(4); + } else { + P0 = I332(1, 3, 4); + P1 = I31(4, 1); + } +} break; +case 18 : +case 22 : +case 30 : +case 50 : +case 54 : +case 62 : +case 86 : +case 118 : +{ + P0 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 19 : +case 51 : +{ + P2 = IC(2); + P3 = IC(2); + if (MUR) { + P0 = IC(2); + P1 = IC(2); + } else { + P0 = I31(2, 1); + P1 = I332(1, 5, 2); + } +} break; +case 23 : +case 55 : +case 119 : +{ + P2 = IC(3); + P3 = IC(3); + if (MUR) { + P0 = IC(3); + P1 = IC(3); + } else { + P0 = I31(3, 1); + P1 = I332(1, 5, 3); + } +} break; +case 26 : +{ + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 31 : +case 95 : +{ + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 42 : +case 170 : +{ + P1 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + P2 = IC(0); + } else { + P0 = I332(1, 3, 0); + P2 = I31(0, 3); + } +} break; +case 43 : +case 171 : +case 187 : +{ + P1 = IC(2); + P3 = IC(2); + if (MUL) { + P0 = IC(2); + P2 = IC(2); + } else { + P0 = I332(1, 3, 2); + P2 = I31(2, 3); + } +} break; +case 46 : +case 174 : +{ + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 47 : +case 175 : +{ + P1 = IC(4); + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 58 : +case 154 : +case 186 : +{ + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 59 : +{ + P2 = IC(2); + P3 = IC(2); + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } + if (MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 63 : +{ + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 72 : +case 76 : +case 104 : +case 106 : +case 108 : +case 110 : +case 120 : +case 124 : +{ + P0 = IC(0); + P1 = IC(0); + P3 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } +} break; +case 73 : +case 77 : +case 105 : +case 109 : +case 125 : +{ + P1 = IC(1); + P3 = IC(1); + if (MDL) { + P0 = IC(1); + P2 = IC(1); + } else { + P0 = I31(1, 3); + P2 = I332(3, 7, 1); + } +} break; +case 74 : +{ + P1 = IC(0); + P3 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } +} break; +case 78 : +case 202 : +case 206 : +{ + P1 = IC(0); + P3 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 79 : +{ + P1 = IC(4); + P3 = IC(4); + if (MDL) { + P2 = IC(4); + } else { + P2 = I611(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 80 : +case 208 : +case 210 : +case 216 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 81 : +case 209 : +case 217 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (MDR) { + P3 = IC(1); + } else { + P3 = I211(1, 5, 7); + } +} break; +case 82 : +case 214 : +case 222 : +{ + P0 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 83 : +case 115 : +{ + P0 = IC(2); + P2 = IC(2); + if (MDR) { + P3 = IC(2); + } else { + P3 = I611(2, 5, 7); + } + if (MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 84 : +case 212 : +{ + P0 = IC(0); + P2 = IC(0); + if (MDR) { + P1 = IC(0); + P3 = IC(0); + } else { + P1 = I31(0, 5); + P3 = I332(5, 7, 0); + } +} break; +case 85 : +case 213 : +case 221 : +{ + P0 = IC(1); + P2 = IC(1); + if (MDR) { + P1 = IC(1); + P3 = IC(1); + } else { + P1 = I31(1, 5); + P3 = I332(5, 7, 1); + } +} break; +case 87 : +{ + P0 = IC(3); + P2 = IC(3); + if (MDR) { + P3 = IC(3); + } else { + P3 = I611(3, 5, 7); + } + if (MUR) { + P1 = IC(3); + } else { + P1 = I211(3, 1, 5); + } +} break; +case 88 : +case 248 : +case 250 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 89 : +case 93 : +{ + P0 = IC(1); + P1 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I611(1, 3, 7); + } + if (MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 90 : +{ + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 91 : +{ + if (MDL) { + P2 = IC(2); + } else { + P2 = I611(2, 3, 7); + } + if (MDR) { + P3 = IC(2); + } else { + P3 = I611(2, 5, 7); + } + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } + if (MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 92 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } +} break; +case 94 : +{ + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 107 : +case 123 : +{ + P1 = IC(2); + P3 = IC(2); + if (MDL) { + P2 = IC(2); + } else { + P2 = I211(2, 3, 7); + } + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 111 : +{ + P1 = IC(4); + P3 = IC(4); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 112 : +case 240 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDR) { + P2 = IC(0); + P3 = IC(0); + } else { + P2 = I31(0, 7); + P3 = I332(5, 7, 0); + } +} break; +case 113 : +case 241 : +{ + P0 = IC(1); + P1 = IC(1); + if (MDR) { + P2 = IC(1); + P3 = IC(1); + } else { + P2 = I31(1, 7); + P3 = I332(5, 7, 1); + } +} break; +case 114 : +{ + P0 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 116 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } +} break; +case 117 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 121 : +{ + P0 = IC(1); + P1 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I211(1, 3, 7); + } + if (MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 122 : +{ + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 126 : +{ + P0 = IC(0); + P3 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 127 : +{ + P3 = IC(4); + if (MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 146 : +case 150 : +case 178 : +case 182 : +case 190 : +{ + P0 = IC(0); + P2 = IC(0); + if (MUR) { + P1 = IC(0); + P3 = IC(0); + } else { + P1 = I332(1, 5, 0); + P3 = I31(0, 5); + } +} break; +case 147 : +case 179 : +{ + P0 = IC(2); + P2 = IC(2); + P3 = IC(2); + if (MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 151 : +case 183 : +{ + P0 = IC(3); + P2 = IC(3); + P3 = IC(3); + if (MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 158 : +{ + P2 = IC(0); + P3 = IC(0); + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 159 : +{ + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 191 : +{ + P2 = IC(4); + P3 = IC(4); + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 200 : +case 204 : +case 232 : +case 236 : +case 238 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDL) { + P2 = IC(0); + P3 = IC(0); + } else { + P2 = I332(3, 7, 0); + P3 = I31(0, 7); + } +} break; +case 201 : +case 205 : +{ + P0 = IC(1); + P1 = IC(1); + P3 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I611(1, 3, 7); + } +} break; +case 211 : +{ + P0 = IC(2); + P1 = IC(2); + P2 = IC(2); + if (MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } +} break; +case 215 : +{ + P0 = IC(3); + P2 = IC(3); + if (MDR) { + P3 = IC(3); + } else { + P3 = I211(3, 5, 7); + } + if (MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 218 : +{ + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 219 : +{ + P1 = IC(2); + P2 = IC(2); + if (MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 220 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 223 : +{ + P2 = IC(4); + if (MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 233 : +case 237 : +{ + P0 = IC(1); + P1 = IC(1); + P3 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } +} break; +case 234 : +{ + P1 = IC(0); + P3 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 235 : +{ + P1 = IC(2); + P3 = IC(2); + if (MDL) { + P2 = IC(2); + } else { + P2 = I1411(2, 3, 7); + } + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 239 : +{ + P1 = IC(4); + P3 = IC(4); + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 242 : +{ + P0 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 243 : +{ + P0 = IC(2); + P1 = IC(2); + if (MDR) { + P2 = IC(2); + P3 = IC(2); + } else { + P2 = I31(2, 7); + P3 = I332(5, 7, 2); + } +} break; +case 244 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } +} break; +case 245 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (MDR) { + P3 = IC(1); + } else { + P3 = I1411(1, 5, 7); + } +} break; +case 246 : +{ + P0 = IC(0); + P2 = IC(0); + if (MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 247 : +{ + P0 = IC(3); + P2 = IC(3); + if (MDR) { + P3 = IC(3); + } else { + P3 = I1411(3, 5, 7); + } + if (MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 249 : +{ + P0 = IC(1); + P1 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } + if (MDR) { + P3 = IC(1); + } else { + P3 = I211(1, 5, 7); + } +} break; +case 251 : +{ + P1 = IC(2); + if (MDL) { + P2 = IC(2); + } else { + P2 = I1411(2, 3, 7); + } + if (MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } + if (MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 252 : +{ + P0 = IC(0); + P1 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } +} break; +case 253 : +{ + P0 = IC(1); + P1 = IC(1); + if (MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } + if (MDR) { + P3 = IC(1); + } else { + P3 = I1411(1, 5, 7); + } +} break; +case 254 : +{ + P0 = IC(0); + if (MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } + if (MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 255 : +{ + if (MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; diff -r 18eaae41bde3 -r b970226568d2 src/filters/motionblur.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/motionblur.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,183 @@ +#include "../Port.h" + +extern u32 RGB_LOW_BITS_MASK; + +void MotionBlur(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16)); + u32 lowPixelMask = RGB_LOW_BITS_MASK; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *xP = (u32 *) deltaPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + u32 currentDelta; + u32 nextDelta; + + finish = (u8 *) bP + ((width + 2) << 1); + nextPixel = *bP++; + nextDelta = *xP++; + + do + { + currentPixel = nextPixel; + currentDelta = nextDelta; + nextPixel = *bP++; + nextDelta = *xP++; + + if (currentPixel != currentDelta) + { + u32 colorA, product, colorB; + + *(xP - 2) = currentPixel; +#ifdef WORDS_BIGENDIAN + colorA = currentPixel >> 16; + colorB = currentDelta >> 16; +#else + colorA = currentPixel & 0xffff; + colorB = currentDelta & 0xffff; +#endif + + product = ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1) + + (colorA & colorB & lowPixelMask))); + + *(dP) = product | product << 16; + *(nL) = product | product << 16; + +#ifdef WORDS_BIGENDIAN + colorA = (currentPixel & 0xffff); + colorB = (currentDelta & 0xffff); +#else + colorA = currentPixel >> 16; + colorB = currentDelta >> 16; +#endif + product = ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1) + + (colorA & colorB & lowPixelMask))); + + *(dP + 1) = product | product << 16; + *(nL + 1) = product | product << 16; + } + else + { + u32 colorA, product; + + *(xP - 2) = currentPixel; +#ifdef WORDS_BIGENDIAN + colorA = currentPixel >> 16; +#else + colorA = currentPixel & 0xffff; +#endif + + product = colorA; + + *(dP) = product | product << 16; + *(nL) = product | product << 16; +#ifdef WORDS_BIGENDIAN + colorA = (currentPixel & 0xffff); +#else + colorA = currentPixel >> 16; +#endif + product = colorA; + + *(dP + 1) = product | product << 16; + *(nL + 1) = product | product << 16; + } + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + deltaPtr += srcPitch; + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void MotionBlur32(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ~RGB_LOW_BITS_MASK; + u32 lowPixelMask = RGB_LOW_BITS_MASK; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *xP = (u32 *) deltaPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + u32 currentDelta; + u32 nextDelta; + + finish = (u8 *) bP + ((width + 1) << 2); + nextPixel = *bP++; + nextDelta = *xP++; + + do + { + currentPixel = nextPixel; + currentDelta = nextDelta; + nextPixel = *bP++; + nextDelta = *xP++; + + u32 colorA, product, colorB; + + *(xP - 2) = currentPixel; + colorA = currentPixel; + colorB = currentDelta; + + product = ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1) + + (colorA & colorB & lowPixelMask))); + + *(dP) = product; + *(dP + 1) = product; + *(nL) = product; + *(nL + 1) = product; + + *(xP - 1) = nextPixel; + + colorA = nextPixel; + colorB = nextDelta; + + product = ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1) + + (colorA & colorB & lowPixelMask))); + + *(dP + 2) = product; + *(dP + 3) = product; + *(nL + 2) = product; + *(nL + 3) = product; + + nextPixel = *bP++; + nextDelta = *xP++; + + dP += 4; + nL += 4; + } + while ((u8 *) bP < finish); + + deltaPtr += srcPitch; + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} diff -r 18eaae41bde3 -r b970226568d2 src/filters/pixel.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/pixel.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,196 @@ +#include "../Port.h" + +extern u32 RGB_LOW_BITS_MASK; + +void Pixelate2x16(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16)); + colorMask = (colorMask >> 2) & (colorMask >> 1); + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *xP = (u32 *) deltaPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + u32 currentDelta; + u32 nextDelta; + + finish = (u8 *) bP + ((width+2) << 1); + nextPixel = *bP++; + nextDelta = *xP++; + + do + { + currentPixel = nextPixel; + currentDelta = nextDelta; + nextPixel = *bP++; + nextDelta = *xP++; + + if ((nextPixel != nextDelta) || (currentPixel != currentDelta)) + { + u32 colorA, colorB, product; + + *(xP - 2) = currentPixel; +#ifdef WORDS_BIGENDIAN + colorA = currentPixel >> 16; + colorB = currentPixel & 0xffff; +#else + colorA = currentPixel & 0xffff; + colorB = currentPixel >> 16; +#endif + product = (colorA >> 2) & colorMask; + +#ifdef WORDS_BIGENDIAN + *(nL) = (product << 16) | (product); + *(dP) = (colorA << 16) | product; +#else + *(nL) = product | (product << 16); + *(dP) = colorA | (product << 16); +#endif + +#ifdef WORDS_BIGENDIAN + colorA = nextPixel >> 16; +#else + colorA = nextPixel & 0xffff; +#endif + product = (colorB >> 2) & colorMask; +#ifdef WORDS_BIGENDIAN + *(nL + 1) = (product << 16) | (product); + *(dP + 1) = (colorB << 16) | (product); +#else + *(nL + 1) = (product) | (product << 16); + *(dP + 1) = (colorB) | (product << 16); +#endif + } + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + deltaPtr += srcPitch; + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void Pixelate2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ((u32)~RGB_LOW_BITS_MASK >> 2) & ((u32)~RGB_LOW_BITS_MASK >> 1); + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + // u32 *xP = (u32 *) deltaPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + + finish = (u8 *) bP + ((width+1) << 2); + nextPixel = *bP++; + + do + { + u32 product; + + currentPixel = nextPixel; + nextPixel = *bP++; + product = (currentPixel >> 2) & colorMask; + *(nL) = product; + *(nL+1) = product; + *(dP) = currentPixel; + *(dP+1) = product; + + currentPixel = nextPixel; + nextPixel = *bP++; + product = (currentPixel >> 2) & colorMask; + *(nL + 2) = product; + *(nL + 3) = product; + *(dP + 2) = currentPixel; + *(dP + 3) = product; + + dP += 4; + nL += 4; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +// generic Pixelate Nx magnification filter +template +void PixelateNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + ColorType colorMask = ((ColorType)~RGB_LOW_BITS_MASK >> 2) & ((ColorType)~RGB_LOW_BITS_MASK >> 1); + + srcPitch = srcPitch / sizeof(ColorType) - width; + u32 dstNextP = dstPitch / sizeof(ColorType); + u32 dstNextL = (dstNextP - width) * magnification; // skip to the next magnificated 'line' + dstNextP -= magnification; + + u32 offset = (dstPitch + sizeof(ColorType)) * magnification - dstPitch; + + ColorType *src = (ColorType *)srcPtr; + ColorType *dst = (ColorType *)dstPtr; + + do // per src line + { + u8 *finishP = (u8 *)dst + offset; + for (int x = 0; x < width; ++x) // per pixel in line + { + ColorType col = *src; + ColorType *dst2 = dst; + u8 *finishM = (u8 *)(dst + magnification); + + ColorType product = (col >> 2) & colorMask; + do + { + *dst2 = product; + } while ((u8 *)++dst2 < finishM); + dst2 += dstNextP; + finishM += dstPitch; + do // dst magnificated pixel + { + *dst2++ = product; + do + { + *dst2 = col; + } while ((u8 *)++dst2 < finishM); + dst2 += dstNextP; + finishM += dstPitch; + } while ((u8 *)dst2 < finishP); + + ++src; + dst += magnification; + finishP += magnification * sizeof(ColorType); + } + src += srcPitch; + dst += dstNextL; + } while (--height); +} + +typedef void (*PixelateNxFP)(u8*, u32, u8*, u8*, u32, int, int); + +PixelateNxFP Pixelate3x16 = PixelateNx<3, u16>; +PixelateNxFP Pixelate3x32 = PixelateNx<3, u32>; +PixelateNxFP Pixelate4x16 = PixelateNx<4, u16>; +PixelateNxFP Pixelate4x32 = PixelateNx<4, u32>; diff -r 18eaae41bde3 -r b970226568d2 src/filters/scanline.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/scanline.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,225 @@ +#include "../Port.h" + +extern u32 RGB_LOW_BITS_MASK; + +void Scanlines(u8 *srcPtr, u32 srcPitch, u8 *, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + + finish = (u8 *) bP + ((width + 2) << 1); + nextPixel = *bP++; + + do + { + currentPixel = nextPixel; + nextPixel = *bP++; + u32 colorA, colorB; + +#ifdef WORDS_BIGENDIAN + colorA = currentPixel >> 16; + colorB = currentPixel & 0xffff; +#else + colorA = currentPixel & 0xffff; + colorB = currentPixel >> 16; +#endif + + *(dP) = colorA | colorA << 16; + *(nL) = 0; + +#ifdef WORDS_BIGENDIAN + colorA = nextPixel >> 16; +#else + colorA = nextPixel & 0xffff; +#endif + + *(dP + 1) = colorB | (colorB << 16); + *(nL + 1) = 0; + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void Scanlines32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + + finish = (u8 *) bP + ((width + 1) << 2); + nextPixel = *bP++; + + do + { + currentPixel = nextPixel; + nextPixel = *bP++; + + u32 colorA, colorB; + + colorA = currentPixel; + colorB = nextPixel; + + *(dP) = colorA; + *(dP + 1) = colorA; + *(nL) = 0; + *(nL + 1) = 0; + + *(dP + 2) = colorB; + *(dP + 3) = colorB; + *(nL + 2) = 0; + *(nL + 3) = 0; + + nextPixel = *bP++; + + dP += 4; + nL += 4; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void ScanlinesTV(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ~(RGB_LOW_BITS_MASK | (RGB_LOW_BITS_MASK << 16)); + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + + finish = (u8 *) bP + ((width + 2) << 1); + nextPixel = *bP++; + + do + { + currentPixel = nextPixel; + nextPixel = *bP++; + + u32 colorA, colorB; + +#ifdef WORDS_BIGENDIAN + colorA = currentPixel >> 16; + colorB = currentPixel & 0xFFFF; +#else + colorA = currentPixel & 0xFFFF; + colorB = currentPixel >> 16; +#endif + + *(dP) = colorA = colorA | ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1))) << 16; + colorA = ((colorA & colorMask) >> 1); + colorA += ((colorA & colorMask) >> 1); + *(nL) = colorA; + + colorA = nextPixel & 0xFFFF; + + *(dP + 1) = colorB = colorB | ((((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1))) << 16; + colorB = ((colorB & colorMask) >> 1); + colorB += ((colorB & colorMask) >> 1); + + *(nL + 1) = colorB; + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void ScanlinesTV32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + u32 colorMask = ~RGB_LOW_BITS_MASK; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + u32 nextPixel; + + finish = (u8 *) bP + ((width + 1) << 2); + nextPixel = *bP++; + + do + { + currentPixel = nextPixel; + nextPixel = *bP++; + + u32 colorA, colorB, temp; + + colorA = currentPixel; + colorB = nextPixel; + + *(dP) = colorA; + *(dP + 1) = temp = ((colorA & colorMask) >> 1) + + ((colorB & colorMask) >> 1); + temp = ((temp & colorMask) >> 1); + temp += ((temp & colorMask) >> 1); + colorA = ((colorA & colorMask) >> 1); + colorA += ((colorA & colorMask) >> 1); + + *(nL) = colorA; + *(nL + 1) = temp; + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + diff -r 18eaae41bde3 -r b970226568d2 src/filters/simple2x.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filters/simple2x.cpp Sun Mar 04 20:32:31 2012 -0600 @@ -0,0 +1,189 @@ +#include "../Port.h" + +void Simple2x16(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + + finish = (u8 *) bP + ((width + 2) << 1); + currentPixel = *bP++; + + do + { +#ifdef WORDS_BIGENDIAN + u32 color = currentPixel >> 16; +#else + u32 color = currentPixel & 0xffff; +#endif + + color = color | (color << 16); + + *(dP) = color; + *(nL) = color; + +#ifdef WORDS_BIGENDIAN + color = currentPixel & 0xffff; +#else + color = currentPixel >> 16; +#endif + color = color | (color << 16); + *(dP + 1) = color; + *(nL + 1) = color; + + currentPixel = *bP++; + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +void Simple2x32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u8 *nextLine, *finish; + + nextLine = dstPtr + dstPitch; + + do + { + u32 *bP = (u32 *) srcPtr; + u32 *dP = (u32 *) dstPtr; + u32 *nL = (u32 *) nextLine; + u32 currentPixel; + + finish = (u8 *) bP + ((width + 1) << 2); + currentPixel = *bP++; + + do + { + u32 color = currentPixel; + + *(dP) = color; + *(dP + 1) = color; + *(nL) = color; + *(nL + 1) = color; + + currentPixel = *bP++; + + dP += 2; + nL += 2; + } + while ((u8 *) bP < finish); + + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + nextLine += dstPitch << 1; + } + while (--height); +} + +#if 0 +// generic Simple Nx magnification filter +template +void SimpleNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + srcPitch = srcPitch / sizeof(ColorType) - width; + u32 dstNextP = dstPitch / sizeof(ColorType); + u32 dstNextL = (dstNextP - width) * magnification; // skip to the next magnificated 'line' + dstNextP -= magnification; + + u32 offset = (dstPitch + sizeof(ColorType)) * magnification - dstPitch; + + ColorType *src = (ColorType *)srcPtr; + ColorType *dst = (ColorType *)dstPtr; + + do // per src line + { + u8 *finishP = (u8 *)dst + offset; + for (int x = 0; x < width; ++x) // per pixel in line + { + ColorType col = *src; + ColorType *dst2 = dst; + u8 * finishM = (u8 *)(dst + magnification); + do // dst magnificated pixel + { + do + { + *dst2 = col; + } + while ((u8 *)++dst2 < finishM); + dst2 += dstNextP; + finishM += dstPitch; + } + while ((u8 *)dst2 < finishP); + + ++src; + dst += magnification; + finishP += magnification * sizeof(ColorType); + } + src += srcPitch; + dst += dstNextL; + } + while (--height); +} + +#else + +// generic Simple Nx magnification filter +template +void SimpleNx(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + srcPitch = srcPitch / sizeof(ColorType) - width; + dstPitch /= sizeof(ColorType); + u32 dstBlank = (dstPitch - width) * magnification; // skip to the next magnificated 'line' + dstPitch -= magnification; + + ColorType *src = (ColorType *)srcPtr; + ColorType *dst = (ColorType *)dstPtr; + + do // per src line + { + for (int x = 0; x < width; ++x) // per pixel in src line + { + ColorType col = *src; + ColorType *dst2 = dst; + for (int dy = 0; dy < magnification; ++dy) // dst magnificated pixel + { + for (int dx = 0; dx < magnification; ++dx) + { + *dst2 = col; + ++dst2; + } + dst2 += dstPitch; + } + + ++src; + dst += magnification; + } + src += srcPitch; + dst += dstBlank; + } + while (--height); +} + +#endif + +typedef void (*SimpleNxFP)(u8 *, u32, u8 *, u8 *, u32, int, int); + +SimpleNxFP Simple3x16 = SimpleNx<3, u16>; +SimpleNxFP Simple3x32 = SimpleNx<3, u32>; +SimpleNxFP Simple4x16 = SimpleNx<4, u16>; +SimpleNxFP Simple4x32 = SimpleNx<4, u32>;