rlm@1: #include "../Port.h" rlm@1: #include "hq_shared32.h" rlm@1: #include "interp.h" rlm@1: rlm@1: const unsigned __int64 reg_blank = 0x0000000000000000; rlm@1: const unsigned __int64 const7 = 0x0000000700070007; rlm@1: const unsigned __int64 treshold = 0x0000000000300706; rlm@1: rlm@1: void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2) rlm@1: { rlm@1: //*((int*)pc) = (c1*3+c2)/4; rlm@1: rlm@1: #ifdef MMX rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: movd mm1, c1 rlm@1: movd mm2, c2 rlm@1: movq mm0, mm1 rlm@1: pslld mm0, 2 rlm@1: psubd mm0, mm1 rlm@1: paddd mm0, mm2 rlm@1: psrld mm0, 2 rlm@1: movd [eax], mm0 rlm@1: EMMS rlm@1: } rlm@1: #else rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: mov edx, c1 rlm@1: shl edx, 2 rlm@1: add edx, c2 rlm@1: sub edx, c1 rlm@1: shr edx, 2 rlm@1: mov [eax], edx rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) rlm@1: { rlm@1: //*((int*)pc) = (c1*2+c2+c3)/4; rlm@1: rlm@1: #ifdef MMX rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: movd mm0, c1 rlm@1: movd mm1, c2 rlm@1: movd mm2, c3 rlm@1: pslld mm0, 1 rlm@1: paddd mm0, mm1 rlm@1: paddd mm0, mm2 rlm@1: psrad mm0, 2 rlm@1: movd [eax], mm0 rlm@1: EMMS rlm@1: } rlm@1: #else rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: mov edx, c1 rlm@1: shl edx, 1 rlm@1: add edx, c2 rlm@1: add edx, c3 rlm@1: shr edx, 2 rlm@1: mov [eax], edx rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2) rlm@1: { rlm@1: //*((int*)pc) = (c1*7+c2)/8; rlm@1: //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + rlm@1: // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; rlm@1: rlm@1: #ifdef MMX rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: movd mm1, c1 rlm@1: movd mm2, c2 rlm@1: punpcklbw mm1, reg_blank rlm@1: punpcklbw mm2, reg_blank rlm@1: pmullw mm1, const7 rlm@1: paddw mm1, mm2 rlm@1: psrlw mm1, 3 rlm@1: packuswb mm1, reg_blank rlm@1: movd [eax], mm1 rlm@1: EMMS rlm@1: } rlm@1: #else rlm@1: __asm rlm@1: { rlm@1: mov eax, c1 rlm@1: mov ebx, c2 rlm@1: mov ecx, eax rlm@1: shl ecx, 3 rlm@1: sub ecx, eax rlm@1: add ecx, ebx rlm@1: shr ecx, 3 rlm@1: mov eax, pc rlm@1: mov [eax], ecx rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) rlm@1: { rlm@1: //*((int*)pc) = (c1*2+(c2+c3)*7)/16; rlm@1: //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + rlm@1: // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; rlm@1: rlm@1: #ifdef MMX rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: movd mm1, c1 rlm@1: movd mm2, c2 rlm@1: movd mm3, c3 rlm@1: punpcklbw mm1, reg_blank rlm@1: punpcklbw mm2, reg_blank rlm@1: punpcklbw mm3, reg_blank rlm@1: psllw mm1, 1 rlm@1: paddw mm2, mm3 rlm@1: pmullw mm2, const7 rlm@1: paddw mm1, mm2 rlm@1: psrlw mm1, 4 rlm@1: packuswb mm1, reg_blank rlm@1: movd [eax], mm1 rlm@1: EMMS rlm@1: } rlm@1: #else rlm@1: rlm@1: __asm rlm@1: { rlm@1: mov eax, [c1] rlm@1: and eax, 0FF00h rlm@1: shl eax, 1 rlm@1: mov ecx, [c2] rlm@1: and ecx, 0FF00h rlm@1: mov edx, [c3] rlm@1: and edx, 0FF00h rlm@1: add ecx, edx rlm@1: imul ecx, ecx, 7 rlm@1: add eax, ecx rlm@1: and eax, 0FF000h rlm@1: rlm@1: mov ebx, [c1] rlm@1: and ebx, 0FF00FFh rlm@1: shl ebx, 1 rlm@1: mov ecx, [c2] rlm@1: and ecx, 0FF00FFh rlm@1: mov edx, [c3] rlm@1: and edx, 0FF00FFh rlm@1: add ecx, edx rlm@1: imul ecx, ecx, 7 rlm@1: add ebx, ecx rlm@1: and ebx, 0FF00FF0h rlm@1: rlm@1: add eax, ebx rlm@1: shr eax, 4 rlm@1: rlm@1: mov ebx, pc rlm@1: mov [ebx], eax rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2) rlm@1: { rlm@1: //*((int*)pc) = (c1+c2)/2; rlm@1: rlm@1: #ifdef MMX rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: movd mm0, c1 rlm@1: movd mm1, c2 rlm@1: paddd mm0, mm1 rlm@1: psrad mm0, 1 rlm@1: movd [eax], mm0 rlm@1: EMMS rlm@1: } rlm@1: #else rlm@1: __asm rlm@1: { rlm@1: mov eax, pc rlm@1: mov edx, c1 rlm@1: add edx, c2 rlm@1: shr edx, 1 rlm@1: mov [eax], edx rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2) rlm@1: { rlm@1: *((unsigned short *)pc) = interp_16_31(c1, c2); rlm@1: //*((int*)pc) = (c1*3+c2)/4; rlm@1: } rlm@1: rlm@1: void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) rlm@1: { rlm@1: *((unsigned short *)pc) = interp_16_211(c1, c2, c3); rlm@1: //*((int*)pc) = (c1*2+c2+c3)/4; rlm@1: } rlm@1: rlm@1: void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2) rlm@1: { rlm@1: *((unsigned short *)pc) = interp_16_71(c1, c2); rlm@1: // *((unsigned short*)pc) = (c1*7+c2)/8; rlm@1: // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + rlm@1: // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; rlm@1: } rlm@1: rlm@1: void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) rlm@1: { rlm@1: *((unsigned short *)pc) = interp_16_772(c2, c3, c1); rlm@1: // *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16; rlm@1: // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + rlm@1: // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; rlm@1: } rlm@1: rlm@1: void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2) rlm@1: { rlm@1: *((unsigned short *)pc) = interp_16_11(c1, c2); rlm@1: } rlm@1: rlm@1: bool Diff(unsigned int c1, unsigned int c2) rlm@1: { rlm@1: unsigned int rlm@1: YUV1 = RGBtoYUV(c1), rlm@1: YUV2 = RGBtoYUV(c2); rlm@1: rlm@1: if (YUV1 == YUV2) return false; // Save some processing power rlm@1: rlm@1: #ifdef MMX rlm@1: unsigned int retval; rlm@1: __asm rlm@1: { rlm@1: mov eax, 0x7FFFFFFF rlm@1: movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF rlm@1: rlm@1: ; Copy source colors in first reg rlm@1: movd mm0, YUV1 rlm@1: movd mm1, YUV2 rlm@1: rlm@1: mov eax, 0x00FF0000 rlm@1: movd mm6, eax; mm6 = Ymask = 0x00FF0000 rlm@1: rlm@1: ; Calculate color Y difference rlm@1: movq mm2, mm0 rlm@1: movq mm3, mm1 rlm@1: pand mm2, mm6 rlm@1: pand mm3, mm6 rlm@1: psubd mm2, mm3 rlm@1: pand mm2, mm7 rlm@1: rlm@1: mov eax, 0x0000FF00 rlm@1: movd mm6, eax; mm6 = Umask = 0x0000FF00 rlm@1: rlm@1: ; Calculate color U difference rlm@1: movq mm3, mm0 rlm@1: movq mm4, mm1 rlm@1: pand mm3, mm6 rlm@1: pand mm4, mm6 rlm@1: psubd mm3, mm4 rlm@1: pand mm3, mm7 rlm@1: rlm@1: mov eax, 0x000000FF rlm@1: movd mm6, eax; mm6 = Vmask = 0x000000FF rlm@1: rlm@1: ; Calculate color V difference rlm@1: movq mm4, mm0 rlm@1: movq mm5, mm1 rlm@1: pand mm4, mm6 rlm@1: pand mm5, mm6 rlm@1: psubd mm4, mm5 rlm@1: pand mm4, mm7 rlm@1: rlm@1: mov eax, 0x00300000 rlm@1: movd mm5, eax; mm5 = trY = 0x00300000 rlm@1: mov eax, 0x00000700 rlm@1: movd mm6, eax; mm6 = trU = 0x00000700 rlm@1: mov eax, 0x00000006 rlm@1: movd mm7, eax; mm7 = trV = 0x00000006 rlm@1: rlm@1: ; Compare the results rlm@1: pcmpgtd mm2, trY rlm@1: pcmpgtd mm3, trU rlm@1: pcmpgtd mm4, trV rlm@1: por mm2, mm3 rlm@1: por mm2, mm4 rlm@1: rlm@1: movd retval, mm2 rlm@1: rlm@1: EMMS rlm@1: } rlm@1: return (retval != 0); rlm@1: #else rlm@1: return rlm@1: (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) || rlm@1: (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) || rlm@1: (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV); rlm@1: #endif rlm@1: } rlm@1: rlm@1: unsigned int RGBtoYUV(unsigned int c) rlm@1: { // Division through 3 slows down the emulation about 10% !!! rlm@1: #ifdef MMX rlm@1: unsigned int retval; rlm@1: __asm rlm@1: { rlm@1: movd mm0, c rlm@1: movq mm1, mm0 rlm@1: movq mm2, mm0; mm0 = mm1 = mm2 = c rlm@1: rlm@1: mov eax, 0x000000FF rlm@1: movd mm5, eax; mm5 = REDMASK = 0x000000FF rlm@1: mov eax, 0x0000FF00 rlm@1: movd mm6, eax; mm6 = GREENMASK = 0x0000FF00 rlm@1: mov eax, 0x00FF0000 rlm@1: movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000 rlm@1: rlm@1: pand mm0, mm5 rlm@1: pand mm1, mm6 rlm@1: pand mm2, mm7; mm0 = R mm1 = G mm2 = B rlm@1: rlm@1: movq mm3, mm0 rlm@1: paddd mm3, mm1 rlm@1: paddd mm3, mm2 rlm@1: ; psrld mm3, 2; mm3 = Y rlm@1: ; pslld mm3, 16 rlm@1: pslld mm3, 14; mm3 = Y << 16 rlm@1: rlm@1: mov eax, 512 rlm@1: movd mm7, eax; mm7 = 128 << 2 = 512 rlm@1: rlm@1: movq mm4, mm0 rlm@1: psubd mm4, mm2 rlm@1: ; psrld mm4, 2 rlm@1: ; paddd mm4, mm7; mm4 = U rlm@1: ; pslld mm4, 8; mm4 = U << 8 rlm@1: paddd mm4, mm7 rlm@1: pslld mm4, 6 rlm@1: rlm@1: mov eax, 128 rlm@1: movd mm7, eax; mm7 = 128 rlm@1: rlm@1: movq mm5, mm1 rlm@1: pslld mm5, 1 rlm@1: psubd mm5, mm0 rlm@1: psubd mm5, mm2 rlm@1: psrld mm5, 3 rlm@1: paddd mm5, mm7; mm5 = V rlm@1: rlm@1: paddd mm5, mm4 rlm@1: paddd mm5, mm3 rlm@1: rlm@1: movd retval, mm5 rlm@1: rlm@1: EMMS rlm@1: } rlm@1: return retval; rlm@1: #else rlm@1: unsigned char r, g, b, Y, u, v; rlm@1: r = (c & 0x000000FF); rlm@1: g = (c & 0x0000FF00) >> 8; rlm@1: b = (c & 0x00FF0000) >> 16; rlm@1: Y = (r + g + b) >> 2; rlm@1: u = 128 + ((r - b) >> 2); rlm@1: v = 128 + ((-r + 2 * g - b) >> 3); rlm@1: return (Y << 16) + (u << 8) + v; rlm@1: rlm@1: // Extremely High Quality Code rlm@1: //unsigned char r, g, b; rlm@1: //r = c & 0xFF; rlm@1: //g = (c >> 8) & 0xFF; rlm@1: //b = (c >> 16) & 0xFF; rlm@1: //unsigned char y, u, v; rlm@1: //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16; rlm@1: //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128; rlm@1: //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128; rlm@1: //return (y << 16) + (u << 8) + v; rlm@1: #endif rlm@1: }