Mercurial > vba-clojure
view src/filters/hq_shared32.cpp @ 596:747d47d96d2f
determined that I cannot shorten the long B sequeuce during bootstrap.
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sun, 02 Sep 2012 06:36:57 -0500 |
parents | f9f4f1b99eed |
children |
line wrap: on
line source
1 #include "../Port.h"2 #include "hq_shared32.h"3 #include "interp.h"5 const unsigned __int64 reg_blank = 0x0000000000000000;6 const unsigned __int64 const7 = 0x0000000700070007;7 const unsigned __int64 treshold = 0x0000000000300706;9 void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2)10 {11 //*((int*)pc) = (c1*3+c2)/4;13 #ifdef MMX14 __asm15 {16 mov eax, pc17 movd mm1, c118 movd mm2, c219 movq mm0, mm120 pslld mm0, 221 psubd mm0, mm122 paddd mm0, mm223 psrld mm0, 224 movd [eax], mm025 EMMS26 }27 #else28 __asm29 {30 mov eax, pc31 mov edx, c132 shl edx, 233 add edx, c234 sub edx, c135 shr edx, 236 mov [eax], edx37 }38 #endif39 }41 void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)42 {43 //*((int*)pc) = (c1*2+c2+c3)/4;45 #ifdef MMX46 __asm47 {48 mov eax, pc49 movd mm0, c150 movd mm1, c251 movd mm2, c352 pslld mm0, 153 paddd mm0, mm154 paddd mm0, mm255 psrad mm0, 256 movd [eax], mm057 EMMS58 }59 #else60 __asm61 {62 mov eax, pc63 mov edx, c164 shl edx, 165 add edx, c266 add edx, c367 shr edx, 268 mov [eax], edx69 }70 #endif71 }73 void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2)74 {75 //*((int*)pc) = (c1*7+c2)/8;76 //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +77 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;79 #ifdef MMX80 __asm81 {82 mov eax, pc83 movd mm1, c184 movd mm2, c285 punpcklbw mm1, reg_blank86 punpcklbw mm2, reg_blank87 pmullw mm1, const788 paddw mm1, mm289 psrlw mm1, 390 packuswb mm1, reg_blank91 movd [eax], mm192 EMMS93 }94 #else95 __asm96 {97 mov eax, c198 mov ebx, c299 mov ecx, eax100 shl ecx, 3101 sub ecx, eax102 add ecx, ebx103 shr ecx, 3104 mov eax, pc105 mov [eax], ecx106 }107 #endif108 }110 void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)111 {112 //*((int*)pc) = (c1*2+(c2+c3)*7)/16;113 //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +114 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;116 #ifdef MMX117 __asm118 {119 mov eax, pc120 movd mm1, c1121 movd mm2, c2122 movd mm3, c3123 punpcklbw mm1, reg_blank124 punpcklbw mm2, reg_blank125 punpcklbw mm3, reg_blank126 psllw mm1, 1127 paddw mm2, mm3128 pmullw mm2, const7129 paddw mm1, mm2130 psrlw mm1, 4131 packuswb mm1, reg_blank132 movd [eax], mm1133 EMMS134 }135 #else137 __asm138 {139 mov eax, [c1]140 and eax, 0FF00h141 shl eax, 1142 mov ecx, [c2]143 and ecx, 0FF00h144 mov edx, [c3]145 and edx, 0FF00h146 add ecx, edx147 imul ecx, ecx, 7148 add eax, ecx149 and eax, 0FF000h151 mov ebx, [c1]152 and ebx, 0FF00FFh153 shl ebx, 1154 mov ecx, [c2]155 and ecx, 0FF00FFh156 mov edx, [c3]157 and edx, 0FF00FFh158 add ecx, edx159 imul ecx, ecx, 7160 add ebx, ecx161 and ebx, 0FF00FF0h163 add eax, ebx164 shr eax, 4166 mov ebx, pc167 mov [ebx], eax168 }169 #endif170 }172 void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2)173 {174 //*((int*)pc) = (c1+c2)/2;176 #ifdef MMX177 __asm178 {179 mov eax, pc180 movd mm0, c1181 movd mm1, c2182 paddd mm0, mm1183 psrad mm0, 1184 movd [eax], mm0185 EMMS186 }187 #else188 __asm189 {190 mov eax, pc191 mov edx, c1192 add edx, c2193 shr edx, 1194 mov [eax], edx195 }196 #endif197 }199 void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2)200 {201 *((unsigned short *)pc) = interp_16_31(c1, c2);202 //*((int*)pc) = (c1*3+c2)/4;203 }205 void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)206 {207 *((unsigned short *)pc) = interp_16_211(c1, c2, c3);208 //*((int*)pc) = (c1*2+c2+c3)/4;209 }211 void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2)212 {213 *((unsigned short *)pc) = interp_16_71(c1, c2);214 // *((unsigned short*)pc) = (c1*7+c2)/8;215 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +216 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;217 }219 void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)220 {221 *((unsigned short *)pc) = interp_16_772(c2, c3, c1);222 // *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;223 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +224 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;225 }227 void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2)228 {229 *((unsigned short *)pc) = interp_16_11(c1, c2);230 }232 bool Diff(unsigned int c1, unsigned int c2)233 {234 unsigned int235 YUV1 = RGBtoYUV(c1),236 YUV2 = RGBtoYUV(c2);238 if (YUV1 == YUV2) return false; // Save some processing power240 #ifdef MMX241 unsigned int retval;242 __asm243 {244 mov eax, 0x7FFFFFFF245 movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF247 ; Copy source colors in first reg248 movd mm0, YUV1249 movd mm1, YUV2251 mov eax, 0x00FF0000252 movd mm6, eax; mm6 = Ymask = 0x00FF0000254 ; Calculate color Y difference255 movq mm2, mm0256 movq mm3, mm1257 pand mm2, mm6258 pand mm3, mm6259 psubd mm2, mm3260 pand mm2, mm7262 mov eax, 0x0000FF00263 movd mm6, eax; mm6 = Umask = 0x0000FF00265 ; Calculate color U difference266 movq mm3, mm0267 movq mm4, mm1268 pand mm3, mm6269 pand mm4, mm6270 psubd mm3, mm4271 pand mm3, mm7273 mov eax, 0x000000FF274 movd mm6, eax; mm6 = Vmask = 0x000000FF276 ; Calculate color V difference277 movq mm4, mm0278 movq mm5, mm1279 pand mm4, mm6280 pand mm5, mm6281 psubd mm4, mm5282 pand mm4, mm7284 mov eax, 0x00300000285 movd mm5, eax; mm5 = trY = 0x00300000286 mov eax, 0x00000700287 movd mm6, eax; mm6 = trU = 0x00000700288 mov eax, 0x00000006289 movd mm7, eax; mm7 = trV = 0x00000006291 ; Compare the results292 pcmpgtd mm2, trY293 pcmpgtd mm3, trU294 pcmpgtd mm4, trV295 por mm2, mm3296 por mm2, mm4298 movd retval, mm2300 EMMS301 }302 return (retval != 0);303 #else304 return305 (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) ||306 (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) ||307 (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV);308 #endif309 }311 unsigned int RGBtoYUV(unsigned int c)312 { // Division through 3 slows down the emulation about 10% !!!313 #ifdef MMX314 unsigned int retval;315 __asm316 {317 movd mm0, c318 movq mm1, mm0319 movq mm2, mm0; mm0 = mm1 = mm2 = c321 mov eax, 0x000000FF322 movd mm5, eax; mm5 = REDMASK = 0x000000FF323 mov eax, 0x0000FF00324 movd mm6, eax; mm6 = GREENMASK = 0x0000FF00325 mov eax, 0x00FF0000326 movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000328 pand mm0, mm5329 pand mm1, mm6330 pand mm2, mm7; mm0 = R mm1 = G mm2 = B332 movq mm3, mm0333 paddd mm3, mm1334 paddd mm3, mm2335 ; psrld mm3, 2; mm3 = Y336 ; pslld mm3, 16337 pslld mm3, 14; mm3 = Y << 16339 mov eax, 512340 movd mm7, eax; mm7 = 128 << 2 = 512342 movq mm4, mm0343 psubd mm4, mm2344 ; psrld mm4, 2345 ; paddd mm4, mm7; mm4 = U346 ; pslld mm4, 8; mm4 = U << 8347 paddd mm4, mm7348 pslld mm4, 6350 mov eax, 128351 movd mm7, eax; mm7 = 128353 movq mm5, mm1354 pslld mm5, 1355 psubd mm5, mm0356 psubd mm5, mm2357 psrld mm5, 3358 paddd mm5, mm7; mm5 = V360 paddd mm5, mm4361 paddd mm5, mm3363 movd retval, mm5365 EMMS366 }367 return retval;368 #else369 unsigned char r, g, b, Y, u, v;370 r = (c & 0x000000FF);371 g = (c & 0x0000FF00) >> 8;372 b = (c & 0x00FF0000) >> 16;373 Y = (r + g + b) >> 2;374 u = 128 + ((r - b) >> 2);375 v = 128 + ((-r + 2 * g - b) >> 3);376 return (Y << 16) + (u << 8) + v;378 // Extremely High Quality Code379 //unsigned char r, g, b;380 //r = c & 0xFF;381 //g = (c >> 8) & 0xFF;382 //b = (c >> 16) & 0xFF;383 //unsigned char y, u, v;384 //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;385 //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;386 //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;387 //return (y << 16) + (u << 8) + v;388 #endif389 }