Mercurial > vba-clojure
diff src/filters/hq_shared32.cpp @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/filters/hq_shared32.cpp Sat Mar 03 10:31:27 2012 -0600 1.3 @@ -0,0 +1,389 @@ 1.4 +#include "../Port.h" 1.5 +#include "hq_shared32.h" 1.6 +#include "interp.h" 1.7 + 1.8 +const unsigned __int64 reg_blank = 0x0000000000000000; 1.9 +const unsigned __int64 const7 = 0x0000000700070007; 1.10 +const unsigned __int64 treshold = 0x0000000000300706; 1.11 + 1.12 +void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2) 1.13 +{ 1.14 + //*((int*)pc) = (c1*3+c2)/4; 1.15 + 1.16 +#ifdef MMX 1.17 + __asm 1.18 + { 1.19 + mov eax, pc 1.20 + movd mm1, c1 1.21 + movd mm2, c2 1.22 + movq mm0, mm1 1.23 + pslld mm0, 2 1.24 + psubd mm0, mm1 1.25 + paddd mm0, mm2 1.26 + psrld mm0, 2 1.27 + movd [eax], mm0 1.28 + EMMS 1.29 + } 1.30 +#else 1.31 + __asm 1.32 + { 1.33 + mov eax, pc 1.34 + mov edx, c1 1.35 + shl edx, 2 1.36 + add edx, c2 1.37 + sub edx, c1 1.38 + shr edx, 2 1.39 + mov [eax], edx 1.40 + } 1.41 +#endif 1.42 +} 1.43 + 1.44 +void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) 1.45 +{ 1.46 + //*((int*)pc) = (c1*2+c2+c3)/4; 1.47 + 1.48 +#ifdef MMX 1.49 + __asm 1.50 + { 1.51 + mov eax, pc 1.52 + movd mm0, c1 1.53 + movd mm1, c2 1.54 + movd mm2, c3 1.55 + pslld mm0, 1 1.56 + paddd mm0, mm1 1.57 + paddd mm0, mm2 1.58 + psrad mm0, 2 1.59 + movd [eax], mm0 1.60 + EMMS 1.61 + } 1.62 +#else 1.63 + __asm 1.64 + { 1.65 + mov eax, pc 1.66 + mov edx, c1 1.67 + shl edx, 1 1.68 + add edx, c2 1.69 + add edx, c3 1.70 + shr edx, 2 1.71 + mov [eax], edx 1.72 + } 1.73 +#endif 1.74 +} 1.75 + 1.76 +void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2) 1.77 +{ 1.78 + //*((int*)pc) = (c1*7+c2)/8; 1.79 + //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + 1.80 + // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; 1.81 + 1.82 +#ifdef MMX 1.83 + __asm 1.84 + { 1.85 + mov eax, pc 1.86 + movd mm1, c1 1.87 + movd mm2, c2 1.88 + punpcklbw mm1, reg_blank 1.89 + punpcklbw mm2, reg_blank 1.90 + pmullw mm1, const7 1.91 + paddw mm1, mm2 1.92 + psrlw mm1, 3 1.93 + packuswb mm1, reg_blank 1.94 + movd [eax], mm1 1.95 + EMMS 1.96 + } 1.97 +#else 1.98 + __asm 1.99 + { 1.100 + mov eax, c1 1.101 + mov ebx, c2 1.102 + mov ecx, eax 1.103 + shl ecx, 3 1.104 + sub ecx, eax 1.105 + add ecx, ebx 1.106 + shr ecx, 3 1.107 + mov eax, pc 1.108 + mov [eax], ecx 1.109 + } 1.110 +#endif 1.111 +} 1.112 + 1.113 +void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3) 1.114 +{ 1.115 + //*((int*)pc) = (c1*2+(c2+c3)*7)/16; 1.116 + //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + 1.117 + // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; 1.118 + 1.119 +#ifdef MMX 1.120 + __asm 1.121 + { 1.122 + mov eax, pc 1.123 + movd mm1, c1 1.124 + movd mm2, c2 1.125 + movd mm3, c3 1.126 + punpcklbw mm1, reg_blank 1.127 + punpcklbw mm2, reg_blank 1.128 + punpcklbw mm3, reg_blank 1.129 + psllw mm1, 1 1.130 + paddw mm2, mm3 1.131 + pmullw mm2, const7 1.132 + paddw mm1, mm2 1.133 + psrlw mm1, 4 1.134 + packuswb mm1, reg_blank 1.135 + movd [eax], mm1 1.136 + EMMS 1.137 + } 1.138 +#else 1.139 + 1.140 + __asm 1.141 + { 1.142 + mov eax, [c1] 1.143 + and eax, 0FF00h 1.144 + shl eax, 1 1.145 + mov ecx, [c2] 1.146 + and ecx, 0FF00h 1.147 + mov edx, [c3] 1.148 + and edx, 0FF00h 1.149 + add ecx, edx 1.150 + imul ecx, ecx, 7 1.151 + add eax, ecx 1.152 + and eax, 0FF000h 1.153 + 1.154 + mov ebx, [c1] 1.155 + and ebx, 0FF00FFh 1.156 + shl ebx, 1 1.157 + mov ecx, [c2] 1.158 + and ecx, 0FF00FFh 1.159 + mov edx, [c3] 1.160 + and edx, 0FF00FFh 1.161 + add ecx, edx 1.162 + imul ecx, ecx, 7 1.163 + add ebx, ecx 1.164 + and ebx, 0FF00FF0h 1.165 + 1.166 + add eax, ebx 1.167 + shr eax, 4 1.168 + 1.169 + mov ebx, pc 1.170 + mov [ebx], eax 1.171 + } 1.172 +#endif 1.173 +} 1.174 + 1.175 +void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2) 1.176 +{ 1.177 + //*((int*)pc) = (c1+c2)/2; 1.178 + 1.179 +#ifdef MMX 1.180 + __asm 1.181 + { 1.182 + mov eax, pc 1.183 + movd mm0, c1 1.184 + movd mm1, c2 1.185 + paddd mm0, mm1 1.186 + psrad mm0, 1 1.187 + movd [eax], mm0 1.188 + EMMS 1.189 + } 1.190 +#else 1.191 + __asm 1.192 + { 1.193 + mov eax, pc 1.194 + mov edx, c1 1.195 + add edx, c2 1.196 + shr edx, 1 1.197 + mov [eax], edx 1.198 + } 1.199 +#endif 1.200 +} 1.201 + 1.202 +void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2) 1.203 +{ 1.204 + *((unsigned short *)pc) = interp_16_31(c1, c2); 1.205 + //*((int*)pc) = (c1*3+c2)/4; 1.206 +} 1.207 + 1.208 +void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) 1.209 +{ 1.210 + *((unsigned short *)pc) = interp_16_211(c1, c2, c3); 1.211 + //*((int*)pc) = (c1*2+c2+c3)/4; 1.212 +} 1.213 + 1.214 +void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2) 1.215 +{ 1.216 + *((unsigned short *)pc) = interp_16_71(c1, c2); 1.217 +// *((unsigned short*)pc) = (c1*7+c2)/8; 1.218 +// *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) + 1.219 +// (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3; 1.220 +} 1.221 + 1.222 +void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3) 1.223 +{ 1.224 + *((unsigned short *)pc) = interp_16_772(c2, c3, c1); 1.225 +// *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16; 1.226 +// *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) + 1.227 +// (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4; 1.228 +} 1.229 + 1.230 +void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2) 1.231 +{ 1.232 + *((unsigned short *)pc) = interp_16_11(c1, c2); 1.233 +} 1.234 + 1.235 +bool Diff(unsigned int c1, unsigned int c2) 1.236 +{ 1.237 + unsigned int 1.238 + YUV1 = RGBtoYUV(c1), 1.239 + YUV2 = RGBtoYUV(c2); 1.240 + 1.241 + if (YUV1 == YUV2) return false; // Save some processing power 1.242 + 1.243 +#ifdef MMX 1.244 + unsigned int retval; 1.245 + __asm 1.246 + { 1.247 + mov eax, 0x7FFFFFFF 1.248 + movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF 1.249 + 1.250 + ; Copy source colors in first reg 1.251 + movd mm0, YUV1 1.252 + movd mm1, YUV2 1.253 + 1.254 + mov eax, 0x00FF0000 1.255 + movd mm6, eax; mm6 = Ymask = 0x00FF0000 1.256 + 1.257 + ; Calculate color Y difference 1.258 + movq mm2, mm0 1.259 + movq mm3, mm1 1.260 + pand mm2, mm6 1.261 + pand mm3, mm6 1.262 + psubd mm2, mm3 1.263 + pand mm2, mm7 1.264 + 1.265 + mov eax, 0x0000FF00 1.266 + movd mm6, eax; mm6 = Umask = 0x0000FF00 1.267 + 1.268 + ; Calculate color U difference 1.269 + movq mm3, mm0 1.270 + movq mm4, mm1 1.271 + pand mm3, mm6 1.272 + pand mm4, mm6 1.273 + psubd mm3, mm4 1.274 + pand mm3, mm7 1.275 + 1.276 + mov eax, 0x000000FF 1.277 + movd mm6, eax; mm6 = Vmask = 0x000000FF 1.278 + 1.279 + ; Calculate color V difference 1.280 + movq mm4, mm0 1.281 + movq mm5, mm1 1.282 + pand mm4, mm6 1.283 + pand mm5, mm6 1.284 + psubd mm4, mm5 1.285 + pand mm4, mm7 1.286 + 1.287 + mov eax, 0x00300000 1.288 + movd mm5, eax; mm5 = trY = 0x00300000 1.289 + mov eax, 0x00000700 1.290 + movd mm6, eax; mm6 = trU = 0x00000700 1.291 + mov eax, 0x00000006 1.292 + movd mm7, eax; mm7 = trV = 0x00000006 1.293 + 1.294 + ; Compare the results 1.295 + pcmpgtd mm2, trY 1.296 + pcmpgtd mm3, trU 1.297 + pcmpgtd mm4, trV 1.298 + por mm2, mm3 1.299 + por mm2, mm4 1.300 + 1.301 + movd retval, mm2 1.302 + 1.303 + EMMS 1.304 + } 1.305 + return (retval != 0); 1.306 +#else 1.307 + return 1.308 + (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) || 1.309 + (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) || 1.310 + (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV); 1.311 +#endif 1.312 +} 1.313 + 1.314 +unsigned int RGBtoYUV(unsigned int c) 1.315 +{ // Division through 3 slows down the emulation about 10% !!! 1.316 +#ifdef MMX 1.317 + unsigned int retval; 1.318 + __asm 1.319 + { 1.320 + movd mm0, c 1.321 + movq mm1, mm0 1.322 + movq mm2, mm0; mm0 = mm1 = mm2 = c 1.323 + 1.324 + mov eax, 0x000000FF 1.325 + movd mm5, eax; mm5 = REDMASK = 0x000000FF 1.326 + mov eax, 0x0000FF00 1.327 + movd mm6, eax; mm6 = GREENMASK = 0x0000FF00 1.328 + mov eax, 0x00FF0000 1.329 + movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000 1.330 + 1.331 + pand mm0, mm5 1.332 + pand mm1, mm6 1.333 + pand mm2, mm7; mm0 = R mm1 = G mm2 = B 1.334 + 1.335 + movq mm3, mm0 1.336 + paddd mm3, mm1 1.337 + paddd mm3, mm2 1.338 + ; psrld mm3, 2; mm3 = Y 1.339 + ; pslld mm3, 16 1.340 + pslld mm3, 14; mm3 = Y << 16 1.341 + 1.342 + mov eax, 512 1.343 + movd mm7, eax; mm7 = 128 << 2 = 512 1.344 + 1.345 + movq mm4, mm0 1.346 + psubd mm4, mm2 1.347 + ; psrld mm4, 2 1.348 + ; paddd mm4, mm7; mm4 = U 1.349 + ; pslld mm4, 8; mm4 = U << 8 1.350 + paddd mm4, mm7 1.351 + pslld mm4, 6 1.352 + 1.353 + mov eax, 128 1.354 + movd mm7, eax; mm7 = 128 1.355 + 1.356 + movq mm5, mm1 1.357 + pslld mm5, 1 1.358 + psubd mm5, mm0 1.359 + psubd mm5, mm2 1.360 + psrld mm5, 3 1.361 + paddd mm5, mm7; mm5 = V 1.362 + 1.363 + paddd mm5, mm4 1.364 + paddd mm5, mm3 1.365 + 1.366 + movd retval, mm5 1.367 + 1.368 + EMMS 1.369 + } 1.370 + return retval; 1.371 +#else 1.372 + unsigned char r, g, b, Y, u, v; 1.373 + r = (c & 0x000000FF); 1.374 + g = (c & 0x0000FF00) >> 8; 1.375 + b = (c & 0x00FF0000) >> 16; 1.376 + Y = (r + g + b) >> 2; 1.377 + u = 128 + ((r - b) >> 2); 1.378 + v = 128 + ((-r + 2 * g - b) >> 3); 1.379 + return (Y << 16) + (u << 8) + v; 1.380 + 1.381 + // Extremely High Quality Code 1.382 + //unsigned char r, g, b; 1.383 + //r = c & 0xFF; 1.384 + //g = (c >> 8) & 0xFF; 1.385 + //b = (c >> 16) & 0xFF; 1.386 + //unsigned char y, u, v; 1.387 + //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16; 1.388 + //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128; 1.389 + //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128; 1.390 + //return (y << 16) + (u << 8) + v; 1.391 +#endif 1.392 +} 1.393 \ No newline at end of file