Mercurial > vba-clojure

     1 #include "../Port.h"

     2 #include "hq_shared32.h"

     3 #include "interp.h"

     4 

     5 const unsigned __int64 reg_blank = 0x0000000000000000;

     6 const unsigned __int64 const7	 = 0x0000000700070007;

     7 const unsigned __int64 treshold	 = 0x0000000000300706;

     8 

     9 void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2)

    10 {

    11 	//*((int*)pc) = (c1*3+c2)/4;

    12 

    13 #ifdef MMX

    14 	__asm

    15 	{

    16 		mov eax, pc

    17 		movd mm1, c1

    18 		movd mm2, c2

    19 		movq mm0, mm1

    20 		pslld mm0, 2

    21 		psubd mm0, mm1

    22 		paddd mm0, mm2

    23 		psrld mm0, 2

    24 		movd    [eax], mm0

    25 		    EMMS

    26 	}

    27 #else

    28 	__asm

    29 	{

    30 		mov eax, pc

    31 		mov edx, c1

    32 		shl edx, 2

    33 		add edx, c2

    34 		sub edx, c1

    35 		shr edx, 2

    36 		mov        [eax], edx

    37 	}

    38 #endif

    39 }

    40 

    41 void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)

    42 {

    43 	//*((int*)pc) = (c1*2+c2+c3)/4;

    44 

    45 #ifdef MMX

    46 	__asm

    47 	{

    48 		mov eax, pc

    49 		movd mm0, c1

    50 		movd mm1, c2

    51 		movd mm2, c3

    52 		pslld mm0, 1

    53 		paddd mm0, mm1

    54 		paddd mm0, mm2

    55 		psrad mm0, 2

    56 		movd [eax], mm0

    57 		    EMMS

    58 	}

    59 #else

    60 	__asm

    61 	{

    62 		mov eax, pc

    63 		mov edx, c1

    64 		shl edx, 1

    65 		add edx, c2

    66 		add edx, c3

    67 		shr edx, 2

    68 		mov        [eax], edx

    69 	}

    70 #endif

    71 }

    72 

    73 void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2)

    74 {

    75 	//*((int*)pc) = (c1*7+c2)/8;

    76 	//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +

    77 	//	            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;

    78 

    79 #ifdef MMX

    80 	__asm

    81 	{

    82 		mov eax, pc

    83 		movd mm1, c1

    84 		movd mm2, c2

    85 		punpcklbw mm1, reg_blank

    86 		punpcklbw mm2, reg_blank

    87 		pmullw mm1, const7

    88 		paddw mm1, mm2

    89 		psrlw mm1, 3

    90 		packuswb mm1, reg_blank

    91 		    movd       [eax], mm1

    92 		    EMMS

    93 	}

    94 #else

    95 	__asm

    96 	{

    97 		mov eax, c1

    98 		mov ebx, c2

    99 		mov ecx, eax

   100 		shl ecx, 3

   101 		sub ecx, eax

   102 		add ecx, ebx

   103 		shr ecx, 3

   104 		mov eax, pc

   105 		    mov     [eax], ecx

   106 	}

   107 #endif

   108 }

   109 

   110 void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)

   111 {

   112 	//*((int*)pc) = (c1*2+(c2+c3)*7)/16;

   113 	//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +

   114 	//              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;

   115 

   116 #ifdef MMX

   117 	__asm

   118 	{

   119 		mov eax, pc

   120 		movd mm1, c1

   121 		movd mm2, c2

   122 		movd mm3, c3

   123 		punpcklbw mm1, reg_blank

   124 		punpcklbw mm2, reg_blank

   125 		punpcklbw mm3, reg_blank

   126 		psllw mm1, 1

   127 		paddw mm2, mm3

   128 		pmullw mm2, const7

   129 		paddw mm1, mm2

   130 		psrlw mm1, 4

   131 		packuswb mm1, reg_blank

   132 		    movd       [eax], mm1

   133 		    EMMS

   134 	}

   135 #else

   136 

   137 	__asm

   138 	{

   139 		mov eax, [c1]

   140 		and     eax, 0FF00h

   141 		shl eax, 1

   142 		mov ecx, [c2]

   143 		and     ecx, 0FF00h

   144 		mov edx, [c3]

   145 		and     edx, 0FF00h

   146 		add ecx, edx

   147 		imul ecx, ecx, 7

   148 		add eax, ecx

   149 		and     eax, 0FF000h

   150 

   151 		mov ebx, [c1]

   152 		and     ebx, 0FF00FFh

   153 		shl ebx, 1

   154 		mov ecx, [c2]

   155 		and     ecx, 0FF00FFh

   156 		mov edx, [c3]

   157 		and     edx, 0FF00FFh

   158 		add ecx, edx

   159 		imul ecx, ecx, 7

   160 		add ebx, ecx

   161 		and     ebx, 0FF00FF0h

   162 

   163 		add eax, ebx

   164 		shr eax, 4

   165 

   166 		mov ebx, pc

   167 		    mov     [ebx], eax

   168 	}

   169 #endif

   170 }

   171 

   172 void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2)

   173 {

   174 	//*((int*)pc) = (c1+c2)/2;

   175 

   176 #ifdef MMX

   177 	__asm

   178 	{

   179 		mov eax, pc

   180 		movd mm0, c1

   181 		movd mm1, c2

   182 		paddd mm0, mm1

   183 		psrad mm0, 1

   184 		movd    [eax], mm0

   185 		    EMMS

   186 	}

   187 #else

   188 	__asm

   189 	{

   190 		mov eax, pc

   191 		mov edx, c1

   192 		add edx, c2

   193 		shr edx, 1

   194 		mov        [eax], edx

   195 	}

   196 #endif

   197 }

   198 

   199 void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2)

   200 {

   201 	*((unsigned short *)pc) = interp_16_31(c1, c2);

   202 	//*((int*)pc) = (c1*3+c2)/4;

   203 }

   204 

   205 void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)

   206 {

   207 	*((unsigned short *)pc) = interp_16_211(c1, c2, c3);

   208 	//*((int*)pc) = (c1*2+c2+c3)/4;

   209 }

   210 

   211 void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2)

   212 {

   213 	*((unsigned short *)pc) = interp_16_71(c1, c2);

   214 //	*((unsigned short*)pc) = (c1*7+c2)/8;

   215 //	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +

   216 //		            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;

   217 }

   218 

   219 void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)

   220 {

   221 	*((unsigned short *)pc) = interp_16_772(c2, c3, c1);

   222 //	*((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;

   223 //	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +

   224 //	              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;

   225 }

   226 

   227 void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2)

   228 {

   229 	*((unsigned short *)pc) = interp_16_11(c1, c2);

   230 }

   231 

   232 bool Diff(unsigned int c1, unsigned int c2)

   233 {

   234 	unsigned int

   235 	    YUV1 = RGBtoYUV(c1),

   236 	    YUV2 = RGBtoYUV(c2);

   237 

   238 	if (YUV1 == YUV2) return false;  // Save some processing power

   239 

   240 #ifdef MMX

   241 	unsigned int retval;

   242 	__asm

   243 	{

   244 		mov eax, 0x7FFFFFFF

   245 		movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF

   246 

   247 		; Copy source colors in first reg

   248 		movd mm0, YUV1

   249 		movd mm1, YUV2

   250 

   251 		mov eax, 0x00FF0000

   252 		movd mm6, eax; mm6 = Ymask = 0x00FF0000

   253 

   254 		; Calculate color Y difference

   255 		movq mm2, mm0

   256 		movq mm3, mm1

   257 		pand mm2, mm6

   258 		pand mm3, mm6

   259 		psubd mm2, mm3

   260 		pand mm2, mm7

   261 

   262 		mov eax, 0x0000FF00

   263 		movd mm6, eax; mm6 = Umask = 0x0000FF00

   264 

   265 		; Calculate color U difference

   266 		movq mm3, mm0

   267 		movq mm4, mm1

   268 		pand mm3, mm6

   269 		pand mm4, mm6

   270 		psubd mm3, mm4

   271 		pand mm3, mm7

   272 

   273 		mov eax, 0x000000FF

   274 		movd mm6, eax; mm6 = Vmask = 0x000000FF

   275 

   276 		; Calculate color V difference

   277 		movq mm4, mm0

   278 		movq mm5, mm1

   279 		pand mm4, mm6

   280 		pand mm5, mm6

   281 		psubd mm4, mm5

   282 		pand mm4, mm7

   283 

   284 		mov eax, 0x00300000

   285 		movd mm5, eax; mm5 = trY = 0x00300000

   286 		                           mov eax, 0x00000700

   287 		movd mm6, eax; mm6 = trU = 0x00000700

   288 		                           mov eax, 0x00000006

   289 		movd mm7, eax; mm7 = trV = 0x00000006

   290 

   291 		; Compare the results

   292 		pcmpgtd mm2, trY

   293 		pcmpgtd mm3, trU

   294 		pcmpgtd mm4, trV

   295 		por mm2, mm3

   296 		por mm2, mm4

   297 

   298 		movd retval, mm2

   299 

   300 		        EMMS

   301 	}

   302 	return (retval != 0);

   303 #else

   304 	return

   305 	    (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) ||

   306 	    (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) ||

   307 	    (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV);

   308 #endif

   309 }

   310 

   311 unsigned int RGBtoYUV(unsigned int c)

   312 {   // Division through 3 slows down the emulation about 10% !!!

   313 #ifdef MMX

   314 	unsigned int retval;

   315 	__asm

   316 	{

   317 		movd mm0, c

   318 		movq mm1, mm0

   319 		movq mm2, mm0; mm0 = mm1 = mm2 = c

   320 

   321 		                                 mov eax, 0x000000FF

   322 		movd mm5, eax; mm5 = REDMASK = 0x000000FF

   323 		                               mov eax, 0x0000FF00

   324 		movd mm6, eax; mm6 = GREENMASK = 0x0000FF00

   325 		                                 mov eax, 0x00FF0000

   326 		movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000

   327 

   328 		                                pand mm0, mm5

   329 		pand mm1, mm6

   330 		pand mm2, mm7; mm0 = R mm1 = G mm2 = B

   331 

   332 		                                     movq mm3, mm0

   333 		paddd mm3, mm1

   334 		paddd mm3, mm2

   335 		;       psrld mm3, 2; mm3 = Y

   336 		;       pslld mm3, 16

   337 		pslld mm3, 14; mm3 = Y << 16

   338 

   339 		                     mov eax, 512

   340 		movd mm7, eax; mm7 = 128 << 2 = 512

   341 

   342 		                                movq mm4, mm0

   343 		psubd mm4, mm2

   344 		;       psrld mm4, 2

   345 		;       paddd mm4, mm7; mm4 = U

   346 		;       pslld mm4, 8; mm4 = U << 8

   347 		                            paddd mm4, mm7

   348 		pslld mm4, 6

   349 

   350 		mov eax, 128

   351 		movd mm7, eax; mm7 = 128

   352 

   353 		                     movq mm5, mm1

   354 		pslld mm5, 1

   355 		psubd mm5, mm0

   356 		psubd mm5, mm2

   357 		psrld mm5, 3

   358 		paddd mm5, mm7; mm5 = V

   359 

   360 		                      paddd mm5, mm4

   361 		paddd mm5, mm3

   362 

   363 		movd retval, mm5

   364 

   365 		EMMS

   366 	}

   367 	return retval;

   368 #else

   369 	unsigned char r, g, b, Y, u, v;

   370 	r = (c & 0x000000FF);

   371 	g = (c & 0x0000FF00) >> 8;

   372 	b = (c & 0x00FF0000) >> 16;

   373 	Y = (r + g + b) >> 2;

   374 	u = 128 + ((r - b) >> 2);

   375 	v = 128 + ((-r + 2 * g - b) >> 3);

   376 	return (Y << 16) + (u << 8) + v;

   377 

   378 	// Extremely High Quality Code

   379 	//unsigned char r, g, b;

   380 	//r = c & 0xFF;

   381 	//g = (c >> 8) & 0xFF;

   382 	//b = (c >> 16) & 0xFF;

   383 	//unsigned char y, u, v;

   384 	//y = (0.256788 * r  +  0.504129 * g  +  0.097906 * b) + 16;

   385 	//u = (-0.148223 * r  -  0.290993 * g  +  0.439216 * b) + 128;

   386 	//v = (0.439216 * r  -  0.367788 * g  -  0.071427 * b) + 128;

   387 	//return (y << 16) + (u << 8) + v;

   388 #endif

   389 }
author	Robert McIntyre <rlm@mit.edu>
date	Sat, 01 Sep 2012 10:15:04 -0500
parents	f9f4f1b99eed
children