rlm@1
|
1 #include "../Port.h"
|
rlm@1
|
2 #include "hq_shared32.h"
|
rlm@1
|
3 #include "interp.h"
|
rlm@1
|
4
|
rlm@1
|
5 const unsigned __int64 reg_blank = 0x0000000000000000;
|
rlm@1
|
6 const unsigned __int64 const7 = 0x0000000700070007;
|
rlm@1
|
7 const unsigned __int64 treshold = 0x0000000000300706;
|
rlm@1
|
8
|
rlm@1
|
9 void Interp1(unsigned char *pc, unsigned int c1, unsigned int c2)
|
rlm@1
|
10 {
|
rlm@1
|
11 //*((int*)pc) = (c1*3+c2)/4;
|
rlm@1
|
12
|
rlm@1
|
13 #ifdef MMX
|
rlm@1
|
14 __asm
|
rlm@1
|
15 {
|
rlm@1
|
16 mov eax, pc
|
rlm@1
|
17 movd mm1, c1
|
rlm@1
|
18 movd mm2, c2
|
rlm@1
|
19 movq mm0, mm1
|
rlm@1
|
20 pslld mm0, 2
|
rlm@1
|
21 psubd mm0, mm1
|
rlm@1
|
22 paddd mm0, mm2
|
rlm@1
|
23 psrld mm0, 2
|
rlm@1
|
24 movd [eax], mm0
|
rlm@1
|
25 EMMS
|
rlm@1
|
26 }
|
rlm@1
|
27 #else
|
rlm@1
|
28 __asm
|
rlm@1
|
29 {
|
rlm@1
|
30 mov eax, pc
|
rlm@1
|
31 mov edx, c1
|
rlm@1
|
32 shl edx, 2
|
rlm@1
|
33 add edx, c2
|
rlm@1
|
34 sub edx, c1
|
rlm@1
|
35 shr edx, 2
|
rlm@1
|
36 mov [eax], edx
|
rlm@1
|
37 }
|
rlm@1
|
38 #endif
|
rlm@1
|
39 }
|
rlm@1
|
40
|
rlm@1
|
41 void Interp2(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
rlm@1
|
42 {
|
rlm@1
|
43 //*((int*)pc) = (c1*2+c2+c3)/4;
|
rlm@1
|
44
|
rlm@1
|
45 #ifdef MMX
|
rlm@1
|
46 __asm
|
rlm@1
|
47 {
|
rlm@1
|
48 mov eax, pc
|
rlm@1
|
49 movd mm0, c1
|
rlm@1
|
50 movd mm1, c2
|
rlm@1
|
51 movd mm2, c3
|
rlm@1
|
52 pslld mm0, 1
|
rlm@1
|
53 paddd mm0, mm1
|
rlm@1
|
54 paddd mm0, mm2
|
rlm@1
|
55 psrad mm0, 2
|
rlm@1
|
56 movd [eax], mm0
|
rlm@1
|
57 EMMS
|
rlm@1
|
58 }
|
rlm@1
|
59 #else
|
rlm@1
|
60 __asm
|
rlm@1
|
61 {
|
rlm@1
|
62 mov eax, pc
|
rlm@1
|
63 mov edx, c1
|
rlm@1
|
64 shl edx, 1
|
rlm@1
|
65 add edx, c2
|
rlm@1
|
66 add edx, c3
|
rlm@1
|
67 shr edx, 2
|
rlm@1
|
68 mov [eax], edx
|
rlm@1
|
69 }
|
rlm@1
|
70 #endif
|
rlm@1
|
71 }
|
rlm@1
|
72
|
rlm@1
|
73 void Interp3(unsigned char *pc, unsigned int c1, unsigned int c2)
|
rlm@1
|
74 {
|
rlm@1
|
75 //*((int*)pc) = (c1*7+c2)/8;
|
rlm@1
|
76 //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
rlm@1
|
77 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
rlm@1
|
78
|
rlm@1
|
79 #ifdef MMX
|
rlm@1
|
80 __asm
|
rlm@1
|
81 {
|
rlm@1
|
82 mov eax, pc
|
rlm@1
|
83 movd mm1, c1
|
rlm@1
|
84 movd mm2, c2
|
rlm@1
|
85 punpcklbw mm1, reg_blank
|
rlm@1
|
86 punpcklbw mm2, reg_blank
|
rlm@1
|
87 pmullw mm1, const7
|
rlm@1
|
88 paddw mm1, mm2
|
rlm@1
|
89 psrlw mm1, 3
|
rlm@1
|
90 packuswb mm1, reg_blank
|
rlm@1
|
91 movd [eax], mm1
|
rlm@1
|
92 EMMS
|
rlm@1
|
93 }
|
rlm@1
|
94 #else
|
rlm@1
|
95 __asm
|
rlm@1
|
96 {
|
rlm@1
|
97 mov eax, c1
|
rlm@1
|
98 mov ebx, c2
|
rlm@1
|
99 mov ecx, eax
|
rlm@1
|
100 shl ecx, 3
|
rlm@1
|
101 sub ecx, eax
|
rlm@1
|
102 add ecx, ebx
|
rlm@1
|
103 shr ecx, 3
|
rlm@1
|
104 mov eax, pc
|
rlm@1
|
105 mov [eax], ecx
|
rlm@1
|
106 }
|
rlm@1
|
107 #endif
|
rlm@1
|
108 }
|
rlm@1
|
109
|
rlm@1
|
110 void Interp4(unsigned char *pc, unsigned int c1, unsigned int c2, unsigned int c3)
|
rlm@1
|
111 {
|
rlm@1
|
112 //*((int*)pc) = (c1*2+(c2+c3)*7)/16;
|
rlm@1
|
113 //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
rlm@1
|
114 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
rlm@1
|
115
|
rlm@1
|
116 #ifdef MMX
|
rlm@1
|
117 __asm
|
rlm@1
|
118 {
|
rlm@1
|
119 mov eax, pc
|
rlm@1
|
120 movd mm1, c1
|
rlm@1
|
121 movd mm2, c2
|
rlm@1
|
122 movd mm3, c3
|
rlm@1
|
123 punpcklbw mm1, reg_blank
|
rlm@1
|
124 punpcklbw mm2, reg_blank
|
rlm@1
|
125 punpcklbw mm3, reg_blank
|
rlm@1
|
126 psllw mm1, 1
|
rlm@1
|
127 paddw mm2, mm3
|
rlm@1
|
128 pmullw mm2, const7
|
rlm@1
|
129 paddw mm1, mm2
|
rlm@1
|
130 psrlw mm1, 4
|
rlm@1
|
131 packuswb mm1, reg_blank
|
rlm@1
|
132 movd [eax], mm1
|
rlm@1
|
133 EMMS
|
rlm@1
|
134 }
|
rlm@1
|
135 #else
|
rlm@1
|
136
|
rlm@1
|
137 __asm
|
rlm@1
|
138 {
|
rlm@1
|
139 mov eax, [c1]
|
rlm@1
|
140 and eax, 0FF00h
|
rlm@1
|
141 shl eax, 1
|
rlm@1
|
142 mov ecx, [c2]
|
rlm@1
|
143 and ecx, 0FF00h
|
rlm@1
|
144 mov edx, [c3]
|
rlm@1
|
145 and edx, 0FF00h
|
rlm@1
|
146 add ecx, edx
|
rlm@1
|
147 imul ecx, ecx, 7
|
rlm@1
|
148 add eax, ecx
|
rlm@1
|
149 and eax, 0FF000h
|
rlm@1
|
150
|
rlm@1
|
151 mov ebx, [c1]
|
rlm@1
|
152 and ebx, 0FF00FFh
|
rlm@1
|
153 shl ebx, 1
|
rlm@1
|
154 mov ecx, [c2]
|
rlm@1
|
155 and ecx, 0FF00FFh
|
rlm@1
|
156 mov edx, [c3]
|
rlm@1
|
157 and edx, 0FF00FFh
|
rlm@1
|
158 add ecx, edx
|
rlm@1
|
159 imul ecx, ecx, 7
|
rlm@1
|
160 add ebx, ecx
|
rlm@1
|
161 and ebx, 0FF00FF0h
|
rlm@1
|
162
|
rlm@1
|
163 add eax, ebx
|
rlm@1
|
164 shr eax, 4
|
rlm@1
|
165
|
rlm@1
|
166 mov ebx, pc
|
rlm@1
|
167 mov [ebx], eax
|
rlm@1
|
168 }
|
rlm@1
|
169 #endif
|
rlm@1
|
170 }
|
rlm@1
|
171
|
rlm@1
|
172 void Interp5(unsigned char *pc, unsigned int c1, unsigned int c2)
|
rlm@1
|
173 {
|
rlm@1
|
174 //*((int*)pc) = (c1+c2)/2;
|
rlm@1
|
175
|
rlm@1
|
176 #ifdef MMX
|
rlm@1
|
177 __asm
|
rlm@1
|
178 {
|
rlm@1
|
179 mov eax, pc
|
rlm@1
|
180 movd mm0, c1
|
rlm@1
|
181 movd mm1, c2
|
rlm@1
|
182 paddd mm0, mm1
|
rlm@1
|
183 psrad mm0, 1
|
rlm@1
|
184 movd [eax], mm0
|
rlm@1
|
185 EMMS
|
rlm@1
|
186 }
|
rlm@1
|
187 #else
|
rlm@1
|
188 __asm
|
rlm@1
|
189 {
|
rlm@1
|
190 mov eax, pc
|
rlm@1
|
191 mov edx, c1
|
rlm@1
|
192 add edx, c2
|
rlm@1
|
193 shr edx, 1
|
rlm@1
|
194 mov [eax], edx
|
rlm@1
|
195 }
|
rlm@1
|
196 #endif
|
rlm@1
|
197 }
|
rlm@1
|
198
|
rlm@1
|
199 void Interp1_16(unsigned char *pc, unsigned short c1, unsigned short c2)
|
rlm@1
|
200 {
|
rlm@1
|
201 *((unsigned short *)pc) = interp_16_31(c1, c2);
|
rlm@1
|
202 //*((int*)pc) = (c1*3+c2)/4;
|
rlm@1
|
203 }
|
rlm@1
|
204
|
rlm@1
|
205 void Interp2_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)
|
rlm@1
|
206 {
|
rlm@1
|
207 *((unsigned short *)pc) = interp_16_211(c1, c2, c3);
|
rlm@1
|
208 //*((int*)pc) = (c1*2+c2+c3)/4;
|
rlm@1
|
209 }
|
rlm@1
|
210
|
rlm@1
|
211 void Interp3_16(unsigned char *pc, unsigned short c1, unsigned short c2)
|
rlm@1
|
212 {
|
rlm@1
|
213 *((unsigned short *)pc) = interp_16_71(c1, c2);
|
rlm@1
|
214 // *((unsigned short*)pc) = (c1*7+c2)/8;
|
rlm@1
|
215 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
|
rlm@1
|
216 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
|
rlm@1
|
217 }
|
rlm@1
|
218
|
rlm@1
|
219 void Interp4_16(unsigned char *pc, unsigned short c1, unsigned short c2, unsigned short c3)
|
rlm@1
|
220 {
|
rlm@1
|
221 *((unsigned short *)pc) = interp_16_772(c2, c3, c1);
|
rlm@1
|
222 // *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;
|
rlm@1
|
223 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
|
rlm@1
|
224 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
|
rlm@1
|
225 }
|
rlm@1
|
226
|
rlm@1
|
227 void Interp5_16(unsigned char *pc, unsigned short c1, unsigned short c2)
|
rlm@1
|
228 {
|
rlm@1
|
229 *((unsigned short *)pc) = interp_16_11(c1, c2);
|
rlm@1
|
230 }
|
rlm@1
|
231
|
rlm@1
|
232 bool Diff(unsigned int c1, unsigned int c2)
|
rlm@1
|
233 {
|
rlm@1
|
234 unsigned int
|
rlm@1
|
235 YUV1 = RGBtoYUV(c1),
|
rlm@1
|
236 YUV2 = RGBtoYUV(c2);
|
rlm@1
|
237
|
rlm@1
|
238 if (YUV1 == YUV2) return false; // Save some processing power
|
rlm@1
|
239
|
rlm@1
|
240 #ifdef MMX
|
rlm@1
|
241 unsigned int retval;
|
rlm@1
|
242 __asm
|
rlm@1
|
243 {
|
rlm@1
|
244 mov eax, 0x7FFFFFFF
|
rlm@1
|
245 movd mm7, eax; mm7 = ABS_MASK = 0x7FFFFFFF
|
rlm@1
|
246
|
rlm@1
|
247 ; Copy source colors in first reg
|
rlm@1
|
248 movd mm0, YUV1
|
rlm@1
|
249 movd mm1, YUV2
|
rlm@1
|
250
|
rlm@1
|
251 mov eax, 0x00FF0000
|
rlm@1
|
252 movd mm6, eax; mm6 = Ymask = 0x00FF0000
|
rlm@1
|
253
|
rlm@1
|
254 ; Calculate color Y difference
|
rlm@1
|
255 movq mm2, mm0
|
rlm@1
|
256 movq mm3, mm1
|
rlm@1
|
257 pand mm2, mm6
|
rlm@1
|
258 pand mm3, mm6
|
rlm@1
|
259 psubd mm2, mm3
|
rlm@1
|
260 pand mm2, mm7
|
rlm@1
|
261
|
rlm@1
|
262 mov eax, 0x0000FF00
|
rlm@1
|
263 movd mm6, eax; mm6 = Umask = 0x0000FF00
|
rlm@1
|
264
|
rlm@1
|
265 ; Calculate color U difference
|
rlm@1
|
266 movq mm3, mm0
|
rlm@1
|
267 movq mm4, mm1
|
rlm@1
|
268 pand mm3, mm6
|
rlm@1
|
269 pand mm4, mm6
|
rlm@1
|
270 psubd mm3, mm4
|
rlm@1
|
271 pand mm3, mm7
|
rlm@1
|
272
|
rlm@1
|
273 mov eax, 0x000000FF
|
rlm@1
|
274 movd mm6, eax; mm6 = Vmask = 0x000000FF
|
rlm@1
|
275
|
rlm@1
|
276 ; Calculate color V difference
|
rlm@1
|
277 movq mm4, mm0
|
rlm@1
|
278 movq mm5, mm1
|
rlm@1
|
279 pand mm4, mm6
|
rlm@1
|
280 pand mm5, mm6
|
rlm@1
|
281 psubd mm4, mm5
|
rlm@1
|
282 pand mm4, mm7
|
rlm@1
|
283
|
rlm@1
|
284 mov eax, 0x00300000
|
rlm@1
|
285 movd mm5, eax; mm5 = trY = 0x00300000
|
rlm@1
|
286 mov eax, 0x00000700
|
rlm@1
|
287 movd mm6, eax; mm6 = trU = 0x00000700
|
rlm@1
|
288 mov eax, 0x00000006
|
rlm@1
|
289 movd mm7, eax; mm7 = trV = 0x00000006
|
rlm@1
|
290
|
rlm@1
|
291 ; Compare the results
|
rlm@1
|
292 pcmpgtd mm2, trY
|
rlm@1
|
293 pcmpgtd mm3, trU
|
rlm@1
|
294 pcmpgtd mm4, trV
|
rlm@1
|
295 por mm2, mm3
|
rlm@1
|
296 por mm2, mm4
|
rlm@1
|
297
|
rlm@1
|
298 movd retval, mm2
|
rlm@1
|
299
|
rlm@1
|
300 EMMS
|
rlm@1
|
301 }
|
rlm@1
|
302 return (retval != 0);
|
rlm@1
|
303 #else
|
rlm@1
|
304 return
|
rlm@1
|
305 (abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY) ||
|
rlm@1
|
306 (abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU) ||
|
rlm@1
|
307 (abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV);
|
rlm@1
|
308 #endif
|
rlm@1
|
309 }
|
rlm@1
|
310
|
rlm@1
|
311 unsigned int RGBtoYUV(unsigned int c)
|
rlm@1
|
312 { // Division through 3 slows down the emulation about 10% !!!
|
rlm@1
|
313 #ifdef MMX
|
rlm@1
|
314 unsigned int retval;
|
rlm@1
|
315 __asm
|
rlm@1
|
316 {
|
rlm@1
|
317 movd mm0, c
|
rlm@1
|
318 movq mm1, mm0
|
rlm@1
|
319 movq mm2, mm0; mm0 = mm1 = mm2 = c
|
rlm@1
|
320
|
rlm@1
|
321 mov eax, 0x000000FF
|
rlm@1
|
322 movd mm5, eax; mm5 = REDMASK = 0x000000FF
|
rlm@1
|
323 mov eax, 0x0000FF00
|
rlm@1
|
324 movd mm6, eax; mm6 = GREENMASK = 0x0000FF00
|
rlm@1
|
325 mov eax, 0x00FF0000
|
rlm@1
|
326 movd mm7, eax; mm7 = BLUEMASK = 0x00FF0000
|
rlm@1
|
327
|
rlm@1
|
328 pand mm0, mm5
|
rlm@1
|
329 pand mm1, mm6
|
rlm@1
|
330 pand mm2, mm7; mm0 = R mm1 = G mm2 = B
|
rlm@1
|
331
|
rlm@1
|
332 movq mm3, mm0
|
rlm@1
|
333 paddd mm3, mm1
|
rlm@1
|
334 paddd mm3, mm2
|
rlm@1
|
335 ; psrld mm3, 2; mm3 = Y
|
rlm@1
|
336 ; pslld mm3, 16
|
rlm@1
|
337 pslld mm3, 14; mm3 = Y << 16
|
rlm@1
|
338
|
rlm@1
|
339 mov eax, 512
|
rlm@1
|
340 movd mm7, eax; mm7 = 128 << 2 = 512
|
rlm@1
|
341
|
rlm@1
|
342 movq mm4, mm0
|
rlm@1
|
343 psubd mm4, mm2
|
rlm@1
|
344 ; psrld mm4, 2
|
rlm@1
|
345 ; paddd mm4, mm7; mm4 = U
|
rlm@1
|
346 ; pslld mm4, 8; mm4 = U << 8
|
rlm@1
|
347 paddd mm4, mm7
|
rlm@1
|
348 pslld mm4, 6
|
rlm@1
|
349
|
rlm@1
|
350 mov eax, 128
|
rlm@1
|
351 movd mm7, eax; mm7 = 128
|
rlm@1
|
352
|
rlm@1
|
353 movq mm5, mm1
|
rlm@1
|
354 pslld mm5, 1
|
rlm@1
|
355 psubd mm5, mm0
|
rlm@1
|
356 psubd mm5, mm2
|
rlm@1
|
357 psrld mm5, 3
|
rlm@1
|
358 paddd mm5, mm7; mm5 = V
|
rlm@1
|
359
|
rlm@1
|
360 paddd mm5, mm4
|
rlm@1
|
361 paddd mm5, mm3
|
rlm@1
|
362
|
rlm@1
|
363 movd retval, mm5
|
rlm@1
|
364
|
rlm@1
|
365 EMMS
|
rlm@1
|
366 }
|
rlm@1
|
367 return retval;
|
rlm@1
|
368 #else
|
rlm@1
|
369 unsigned char r, g, b, Y, u, v;
|
rlm@1
|
370 r = (c & 0x000000FF);
|
rlm@1
|
371 g = (c & 0x0000FF00) >> 8;
|
rlm@1
|
372 b = (c & 0x00FF0000) >> 16;
|
rlm@1
|
373 Y = (r + g + b) >> 2;
|
rlm@1
|
374 u = 128 + ((r - b) >> 2);
|
rlm@1
|
375 v = 128 + ((-r + 2 * g - b) >> 3);
|
rlm@1
|
376 return (Y << 16) + (u << 8) + v;
|
rlm@1
|
377
|
rlm@1
|
378 // Extremely High Quality Code
|
rlm@1
|
379 //unsigned char r, g, b;
|
rlm@1
|
380 //r = c & 0xFF;
|
rlm@1
|
381 //g = (c >> 8) & 0xFF;
|
rlm@1
|
382 //b = (c >> 16) & 0xFF;
|
rlm@1
|
383 //unsigned char y, u, v;
|
rlm@1
|
384 //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
|
rlm@1
|
385 //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
|
rlm@1
|
386 //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
|
rlm@1
|
387 //return (y << 16) + (u << 8) + v;
|
rlm@1
|
388 #endif
|
rlm@1
|
389 } |