rlm@1
|
1 #include <cstdlib>
|
rlm@1
|
2 #include <cstring>
|
rlm@1
|
3 #include "../Port.h"
|
rlm@1
|
4
|
rlm@1
|
5 #ifdef MMX
|
rlm@1
|
6 extern "C" bool cpu_mmx;
|
rlm@1
|
7 #endif
|
rlm@1
|
8
|
rlm@1
|
9 /*
|
rlm@1
|
10 * Thanks to Kawaks' Mr. K for the code
|
rlm@1
|
11
|
rlm@1
|
12 Incorporated into vba by Anthony Di Franco
|
rlm@1
|
13 */
|
rlm@1
|
14
|
rlm@1
|
15 static u8 *frm1 = NULL;
|
rlm@1
|
16 static u8 *frm2 = NULL;
|
rlm@1
|
17 static u8 *frm3 = NULL;
|
rlm@1
|
18
|
rlm@1
|
19 extern u32 RGB_LOW_BITS_MASK;
|
rlm@1
|
20 extern u32 qRGB_COLOR_MASK[2];
|
rlm@1
|
21
|
rlm@1
|
22 static void Init()
|
rlm@1
|
23 {
|
rlm@1
|
24 frm1 = (u8 *)calloc(322 * 242, 4);
|
rlm@1
|
25 // 1 frame ago
|
rlm@1
|
26 frm2 = (u8 *)calloc(322 * 242, 4);
|
rlm@1
|
27 // 2 frames ago
|
rlm@1
|
28 frm3 = (u8 *)calloc(322 * 242, 4);
|
rlm@1
|
29 // 3 frames ago
|
rlm@1
|
30 }
|
rlm@1
|
31
|
rlm@1
|
32 void InterframeCleanup()
|
rlm@1
|
33 {
|
rlm@1
|
34 if (frm1)
|
rlm@1
|
35 free(frm1);
|
rlm@1
|
36 if (frm2)
|
rlm@1
|
37 free(frm2);
|
rlm@1
|
38 if (frm3)
|
rlm@1
|
39 free(frm3);
|
rlm@1
|
40 frm1 = frm2 = frm3 = NULL;
|
rlm@1
|
41 }
|
rlm@1
|
42
|
rlm@1
|
43 #ifdef MMX
|
rlm@1
|
44 static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
45 {
|
rlm@1
|
46 u16 *src0 = (u16 *)srcPtr;
|
rlm@1
|
47 u16 *src1 = (u16 *)frm1;
|
rlm@1
|
48 u16 *src2 = (u16 *)frm2;
|
rlm@1
|
49 u16 *src3 = (u16 *)frm3;
|
rlm@1
|
50
|
rlm@1
|
51 int count = width >> 2;
|
rlm@1
|
52
|
rlm@1
|
53 for (int i = 0; i < height; i++)
|
rlm@1
|
54 {
|
rlm@1
|
55 #ifdef __GNUC__
|
rlm@1
|
56 asm volatile (
|
rlm@1
|
57 "push %4\n"
|
rlm@1
|
58 "movq 0(%5), %%mm7\n" // colorMask
|
rlm@1
|
59 "0:\n"
|
rlm@1
|
60 "movq 0(%0), %%mm0\n" // src0
|
rlm@1
|
61 "movq 0(%1), %%mm1\n" // src1
|
rlm@1
|
62 "movq 0(%2), %%mm2\n" // src2
|
rlm@1
|
63 "movq 0(%3), %%mm3\n" // src3
|
rlm@1
|
64 "movq %%mm0, 0(%3)\n" // src3 = src0
|
rlm@1
|
65 "movq %%mm0, %%mm4\n"
|
rlm@1
|
66 "movq %%mm1, %%mm5\n"
|
rlm@1
|
67 "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
|
rlm@1
|
68 "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
|
rlm@1
|
69 "por %%mm5, %%mm4\n" // A | B
|
rlm@1
|
70 "movq %%mm2, %%mm5\n"
|
rlm@1
|
71 "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
|
rlm@1
|
72 "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
|
rlm@1
|
73 "por %%mm3, %%mm5\n" // C|D
|
rlm@1
|
74 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
|
rlm@1
|
75 "movq %%mm0, %%mm2\n"
|
rlm@1
|
76 "pand %%mm7, %%mm2\n" // color & colorMask
|
rlm@1
|
77 "pand %%mm7, %%mm1\n" // src1 & colorMask
|
rlm@1
|
78 "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
|
rlm@1
|
79 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
rlm@1
|
80 "paddw %%mm2, %%mm1\n" // E+F
|
rlm@1
|
81 "pand %%mm4, %%mm1\n" // (E+F) & res
|
rlm@1
|
82 "pandn %%mm0, %%mm4\n" // color& !res
|
rlm@1
|
83
|
rlm@1
|
84 "por %%mm1, %%mm4\n"
|
rlm@1
|
85 "movq %%mm4, 0(%0)\n" // src0 = res
|
rlm@1
|
86
|
rlm@1
|
87 "addl $8, %0\n"
|
rlm@1
|
88 "addl $8, %1\n"
|
rlm@1
|
89 "addl $8, %2\n"
|
rlm@1
|
90 "addl $8, %3\n"
|
rlm@1
|
91
|
rlm@1
|
92 "decl %4\n"
|
rlm@1
|
93 "jnz 0b\n"
|
rlm@1
|
94 "pop %4\n"
|
rlm@1
|
95 "emms\n"
|
rlm@1
|
96 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
|
rlm@1
|
97 : "r" (count), "r" (qRGB_COLOR_MASK)
|
rlm@1
|
98 );
|
rlm@1
|
99 #else
|
rlm@1
|
100 __asm {
|
rlm@1
|
101 movq mm7, qword ptr [qRGB_COLOR_MASK];
|
rlm@1
|
102 mov eax, src0;
|
rlm@1
|
103 mov ebx, src1;
|
rlm@1
|
104 mov ecx, src2;
|
rlm@1
|
105 mov edx, src3;
|
rlm@1
|
106 mov edi, count;
|
rlm@1
|
107 label0:
|
rlm@1
|
108 movq mm0, qword ptr [eax]; // src0
|
rlm@1
|
109 movq mm1, qword ptr [ebx]; // src1
|
rlm@1
|
110 movq mm2, qword ptr [ecx]; // src2
|
rlm@1
|
111 movq mm3, qword ptr [edx]; // src3
|
rlm@1
|
112 movq qword ptr [edx], mm0; // src3 = src0
|
rlm@1
|
113 movq mm4, mm0;
|
rlm@1
|
114 movq mm5, mm1;
|
rlm@1
|
115 pcmpeqw mm5, mm2; // src1 == src2 (A)
|
rlm@1
|
116 pcmpeqw mm4, mm3; // src3 == src0 (B)
|
rlm@1
|
117 por mm4, mm5; // A | B
|
rlm@1
|
118 movq mm5, mm2;
|
rlm@1
|
119 pcmpeqw mm5, mm0; // src0 == src2 (C)
|
rlm@1
|
120 pcmpeqw mm3, mm1; // src1 == src3 (D)
|
rlm@1
|
121 por mm5, mm3; // C|D
|
rlm@1
|
122 pandn mm4, mm5; // (!(A|B))&(C|D)
|
rlm@1
|
123 movq mm2, mm0;
|
rlm@1
|
124 pand mm2, mm7; // color & colorMask
|
rlm@1
|
125 pand mm1, mm7; // src1 & colorMask
|
rlm@1
|
126 psrlw mm2, 1; // (color & colorMask) >> 1 (E)
|
rlm@1
|
127 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
|
rlm@1
|
128 paddw mm1, mm2; // E+F
|
rlm@1
|
129 pand mm1, mm4; // (E+F) & res
|
rlm@1
|
130 pandn mm4, mm0; // color & !res
|
rlm@1
|
131
|
rlm@1
|
132 por mm4, mm1;
|
rlm@1
|
133 movq qword ptr [eax], mm4; // src0 = res
|
rlm@1
|
134
|
rlm@1
|
135 add eax, 8;
|
rlm@1
|
136 add ebx, 8;
|
rlm@1
|
137 add ecx, 8;
|
rlm@1
|
138 add edx, 8;
|
rlm@1
|
139
|
rlm@1
|
140 dec edi;
|
rlm@1
|
141 jnz label0;
|
rlm@1
|
142 mov src0, eax;
|
rlm@1
|
143 mov src1, ebx;
|
rlm@1
|
144 mov src2, ecx;
|
rlm@1
|
145 mov src3, edx;
|
rlm@1
|
146 emms;
|
rlm@1
|
147 }
|
rlm@1
|
148 #endif
|
rlm@1
|
149 src0 += 2;
|
rlm@1
|
150 src1 += 2;
|
rlm@1
|
151 src2 += 2;
|
rlm@1
|
152 src3 += 2;
|
rlm@1
|
153 }
|
rlm@1
|
154
|
rlm@1
|
155 /* Swap buffers around */
|
rlm@1
|
156 u8 *temp = frm1;
|
rlm@1
|
157 frm1 = frm3;
|
rlm@1
|
158 frm3 = frm2;
|
rlm@1
|
159 frm2 = temp;
|
rlm@1
|
160 }
|
rlm@1
|
161
|
rlm@1
|
162 #endif
|
rlm@1
|
163
|
rlm@1
|
164 void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
165 {
|
rlm@1
|
166 if (frm1 == NULL)
|
rlm@1
|
167 {
|
rlm@1
|
168 Init();
|
rlm@1
|
169 }
|
rlm@1
|
170 #ifdef MMX
|
rlm@1
|
171 if (cpu_mmx)
|
rlm@1
|
172 {
|
rlm@1
|
173 SmartIB_MMX(srcPtr, srcPitch, width, height);
|
rlm@1
|
174 return;
|
rlm@1
|
175 }
|
rlm@1
|
176 #endif
|
rlm@1
|
177
|
rlm@1
|
178 u16 colorMask = ~RGB_LOW_BITS_MASK;
|
rlm@1
|
179
|
rlm@1
|
180 u16 *src0 = (u16 *)srcPtr;
|
rlm@1
|
181 u16 *src1 = (u16 *)frm1;
|
rlm@1
|
182 u16 *src2 = (u16 *)frm2;
|
rlm@1
|
183 u16 *src3 = (u16 *)frm3;
|
rlm@1
|
184
|
rlm@1
|
185 int sPitch = srcPitch >> 1;
|
rlm@1
|
186
|
rlm@1
|
187 int pos = 0;
|
rlm@1
|
188 for (int j = 0; j < height; j++)
|
rlm@1
|
189 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
190 {
|
rlm@1
|
191 u16 color = src0[pos];
|
rlm@1
|
192 src0[pos] =
|
rlm@1
|
193 (src1[pos] != src2[pos]) &&
|
rlm@1
|
194 (src3[pos] != color) &&
|
rlm@1
|
195 ((color == src2[pos]) || (src1[pos] == src3[pos]))
|
rlm@1
|
196 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
|
rlm@1
|
197 color;
|
rlm@1
|
198 src3[pos] = color; /* oldest buffer now holds newest frame */
|
rlm@1
|
199 pos++;
|
rlm@1
|
200 }
|
rlm@1
|
201
|
rlm@1
|
202 /* Swap buffers around */
|
rlm@1
|
203 u8 *temp = frm1;
|
rlm@1
|
204 frm1 = frm3;
|
rlm@1
|
205 frm3 = frm2;
|
rlm@1
|
206 frm2 = temp;
|
rlm@1
|
207 }
|
rlm@1
|
208
|
rlm@1
|
209 #ifdef MMX
|
rlm@1
|
210 static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
211 {
|
rlm@1
|
212 u32 *src0 = (u32 *)srcPtr;
|
rlm@1
|
213 u32 *src1 = (u32 *)frm1;
|
rlm@1
|
214 u32 *src2 = (u32 *)frm2;
|
rlm@1
|
215 u32 *src3 = (u32 *)frm3;
|
rlm@1
|
216
|
rlm@1
|
217 int count = width >> 1;
|
rlm@1
|
218
|
rlm@1
|
219 for (int i = 0; i < height; i++)
|
rlm@1
|
220 {
|
rlm@1
|
221 #ifdef __GNUC__
|
rlm@1
|
222 asm volatile (
|
rlm@1
|
223 "push %4\n"
|
rlm@1
|
224 "movq 0(%5), %%mm7\n" // colorMask
|
rlm@1
|
225 "0:\n"
|
rlm@1
|
226 "movq 0(%0), %%mm0\n" // src0
|
rlm@1
|
227 "movq 0(%1), %%mm1\n" // src1
|
rlm@1
|
228 "movq 0(%2), %%mm2\n" // src2
|
rlm@1
|
229 "movq 0(%3), %%mm3\n" // src3
|
rlm@1
|
230 "movq %%mm0, 0(%3)\n" // src3 = src0
|
rlm@1
|
231 "movq %%mm0, %%mm4\n"
|
rlm@1
|
232 "movq %%mm1, %%mm5\n"
|
rlm@1
|
233 "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
|
rlm@1
|
234 "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
|
rlm@1
|
235 "por %%mm5, %%mm4\n" // A | B
|
rlm@1
|
236 "movq %%mm2, %%mm5\n"
|
rlm@1
|
237 "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
|
rlm@1
|
238 "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
|
rlm@1
|
239 "por %%mm3, %%mm5\n" // C|D
|
rlm@1
|
240 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
|
rlm@1
|
241 "movq %%mm0, %%mm2\n"
|
rlm@1
|
242 "pand %%mm7, %%mm2\n" // color & colorMask
|
rlm@1
|
243 "pand %%mm7, %%mm1\n" // src1 & colorMask
|
rlm@1
|
244 "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
|
rlm@1
|
245 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
rlm@1
|
246 "paddd %%mm2, %%mm1\n" // E+F
|
rlm@1
|
247 "pand %%mm4, %%mm1\n" // (E+F) & res
|
rlm@1
|
248 "pandn %%mm0, %%mm4\n" // color& !res
|
rlm@1
|
249
|
rlm@1
|
250 "por %%mm1, %%mm4\n"
|
rlm@1
|
251 "movq %%mm4, 0(%0)\n" // src0 = res
|
rlm@1
|
252
|
rlm@1
|
253 "addl $8, %0\n"
|
rlm@1
|
254 "addl $8, %1\n"
|
rlm@1
|
255 "addl $8, %2\n"
|
rlm@1
|
256 "addl $8, %3\n"
|
rlm@1
|
257
|
rlm@1
|
258 "decl %4\n"
|
rlm@1
|
259 "jnz 0b\n"
|
rlm@1
|
260 "pop %4\n"
|
rlm@1
|
261 "emms\n"
|
rlm@1
|
262 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
|
rlm@1
|
263 : "r" (count), "r" (qRGB_COLOR_MASK)
|
rlm@1
|
264 );
|
rlm@1
|
265 #else
|
rlm@1
|
266 __asm {
|
rlm@1
|
267 movq mm7, qword ptr [qRGB_COLOR_MASK];
|
rlm@1
|
268 mov eax, src0;
|
rlm@1
|
269 mov ebx, src1;
|
rlm@1
|
270 mov ecx, src2;
|
rlm@1
|
271 mov edx, src3;
|
rlm@1
|
272 mov edi, count;
|
rlm@1
|
273 label0:
|
rlm@1
|
274 movq mm0, qword ptr [eax]; // src0
|
rlm@1
|
275 movq mm1, qword ptr [ebx]; // src1
|
rlm@1
|
276 movq mm2, qword ptr [ecx]; // src2
|
rlm@1
|
277 movq mm3, qword ptr [edx]; // src3
|
rlm@1
|
278 movq qword ptr [edx], mm0; // src3 = src0
|
rlm@1
|
279 movq mm4, mm0;
|
rlm@1
|
280 movq mm5, mm1;
|
rlm@1
|
281 pcmpeqd mm5, mm2; // src1 == src2 (A)
|
rlm@1
|
282 pcmpeqd mm4, mm3; // src3 == src0 (B)
|
rlm@1
|
283 por mm4, mm5; // A | B
|
rlm@1
|
284 movq mm5, mm2;
|
rlm@1
|
285 pcmpeqd mm5, mm0; // src0 == src2 (C)
|
rlm@1
|
286 pcmpeqd mm3, mm1; // src1 == src3 (D)
|
rlm@1
|
287 por mm5, mm3; // C|D
|
rlm@1
|
288 pandn mm4, mm5; // (!(A|B))&(C|D)
|
rlm@1
|
289 movq mm2, mm0;
|
rlm@1
|
290 pand mm2, mm7; // color & colorMask
|
rlm@1
|
291 pand mm1, mm7; // src1 & colorMask
|
rlm@1
|
292 psrld mm2, 1; // (color & colorMask) >> 1 (E)
|
rlm@1
|
293 psrld mm1, 1; // (src & colorMask) >> 1 (F)
|
rlm@1
|
294 paddd mm1, mm2; // E+F
|
rlm@1
|
295 pand mm1, mm4; // (E+F) & res
|
rlm@1
|
296 pandn mm4, mm0; // color & !res
|
rlm@1
|
297
|
rlm@1
|
298 por mm4, mm1;
|
rlm@1
|
299 movq qword ptr [eax], mm4; // src0 = res
|
rlm@1
|
300
|
rlm@1
|
301 add eax, 8;
|
rlm@1
|
302 add ebx, 8;
|
rlm@1
|
303 add ecx, 8;
|
rlm@1
|
304 add edx, 8;
|
rlm@1
|
305
|
rlm@1
|
306 dec edi;
|
rlm@1
|
307 jnz label0;
|
rlm@1
|
308 mov src0, eax;
|
rlm@1
|
309 mov src1, ebx;
|
rlm@1
|
310 mov src2, ecx;
|
rlm@1
|
311 mov src3, edx;
|
rlm@1
|
312 emms;
|
rlm@1
|
313 }
|
rlm@1
|
314 #endif
|
rlm@1
|
315
|
rlm@1
|
316 src0++;
|
rlm@1
|
317 src1++;
|
rlm@1
|
318 src2++;
|
rlm@1
|
319 src3++;
|
rlm@1
|
320 }
|
rlm@1
|
321 /* Swap buffers around */
|
rlm@1
|
322 u8 *temp = frm1;
|
rlm@1
|
323 frm1 = frm3;
|
rlm@1
|
324 frm3 = frm2;
|
rlm@1
|
325 frm2 = temp;
|
rlm@1
|
326 }
|
rlm@1
|
327
|
rlm@1
|
328 #endif
|
rlm@1
|
329
|
rlm@1
|
330 void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
331 {
|
rlm@1
|
332 if (frm1 == NULL)
|
rlm@1
|
333 {
|
rlm@1
|
334 Init();
|
rlm@1
|
335 }
|
rlm@1
|
336 #ifdef MMX
|
rlm@1
|
337 if (cpu_mmx)
|
rlm@1
|
338 {
|
rlm@1
|
339 SmartIB32_MMX(srcPtr, srcPitch, width, height);
|
rlm@1
|
340 return;
|
rlm@1
|
341 }
|
rlm@1
|
342 #endif
|
rlm@1
|
343
|
rlm@1
|
344 u32 *src0 = (u32 *)srcPtr;
|
rlm@1
|
345 u32 *src1 = (u32 *)frm1;
|
rlm@1
|
346 u32 *src2 = (u32 *)frm2;
|
rlm@1
|
347 u32 *src3 = (u32 *)frm3;
|
rlm@1
|
348
|
rlm@1
|
349 u32 colorMask = 0xfefefe;
|
rlm@1
|
350
|
rlm@1
|
351 int sPitch = srcPitch >> 2;
|
rlm@1
|
352 int pos = 0;
|
rlm@1
|
353
|
rlm@1
|
354 for (int j = 0; j < height; j++)
|
rlm@1
|
355 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
356 {
|
rlm@1
|
357 u32 color = src0[pos];
|
rlm@1
|
358 src0[pos] =
|
rlm@1
|
359 (src1[pos] != src2[pos]) &&
|
rlm@1
|
360 (src3[pos] != color) &&
|
rlm@1
|
361 ((color == src2[pos]) || (src1[pos] == src3[pos]))
|
rlm@1
|
362 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
|
rlm@1
|
363 color;
|
rlm@1
|
364 src3[pos] = color; /* oldest buffer now holds newest frame */
|
rlm@1
|
365 pos++;
|
rlm@1
|
366 }
|
rlm@1
|
367
|
rlm@1
|
368 /* Swap buffers around */
|
rlm@1
|
369 u8 *temp = frm1;
|
rlm@1
|
370 frm1 = frm3;
|
rlm@1
|
371 frm3 = frm2;
|
rlm@1
|
372 frm2 = temp;
|
rlm@1
|
373 }
|
rlm@1
|
374
|
rlm@1
|
375 #ifdef MMX
|
rlm@1
|
376 static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
377 {
|
rlm@1
|
378 u16 *src0 = (u16 *)srcPtr;
|
rlm@1
|
379 u16 *src1 = (u16 *)frm1;
|
rlm@1
|
380
|
rlm@1
|
381 int count = width >> 2;
|
rlm@1
|
382
|
rlm@1
|
383 for (int i = 0; i < height; i++)
|
rlm@1
|
384 {
|
rlm@1
|
385 #ifdef __GNUC__
|
rlm@1
|
386 asm volatile (
|
rlm@1
|
387 "push %2\n"
|
rlm@1
|
388 "movq 0(%3), %%mm7\n" // colorMask
|
rlm@1
|
389 "0:\n"
|
rlm@1
|
390 "movq 0(%0), %%mm0\n" // src0
|
rlm@1
|
391 "movq 0(%1), %%mm1\n" // src1
|
rlm@1
|
392 "movq %%mm0, 0(%1)\n" // src1 = src0
|
rlm@1
|
393 "pand %%mm7, %%mm0\n" // color & colorMask
|
rlm@1
|
394 "pand %%mm7, %%mm1\n" // src1 & colorMask
|
rlm@1
|
395 "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
|
rlm@1
|
396 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
rlm@1
|
397 "paddw %%mm1, %%mm0\n" // E+F
|
rlm@1
|
398
|
rlm@1
|
399 "movq %%mm0, 0(%0)\n" // src0 = res
|
rlm@1
|
400
|
rlm@1
|
401 "addl $8, %0\n"
|
rlm@1
|
402 "addl $8, %1\n"
|
rlm@1
|
403
|
rlm@1
|
404 "decl %2\n"
|
rlm@1
|
405 "jnz 0b\n"
|
rlm@1
|
406 "pop %2\n"
|
rlm@1
|
407 "emms\n"
|
rlm@1
|
408 : "+r" (src0), "+r" (src1)
|
rlm@1
|
409 : "r" (count), "r" (qRGB_COLOR_MASK)
|
rlm@1
|
410 );
|
rlm@1
|
411 #else
|
rlm@1
|
412 __asm {
|
rlm@1
|
413 movq mm7, qword ptr [qRGB_COLOR_MASK];
|
rlm@1
|
414 mov eax, src0;
|
rlm@1
|
415 mov ebx, src1;
|
rlm@1
|
416 mov edi, count;
|
rlm@1
|
417 label0:
|
rlm@1
|
418 movq mm0, qword ptr [eax]; // src0
|
rlm@1
|
419 movq mm1, qword ptr [ebx]; // src1
|
rlm@1
|
420 movq qword ptr [ebx], mm0; // src1 = src0
|
rlm@1
|
421 pand mm0, mm7; // color & colorMask
|
rlm@1
|
422 pand mm1, mm7; // src1 & colorMask
|
rlm@1
|
423 psrlw mm0, 1; // (color & colorMask) >> 1 (E)
|
rlm@1
|
424 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
|
rlm@1
|
425 paddw mm0, mm1; // E+F
|
rlm@1
|
426
|
rlm@1
|
427 movq qword ptr [eax], mm0; // src0 = res
|
rlm@1
|
428
|
rlm@1
|
429 add eax, 8;
|
rlm@1
|
430 add ebx, 8;
|
rlm@1
|
431
|
rlm@1
|
432 dec edi;
|
rlm@1
|
433 jnz label0;
|
rlm@1
|
434 mov src0, eax;
|
rlm@1
|
435 mov src1, ebx;
|
rlm@1
|
436 emms;
|
rlm@1
|
437 }
|
rlm@1
|
438 #endif
|
rlm@1
|
439 src0 += 2;
|
rlm@1
|
440 src1 += 2;
|
rlm@1
|
441 }
|
rlm@1
|
442 }
|
rlm@1
|
443
|
rlm@1
|
444 #endif
|
rlm@1
|
445
|
rlm@1
|
446 void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
447 {
|
rlm@1
|
448 if (frm1 == NULL)
|
rlm@1
|
449 {
|
rlm@1
|
450 Init();
|
rlm@1
|
451 }
|
rlm@1
|
452
|
rlm@1
|
453 #ifdef MMX
|
rlm@1
|
454 if (cpu_mmx)
|
rlm@1
|
455 {
|
rlm@1
|
456 MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
|
rlm@1
|
457 return;
|
rlm@1
|
458 }
|
rlm@1
|
459 #endif
|
rlm@1
|
460
|
rlm@1
|
461 u16 colorMask = ~RGB_LOW_BITS_MASK;
|
rlm@1
|
462
|
rlm@1
|
463 u16 *src0 = (u16 *)srcPtr;
|
rlm@1
|
464 u16 *src1 = (u16 *)frm1;
|
rlm@1
|
465
|
rlm@1
|
466 int sPitch = srcPitch >> 1;
|
rlm@1
|
467
|
rlm@1
|
468 int pos = 0;
|
rlm@1
|
469 for (int j = 0; j < height; j++)
|
rlm@1
|
470 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
471 {
|
rlm@1
|
472 u16 color = src0[pos];
|
rlm@1
|
473 src0[pos] =
|
rlm@1
|
474 (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
|
rlm@1
|
475 src1[pos] = color;
|
rlm@1
|
476 pos++;
|
rlm@1
|
477 }
|
rlm@1
|
478 }
|
rlm@1
|
479
|
rlm@1
|
480 #ifdef MMX
|
rlm@1
|
481 static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
482 {
|
rlm@1
|
483 u32 *src0 = (u32 *)srcPtr;
|
rlm@1
|
484 u32 *src1 = (u32 *)frm1;
|
rlm@1
|
485
|
rlm@1
|
486 int count = width >> 1;
|
rlm@1
|
487
|
rlm@1
|
488 for (int i = 0; i < height; i++)
|
rlm@1
|
489 {
|
rlm@1
|
490 #ifdef __GNUC__
|
rlm@1
|
491 asm volatile (
|
rlm@1
|
492 "push %2\n"
|
rlm@1
|
493 "movq 0(%3), %%mm7\n" // colorMask
|
rlm@1
|
494 "0:\n"
|
rlm@1
|
495 "movq 0(%0), %%mm0\n" // src0
|
rlm@1
|
496 "movq 0(%1), %%mm1\n" // src1
|
rlm@1
|
497 "movq %%mm0, 0(%1)\n" // src1 = src0
|
rlm@1
|
498 "pand %%mm7, %%mm0\n" // color & colorMask
|
rlm@1
|
499 "pand %%mm7, %%mm1\n" // src1 & colorMask
|
rlm@1
|
500 "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
|
rlm@1
|
501 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
rlm@1
|
502 "paddd %%mm1, %%mm0\n" // E+F
|
rlm@1
|
503
|
rlm@1
|
504 "movq %%mm0, 0(%0)\n" // src0 = res
|
rlm@1
|
505
|
rlm@1
|
506 "addl $8, %0\n"
|
rlm@1
|
507 "addl $8, %1\n"
|
rlm@1
|
508
|
rlm@1
|
509 "decl %2\n"
|
rlm@1
|
510 "jnz 0b\n"
|
rlm@1
|
511 "pop %2\n"
|
rlm@1
|
512 "emms\n"
|
rlm@1
|
513 : "+r" (src0), "+r" (src1)
|
rlm@1
|
514 : "r" (count), "r" (qRGB_COLOR_MASK)
|
rlm@1
|
515 );
|
rlm@1
|
516 #else
|
rlm@1
|
517 __asm {
|
rlm@1
|
518 movq mm7, qword ptr [qRGB_COLOR_MASK];
|
rlm@1
|
519 mov eax, src0;
|
rlm@1
|
520 mov ebx, src1;
|
rlm@1
|
521 mov edi, count;
|
rlm@1
|
522 label0:
|
rlm@1
|
523 movq mm0, qword ptr [eax]; // src0
|
rlm@1
|
524 movq mm1, qword ptr [ebx]; // src1
|
rlm@1
|
525 movq qword ptr [ebx], mm0; // src1 = src0
|
rlm@1
|
526 pand mm0, mm7; // color & colorMask
|
rlm@1
|
527 pand mm1, mm7; // src1 & colorMask
|
rlm@1
|
528 psrld mm0, 1; // (color & colorMask) >> 1 (E)
|
rlm@1
|
529 psrld mm1, 1; // (src & colorMask) >> 1 (F)
|
rlm@1
|
530 paddd mm0, mm1; // E+F
|
rlm@1
|
531
|
rlm@1
|
532 movq qword ptr [eax], mm0; // src0 = res
|
rlm@1
|
533
|
rlm@1
|
534 add eax, 8;
|
rlm@1
|
535 add ebx, 8;
|
rlm@1
|
536
|
rlm@1
|
537 dec edi;
|
rlm@1
|
538 jnz label0;
|
rlm@1
|
539 mov src0, eax;
|
rlm@1
|
540 mov src1, ebx;
|
rlm@1
|
541 emms;
|
rlm@1
|
542 }
|
rlm@1
|
543 #endif
|
rlm@1
|
544 src0++;
|
rlm@1
|
545 src1++;
|
rlm@1
|
546 }
|
rlm@1
|
547 }
|
rlm@1
|
548
|
rlm@1
|
549 #endif
|
rlm@1
|
550
|
rlm@1
|
551 void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
552 {
|
rlm@1
|
553 if (frm1 == NULL)
|
rlm@1
|
554 {
|
rlm@1
|
555 Init();
|
rlm@1
|
556 }
|
rlm@1
|
557
|
rlm@1
|
558 #ifdef MMX
|
rlm@1
|
559 if (cpu_mmx)
|
rlm@1
|
560 {
|
rlm@1
|
561 MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
|
rlm@1
|
562 return;
|
rlm@1
|
563 }
|
rlm@1
|
564 #endif
|
rlm@1
|
565
|
rlm@1
|
566 u32 *src0 = (u32 *)srcPtr;
|
rlm@1
|
567 u32 *src1 = (u32 *)frm1;
|
rlm@1
|
568
|
rlm@1
|
569 u32 colorMask = 0xfefefe;
|
rlm@1
|
570
|
rlm@1
|
571 int sPitch = srcPitch >> 2;
|
rlm@1
|
572 int pos = 0;
|
rlm@1
|
573
|
rlm@1
|
574 for (int j = 0; j < height; j++)
|
rlm@1
|
575 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
576 {
|
rlm@1
|
577 u32 color = src0[pos];
|
rlm@1
|
578 src0[pos] = (((color & colorMask) >> 1) +
|
rlm@1
|
579 ((src1[pos] & colorMask) >> 1));
|
rlm@1
|
580 src1[pos] = color;
|
rlm@1
|
581 pos++;
|
rlm@1
|
582 }
|
rlm@1
|
583 }
|
rlm@1
|
584
|
rlm@1
|
585 static int count = 0;
|
rlm@1
|
586
|
rlm@1
|
587 void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
|
rlm@1
|
588 {
|
rlm@1
|
589 if (frm1 == NULL)
|
rlm@1
|
590 {
|
rlm@1
|
591 Init();
|
rlm@1
|
592 }
|
rlm@1
|
593
|
rlm@1
|
594 u16 colorMask = ~RGB_LOW_BITS_MASK;
|
rlm@1
|
595
|
rlm@1
|
596 u16 *src0 = (u16 *)srcPtr;
|
rlm@1
|
597 u16 *src1 = (u16 *)frm1;
|
rlm@1
|
598
|
rlm@1
|
599 int sPitch = srcPitch >> 1;
|
rlm@1
|
600
|
rlm@1
|
601 int pos = 0;
|
rlm@1
|
602 for (int j = 0; j < height; j++)
|
rlm@1
|
603 {
|
rlm@1
|
604 bool render = count ? (j & 1) != 0 : (j & 1) == 0;
|
rlm@1
|
605 if (render)
|
rlm@1
|
606 {
|
rlm@1
|
607 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
608 {
|
rlm@1
|
609 u16 color = src0[pos];
|
rlm@1
|
610 src0[pos] =
|
rlm@1
|
611 (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
|
rlm@1
|
612 src1[pos] = color;
|
rlm@1
|
613 pos++;
|
rlm@1
|
614 }
|
rlm@1
|
615 }
|
rlm@1
|
616 else
|
rlm@1
|
617 {
|
rlm@1
|
618 for (int i = 0; i < sPitch; i++)
|
rlm@1
|
619 {
|
rlm@1
|
620 u16 color = src0[pos];
|
rlm@1
|
621 src0[pos] =
|
rlm@1
|
622 (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
|
rlm@1
|
623 src1[pos] = color;
|
rlm@1
|
624 pos++;
|
rlm@1
|
625 }
|
rlm@1
|
626 }
|
rlm@1
|
627 }
|
rlm@1
|
628 count = count ^ 1;
|
rlm@1
|
629 }
|
rlm@1
|
630
|