rlm@1
|
1 ;/*---------------------------------------------------------------------*
|
rlm@1
|
2 ; * The following (piece of) code, (part of) the 2xSaI engine, *
|
rlm@1
|
3 ; * copyright (c) 1999 - 2001 by Derek Liauw Kie Fa. *
|
rlm@1
|
4 ; * Non-Commercial use of this software is allowed and is encouraged, *
|
rlm@1
|
5 ; * provided that appropriate credit be given. *
|
rlm@1
|
6 ; * You may freely modify this code, but I request *
|
rlm@1
|
7 ; * that any improvements to the engine be submitted to me, so *
|
rlm@1
|
8 ; * that I can implement these improvements in newer versions of *
|
rlm@1
|
9 ; * the software. *
|
rlm@1
|
10 ; * If you need more information, have any comments or suggestions, *
|
rlm@1
|
11 ; * you can e-mail me. My e-mail: derek-liauw@usa.net. *
|
rlm@1
|
12 ; *---------------------------------------------------------------------*/
|
rlm@1
|
13
|
rlm@1
|
14 ;----------------------
|
rlm@1
|
15 ; 2xSaI version 0.59 WIP, soon to become version 0.60
|
rlm@1
|
16 ;----------------------
|
rlm@1
|
17
|
rlm@1
|
18 ;%define FAR_POINTER
|
rlm@1
|
19
|
rlm@1
|
20
|
rlm@1
|
21
|
rlm@1
|
22 BITS 32
|
rlm@1
|
23 %ifdef __DJGPP__
|
rlm@1
|
24 GLOBAL __2xSaILine
|
rlm@1
|
25 GLOBAL __2xSaISuperEagleLine
|
rlm@1
|
26 GLOBAL __2xSaISuper2xSaILine
|
rlm@1
|
27 GLOBAL _Init_2xSaIMMX
|
rlm@1
|
28 %else
|
rlm@1
|
29 GLOBAL _2xSaILine
|
rlm@1
|
30 GLOBAL _2xSaISuperEagleLine
|
rlm@1
|
31 GLOBAL _2xSaISuper2xSaILine
|
rlm@1
|
32 GLOBAL Init_2xSaIMMX
|
rlm@1
|
33 %endif
|
rlm@1
|
34 SECTION .text ALIGN = 32
|
rlm@1
|
35
|
rlm@1
|
36 %ifdef FAR_POINTER
|
rlm@1
|
37 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
|
rlm@1
|
38 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
|
rlm@1
|
39 %else
|
rlm@1
|
40 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
|
rlm@1
|
41 ; uint8 *dstPtr, uint32 dstPitch);
|
rlm@1
|
42 %endif
|
rlm@1
|
43
|
rlm@1
|
44 srcPtr equ 8
|
rlm@1
|
45 deltaPtr equ 12
|
rlm@1
|
46 srcPitch equ 16
|
rlm@1
|
47 width equ 20
|
rlm@1
|
48 dstOffset equ 24
|
rlm@1
|
49 dstPitch equ 28
|
rlm@1
|
50 dstSegment equ 32
|
rlm@1
|
51
|
rlm@1
|
52
|
rlm@1
|
53
|
rlm@1
|
54
|
rlm@1
|
55 colorB0 equ -2
|
rlm@1
|
56 colorB1 equ 0
|
rlm@1
|
57 colorB2 equ 2
|
rlm@1
|
58 colorB3 equ 4
|
rlm@1
|
59
|
rlm@1
|
60 color7 equ -2
|
rlm@1
|
61 color8 equ 0
|
rlm@1
|
62 color9 equ 2
|
rlm@1
|
63
|
rlm@1
|
64 color4 equ -2
|
rlm@1
|
65 color5 equ 0
|
rlm@1
|
66 color6 equ 2
|
rlm@1
|
67 colorS2 equ 4
|
rlm@1
|
68
|
rlm@1
|
69 color1 equ -2
|
rlm@1
|
70 color2 equ 0
|
rlm@1
|
71 color3 equ 2
|
rlm@1
|
72 colorS1 equ 4
|
rlm@1
|
73
|
rlm@1
|
74 colorA0 equ -2
|
rlm@1
|
75 colorA1 equ 0
|
rlm@1
|
76 colorA2 equ 2
|
rlm@1
|
77 colorA3 equ 4
|
rlm@1
|
78
|
rlm@1
|
79
|
rlm@1
|
80
|
rlm@1
|
81
|
rlm@1
|
82 %ifdef __DJGPP__
|
rlm@1
|
83 __2xSaISuper2xSaILine:
|
rlm@1
|
84 %else
|
rlm@1
|
85 _2xSaISuper2xSaILine:
|
rlm@1
|
86 %endif
|
rlm@1
|
87 ; Store some stuff
|
rlm@1
|
88 push ebp
|
rlm@1
|
89 mov ebp, esp
|
rlm@1
|
90 pushad
|
rlm@1
|
91
|
rlm@1
|
92 ; Prepare the destination
|
rlm@1
|
93 %ifdef FAR_POINTER
|
rlm@1
|
94 ; Set the selector
|
rlm@1
|
95 mov eax, [ebp+dstSegment]
|
rlm@1
|
96 mov fs, ax
|
rlm@1
|
97 %endif
|
rlm@1
|
98 mov edx, [ebp+dstOffset] ; edx points to the screen
|
rlm@1
|
99 ; Prepare the source
|
rlm@1
|
100 ; eax points to colorA
|
rlm@1
|
101 mov eax, [ebp+srcPtr] ;eax points to colorA
|
rlm@1
|
102 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
|
rlm@1
|
103 mov ecx, [ebp+width] ;ecx contains the number of pixels to process
|
rlm@1
|
104 ; eax now points to colorB1
|
rlm@1
|
105 sub eax, ebx ;eax points to B1 which is the base
|
rlm@1
|
106
|
rlm@1
|
107 ; Main Loop
|
rlm@1
|
108 .Loop: push ecx
|
rlm@1
|
109
|
rlm@1
|
110 ;-----Check Delta------------------
|
rlm@1
|
111 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
112
|
rlm@1
|
113
|
rlm@1
|
114 ;load source img
|
rlm@1
|
115 movq mm0, [eax+colorB0]
|
rlm@1
|
116 movq mm1, [eax+colorB3]
|
rlm@1
|
117 movq mm2, [eax+ebx+color4]
|
rlm@1
|
118 movq mm3, [eax+ebx+colorS2]
|
rlm@1
|
119 movq mm4, [eax+ebx+ebx+color1]
|
rlm@1
|
120 movq mm5, [eax+ebx+ebx+colorS1]
|
rlm@1
|
121 push eax
|
rlm@1
|
122 add eax, ebx
|
rlm@1
|
123 movq mm6, [eax+ebx+ebx+colorA0]
|
rlm@1
|
124 movq mm7, [eax+ebx+ebx+colorA3]
|
rlm@1
|
125 pop eax
|
rlm@1
|
126
|
rlm@1
|
127 ;compare to delta
|
rlm@1
|
128 pcmpeqw mm0, [ecx+2+colorB0]
|
rlm@1
|
129 pcmpeqw mm1, [ecx+2+colorB3]
|
rlm@1
|
130 pcmpeqw mm2, [ecx+ebx+2+color4]
|
rlm@1
|
131 pcmpeqw mm3, [ecx+ebx+2+colorS2]
|
rlm@1
|
132 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
|
rlm@1
|
133 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
|
rlm@1
|
134 add ecx, ebx
|
rlm@1
|
135 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
|
rlm@1
|
136 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
|
rlm@1
|
137 sub ecx, ebx
|
rlm@1
|
138
|
rlm@1
|
139
|
rlm@1
|
140 ;compose results
|
rlm@1
|
141 pand mm0, mm1
|
rlm@1
|
142 pand mm2, mm3
|
rlm@1
|
143 pand mm4, mm5
|
rlm@1
|
144 pand mm6, mm7
|
rlm@1
|
145 pand mm0, mm2
|
rlm@1
|
146 pand mm4, mm6
|
rlm@1
|
147 pxor mm7, mm7
|
rlm@1
|
148 pand mm0, mm4
|
rlm@1
|
149 movq mm6, [eax+colorB0]
|
rlm@1
|
150 pcmpeqw mm7, mm0 ;did any compare give us a zero ?
|
rlm@1
|
151
|
rlm@1
|
152 movq [ecx+2+colorB0], mm6
|
rlm@1
|
153
|
rlm@1
|
154 packsswb mm7, mm7
|
rlm@1
|
155 movd ecx, mm7
|
rlm@1
|
156 test ecx, ecx
|
rlm@1
|
157 jz near .SKIP_PROCESS ;no, so we can skip
|
rlm@1
|
158
|
rlm@1
|
159 ;End Delta
|
rlm@1
|
160
|
rlm@1
|
161 ;---------------------------------
|
rlm@1
|
162 movq mm0, [eax+ebx+color5]
|
rlm@1
|
163 movq mm1, [eax+ebx+color6]
|
rlm@1
|
164 movq mm2, mm0
|
rlm@1
|
165 movq mm3, mm1
|
rlm@1
|
166 movq mm4, mm0
|
rlm@1
|
167 movq mm5, mm1
|
rlm@1
|
168
|
rlm@1
|
169 pand mm0, [colorMask]
|
rlm@1
|
170 pand mm1, [colorMask]
|
rlm@1
|
171
|
rlm@1
|
172 psrlw mm0, 1
|
rlm@1
|
173 psrlw mm1, 1
|
rlm@1
|
174
|
rlm@1
|
175 pand mm3, [lowPixelMask]
|
rlm@1
|
176 paddw mm0, mm1
|
rlm@1
|
177
|
rlm@1
|
178 pand mm3, mm2
|
rlm@1
|
179 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
180 movq [I56Pixel], mm0
|
rlm@1
|
181 movq mm7, mm0
|
rlm@1
|
182
|
rlm@1
|
183 ;-------------------
|
rlm@1
|
184 movq mm0, mm7
|
rlm@1
|
185 movq mm1, mm4 ;5,5,5,6
|
rlm@1
|
186 movq mm2, mm0
|
rlm@1
|
187 movq mm3, mm1
|
rlm@1
|
188
|
rlm@1
|
189 pand mm0, [colorMask]
|
rlm@1
|
190 pand mm1, [colorMask]
|
rlm@1
|
191
|
rlm@1
|
192 psrlw mm0, 1
|
rlm@1
|
193 psrlw mm1, 1
|
rlm@1
|
194
|
rlm@1
|
195 pand mm3, [lowPixelMask]
|
rlm@1
|
196 paddw mm0, mm1
|
rlm@1
|
197
|
rlm@1
|
198 pand mm3, mm2
|
rlm@1
|
199 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
200 movq [I5556Pixel], mm0
|
rlm@1
|
201 ;--------------------
|
rlm@1
|
202
|
rlm@1
|
203 movq mm0, mm7
|
rlm@1
|
204 movq mm1, mm5 ;6,6,6,5
|
rlm@1
|
205 movq mm2, mm0
|
rlm@1
|
206 movq mm3, mm1
|
rlm@1
|
207
|
rlm@1
|
208 pand mm0, [colorMask]
|
rlm@1
|
209 pand mm1, [colorMask]
|
rlm@1
|
210
|
rlm@1
|
211 psrlw mm0, 1
|
rlm@1
|
212 psrlw mm1, 1
|
rlm@1
|
213
|
rlm@1
|
214 pand mm3, [lowPixelMask]
|
rlm@1
|
215 paddw mm0, mm1
|
rlm@1
|
216
|
rlm@1
|
217 pand mm3, mm2
|
rlm@1
|
218 paddw mm0, mm3
|
rlm@1
|
219 movq [I5666Pixel], mm0
|
rlm@1
|
220
|
rlm@1
|
221 ;-------------------------
|
rlm@1
|
222 ;-------------------------
|
rlm@1
|
223 movq mm0, [eax+ebx+ebx+color2]
|
rlm@1
|
224 movq mm1, [eax+ebx+ebx+color3]
|
rlm@1
|
225 movq mm2, mm0
|
rlm@1
|
226 movq mm3, mm1
|
rlm@1
|
227 movq mm4, mm0
|
rlm@1
|
228 movq mm5, mm1
|
rlm@1
|
229
|
rlm@1
|
230 pand mm0, [colorMask]
|
rlm@1
|
231 pand mm1, [colorMask]
|
rlm@1
|
232
|
rlm@1
|
233 psrlw mm0, 1
|
rlm@1
|
234 psrlw mm1, 1
|
rlm@1
|
235
|
rlm@1
|
236 pand mm3, [lowPixelMask]
|
rlm@1
|
237 paddw mm0, mm1
|
rlm@1
|
238
|
rlm@1
|
239 pand mm3, mm2
|
rlm@1
|
240 paddw mm0, mm3
|
rlm@1
|
241 movq [I23Pixel], mm0
|
rlm@1
|
242 movq mm7, mm0
|
rlm@1
|
243
|
rlm@1
|
244 ;---------------------
|
rlm@1
|
245 movq mm0, mm7
|
rlm@1
|
246 movq mm1, mm4 ;2,2,2,3
|
rlm@1
|
247 movq mm2, mm0
|
rlm@1
|
248 movq mm3, mm1
|
rlm@1
|
249
|
rlm@1
|
250 pand mm0, [colorMask]
|
rlm@1
|
251 pand mm1, [colorMask]
|
rlm@1
|
252
|
rlm@1
|
253 psrlw mm0, 1
|
rlm@1
|
254 psrlw mm1, 1
|
rlm@1
|
255
|
rlm@1
|
256 pand mm3, [lowPixelMask]
|
rlm@1
|
257 paddw mm0, mm1
|
rlm@1
|
258
|
rlm@1
|
259 pand mm3, mm2
|
rlm@1
|
260 paddw mm0, mm3
|
rlm@1
|
261 movq [I2223Pixel], mm0
|
rlm@1
|
262
|
rlm@1
|
263 ;----------------------
|
rlm@1
|
264 movq mm0, mm7
|
rlm@1
|
265 movq mm1, mm5 ;3,3,3,2
|
rlm@1
|
266 movq mm2, mm0
|
rlm@1
|
267 movq mm3, mm1
|
rlm@1
|
268
|
rlm@1
|
269 pand mm0, [colorMask]
|
rlm@1
|
270 pand mm1, [colorMask]
|
rlm@1
|
271
|
rlm@1
|
272 psrlw mm0, 1
|
rlm@1
|
273 psrlw mm1, 1
|
rlm@1
|
274
|
rlm@1
|
275 pand mm3, [lowPixelMask]
|
rlm@1
|
276 paddw mm0, mm1
|
rlm@1
|
277
|
rlm@1
|
278 pand mm3, mm2
|
rlm@1
|
279 paddw mm0, mm3
|
rlm@1
|
280 movq [I2333Pixel], mm0
|
rlm@1
|
281
|
rlm@1
|
282
|
rlm@1
|
283 ;--------------------
|
rlm@1
|
284 ;////////////////////////////////
|
rlm@1
|
285 ; Decide which "branch" to take
|
rlm@1
|
286 ;--------------------------------
|
rlm@1
|
287 movq mm0, [eax+ebx+color5]
|
rlm@1
|
288 movq mm1, [eax+ebx+color6]
|
rlm@1
|
289 movq mm6, mm0
|
rlm@1
|
290 movq mm7, mm1
|
rlm@1
|
291 pcmpeqw mm0, [eax+ebx+ebx+color3]
|
rlm@1
|
292 pcmpeqw mm1, [eax+ebx+ebx+color2]
|
rlm@1
|
293 pcmpeqw mm6, mm7
|
rlm@1
|
294
|
rlm@1
|
295 movq mm2, mm0
|
rlm@1
|
296 movq mm3, mm0
|
rlm@1
|
297
|
rlm@1
|
298 pand mm0, mm1 ;colorA == colorD && colorB == colorC
|
rlm@1
|
299 pxor mm7, mm7
|
rlm@1
|
300
|
rlm@1
|
301 pcmpeqw mm2, mm7
|
rlm@1
|
302 pand mm6, mm0
|
rlm@1
|
303 pand mm2, mm1 ;colorA != colorD && colorB == colorC
|
rlm@1
|
304
|
rlm@1
|
305 pcmpeqw mm1, mm7
|
rlm@1
|
306
|
rlm@1
|
307 pand mm1, mm3 ;colorA == colorD && colorB != colorC
|
rlm@1
|
308 pxor mm0, mm6
|
rlm@1
|
309 por mm1, mm6
|
rlm@1
|
310 movq mm7, mm0
|
rlm@1
|
311 movq [Mask26], mm2
|
rlm@1
|
312 packsswb mm7, mm7
|
rlm@1
|
313 movq [Mask35], mm1
|
rlm@1
|
314
|
rlm@1
|
315 movd ecx, mm7
|
rlm@1
|
316 test ecx, ecx
|
rlm@1
|
317 jz near .SKIP_GUESS
|
rlm@1
|
318
|
rlm@1
|
319 ;---------------------------------------------
|
rlm@1
|
320 movq mm6, mm0
|
rlm@1
|
321 movq mm4, [eax+ebx+colorA]
|
rlm@1
|
322 movq mm5, [eax+ebx+colorB]
|
rlm@1
|
323 pxor mm7, mm7
|
rlm@1
|
324 pand mm6, [ONE]
|
rlm@1
|
325
|
rlm@1
|
326 movq mm0, [eax+colorE]
|
rlm@1
|
327 movq mm1, [eax+ebx+colorG]
|
rlm@1
|
328 movq mm2, mm0
|
rlm@1
|
329 movq mm3, mm1
|
rlm@1
|
330 pcmpeqw mm0, mm4
|
rlm@1
|
331 pcmpeqw mm1, mm4
|
rlm@1
|
332 pcmpeqw mm2, mm5
|
rlm@1
|
333 pcmpeqw mm3, mm5
|
rlm@1
|
334 pand mm0, mm6
|
rlm@1
|
335 pand mm1, mm6
|
rlm@1
|
336 pand mm2, mm6
|
rlm@1
|
337 pand mm3, mm6
|
rlm@1
|
338 paddw mm0, mm1
|
rlm@1
|
339 paddw mm2, mm3
|
rlm@1
|
340
|
rlm@1
|
341 pxor mm3, mm3
|
rlm@1
|
342 pcmpgtw mm0, mm6
|
rlm@1
|
343 pcmpgtw mm2, mm6
|
rlm@1
|
344 pcmpeqw mm0, mm3
|
rlm@1
|
345 pcmpeqw mm2, mm3
|
rlm@1
|
346 pand mm0, mm6
|
rlm@1
|
347 pand mm2, mm6
|
rlm@1
|
348 paddw mm7, mm0
|
rlm@1
|
349 psubw mm7, mm2
|
rlm@1
|
350
|
rlm@1
|
351 movq mm0, [eax+colorF]
|
rlm@1
|
352 movq mm1, [eax+ebx+colorK]
|
rlm@1
|
353 movq mm2, mm0
|
rlm@1
|
354 movq mm3, mm1
|
rlm@1
|
355 pcmpeqw mm0, mm4
|
rlm@1
|
356 pcmpeqw mm1, mm4
|
rlm@1
|
357 pcmpeqw mm2, mm5
|
rlm@1
|
358 pcmpeqw mm3, mm5
|
rlm@1
|
359 pand mm0, mm6
|
rlm@1
|
360 pand mm1, mm6
|
rlm@1
|
361 pand mm2, mm6
|
rlm@1
|
362 pand mm3, mm6
|
rlm@1
|
363 paddw mm0, mm1
|
rlm@1
|
364 paddw mm2, mm3
|
rlm@1
|
365
|
rlm@1
|
366 pxor mm3, mm3
|
rlm@1
|
367 pcmpgtw mm0, mm6
|
rlm@1
|
368 pcmpgtw mm2, mm6
|
rlm@1
|
369 pcmpeqw mm0, mm3
|
rlm@1
|
370 pcmpeqw mm2, mm3
|
rlm@1
|
371 pand mm0, mm6
|
rlm@1
|
372 pand mm2, mm6
|
rlm@1
|
373 paddw mm7, mm0
|
rlm@1
|
374 psubw mm7, mm2
|
rlm@1
|
375
|
rlm@1
|
376 push eax
|
rlm@1
|
377 add eax, ebx
|
rlm@1
|
378 movq mm0, [eax+ebx+colorH]
|
rlm@1
|
379 movq mm1, [eax+ebx+ebx+colorN]
|
rlm@1
|
380 movq mm2, mm0
|
rlm@1
|
381 movq mm3, mm1
|
rlm@1
|
382 pcmpeqw mm0, mm4
|
rlm@1
|
383 pcmpeqw mm1, mm4
|
rlm@1
|
384 pcmpeqw mm2, mm5
|
rlm@1
|
385 pcmpeqw mm3, mm5
|
rlm@1
|
386 pand mm0, mm6
|
rlm@1
|
387 pand mm1, mm6
|
rlm@1
|
388 pand mm2, mm6
|
rlm@1
|
389 pand mm3, mm6
|
rlm@1
|
390 paddw mm0, mm1
|
rlm@1
|
391 paddw mm2, mm3
|
rlm@1
|
392
|
rlm@1
|
393 pxor mm3, mm3
|
rlm@1
|
394 pcmpgtw mm0, mm6
|
rlm@1
|
395 pcmpgtw mm2, mm6
|
rlm@1
|
396 pcmpeqw mm0, mm3
|
rlm@1
|
397 pcmpeqw mm2, mm3
|
rlm@1
|
398 pand mm0, mm6
|
rlm@1
|
399 pand mm2, mm6
|
rlm@1
|
400 paddw mm7, mm0
|
rlm@1
|
401 psubw mm7, mm2
|
rlm@1
|
402
|
rlm@1
|
403 movq mm0, [eax+ebx+colorL]
|
rlm@1
|
404 movq mm1, [eax+ebx+ebx+colorO]
|
rlm@1
|
405 movq mm2, mm0
|
rlm@1
|
406 movq mm3, mm1
|
rlm@1
|
407 pcmpeqw mm0, mm4
|
rlm@1
|
408 pcmpeqw mm1, mm4
|
rlm@1
|
409 pcmpeqw mm2, mm5
|
rlm@1
|
410 pcmpeqw mm3, mm5
|
rlm@1
|
411 pand mm0, mm6
|
rlm@1
|
412 pand mm1, mm6
|
rlm@1
|
413 pand mm2, mm6
|
rlm@1
|
414 pand mm3, mm6
|
rlm@1
|
415 paddw mm0, mm1
|
rlm@1
|
416 paddw mm2, mm3
|
rlm@1
|
417
|
rlm@1
|
418 pxor mm3, mm3
|
rlm@1
|
419 pcmpgtw mm0, mm6
|
rlm@1
|
420 pcmpgtw mm2, mm6
|
rlm@1
|
421 pcmpeqw mm0, mm3
|
rlm@1
|
422 pcmpeqw mm2, mm3
|
rlm@1
|
423 pand mm0, mm6
|
rlm@1
|
424 pand mm2, mm6
|
rlm@1
|
425 paddw mm7, mm0
|
rlm@1
|
426 psubw mm7, mm2
|
rlm@1
|
427
|
rlm@1
|
428 pop eax
|
rlm@1
|
429 movq mm1, mm7
|
rlm@1
|
430 pxor mm0, mm0
|
rlm@1
|
431 pcmpgtw mm7, mm0
|
rlm@1
|
432 pcmpgtw mm0, mm1
|
rlm@1
|
433
|
rlm@1
|
434 por mm7, [Mask35]
|
rlm@1
|
435 por mm0, [Mask26]
|
rlm@1
|
436 movq [Mask35], mm7
|
rlm@1
|
437 movq [Mask26], mm0
|
rlm@1
|
438
|
rlm@1
|
439 .SKIP_GUESS:
|
rlm@1
|
440
|
rlm@1
|
441 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
|
rlm@1
|
442
|
rlm@1
|
443
|
rlm@1
|
444 movq mm0, [eax+ebx+color5]
|
rlm@1
|
445 movq mm1, [eax+ebx+ebx+color2]
|
rlm@1
|
446 movq mm2, mm0
|
rlm@1
|
447 movq mm3, mm1
|
rlm@1
|
448 movq mm4, mm0
|
rlm@1
|
449 movq mm5, mm1
|
rlm@1
|
450
|
rlm@1
|
451 pand mm0, [colorMask]
|
rlm@1
|
452 pand mm1, [colorMask]
|
rlm@1
|
453
|
rlm@1
|
454 psrlw mm0, 1
|
rlm@1
|
455 psrlw mm1, 1
|
rlm@1
|
456
|
rlm@1
|
457 pand mm3, [lowPixelMask]
|
rlm@1
|
458 paddw mm0, mm1
|
rlm@1
|
459
|
rlm@1
|
460 pand mm3, mm2
|
rlm@1
|
461 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
462 ;---------------------------
|
rlm@1
|
463
|
rlm@1
|
464
|
rlm@1
|
465
|
rlm@1
|
466 %ifdef dfhsdfhsdahdsfhdsfh
|
rlm@1
|
467
|
rlm@1
|
468 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
|
rlm@1
|
469 product2a = INTERPOLATE (color2, color5);
|
rlm@1
|
470 else
|
rlm@1
|
471 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
|
rlm@1
|
472 product2a = INTERPOLATE(color2, color5);
|
rlm@1
|
473 else
|
rlm@1
|
474 product2a = color2;
|
rlm@1
|
475
|
rlm@1
|
476 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
|
rlm@1
|
477 product1a = INTERPOLATE (color2, color5);
|
rlm@1
|
478 else
|
rlm@1
|
479 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
|
rlm@1
|
480 product1a = INTERPOLATE(color2, color5);
|
rlm@1
|
481 else
|
rlm@1
|
482 product1a = color5;
|
rlm@1
|
483
|
rlm@1
|
484 %endif
|
rlm@1
|
485
|
rlm@1
|
486
|
rlm@1
|
487 movq mm7, [Mask26]
|
rlm@1
|
488 movq mm6, [eax+colorB2]
|
rlm@1
|
489 movq mm5, [eax+ebx+ebx+color2]
|
rlm@1
|
490 movq mm4, [eax+ebx+ebx+color1]
|
rlm@1
|
491 pcmpeqw mm4, mm5
|
rlm@1
|
492 pcmpeqw mm6, mm5
|
rlm@1
|
493 pxor mm5, mm5
|
rlm@1
|
494 pand mm7, mm4
|
rlm@1
|
495 pcmpeqw mm6, mm5
|
rlm@1
|
496 pand mm7, mm6
|
rlm@1
|
497
|
rlm@1
|
498
|
rlm@1
|
499
|
rlm@1
|
500 movq mm6, [eax+ebx+ebx+color3]
|
rlm@1
|
501 movq mm5, [eax+ebx+ebx+color2]
|
rlm@1
|
502 movq mm4, [eax+ebx+ebx+color1]
|
rlm@1
|
503 movq mm2, [eax+ebx+color5]
|
rlm@1
|
504 movq mm1, [eax+ebx+color4]
|
rlm@1
|
505 movq mm3, [eax+colorB0]
|
rlm@1
|
506
|
rlm@1
|
507 pcmpeqw mm2, mm4
|
rlm@1
|
508 pcmpeqw mm6, mm5
|
rlm@1
|
509 pcmpeqw mm1, mm5
|
rlm@1
|
510 pcmpeqw mm3, mm5
|
rlm@1
|
511 pxor mm5, mm5
|
rlm@1
|
512 pcmpeqw mm2, mm5
|
rlm@1
|
513 pcmpeqw mm3, mm5
|
rlm@1
|
514 pand mm6, mm1
|
rlm@1
|
515 pand mm2, mm3
|
rlm@1
|
516 pand mm6, mm2
|
rlm@1
|
517 por mm7, mm6
|
rlm@1
|
518
|
rlm@1
|
519
|
rlm@1
|
520 movq mm6, mm7
|
rlm@1
|
521 pcmpeqw mm6, mm5
|
rlm@1
|
522 pand mm7, mm0
|
rlm@1
|
523
|
rlm@1
|
524 movq mm1, [eax+ebx+color5]
|
rlm@1
|
525 pand mm6, mm1
|
rlm@1
|
526 por mm7, mm6
|
rlm@1
|
527 movq [final1a], mm7 ;finished 1a
|
rlm@1
|
528
|
rlm@1
|
529
|
rlm@1
|
530
|
rlm@1
|
531 ;--------------------------------
|
rlm@1
|
532
|
rlm@1
|
533 movq mm7, [Mask35]
|
rlm@1
|
534 push eax
|
rlm@1
|
535 add eax, ebx
|
rlm@1
|
536 movq mm6, [eax+ebx+ebx+colorA2]
|
rlm@1
|
537 pop eax
|
rlm@1
|
538 movq mm5, [eax+ebx+color5]
|
rlm@1
|
539 movq mm4, [eax+ebx+color4]
|
rlm@1
|
540 pcmpeqw mm4, mm5
|
rlm@1
|
541 pcmpeqw mm6, mm5
|
rlm@1
|
542 pxor mm5, mm5
|
rlm@1
|
543 pand mm7, mm4
|
rlm@1
|
544 pcmpeqw mm6, mm5
|
rlm@1
|
545 pand mm7, mm6
|
rlm@1
|
546
|
rlm@1
|
547
|
rlm@1
|
548
|
rlm@1
|
549 movq mm6, [eax+ebx+color6]
|
rlm@1
|
550 movq mm5, [eax+ebx+color5]
|
rlm@1
|
551 movq mm4, [eax+ebx+color4]
|
rlm@1
|
552 movq mm2, [eax+ebx+ebx+color2]
|
rlm@1
|
553 movq mm1, [eax+ebx+ebx+color1]
|
rlm@1
|
554 push eax
|
rlm@1
|
555 add eax, ebx
|
rlm@1
|
556 movq mm3, [eax+ebx+ebx+colorA0]
|
rlm@1
|
557 pop eax
|
rlm@1
|
558
|
rlm@1
|
559 pcmpeqw mm2, mm4
|
rlm@1
|
560 pcmpeqw mm6, mm5
|
rlm@1
|
561 pcmpeqw mm1, mm5
|
rlm@1
|
562 pcmpeqw mm3, mm5
|
rlm@1
|
563 pxor mm5, mm5
|
rlm@1
|
564 pcmpeqw mm2, mm5
|
rlm@1
|
565 pcmpeqw mm3, mm5
|
rlm@1
|
566 pand mm6, mm1
|
rlm@1
|
567 pand mm2, mm3
|
rlm@1
|
568 pand mm6, mm2
|
rlm@1
|
569 por mm7, mm6
|
rlm@1
|
570
|
rlm@1
|
571
|
rlm@1
|
572 movq mm6, mm7
|
rlm@1
|
573 pcmpeqw mm6, mm5
|
rlm@1
|
574 pand mm7, mm0
|
rlm@1
|
575
|
rlm@1
|
576 movq mm1, [eax+ebx+ebx+color2]
|
rlm@1
|
577 pand mm6, mm1
|
rlm@1
|
578 por mm7, mm6
|
rlm@1
|
579 movq [final2a], mm7 ;finished 2a
|
rlm@1
|
580
|
rlm@1
|
581
|
rlm@1
|
582 ;--------------------------------------------
|
rlm@1
|
583
|
rlm@1
|
584
|
rlm@1
|
585 %ifdef dfhsdfhsdahdsfhdsfh
|
rlm@1
|
586 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
|
rlm@1
|
587 product2b = Q_INTERPOLATE (color3, color3, color3, color2);
|
rlm@1
|
588 else
|
rlm@1
|
589 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
|
rlm@1
|
590 product2b = Q_INTERPOLATE (color2, color2, color2, color3);
|
rlm@1
|
591 else
|
rlm@1
|
592 product2b = INTERPOLATE (color2, color3);
|
rlm@1
|
593
|
rlm@1
|
594 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
|
rlm@1
|
595 product1b = Q_INTERPOLATE (color6, color6, color6, color5);
|
rlm@1
|
596 else
|
rlm@1
|
597 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
|
rlm@1
|
598 product1b = Q_INTERPOLATE (color6, color5, color5, color5);
|
rlm@1
|
599 else
|
rlm@1
|
600 product1b = INTERPOLATE (color5, color6);
|
rlm@1
|
601 %endif
|
rlm@1
|
602
|
rlm@1
|
603 push eax
|
rlm@1
|
604 add eax, ebx
|
rlm@1
|
605 pxor mm7, mm7
|
rlm@1
|
606 movq mm0, [eax+ebx+ebx+colorA0]
|
rlm@1
|
607 movq mm1, [eax+ebx+ebx+colorA1]
|
rlm@1
|
608 movq mm2, [eax+ebx+ebx+colorA2]
|
rlm@1
|
609 movq mm3, [eax+ebx+ebx+colorA3]
|
rlm@1
|
610 pop eax
|
rlm@1
|
611 movq mm4, [eax+ebx+ebx+color2]
|
rlm@1
|
612 movq mm5, [eax+ebx+ebx+color3]
|
rlm@1
|
613 movq mm6, [eax+ebx+color6]
|
rlm@1
|
614
|
rlm@1
|
615 pcmpeqw mm6, mm5
|
rlm@1
|
616 pcmpeqw mm1, mm5
|
rlm@1
|
617 pcmpeqw mm4, mm2
|
rlm@1
|
618 pcmpeqw mm0, mm5
|
rlm@1
|
619 pcmpeqw mm4, mm7
|
rlm@1
|
620 pcmpeqw mm0, mm7
|
rlm@1
|
621 pand mm0, mm4
|
rlm@1
|
622 pand mm6, mm1
|
rlm@1
|
623 pand mm0, mm6
|
rlm@1
|
624
|
rlm@1
|
625
|
rlm@1
|
626 push eax
|
rlm@1
|
627 add eax, ebx
|
rlm@1
|
628 movq mm1, [eax+ebx+ebx+colorA1]
|
rlm@1
|
629 pop eax
|
rlm@1
|
630 movq mm4, [eax+ebx+ebx+color2]
|
rlm@1
|
631 movq mm5, [eax+ebx+color5]
|
rlm@1
|
632 movq mm6, [eax+ebx+ebx+color3]
|
rlm@1
|
633
|
rlm@1
|
634 pcmpeqw mm5, mm4
|
rlm@1
|
635 pcmpeqw mm2, mm4
|
rlm@1
|
636 pcmpeqw mm1, mm6
|
rlm@1
|
637 pcmpeqw mm3, mm4
|
rlm@1
|
638 pcmpeqw mm1, mm7
|
rlm@1
|
639 pcmpeqw mm3, mm7
|
rlm@1
|
640 pand mm2, mm5
|
rlm@1
|
641 pand mm1, mm3
|
rlm@1
|
642 pand mm1, mm2
|
rlm@1
|
643
|
rlm@1
|
644
|
rlm@1
|
645 movq mm7, mm0
|
rlm@1
|
646 por mm7, mm1
|
rlm@1
|
647
|
rlm@1
|
648 movq mm4, [Mask35]
|
rlm@1
|
649 movq mm3, [Mask26]
|
rlm@1
|
650
|
rlm@1
|
651 movq mm6, mm4
|
rlm@1
|
652 pand mm6, mm7
|
rlm@1
|
653 pxor mm4, mm6
|
rlm@1
|
654
|
rlm@1
|
655 movq mm6, mm3
|
rlm@1
|
656 pand mm6, mm7
|
rlm@1
|
657 pxor mm3, mm6
|
rlm@1
|
658
|
rlm@1
|
659 movq mm2, mm0
|
rlm@1
|
660 movq mm7, [I2333Pixel]
|
rlm@1
|
661 movq mm6, [I2223Pixel]
|
rlm@1
|
662 movq mm5, [I23Pixel]
|
rlm@1
|
663
|
rlm@1
|
664
|
rlm@1
|
665 por mm2, mm4
|
rlm@1
|
666 pand mm4, [eax+ebx+ebx+color3]
|
rlm@1
|
667 por mm2, mm3
|
rlm@1
|
668 pand mm3, [eax+ebx+ebx+color2]
|
rlm@1
|
669 por mm2, mm1
|
rlm@1
|
670 pand mm0, mm7
|
rlm@1
|
671 pand mm1, mm6
|
rlm@1
|
672 pxor mm7, mm7
|
rlm@1
|
673 pcmpeqw mm2, mm7
|
rlm@1
|
674 por mm0, mm1
|
rlm@1
|
675 por mm3, mm4
|
rlm@1
|
676 pand mm2, mm5
|
rlm@1
|
677 por mm0, mm3
|
rlm@1
|
678 por mm0, mm2
|
rlm@1
|
679 movq [final2b], mm0
|
rlm@1
|
680
|
rlm@1
|
681 ;-----------------------------------
|
rlm@1
|
682
|
rlm@1
|
683
|
rlm@1
|
684 pxor mm7, mm7
|
rlm@1
|
685 movq mm0, [eax+colorB0]
|
rlm@1
|
686 movq mm1, [eax+colorB1]
|
rlm@1
|
687 movq mm2, [eax+colorB2]
|
rlm@1
|
688 movq mm3, [eax+colorB3]
|
rlm@1
|
689 movq mm4, [eax+ebx+color5]
|
rlm@1
|
690 movq mm5, [eax+ebx+color6]
|
rlm@1
|
691 movq mm6, [eax+ebx+ebx+color3]
|
rlm@1
|
692
|
rlm@1
|
693 pcmpeqw mm6, mm5
|
rlm@1
|
694 pcmpeqw mm1, mm5
|
rlm@1
|
695 pcmpeqw mm4, mm2
|
rlm@1
|
696 pcmpeqw mm0, mm5
|
rlm@1
|
697 pcmpeqw mm4, mm7
|
rlm@1
|
698 pcmpeqw mm0, mm7
|
rlm@1
|
699 pand mm0, mm4
|
rlm@1
|
700 pand mm6, mm1
|
rlm@1
|
701 pand mm0, mm6
|
rlm@1
|
702
|
rlm@1
|
703 movq mm1, [eax+colorB1]
|
rlm@1
|
704 movq mm4, [eax+ebx+color5]
|
rlm@1
|
705 movq mm5, [eax+ebx+ebx+color2]
|
rlm@1
|
706 movq mm6, [eax+ebx+color6]
|
rlm@1
|
707
|
rlm@1
|
708 pcmpeqw mm5, mm4
|
rlm@1
|
709 pcmpeqw mm2, mm4
|
rlm@1
|
710 pcmpeqw mm1, mm6
|
rlm@1
|
711 pcmpeqw mm3, mm4
|
rlm@1
|
712 pcmpeqw mm1, mm7
|
rlm@1
|
713 pcmpeqw mm3, mm7
|
rlm@1
|
714 pand mm2, mm5
|
rlm@1
|
715 pand mm1, mm3
|
rlm@1
|
716 pand mm1, mm2
|
rlm@1
|
717
|
rlm@1
|
718
|
rlm@1
|
719 movq mm7, mm0
|
rlm@1
|
720 por mm7, mm1
|
rlm@1
|
721
|
rlm@1
|
722 movq mm4, [Mask35]
|
rlm@1
|
723 movq mm3, [Mask26]
|
rlm@1
|
724
|
rlm@1
|
725 movq mm6, mm4
|
rlm@1
|
726 pand mm6, mm7
|
rlm@1
|
727 pxor mm4, mm6
|
rlm@1
|
728
|
rlm@1
|
729 movq mm6, mm3
|
rlm@1
|
730 pand mm6, mm7
|
rlm@1
|
731 pxor mm3, mm6
|
rlm@1
|
732
|
rlm@1
|
733 movq mm2, mm0
|
rlm@1
|
734 movq mm7, [I5666Pixel]
|
rlm@1
|
735 movq mm6, [I5556Pixel]
|
rlm@1
|
736 movq mm5, [I56Pixel]
|
rlm@1
|
737
|
rlm@1
|
738
|
rlm@1
|
739 por mm2, mm4
|
rlm@1
|
740 pand mm4, [eax+ebx+color5]
|
rlm@1
|
741 por mm2, mm3
|
rlm@1
|
742 pand mm3, [eax+ebx+color6]
|
rlm@1
|
743 por mm2, mm1
|
rlm@1
|
744 pand mm0, mm7
|
rlm@1
|
745 pand mm1, mm6
|
rlm@1
|
746 pxor mm7, mm7
|
rlm@1
|
747 pcmpeqw mm2, mm7
|
rlm@1
|
748 por mm0, mm1
|
rlm@1
|
749 por mm3, mm4
|
rlm@1
|
750 pand mm2, mm5
|
rlm@1
|
751 por mm0, mm3
|
rlm@1
|
752 por mm0, mm2
|
rlm@1
|
753 movq [final1b], mm0
|
rlm@1
|
754
|
rlm@1
|
755 ;---------
|
rlm@1
|
756
|
rlm@1
|
757 movq mm0, [final1a]
|
rlm@1
|
758 movq mm4, [final2a]
|
rlm@1
|
759 movq mm2, [final1b]
|
rlm@1
|
760 movq mm6, [final2b]
|
rlm@1
|
761
|
rlm@1
|
762
|
rlm@1
|
763 movq mm1, mm0
|
rlm@1
|
764 movq mm5, mm4
|
rlm@1
|
765
|
rlm@1
|
766
|
rlm@1
|
767 punpcklwd mm0, mm2
|
rlm@1
|
768 punpckhwd mm1, mm2
|
rlm@1
|
769
|
rlm@1
|
770 punpcklwd mm4, mm6
|
rlm@1
|
771 punpckhwd mm5, mm6
|
rlm@1
|
772
|
rlm@1
|
773
|
rlm@1
|
774 %ifdef FAR_POINTER
|
rlm@1
|
775 movq [fs:edx], mm0
|
rlm@1
|
776 movq [fs:edx+8], mm1
|
rlm@1
|
777 push edx
|
rlm@1
|
778 add edx, [ebp+dstPitch]
|
rlm@1
|
779 movq [fs:edx], mm4
|
rlm@1
|
780 movq [fs:edx+8], mm5
|
rlm@1
|
781 pop edx
|
rlm@1
|
782 %else
|
rlm@1
|
783 movq [edx], mm0
|
rlm@1
|
784 movq [edx+8], mm1
|
rlm@1
|
785 push edx
|
rlm@1
|
786 add edx, [ebp+dstPitch]
|
rlm@1
|
787 movq [edx], mm4
|
rlm@1
|
788 movq [edx+8], mm5
|
rlm@1
|
789 pop edx
|
rlm@1
|
790 %endif
|
rlm@1
|
791 .SKIP_PROCESS:
|
rlm@1
|
792 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
793 add ecx, 8
|
rlm@1
|
794 mov [ebp+deltaPtr], ecx
|
rlm@1
|
795 add edx, 16
|
rlm@1
|
796 add eax, 8
|
rlm@1
|
797
|
rlm@1
|
798 pop ecx
|
rlm@1
|
799 sub ecx, 4
|
rlm@1
|
800 cmp ecx, 0
|
rlm@1
|
801 jg near .Loop
|
rlm@1
|
802
|
rlm@1
|
803 ; Restore some stuff
|
rlm@1
|
804 popad
|
rlm@1
|
805 mov esp, ebp
|
rlm@1
|
806 pop ebp
|
rlm@1
|
807 emms
|
rlm@1
|
808 ret
|
rlm@1
|
809
|
rlm@1
|
810
|
rlm@1
|
811 ;-------------------------------------------------------------------------
|
rlm@1
|
812 ;-------------------------------------------------------------------------
|
rlm@1
|
813 ;-------------------------------------------------------------------------
|
rlm@1
|
814 ;-------------------------------------------------------------------------
|
rlm@1
|
815 ;-------------------------------------------------------------------------
|
rlm@1
|
816 ;-------------------------------------------------------------------------
|
rlm@1
|
817 ;-------------------------------------------------------------------------
|
rlm@1
|
818
|
rlm@1
|
819
|
rlm@1
|
820
|
rlm@1
|
821 %ifdef __DJGPP__
|
rlm@1
|
822 __2xSaISuperEagleLine:
|
rlm@1
|
823 %else
|
rlm@1
|
824 _2xSaISuperEagleLine:
|
rlm@1
|
825 %endif
|
rlm@1
|
826 ; Store some stuff
|
rlm@1
|
827 push ebp
|
rlm@1
|
828 mov ebp, esp
|
rlm@1
|
829 pushad
|
rlm@1
|
830
|
rlm@1
|
831 ; Prepare the destination
|
rlm@1
|
832 %ifdef FAR_POINTER
|
rlm@1
|
833 ; Set the selector
|
rlm@1
|
834 mov eax, [ebp+dstSegment]
|
rlm@1
|
835 mov fs, ax
|
rlm@1
|
836 %endif
|
rlm@1
|
837 mov edx, [ebp+dstOffset] ; edx points to the screen
|
rlm@1
|
838 ; Prepare the source
|
rlm@1
|
839 ; eax points to colorA
|
rlm@1
|
840 mov eax, [ebp+srcPtr]
|
rlm@1
|
841 mov ebx, [ebp+srcPitch]
|
rlm@1
|
842 mov ecx, [ebp+width]
|
rlm@1
|
843 ; eax now points to colorB1
|
rlm@1
|
844 sub eax, ebx
|
rlm@1
|
845
|
rlm@1
|
846 ; Main Loop
|
rlm@1
|
847 .Loop: push ecx
|
rlm@1
|
848
|
rlm@1
|
849 ;-----Check Delta------------------
|
rlm@1
|
850 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
851
|
rlm@1
|
852 movq mm0, [eax+colorB0]
|
rlm@1
|
853 movq mm1, [eax+colorB3]
|
rlm@1
|
854 movq mm2, [eax+ebx+color4]
|
rlm@1
|
855 movq mm3, [eax+ebx+colorS2]
|
rlm@1
|
856 movq mm4, [eax+ebx+ebx+color1]
|
rlm@1
|
857 movq mm5, [eax+ebx+ebx+colorS1]
|
rlm@1
|
858 push eax
|
rlm@1
|
859 add eax, ebx
|
rlm@1
|
860 movq mm6, [eax+ebx+ebx+colorA0]
|
rlm@1
|
861 movq mm7, [eax+ebx+ebx+colorA3]
|
rlm@1
|
862 pop eax
|
rlm@1
|
863
|
rlm@1
|
864 pcmpeqw mm0, [ecx+2+colorB0]
|
rlm@1
|
865 pcmpeqw mm1, [ecx+2+colorB3]
|
rlm@1
|
866 pcmpeqw mm2, [ecx+ebx+2+color4]
|
rlm@1
|
867 pcmpeqw mm3, [ecx+ebx+2+colorS2]
|
rlm@1
|
868 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
|
rlm@1
|
869 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
|
rlm@1
|
870 add ecx, ebx
|
rlm@1
|
871 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
|
rlm@1
|
872 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
|
rlm@1
|
873 sub ecx, ebx
|
rlm@1
|
874
|
rlm@1
|
875
|
rlm@1
|
876 pand mm0, mm1
|
rlm@1
|
877 pand mm2, mm3
|
rlm@1
|
878 pand mm4, mm5
|
rlm@1
|
879 pand mm6, mm7
|
rlm@1
|
880 pand mm0, mm2
|
rlm@1
|
881 pand mm4, mm6
|
rlm@1
|
882 pxor mm7, mm7
|
rlm@1
|
883 pand mm0, mm4
|
rlm@1
|
884 movq mm6, [eax+colorB0]
|
rlm@1
|
885 pcmpeqw mm7, mm0
|
rlm@1
|
886
|
rlm@1
|
887 movq [ecx+2+colorB0], mm6
|
rlm@1
|
888
|
rlm@1
|
889 packsswb mm7, mm7
|
rlm@1
|
890 movd ecx, mm7
|
rlm@1
|
891 test ecx, ecx
|
rlm@1
|
892 jz near .SKIP_PROCESS
|
rlm@1
|
893
|
rlm@1
|
894 ;End Delta
|
rlm@1
|
895
|
rlm@1
|
896 ;---------------------------------
|
rlm@1
|
897 movq mm0, [eax+ebx+color5]
|
rlm@1
|
898 movq mm1, [eax+ebx+color6]
|
rlm@1
|
899 movq mm2, mm0
|
rlm@1
|
900 movq mm3, mm1
|
rlm@1
|
901 movq mm4, mm0
|
rlm@1
|
902 movq mm5, mm1
|
rlm@1
|
903
|
rlm@1
|
904 pand mm0, [colorMask]
|
rlm@1
|
905 pand mm1, [colorMask]
|
rlm@1
|
906
|
rlm@1
|
907 psrlw mm0, 1
|
rlm@1
|
908 psrlw mm1, 1
|
rlm@1
|
909
|
rlm@1
|
910 pand mm3, [lowPixelMask]
|
rlm@1
|
911 paddw mm0, mm1
|
rlm@1
|
912
|
rlm@1
|
913 pand mm3, mm2
|
rlm@1
|
914 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
915 movq [I56Pixel], mm0
|
rlm@1
|
916 movq mm7, mm0
|
rlm@1
|
917
|
rlm@1
|
918 ;-------------------
|
rlm@1
|
919 movq mm0, mm7
|
rlm@1
|
920 movq mm1, mm4 ;5,5,5,6
|
rlm@1
|
921 movq mm2, mm0
|
rlm@1
|
922 movq mm3, mm1
|
rlm@1
|
923
|
rlm@1
|
924 pand mm0, [colorMask]
|
rlm@1
|
925 pand mm1, [colorMask]
|
rlm@1
|
926
|
rlm@1
|
927 psrlw mm0, 1
|
rlm@1
|
928 psrlw mm1, 1
|
rlm@1
|
929
|
rlm@1
|
930 pand mm3, [lowPixelMask]
|
rlm@1
|
931 paddw mm0, mm1
|
rlm@1
|
932
|
rlm@1
|
933 pand mm3, mm2
|
rlm@1
|
934 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
935 movq [product1a], mm0
|
rlm@1
|
936 ;--------------------
|
rlm@1
|
937
|
rlm@1
|
938 movq mm0, mm7
|
rlm@1
|
939 movq mm1, mm5 ;6,6,6,5
|
rlm@1
|
940 movq mm2, mm0
|
rlm@1
|
941 movq mm3, mm1
|
rlm@1
|
942
|
rlm@1
|
943 pand mm0, [colorMask]
|
rlm@1
|
944 pand mm1, [colorMask]
|
rlm@1
|
945
|
rlm@1
|
946 psrlw mm0, 1
|
rlm@1
|
947 psrlw mm1, 1
|
rlm@1
|
948
|
rlm@1
|
949 pand mm3, [lowPixelMask]
|
rlm@1
|
950 paddw mm0, mm1
|
rlm@1
|
951
|
rlm@1
|
952 pand mm3, mm2
|
rlm@1
|
953 paddw mm0, mm3
|
rlm@1
|
954 movq [product1b], mm0
|
rlm@1
|
955
|
rlm@1
|
956 ;-------------------------
|
rlm@1
|
957 ;-------------------------
|
rlm@1
|
958 movq mm0, [eax+ebx+ebx+color2]
|
rlm@1
|
959 movq mm1, [eax+ebx+ebx+color3]
|
rlm@1
|
960 movq mm2, mm0
|
rlm@1
|
961 movq mm3, mm1
|
rlm@1
|
962 movq mm4, mm0
|
rlm@1
|
963 movq mm5, mm1
|
rlm@1
|
964
|
rlm@1
|
965 pand mm0, [colorMask]
|
rlm@1
|
966 pand mm1, [colorMask]
|
rlm@1
|
967
|
rlm@1
|
968 psrlw mm0, 1
|
rlm@1
|
969 psrlw mm1, 1
|
rlm@1
|
970
|
rlm@1
|
971 pand mm3, [lowPixelMask]
|
rlm@1
|
972 paddw mm0, mm1
|
rlm@1
|
973
|
rlm@1
|
974 pand mm3, mm2
|
rlm@1
|
975 paddw mm0, mm3
|
rlm@1
|
976 movq [I23Pixel], mm0
|
rlm@1
|
977 movq mm7, mm0
|
rlm@1
|
978
|
rlm@1
|
979 ;---------------------
|
rlm@1
|
980 movq mm0, mm7
|
rlm@1
|
981 movq mm1, mm4 ;2,2,2,3
|
rlm@1
|
982 movq mm2, mm0
|
rlm@1
|
983 movq mm3, mm1
|
rlm@1
|
984
|
rlm@1
|
985 pand mm0, [colorMask]
|
rlm@1
|
986 pand mm1, [colorMask]
|
rlm@1
|
987
|
rlm@1
|
988 psrlw mm0, 1
|
rlm@1
|
989 psrlw mm1, 1
|
rlm@1
|
990
|
rlm@1
|
991 pand mm3, [lowPixelMask]
|
rlm@1
|
992 paddw mm0, mm1
|
rlm@1
|
993
|
rlm@1
|
994 pand mm3, mm2
|
rlm@1
|
995 paddw mm0, mm3
|
rlm@1
|
996 movq [product2a], mm0
|
rlm@1
|
997
|
rlm@1
|
998 ;----------------------
|
rlm@1
|
999 movq mm0, mm7
|
rlm@1
|
1000 movq mm1, mm5 ;3,3,3,2
|
rlm@1
|
1001 movq mm2, mm0
|
rlm@1
|
1002 movq mm3, mm1
|
rlm@1
|
1003
|
rlm@1
|
1004 pand mm0, [colorMask]
|
rlm@1
|
1005 pand mm1, [colorMask]
|
rlm@1
|
1006
|
rlm@1
|
1007 psrlw mm0, 1
|
rlm@1
|
1008 psrlw mm1, 1
|
rlm@1
|
1009
|
rlm@1
|
1010 pand mm3, [lowPixelMask]
|
rlm@1
|
1011 paddw mm0, mm1
|
rlm@1
|
1012
|
rlm@1
|
1013 pand mm3, mm2
|
rlm@1
|
1014 paddw mm0, mm3
|
rlm@1
|
1015 movq [product2b], mm0
|
rlm@1
|
1016
|
rlm@1
|
1017
|
rlm@1
|
1018 ;////////////////////////////////
|
rlm@1
|
1019 ; Decide which "branch" to take
|
rlm@1
|
1020 ;--------------------------------
|
rlm@1
|
1021 movq mm4, [eax+ebx+color5]
|
rlm@1
|
1022 movq mm5, [eax+ebx+color6]
|
rlm@1
|
1023 movq mm6, [eax+ebx+ebx+color3]
|
rlm@1
|
1024 movq mm7, [eax+ebx+ebx+color2]
|
rlm@1
|
1025
|
rlm@1
|
1026 pxor mm3, mm3
|
rlm@1
|
1027 movq mm0, mm4
|
rlm@1
|
1028 movq mm1, mm5
|
rlm@1
|
1029
|
rlm@1
|
1030 pcmpeqw mm0, mm6
|
rlm@1
|
1031 pcmpeqw mm1, mm7
|
rlm@1
|
1032 pcmpeqw mm1, mm3
|
rlm@1
|
1033 pand mm0, mm1
|
rlm@1
|
1034 movq [Mask35], mm0
|
rlm@1
|
1035
|
rlm@1
|
1036 movq mm0, [eax+ebx+ebx+colorS1]
|
rlm@1
|
1037 movq mm1, [eax+ebx+color4]
|
rlm@1
|
1038 push eax
|
rlm@1
|
1039 add eax, ebx
|
rlm@1
|
1040 movq mm2, [eax+ebx+ebx+colorA2]
|
rlm@1
|
1041 pop eax
|
rlm@1
|
1042 movq mm3, [eax+colorB1]
|
rlm@1
|
1043 pcmpeqw mm0, mm4
|
rlm@1
|
1044 pcmpeqw mm1, mm4
|
rlm@1
|
1045 pcmpeqw mm2, mm4
|
rlm@1
|
1046 pcmpeqw mm3, mm4
|
rlm@1
|
1047 pand mm0, mm1
|
rlm@1
|
1048 pand mm2, mm3
|
rlm@1
|
1049 por mm0, mm2
|
rlm@1
|
1050 pand mm0, [Mask35]
|
rlm@1
|
1051 movq [Mask35b], mm0
|
rlm@1
|
1052
|
rlm@1
|
1053 ;-----------
|
rlm@1
|
1054 pxor mm3, mm3
|
rlm@1
|
1055 movq mm0, mm4
|
rlm@1
|
1056 movq mm1, mm5
|
rlm@1
|
1057
|
rlm@1
|
1058 pcmpeqw mm0, mm6
|
rlm@1
|
1059 pcmpeqw mm1, mm7
|
rlm@1
|
1060 pcmpeqw mm0, mm3
|
rlm@1
|
1061 pand mm0, mm1
|
rlm@1
|
1062 movq [Mask26], mm0
|
rlm@1
|
1063
|
rlm@1
|
1064 movq mm0, [eax+ebx+ebx+color1]
|
rlm@1
|
1065 movq mm1, [eax+ebx+colorS2]
|
rlm@1
|
1066 push eax
|
rlm@1
|
1067 add eax, ebx
|
rlm@1
|
1068 movq mm2, [eax+ebx+ebx+colorA1]
|
rlm@1
|
1069 pop eax
|
rlm@1
|
1070 movq mm3, [eax+colorB2]
|
rlm@1
|
1071 pcmpeqw mm0, mm5
|
rlm@1
|
1072 pcmpeqw mm1, mm5
|
rlm@1
|
1073 pcmpeqw mm2, mm5
|
rlm@1
|
1074 pcmpeqw mm3, mm5
|
rlm@1
|
1075 pand mm0, mm1
|
rlm@1
|
1076 pand mm2, mm3
|
rlm@1
|
1077 por mm0, mm2
|
rlm@1
|
1078 pand mm0, [Mask26]
|
rlm@1
|
1079 movq [Mask26b], mm0
|
rlm@1
|
1080
|
rlm@1
|
1081 ;--------------------
|
rlm@1
|
1082 movq mm0, mm4
|
rlm@1
|
1083 movq mm1, mm5
|
rlm@1
|
1084 movq mm2, mm0
|
rlm@1
|
1085
|
rlm@1
|
1086 pcmpeqw mm2, mm1
|
rlm@1
|
1087 pcmpeqw mm0, mm6
|
rlm@1
|
1088 pcmpeqw mm1, mm7
|
rlm@1
|
1089 pand mm0, mm1
|
rlm@1
|
1090 pand mm2, mm0
|
rlm@1
|
1091 pxor mm0, mm2
|
rlm@1
|
1092 movq mm7, mm0
|
rlm@1
|
1093
|
rlm@1
|
1094 ;------------------
|
rlm@1
|
1095 packsswb mm7, mm7
|
rlm@1
|
1096 movd ecx, mm7
|
rlm@1
|
1097 test ecx, ecx
|
rlm@1
|
1098 jz near .SKIP_GUESS
|
rlm@1
|
1099
|
rlm@1
|
1100 ;---------------------------------------------
|
rlm@1
|
1101 ; Map of the pixels: I|E F|J
|
rlm@1
|
1102 ; G|A B|K
|
rlm@1
|
1103 ; H|C D|L
|
rlm@1
|
1104 ; M|N O|P
|
rlm@1
|
1105 movq mm6, mm0
|
rlm@1
|
1106 movq mm4, [eax+ebx+color5]
|
rlm@1
|
1107 movq mm5, [eax+ebx+color6]
|
rlm@1
|
1108 pxor mm7, mm7
|
rlm@1
|
1109 pand mm6, [ONE]
|
rlm@1
|
1110
|
rlm@1
|
1111 movq mm0, [eax+colorB1]
|
rlm@1
|
1112 movq mm1, [eax+ebx+color4]
|
rlm@1
|
1113 movq mm2, mm0
|
rlm@1
|
1114 movq mm3, mm1
|
rlm@1
|
1115 pcmpeqw mm0, mm4
|
rlm@1
|
1116 pcmpeqw mm1, mm4
|
rlm@1
|
1117 pcmpeqw mm2, mm5
|
rlm@1
|
1118 pcmpeqw mm3, mm5
|
rlm@1
|
1119 pand mm0, mm6
|
rlm@1
|
1120 pand mm1, mm6
|
rlm@1
|
1121 pand mm2, mm6
|
rlm@1
|
1122 pand mm3, mm6
|
rlm@1
|
1123 paddw mm0, mm1
|
rlm@1
|
1124 paddw mm2, mm3
|
rlm@1
|
1125
|
rlm@1
|
1126 pxor mm3, mm3
|
rlm@1
|
1127 pcmpgtw mm0, mm6
|
rlm@1
|
1128 pcmpgtw mm2, mm6
|
rlm@1
|
1129 pcmpeqw mm0, mm3
|
rlm@1
|
1130 pcmpeqw mm2, mm3
|
rlm@1
|
1131 pand mm0, mm6
|
rlm@1
|
1132 pand mm2, mm6
|
rlm@1
|
1133 paddw mm7, mm0
|
rlm@1
|
1134 psubw mm7, mm2
|
rlm@1
|
1135
|
rlm@1
|
1136 movq mm0, [eax+colorB2]
|
rlm@1
|
1137 movq mm1, [eax+ebx+colorS2]
|
rlm@1
|
1138 movq mm2, mm0
|
rlm@1
|
1139 movq mm3, mm1
|
rlm@1
|
1140 pcmpeqw mm0, mm4
|
rlm@1
|
1141 pcmpeqw mm1, mm4
|
rlm@1
|
1142 pcmpeqw mm2, mm5
|
rlm@1
|
1143 pcmpeqw mm3, mm5
|
rlm@1
|
1144 pand mm0, mm6
|
rlm@1
|
1145 pand mm1, mm6
|
rlm@1
|
1146 pand mm2, mm6
|
rlm@1
|
1147 pand mm3, mm6
|
rlm@1
|
1148 paddw mm0, mm1
|
rlm@1
|
1149 paddw mm2, mm3
|
rlm@1
|
1150
|
rlm@1
|
1151 pxor mm3, mm3
|
rlm@1
|
1152 pcmpgtw mm0, mm6
|
rlm@1
|
1153 pcmpgtw mm2, mm6
|
rlm@1
|
1154 pcmpeqw mm0, mm3
|
rlm@1
|
1155 pcmpeqw mm2, mm3
|
rlm@1
|
1156 pand mm0, mm6
|
rlm@1
|
1157 pand mm2, mm6
|
rlm@1
|
1158 paddw mm7, mm0
|
rlm@1
|
1159 psubw mm7, mm2
|
rlm@1
|
1160
|
rlm@1
|
1161 push eax
|
rlm@1
|
1162 add eax, ebx
|
rlm@1
|
1163 movq mm0, [eax+ebx+color1]
|
rlm@1
|
1164 movq mm1, [eax+ebx+ebx+colorA1]
|
rlm@1
|
1165 movq mm2, mm0
|
rlm@1
|
1166 movq mm3, mm1
|
rlm@1
|
1167 pcmpeqw mm0, mm4
|
rlm@1
|
1168 pcmpeqw mm1, mm4
|
rlm@1
|
1169 pcmpeqw mm2, mm5
|
rlm@1
|
1170 pcmpeqw mm3, mm5
|
rlm@1
|
1171 pand mm0, mm6
|
rlm@1
|
1172 pand mm1, mm6
|
rlm@1
|
1173 pand mm2, mm6
|
rlm@1
|
1174 pand mm3, mm6
|
rlm@1
|
1175 paddw mm0, mm1
|
rlm@1
|
1176 paddw mm2, mm3
|
rlm@1
|
1177
|
rlm@1
|
1178 pxor mm3, mm3
|
rlm@1
|
1179 pcmpgtw mm0, mm6
|
rlm@1
|
1180 pcmpgtw mm2, mm6
|
rlm@1
|
1181 pcmpeqw mm0, mm3
|
rlm@1
|
1182 pcmpeqw mm2, mm3
|
rlm@1
|
1183 pand mm0, mm6
|
rlm@1
|
1184 pand mm2, mm6
|
rlm@1
|
1185 paddw mm7, mm0
|
rlm@1
|
1186 psubw mm7, mm2
|
rlm@1
|
1187
|
rlm@1
|
1188 movq mm0, [eax+ebx+colorS1]
|
rlm@1
|
1189 movq mm1, [eax+ebx+ebx+colorA2]
|
rlm@1
|
1190 movq mm2, mm0
|
rlm@1
|
1191 movq mm3, mm1
|
rlm@1
|
1192 pcmpeqw mm0, mm4
|
rlm@1
|
1193 pcmpeqw mm1, mm4
|
rlm@1
|
1194 pcmpeqw mm2, mm5
|
rlm@1
|
1195 pcmpeqw mm3, mm5
|
rlm@1
|
1196 pand mm0, mm6
|
rlm@1
|
1197 pand mm1, mm6
|
rlm@1
|
1198 pand mm2, mm6
|
rlm@1
|
1199 pand mm3, mm6
|
rlm@1
|
1200 paddw mm0, mm1
|
rlm@1
|
1201 paddw mm2, mm3
|
rlm@1
|
1202
|
rlm@1
|
1203 pxor mm3, mm3
|
rlm@1
|
1204 pcmpgtw mm0, mm6
|
rlm@1
|
1205 pcmpgtw mm2, mm6
|
rlm@1
|
1206 pcmpeqw mm0, mm3
|
rlm@1
|
1207 pcmpeqw mm2, mm3
|
rlm@1
|
1208 pand mm0, mm6
|
rlm@1
|
1209 pand mm2, mm6
|
rlm@1
|
1210 paddw mm7, mm0
|
rlm@1
|
1211 psubw mm7, mm2
|
rlm@1
|
1212
|
rlm@1
|
1213 pop eax
|
rlm@1
|
1214 movq mm1, mm7
|
rlm@1
|
1215 pxor mm0, mm0
|
rlm@1
|
1216 pcmpgtw mm7, mm0
|
rlm@1
|
1217 pcmpgtw mm0, mm1
|
rlm@1
|
1218
|
rlm@1
|
1219 por mm7, [Mask35]
|
rlm@1
|
1220 por mm0, [Mask26]
|
rlm@1
|
1221 movq [Mask35], mm7
|
rlm@1
|
1222 movq [Mask26], mm0
|
rlm@1
|
1223
|
rlm@1
|
1224 .SKIP_GUESS:
|
rlm@1
|
1225 ;Start the ASSEMBLY !!!
|
rlm@1
|
1226
|
rlm@1
|
1227 movq mm4, [Mask35]
|
rlm@1
|
1228 movq mm5, [Mask26]
|
rlm@1
|
1229 movq mm6, [Mask35b]
|
rlm@1
|
1230 movq mm7, [Mask26b]
|
rlm@1
|
1231
|
rlm@1
|
1232 movq mm0, [eax+ebx+color5]
|
rlm@1
|
1233 movq mm1, [eax+ebx+color6]
|
rlm@1
|
1234 movq mm2, [eax+ebx+ebx+color2]
|
rlm@1
|
1235 movq mm3, [eax+ebx+ebx+color3]
|
rlm@1
|
1236 pcmpeqw mm0, mm2
|
rlm@1
|
1237 pcmpeqw mm1, mm3
|
rlm@1
|
1238 movq mm2, mm4
|
rlm@1
|
1239 movq mm3, mm5
|
rlm@1
|
1240 por mm0, mm1
|
rlm@1
|
1241 por mm2, mm3
|
rlm@1
|
1242 pand mm2, mm0
|
rlm@1
|
1243 pxor mm0, mm2
|
rlm@1
|
1244 movq mm3, mm0
|
rlm@1
|
1245
|
rlm@1
|
1246 movq mm2, mm0
|
rlm@1
|
1247 pxor mm0, mm0
|
rlm@1
|
1248 por mm2, mm4
|
rlm@1
|
1249 pxor mm4, mm6
|
rlm@1
|
1250 por mm2, mm5
|
rlm@1
|
1251 pxor mm5, mm7
|
rlm@1
|
1252 pcmpeqw mm2, mm0
|
rlm@1
|
1253 ;----------------
|
rlm@1
|
1254
|
rlm@1
|
1255 movq mm0, [eax+ebx+color5]
|
rlm@1
|
1256 movq mm1, mm3
|
rlm@1
|
1257 por mm1, mm4
|
rlm@1
|
1258 por mm1, mm6
|
rlm@1
|
1259 pand mm0, mm1
|
rlm@1
|
1260 movq mm1, mm5
|
rlm@1
|
1261 pand mm1, [I56Pixel]
|
rlm@1
|
1262 por mm0, mm1
|
rlm@1
|
1263 movq mm1, mm7
|
rlm@1
|
1264 pand mm1, [product1b]
|
rlm@1
|
1265 por mm0, mm1
|
rlm@1
|
1266 movq mm1, mm2
|
rlm@1
|
1267 pand mm1, [product1a]
|
rlm@1
|
1268 por mm0, mm1
|
rlm@1
|
1269 movq [final1a], mm0
|
rlm@1
|
1270
|
rlm@1
|
1271 movq mm0, [eax+ebx+color6]
|
rlm@1
|
1272 movq mm1, mm3
|
rlm@1
|
1273 por mm1, mm5
|
rlm@1
|
1274 por mm1, mm7
|
rlm@1
|
1275 pand mm0, mm1
|
rlm@1
|
1276 movq mm1, mm4
|
rlm@1
|
1277 pand mm1, [I56Pixel]
|
rlm@1
|
1278 por mm0, mm1
|
rlm@1
|
1279 movq mm1, mm6
|
rlm@1
|
1280 pand mm1, [product1a]
|
rlm@1
|
1281 por mm0, mm1
|
rlm@1
|
1282 movq mm1, mm2
|
rlm@1
|
1283 pand mm1, [product1b]
|
rlm@1
|
1284 por mm0, mm1
|
rlm@1
|
1285 movq [final1b], mm0
|
rlm@1
|
1286
|
rlm@1
|
1287 movq mm0, [eax+ebx+ebx+color2]
|
rlm@1
|
1288 movq mm1, mm3
|
rlm@1
|
1289 por mm1, mm5
|
rlm@1
|
1290 por mm1, mm7
|
rlm@1
|
1291 pand mm0, mm1
|
rlm@1
|
1292 movq mm1, mm4
|
rlm@1
|
1293 pand mm1, [I23Pixel]
|
rlm@1
|
1294 por mm0, mm1
|
rlm@1
|
1295 movq mm1, mm6
|
rlm@1
|
1296 pand mm1, [product2b]
|
rlm@1
|
1297 por mm0, mm1
|
rlm@1
|
1298 movq mm1, mm2
|
rlm@1
|
1299 pand mm1, [product2a]
|
rlm@1
|
1300 por mm0, mm1
|
rlm@1
|
1301 movq [final2a], mm0
|
rlm@1
|
1302
|
rlm@1
|
1303 movq mm0, [eax+ebx+ebx+color3]
|
rlm@1
|
1304 movq mm1, mm3
|
rlm@1
|
1305 por mm1, mm4
|
rlm@1
|
1306 por mm1, mm6
|
rlm@1
|
1307 pand mm0, mm1
|
rlm@1
|
1308 movq mm1, mm5
|
rlm@1
|
1309 pand mm1, [I23Pixel]
|
rlm@1
|
1310 por mm0, mm1
|
rlm@1
|
1311 movq mm1, mm7
|
rlm@1
|
1312 pand mm1, [product2a]
|
rlm@1
|
1313 por mm0, mm1
|
rlm@1
|
1314 movq mm1, mm2
|
rlm@1
|
1315 pand mm1, [product2b]
|
rlm@1
|
1316 por mm0, mm1
|
rlm@1
|
1317 movq [final2b], mm0
|
rlm@1
|
1318
|
rlm@1
|
1319
|
rlm@1
|
1320 movq mm0, [final1a]
|
rlm@1
|
1321 movq mm2, [final1b]
|
rlm@1
|
1322 movq mm1, mm0
|
rlm@1
|
1323 movq mm4, [final2a]
|
rlm@1
|
1324 movq mm6, [final2b]
|
rlm@1
|
1325 movq mm5, mm4
|
rlm@1
|
1326 punpcklwd mm0, mm2
|
rlm@1
|
1327 punpckhwd mm1, mm2
|
rlm@1
|
1328 punpcklwd mm4, mm6
|
rlm@1
|
1329 punpckhwd mm5, mm6
|
rlm@1
|
1330
|
rlm@1
|
1331
|
rlm@1
|
1332
|
rlm@1
|
1333
|
rlm@1
|
1334 %ifdef FAR_POINTER
|
rlm@1
|
1335 movq [fs:edx], mm0
|
rlm@1
|
1336 movq [fs:edx+8], mm1
|
rlm@1
|
1337 push edx
|
rlm@1
|
1338 add edx, [ebp+dstPitch]
|
rlm@1
|
1339 movq [fs:edx], mm4
|
rlm@1
|
1340 movq [fs:edx+8], mm5
|
rlm@1
|
1341 pop edx
|
rlm@1
|
1342 %else
|
rlm@1
|
1343 movq [edx], mm0
|
rlm@1
|
1344 movq [edx+8], mm1
|
rlm@1
|
1345 push edx
|
rlm@1
|
1346 add edx, [ebp+dstPitch]
|
rlm@1
|
1347 movq [edx], mm4
|
rlm@1
|
1348 movq [edx+8], mm5
|
rlm@1
|
1349 pop edx
|
rlm@1
|
1350 %endif
|
rlm@1
|
1351 .SKIP_PROCESS:
|
rlm@1
|
1352 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
1353 add ecx, 8
|
rlm@1
|
1354 mov [ebp+deltaPtr], ecx
|
rlm@1
|
1355 add edx, 16
|
rlm@1
|
1356 add eax, 8
|
rlm@1
|
1357
|
rlm@1
|
1358 pop ecx
|
rlm@1
|
1359 sub ecx, 4
|
rlm@1
|
1360 cmp ecx, 0
|
rlm@1
|
1361 jg near .Loop
|
rlm@1
|
1362
|
rlm@1
|
1363 ; Restore some stuff
|
rlm@1
|
1364 popad
|
rlm@1
|
1365 mov esp, ebp
|
rlm@1
|
1366 pop ebp
|
rlm@1
|
1367 emms
|
rlm@1
|
1368 ret
|
rlm@1
|
1369
|
rlm@1
|
1370
|
rlm@1
|
1371 ;-------------------------------------------------------------------------
|
rlm@1
|
1372 ;-------------------------------------------------------------------------
|
rlm@1
|
1373 ;-------------------------------------------------------------------------
|
rlm@1
|
1374 ;-------------------------------------------------------------------------
|
rlm@1
|
1375 ;-------------------------------------------------------------------------
|
rlm@1
|
1376 ;-------------------------------------------------------------------------
|
rlm@1
|
1377 ;-------------------------------------------------------------------------
|
rlm@1
|
1378
|
rlm@1
|
1379
|
rlm@1
|
1380 ;This is version 0.50
|
rlm@1
|
1381 colorI equ -2
|
rlm@1
|
1382 colorE equ 0
|
rlm@1
|
1383 colorF equ 2
|
rlm@1
|
1384 colorJ equ 4
|
rlm@1
|
1385
|
rlm@1
|
1386 colorG equ -2
|
rlm@1
|
1387 colorA equ 0
|
rlm@1
|
1388 colorB equ 2
|
rlm@1
|
1389 colorK equ 4
|
rlm@1
|
1390
|
rlm@1
|
1391 colorH equ -2
|
rlm@1
|
1392 colorC equ 0
|
rlm@1
|
1393 colorD equ 2
|
rlm@1
|
1394 colorL equ 4
|
rlm@1
|
1395
|
rlm@1
|
1396 colorM equ -2
|
rlm@1
|
1397 colorN equ 0
|
rlm@1
|
1398 colorO equ 2
|
rlm@1
|
1399 colorP equ 4
|
rlm@1
|
1400
|
rlm@1
|
1401 %ifdef __DJGPP__
|
rlm@1
|
1402 __2xSaILine:
|
rlm@1
|
1403 %else
|
rlm@1
|
1404 _2xSaILine:
|
rlm@1
|
1405 %endif
|
rlm@1
|
1406 ; Store some stuff
|
rlm@1
|
1407 push ebp
|
rlm@1
|
1408 mov ebp, esp
|
rlm@1
|
1409 pushad
|
rlm@1
|
1410
|
rlm@1
|
1411 ; Prepare the destination
|
rlm@1
|
1412 %ifdef FAR_POINTER
|
rlm@1
|
1413 ; Set the selector
|
rlm@1
|
1414 mov eax, [ebp+dstSegment]
|
rlm@1
|
1415 mov fs, ax
|
rlm@1
|
1416 %endif
|
rlm@1
|
1417 mov edx, [ebp+dstOffset] ; edx points to the screen
|
rlm@1
|
1418 ; Prepare the source
|
rlm@1
|
1419 ; eax points to colorA
|
rlm@1
|
1420 mov eax, [ebp+srcPtr]
|
rlm@1
|
1421 mov ebx, [ebp+srcPitch]
|
rlm@1
|
1422 mov ecx, [ebp+width]
|
rlm@1
|
1423 ; eax now points to colorE
|
rlm@1
|
1424 sub eax, ebx
|
rlm@1
|
1425
|
rlm@1
|
1426
|
rlm@1
|
1427 ; Main Loop
|
rlm@1
|
1428 .Loop: push ecx
|
rlm@1
|
1429
|
rlm@1
|
1430 ;-----Check Delta------------------
|
rlm@1
|
1431 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
1432
|
rlm@1
|
1433 movq mm0, [eax+colorI]
|
rlm@1
|
1434 movq mm1, [eax+colorJ]
|
rlm@1
|
1435 movq mm2, [eax+ebx+colorG]
|
rlm@1
|
1436 movq mm3, [eax+ebx+colorK]
|
rlm@1
|
1437 movq mm4, [eax+ebx+ebx+colorH]
|
rlm@1
|
1438 movq mm5, [eax+ebx+ebx+colorL]
|
rlm@1
|
1439 push eax
|
rlm@1
|
1440 add eax, ebx
|
rlm@1
|
1441 movq mm6, [eax+ebx+ebx+colorM]
|
rlm@1
|
1442 movq mm7, [eax+ebx+ebx+colorP]
|
rlm@1
|
1443 pop eax
|
rlm@1
|
1444
|
rlm@1
|
1445 pcmpeqw mm0, [ecx+2+colorI]
|
rlm@1
|
1446 pcmpeqw mm1, [ecx+2+colorK]
|
rlm@1
|
1447 pcmpeqw mm2, [ecx+ebx+2+colorG]
|
rlm@1
|
1448 pcmpeqw mm3, [ecx+ebx+2+colorK]
|
rlm@1
|
1449 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
|
rlm@1
|
1450 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
|
rlm@1
|
1451 add ecx, ebx
|
rlm@1
|
1452 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
|
rlm@1
|
1453 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
|
rlm@1
|
1454 sub ecx, ebx
|
rlm@1
|
1455
|
rlm@1
|
1456
|
rlm@1
|
1457 pand mm0, mm1
|
rlm@1
|
1458 pand mm2, mm3
|
rlm@1
|
1459 pand mm4, mm5
|
rlm@1
|
1460 pand mm6, mm7
|
rlm@1
|
1461 pand mm0, mm2
|
rlm@1
|
1462 pand mm4, mm6
|
rlm@1
|
1463 pxor mm7, mm7
|
rlm@1
|
1464 pand mm0, mm4
|
rlm@1
|
1465 movq mm6, [eax+colorI]
|
rlm@1
|
1466 pcmpeqw mm7, mm0
|
rlm@1
|
1467
|
rlm@1
|
1468 movq [ecx+2+colorI], mm6
|
rlm@1
|
1469
|
rlm@1
|
1470 packsswb mm7, mm7
|
rlm@1
|
1471 movd ecx, mm7
|
rlm@1
|
1472 test ecx, ecx
|
rlm@1
|
1473 jz near .SKIP_PROCESS
|
rlm@1
|
1474
|
rlm@1
|
1475 ;End Delta
|
rlm@1
|
1476
|
rlm@1
|
1477 ;---------------------------------
|
rlm@1
|
1478
|
rlm@1
|
1479
|
rlm@1
|
1480 ;1
|
rlm@1
|
1481 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
|
rlm@1
|
1482 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
|
rlm@1
|
1483 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB
|
rlm@1
|
1484
|
rlm@1
|
1485 movq mm1, mm0
|
rlm@1
|
1486 movq mm3, mm2
|
rlm@1
|
1487
|
rlm@1
|
1488 pcmpeqw mm0, [eax+ebx+ebx+colorD]
|
rlm@1
|
1489 pcmpeqw mm1, [eax+colorE]
|
rlm@1
|
1490 pcmpeqw mm2, [eax+ebx+ebx+colorL]
|
rlm@1
|
1491 pcmpeqw mm3, [eax+ebx+ebx+colorC]
|
rlm@1
|
1492
|
rlm@1
|
1493 pand mm0, mm1
|
rlm@1
|
1494 pxor mm1, mm1
|
rlm@1
|
1495 pand mm0, mm2
|
rlm@1
|
1496 pcmpeqw mm3, mm1
|
rlm@1
|
1497 pand mm0, mm3 ;result in mm0
|
rlm@1
|
1498
|
rlm@1
|
1499 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
|
rlm@1
|
1500 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
|
rlm@1
|
1501 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB
|
rlm@1
|
1502 movq mm5, mm4
|
rlm@1
|
1503 movq mm7, mm6
|
rlm@1
|
1504
|
rlm@1
|
1505 pcmpeqw mm4, [eax+ebx+ebx+colorC]
|
rlm@1
|
1506 pcmpeqw mm5, [eax+colorF]
|
rlm@1
|
1507 pcmpeqw mm6, [eax+colorJ]
|
rlm@1
|
1508 pcmpeqw mm7, [eax+colorE]
|
rlm@1
|
1509
|
rlm@1
|
1510 pand mm4, mm5
|
rlm@1
|
1511 pxor mm5, mm5
|
rlm@1
|
1512 pand mm4, mm6
|
rlm@1
|
1513 pcmpeqw mm7, mm5
|
rlm@1
|
1514 pand mm4, mm7 ;result in mm4
|
rlm@1
|
1515
|
rlm@1
|
1516 por mm0, mm4 ;combine the masks
|
rlm@1
|
1517 movq [Mask1], mm0
|
rlm@1
|
1518
|
rlm@1
|
1519 ;--------------------------------------------
|
rlm@1
|
1520
|
rlm@1
|
1521 ;2
|
rlm@1
|
1522 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
|
rlm@1
|
1523 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB
|
rlm@1
|
1524 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
|
rlm@1
|
1525 movq mm1, mm0
|
rlm@1
|
1526 movq mm3, mm2
|
rlm@1
|
1527
|
rlm@1
|
1528 pcmpeqw mm0, [eax+ebx+ebx+colorC]
|
rlm@1
|
1529 pcmpeqw mm1, [eax+colorF]
|
rlm@1
|
1530 pcmpeqw mm2, [eax+ebx+ebx+colorH]
|
rlm@1
|
1531 pcmpeqw mm3, [eax+ebx+ebx+colorD]
|
rlm@1
|
1532
|
rlm@1
|
1533 pand mm0, mm1
|
rlm@1
|
1534 pxor mm1, mm1
|
rlm@1
|
1535 pand mm0, mm2
|
rlm@1
|
1536 pcmpeqw mm3, mm1
|
rlm@1
|
1537 pand mm0, mm3 ;result in mm0
|
rlm@1
|
1538
|
rlm@1
|
1539 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
|
rlm@1
|
1540 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB
|
rlm@1
|
1541 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
|
rlm@1
|
1542 movq mm5, mm4
|
rlm@1
|
1543 movq mm7, mm6
|
rlm@1
|
1544
|
rlm@1
|
1545 pcmpeqw mm4, [eax+ebx+ebx+colorD]
|
rlm@1
|
1546 pcmpeqw mm5, [eax+colorE]
|
rlm@1
|
1547 pcmpeqw mm6, [eax+colorI]
|
rlm@1
|
1548 pcmpeqw mm7, [eax+colorF]
|
rlm@1
|
1549
|
rlm@1
|
1550 pand mm4, mm5
|
rlm@1
|
1551 pxor mm5, mm5
|
rlm@1
|
1552 pand mm4, mm6
|
rlm@1
|
1553 pcmpeqw mm7, mm5
|
rlm@1
|
1554 pand mm4, mm7 ;result in mm4
|
rlm@1
|
1555
|
rlm@1
|
1556 por mm0, mm4 ;combine the masks
|
rlm@1
|
1557 movq [Mask2], mm0
|
rlm@1
|
1558
|
rlm@1
|
1559
|
rlm@1
|
1560 ;interpolate colorA and colorB
|
rlm@1
|
1561 movq mm0, [eax+ebx+colorA]
|
rlm@1
|
1562 movq mm1, [eax+ebx+colorB]
|
rlm@1
|
1563
|
rlm@1
|
1564 movq mm2, mm0
|
rlm@1
|
1565 movq mm3, mm1
|
rlm@1
|
1566
|
rlm@1
|
1567 pand mm0, [colorMask]
|
rlm@1
|
1568 pand mm1, [colorMask]
|
rlm@1
|
1569
|
rlm@1
|
1570 psrlw mm0, 1
|
rlm@1
|
1571 psrlw mm1, 1
|
rlm@1
|
1572
|
rlm@1
|
1573 pand mm3, [lowPixelMask]
|
rlm@1
|
1574 paddw mm0, mm1
|
rlm@1
|
1575
|
rlm@1
|
1576 pand mm3, mm2
|
rlm@1
|
1577 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
1578
|
rlm@1
|
1579 ;assemble the pixels
|
rlm@1
|
1580 movq mm1, [eax+ebx+colorA]
|
rlm@1
|
1581 movq mm2, [eax+ebx+colorB]
|
rlm@1
|
1582
|
rlm@1
|
1583 movq mm3, [Mask1]
|
rlm@1
|
1584 movq mm5, mm1
|
rlm@1
|
1585 movq mm4, [Mask2]
|
rlm@1
|
1586 movq mm6, mm1
|
rlm@1
|
1587
|
rlm@1
|
1588 pand mm1, mm3
|
rlm@1
|
1589 por mm3, mm4
|
rlm@1
|
1590 pxor mm7, mm7
|
rlm@1
|
1591 pand mm2, mm4
|
rlm@1
|
1592
|
rlm@1
|
1593 pcmpeqw mm3, mm7
|
rlm@1
|
1594 por mm1, mm2
|
rlm@1
|
1595 pand mm0, mm3
|
rlm@1
|
1596
|
rlm@1
|
1597 por mm0, mm1
|
rlm@1
|
1598
|
rlm@1
|
1599 punpcklwd mm5, mm0
|
rlm@1
|
1600 punpckhwd mm6, mm0
|
rlm@1
|
1601
|
rlm@1
|
1602 %ifdef FAR_POINTER
|
rlm@1
|
1603 movq [fs:edx], mm5
|
rlm@1
|
1604 movq [fs:edx+8], mm6
|
rlm@1
|
1605 %else
|
rlm@1
|
1606 movq [edx], mm5
|
rlm@1
|
1607 movq [edx+8], mm6
|
rlm@1
|
1608 %endif
|
rlm@1
|
1609
|
rlm@1
|
1610 ;------------------------------------------------
|
rlm@1
|
1611 ; Create the Nextline
|
rlm@1
|
1612 ;------------------------------------------------
|
rlm@1
|
1613 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
|
rlm@1
|
1614 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
|
rlm@1
|
1615 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC
|
rlm@1
|
1616 movq mm1, mm0
|
rlm@1
|
1617 movq mm3, mm2
|
rlm@1
|
1618
|
rlm@1
|
1619 push eax
|
rlm@1
|
1620 add eax, ebx
|
rlm@1
|
1621 pcmpeqw mm0, [eax+ebx+colorD]
|
rlm@1
|
1622 pcmpeqw mm1, [eax+colorG]
|
rlm@1
|
1623 pcmpeqw mm2, [eax+ebx+ebx+colorO]
|
rlm@1
|
1624 pcmpeqw mm3, [eax+colorB]
|
rlm@1
|
1625 pop eax
|
rlm@1
|
1626
|
rlm@1
|
1627 pand mm0, mm1
|
rlm@1
|
1628 pxor mm1, mm1
|
rlm@1
|
1629 pand mm0, mm2
|
rlm@1
|
1630 pcmpeqw mm3, mm1
|
rlm@1
|
1631 pand mm0, mm3 ;result in mm0
|
rlm@1
|
1632
|
rlm@1
|
1633 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
|
rlm@1
|
1634 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
|
rlm@1
|
1635 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC
|
rlm@1
|
1636 movq mm5, mm4
|
rlm@1
|
1637 movq mm7, mm6
|
rlm@1
|
1638
|
rlm@1
|
1639 push eax
|
rlm@1
|
1640 add eax, ebx
|
rlm@1
|
1641 pcmpeqw mm4, [eax+ebx+colorH]
|
rlm@1
|
1642 pcmpeqw mm5, [eax+colorB]
|
rlm@1
|
1643 pcmpeqw mm6, [eax+ebx+ebx+colorM]
|
rlm@1
|
1644 pcmpeqw mm7, [eax+colorG]
|
rlm@1
|
1645 pop eax
|
rlm@1
|
1646
|
rlm@1
|
1647 pand mm4, mm5
|
rlm@1
|
1648 pxor mm5, mm5
|
rlm@1
|
1649 pand mm4, mm6
|
rlm@1
|
1650 pcmpeqw mm7, mm5
|
rlm@1
|
1651 pand mm4, mm7 ;result in mm4
|
rlm@1
|
1652
|
rlm@1
|
1653 por mm0, mm4 ;combine the masks
|
rlm@1
|
1654 movq [Mask1], mm0
|
rlm@1
|
1655 ;--------------------------------------------
|
rlm@1
|
1656
|
rlm@1
|
1657 ;4
|
rlm@1
|
1658 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
|
rlm@1
|
1659 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC
|
rlm@1
|
1660 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
|
rlm@1
|
1661 movq mm1, mm0
|
rlm@1
|
1662 movq mm3, mm2
|
rlm@1
|
1663
|
rlm@1
|
1664 pcmpeqw mm0, [eax+ebx+colorB]
|
rlm@1
|
1665 pcmpeqw mm1, [eax+ebx+ebx+colorH]
|
rlm@1
|
1666 pcmpeqw mm2, [eax+colorF]
|
rlm@1
|
1667 pcmpeqw mm3, [eax+ebx+ebx+colorD]
|
rlm@1
|
1668
|
rlm@1
|
1669 pand mm0, mm1
|
rlm@1
|
1670 pxor mm1, mm1
|
rlm@1
|
1671 pand mm0, mm2
|
rlm@1
|
1672 pcmpeqw mm3, mm1
|
rlm@1
|
1673 pand mm0, mm3 ;result in mm0
|
rlm@1
|
1674
|
rlm@1
|
1675 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
|
rlm@1
|
1676 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC
|
rlm@1
|
1677 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
|
rlm@1
|
1678 movq mm5, mm4
|
rlm@1
|
1679 movq mm7, mm6
|
rlm@1
|
1680
|
rlm@1
|
1681 pcmpeqw mm4, [eax+ebx+ebx+colorD]
|
rlm@1
|
1682 pcmpeqw mm5, [eax+ebx+colorG]
|
rlm@1
|
1683 pcmpeqw mm6, [eax+colorI]
|
rlm@1
|
1684 pcmpeqw mm7, [eax+ebx+ebx+colorH]
|
rlm@1
|
1685
|
rlm@1
|
1686 pand mm4, mm5
|
rlm@1
|
1687 pxor mm5, mm5
|
rlm@1
|
1688 pand mm4, mm6
|
rlm@1
|
1689 pcmpeqw mm7, mm5
|
rlm@1
|
1690 pand mm4, mm7 ;result in mm4
|
rlm@1
|
1691
|
rlm@1
|
1692 por mm0, mm4 ;combine the masks
|
rlm@1
|
1693 movq [Mask2], mm0
|
rlm@1
|
1694 ;----------------------------------------------
|
rlm@1
|
1695
|
rlm@1
|
1696 ;interpolate colorA and colorC
|
rlm@1
|
1697 movq mm0, [eax+ebx+colorA]
|
rlm@1
|
1698 movq mm1, [eax+ebx+ebx+colorC]
|
rlm@1
|
1699
|
rlm@1
|
1700 movq mm2, mm0
|
rlm@1
|
1701 movq mm3, mm1
|
rlm@1
|
1702
|
rlm@1
|
1703 pand mm0, [colorMask]
|
rlm@1
|
1704 pand mm1, [colorMask]
|
rlm@1
|
1705
|
rlm@1
|
1706 psrlw mm0, 1
|
rlm@1
|
1707 psrlw mm1, 1
|
rlm@1
|
1708
|
rlm@1
|
1709 pand mm3, [lowPixelMask]
|
rlm@1
|
1710 paddw mm0, mm1
|
rlm@1
|
1711
|
rlm@1
|
1712 pand mm3, mm2
|
rlm@1
|
1713 paddw mm0, mm3 ;mm0 contains the interpolated values
|
rlm@1
|
1714 ;-------------
|
rlm@1
|
1715
|
rlm@1
|
1716 ;assemble the pixels
|
rlm@1
|
1717 movq mm1, [eax+ebx+colorA]
|
rlm@1
|
1718 movq mm2, [eax+ebx+ebx+colorC]
|
rlm@1
|
1719
|
rlm@1
|
1720 movq mm3, [Mask1]
|
rlm@1
|
1721 movq mm4, [Mask2]
|
rlm@1
|
1722
|
rlm@1
|
1723 pand mm1, mm3
|
rlm@1
|
1724 pand mm2, mm4
|
rlm@1
|
1725
|
rlm@1
|
1726 por mm3, mm4
|
rlm@1
|
1727 pxor mm7, mm7
|
rlm@1
|
1728 por mm1, mm2
|
rlm@1
|
1729
|
rlm@1
|
1730 pcmpeqw mm3, mm7
|
rlm@1
|
1731 pand mm0, mm3
|
rlm@1
|
1732 por mm0, mm1
|
rlm@1
|
1733 movq [ACPixel], mm0
|
rlm@1
|
1734
|
rlm@1
|
1735 ;////////////////////////////////
|
rlm@1
|
1736 ; Decide which "branch" to take
|
rlm@1
|
1737 ;--------------------------------
|
rlm@1
|
1738 movq mm0, [eax+ebx+colorA]
|
rlm@1
|
1739 movq mm1, [eax+ebx+colorB]
|
rlm@1
|
1740 movq mm6, mm0
|
rlm@1
|
1741 movq mm7, mm1
|
rlm@1
|
1742 pcmpeqw mm0, [eax+ebx+ebx+colorD]
|
rlm@1
|
1743 pcmpeqw mm1, [eax+ebx+ebx+colorC]
|
rlm@1
|
1744 pcmpeqw mm6, mm7
|
rlm@1
|
1745
|
rlm@1
|
1746 movq mm2, mm0
|
rlm@1
|
1747 movq mm3, mm0
|
rlm@1
|
1748
|
rlm@1
|
1749 pand mm0, mm1 ;colorA == colorD && colorB == colorC
|
rlm@1
|
1750 pxor mm7, mm7
|
rlm@1
|
1751
|
rlm@1
|
1752 pcmpeqw mm2, mm7
|
rlm@1
|
1753 pand mm6, mm0
|
rlm@1
|
1754 pand mm2, mm1 ;colorA != colorD && colorB == colorC
|
rlm@1
|
1755
|
rlm@1
|
1756 pcmpeqw mm1, mm7
|
rlm@1
|
1757
|
rlm@1
|
1758 pand mm1, mm3 ;colorA == colorD && colorB != colorC
|
rlm@1
|
1759 pxor mm0, mm6
|
rlm@1
|
1760 por mm1, mm6
|
rlm@1
|
1761 movq mm7, mm0
|
rlm@1
|
1762 movq [Mask2], mm2
|
rlm@1
|
1763 packsswb mm7, mm7
|
rlm@1
|
1764 movq [Mask1], mm1
|
rlm@1
|
1765
|
rlm@1
|
1766 movd ecx, mm7
|
rlm@1
|
1767 test ecx, ecx
|
rlm@1
|
1768 jz near .SKIP_GUESS
|
rlm@1
|
1769
|
rlm@1
|
1770 ;---------------------------------------------
|
rlm@1
|
1771 ; Map of the pixels: I|E F|J
|
rlm@1
|
1772 ; G|A B|K
|
rlm@1
|
1773 ; H|C D|L
|
rlm@1
|
1774 ; M|N O|P
|
rlm@1
|
1775 movq mm6, mm0
|
rlm@1
|
1776 movq mm4, [eax+ebx+colorA]
|
rlm@1
|
1777 movq mm5, [eax+ebx+colorB]
|
rlm@1
|
1778 pxor mm7, mm7
|
rlm@1
|
1779 pand mm6, [ONE]
|
rlm@1
|
1780
|
rlm@1
|
1781 movq mm0, [eax+colorE]
|
rlm@1
|
1782 movq mm1, [eax+ebx+colorG]
|
rlm@1
|
1783 movq mm2, mm0
|
rlm@1
|
1784 movq mm3, mm1
|
rlm@1
|
1785 pcmpeqw mm0, mm4
|
rlm@1
|
1786 pcmpeqw mm1, mm4
|
rlm@1
|
1787 pcmpeqw mm2, mm5
|
rlm@1
|
1788 pcmpeqw mm3, mm5
|
rlm@1
|
1789 pand mm0, mm6
|
rlm@1
|
1790 pand mm1, mm6
|
rlm@1
|
1791 pand mm2, mm6
|
rlm@1
|
1792 pand mm3, mm6
|
rlm@1
|
1793 paddw mm0, mm1
|
rlm@1
|
1794 paddw mm2, mm3
|
rlm@1
|
1795
|
rlm@1
|
1796 pxor mm3, mm3
|
rlm@1
|
1797 pcmpgtw mm0, mm6
|
rlm@1
|
1798 pcmpgtw mm2, mm6
|
rlm@1
|
1799 pcmpeqw mm0, mm3
|
rlm@1
|
1800 pcmpeqw mm2, mm3
|
rlm@1
|
1801 pand mm0, mm6
|
rlm@1
|
1802 pand mm2, mm6
|
rlm@1
|
1803 paddw mm7, mm0
|
rlm@1
|
1804 psubw mm7, mm2
|
rlm@1
|
1805
|
rlm@1
|
1806 movq mm0, [eax+colorF]
|
rlm@1
|
1807 movq mm1, [eax+ebx+colorK]
|
rlm@1
|
1808 movq mm2, mm0
|
rlm@1
|
1809 movq mm3, mm1
|
rlm@1
|
1810 pcmpeqw mm0, mm4
|
rlm@1
|
1811 pcmpeqw mm1, mm4
|
rlm@1
|
1812 pcmpeqw mm2, mm5
|
rlm@1
|
1813 pcmpeqw mm3, mm5
|
rlm@1
|
1814 pand mm0, mm6
|
rlm@1
|
1815 pand mm1, mm6
|
rlm@1
|
1816 pand mm2, mm6
|
rlm@1
|
1817 pand mm3, mm6
|
rlm@1
|
1818 paddw mm0, mm1
|
rlm@1
|
1819 paddw mm2, mm3
|
rlm@1
|
1820
|
rlm@1
|
1821 pxor mm3, mm3
|
rlm@1
|
1822 pcmpgtw mm0, mm6
|
rlm@1
|
1823 pcmpgtw mm2, mm6
|
rlm@1
|
1824 pcmpeqw mm0, mm3
|
rlm@1
|
1825 pcmpeqw mm2, mm3
|
rlm@1
|
1826 pand mm0, mm6
|
rlm@1
|
1827 pand mm2, mm6
|
rlm@1
|
1828 paddw mm7, mm0
|
rlm@1
|
1829 psubw mm7, mm2
|
rlm@1
|
1830
|
rlm@1
|
1831 push eax
|
rlm@1
|
1832 add eax, ebx
|
rlm@1
|
1833 movq mm0, [eax+ebx+colorH]
|
rlm@1
|
1834 movq mm1, [eax+ebx+ebx+colorN]
|
rlm@1
|
1835 movq mm2, mm0
|
rlm@1
|
1836 movq mm3, mm1
|
rlm@1
|
1837 pcmpeqw mm0, mm4
|
rlm@1
|
1838 pcmpeqw mm1, mm4
|
rlm@1
|
1839 pcmpeqw mm2, mm5
|
rlm@1
|
1840 pcmpeqw mm3, mm5
|
rlm@1
|
1841 pand mm0, mm6
|
rlm@1
|
1842 pand mm1, mm6
|
rlm@1
|
1843 pand mm2, mm6
|
rlm@1
|
1844 pand mm3, mm6
|
rlm@1
|
1845 paddw mm0, mm1
|
rlm@1
|
1846 paddw mm2, mm3
|
rlm@1
|
1847
|
rlm@1
|
1848 pxor mm3, mm3
|
rlm@1
|
1849 pcmpgtw mm0, mm6
|
rlm@1
|
1850 pcmpgtw mm2, mm6
|
rlm@1
|
1851 pcmpeqw mm0, mm3
|
rlm@1
|
1852 pcmpeqw mm2, mm3
|
rlm@1
|
1853 pand mm0, mm6
|
rlm@1
|
1854 pand mm2, mm6
|
rlm@1
|
1855 paddw mm7, mm0
|
rlm@1
|
1856 psubw mm7, mm2
|
rlm@1
|
1857
|
rlm@1
|
1858 movq mm0, [eax+ebx+colorL]
|
rlm@1
|
1859 movq mm1, [eax+ebx+ebx+colorO]
|
rlm@1
|
1860 movq mm2, mm0
|
rlm@1
|
1861 movq mm3, mm1
|
rlm@1
|
1862 pcmpeqw mm0, mm4
|
rlm@1
|
1863 pcmpeqw mm1, mm4
|
rlm@1
|
1864 pcmpeqw mm2, mm5
|
rlm@1
|
1865 pcmpeqw mm3, mm5
|
rlm@1
|
1866 pand mm0, mm6
|
rlm@1
|
1867 pand mm1, mm6
|
rlm@1
|
1868 pand mm2, mm6
|
rlm@1
|
1869 pand mm3, mm6
|
rlm@1
|
1870 paddw mm0, mm1
|
rlm@1
|
1871 paddw mm2, mm3
|
rlm@1
|
1872
|
rlm@1
|
1873 pxor mm3, mm3
|
rlm@1
|
1874 pcmpgtw mm0, mm6
|
rlm@1
|
1875 pcmpgtw mm2, mm6
|
rlm@1
|
1876 pcmpeqw mm0, mm3
|
rlm@1
|
1877 pcmpeqw mm2, mm3
|
rlm@1
|
1878 pand mm0, mm6
|
rlm@1
|
1879 pand mm2, mm6
|
rlm@1
|
1880 paddw mm7, mm0
|
rlm@1
|
1881 psubw mm7, mm2
|
rlm@1
|
1882
|
rlm@1
|
1883 pop eax
|
rlm@1
|
1884 movq mm1, mm7
|
rlm@1
|
1885 pxor mm0, mm0
|
rlm@1
|
1886 pcmpgtw mm7, mm0
|
rlm@1
|
1887 pcmpgtw mm0, mm1
|
rlm@1
|
1888
|
rlm@1
|
1889 por mm7, [Mask1]
|
rlm@1
|
1890 por mm0, [Mask2]
|
rlm@1
|
1891 movq [Mask1], mm7
|
rlm@1
|
1892 movq [Mask2], mm0
|
rlm@1
|
1893
|
rlm@1
|
1894 .SKIP_GUESS:
|
rlm@1
|
1895 ;----------------------------
|
rlm@1
|
1896 ;interpolate A, B, C and D
|
rlm@1
|
1897 movq mm0, [eax+ebx+colorA]
|
rlm@1
|
1898 movq mm1, [eax+ebx+colorB]
|
rlm@1
|
1899 movq mm4, mm0
|
rlm@1
|
1900 movq mm2, [eax+ebx+ebx+colorC]
|
rlm@1
|
1901 movq mm5, mm1
|
rlm@1
|
1902 movq mm3, [qcolorMask]
|
rlm@1
|
1903 movq mm6, mm2
|
rlm@1
|
1904 movq mm7, [qlowpixelMask]
|
rlm@1
|
1905
|
rlm@1
|
1906 pand mm0, mm3
|
rlm@1
|
1907 pand mm1, mm3
|
rlm@1
|
1908 pand mm2, mm3
|
rlm@1
|
1909 pand mm3, [eax+ebx+ebx+colorD]
|
rlm@1
|
1910
|
rlm@1
|
1911 psrlw mm0, 2
|
rlm@1
|
1912 pand mm4, mm7
|
rlm@1
|
1913 psrlw mm1, 2
|
rlm@1
|
1914 pand mm5, mm7
|
rlm@1
|
1915 psrlw mm2, 2
|
rlm@1
|
1916 pand mm6, mm7
|
rlm@1
|
1917 psrlw mm3, 2
|
rlm@1
|
1918 pand mm7, [eax+ebx+ebx+colorD]
|
rlm@1
|
1919
|
rlm@1
|
1920 paddw mm0, mm1
|
rlm@1
|
1921 paddw mm2, mm3
|
rlm@1
|
1922
|
rlm@1
|
1923 paddw mm4, mm5
|
rlm@1
|
1924 paddw mm6, mm7
|
rlm@1
|
1925
|
rlm@1
|
1926 paddw mm4, mm6
|
rlm@1
|
1927 paddw mm0, mm2
|
rlm@1
|
1928 psrlw mm4, 2
|
rlm@1
|
1929 pand mm4, [qlowpixelMask]
|
rlm@1
|
1930 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D
|
rlm@1
|
1931
|
rlm@1
|
1932 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
|
rlm@1
|
1933 ;assemble the pixels
|
rlm@1
|
1934 movq mm1, [Mask1]
|
rlm@1
|
1935 movq mm2, [Mask2]
|
rlm@1
|
1936 movq mm4, [eax+ebx+colorA]
|
rlm@1
|
1937 movq mm5, [eax+ebx+colorB]
|
rlm@1
|
1938 pand mm4, mm1
|
rlm@1
|
1939 pand mm5, mm2
|
rlm@1
|
1940
|
rlm@1
|
1941 pxor mm7, mm7
|
rlm@1
|
1942 por mm1, mm2
|
rlm@1
|
1943 por mm4, mm5
|
rlm@1
|
1944 pcmpeqw mm1, mm7
|
rlm@1
|
1945 pand mm0, mm1
|
rlm@1
|
1946 por mm4, mm0 ;mm4 contains the diagonal pixels
|
rlm@1
|
1947
|
rlm@1
|
1948 movq mm0, [ACPixel]
|
rlm@1
|
1949 movq mm1, mm0
|
rlm@1
|
1950 punpcklwd mm0, mm4
|
rlm@1
|
1951 punpckhwd mm1, mm4
|
rlm@1
|
1952
|
rlm@1
|
1953 push edx
|
rlm@1
|
1954 add edx, [ebp+dstPitch]
|
rlm@1
|
1955
|
rlm@1
|
1956 %ifdef FAR_POINTER
|
rlm@1
|
1957 movq [fs:edx], mm0
|
rlm@1
|
1958 movq [fs:edx+8], mm1
|
rlm@1
|
1959 %else
|
rlm@1
|
1960 movq [edx], mm0
|
rlm@1
|
1961 movq [edx+8], mm1
|
rlm@1
|
1962 %endif
|
rlm@1
|
1963 pop edx
|
rlm@1
|
1964
|
rlm@1
|
1965 .SKIP_PROCESS:
|
rlm@1
|
1966 mov ecx, [ebp+deltaPtr]
|
rlm@1
|
1967 add ecx, 8
|
rlm@1
|
1968 mov [ebp+deltaPtr], ecx
|
rlm@1
|
1969 add edx, 16
|
rlm@1
|
1970 add eax, 8
|
rlm@1
|
1971
|
rlm@1
|
1972 pop ecx
|
rlm@1
|
1973 sub ecx, 4
|
rlm@1
|
1974 cmp ecx, 0
|
rlm@1
|
1975 jg near .Loop
|
rlm@1
|
1976
|
rlm@1
|
1977 ; Restore some stuff
|
rlm@1
|
1978 popad
|
rlm@1
|
1979 mov esp, ebp
|
rlm@1
|
1980 pop ebp
|
rlm@1
|
1981 emms
|
rlm@1
|
1982 ret
|
rlm@1
|
1983
|
rlm@1
|
1984 ;-------------------------------------------------------------------------
|
rlm@1
|
1985 ;-------------------------------------------------------------------------
|
rlm@1
|
1986 ;-------------------------------------------------------------------------
|
rlm@1
|
1987 ;-------------------------------------------------------------------------
|
rlm@1
|
1988 ;-------------------------------------------------------------------------
|
rlm@1
|
1989 ;-------------------------------------------------------------------------
|
rlm@1
|
1990 ;-------------------------------------------------------------------------
|
rlm@1
|
1991
|
rlm@1
|
1992 %ifdef __DJGPP__
|
rlm@1
|
1993 _Init_2xSaIMMX:
|
rlm@1
|
1994 %else
|
rlm@1
|
1995 Init_2xSaIMMX:
|
rlm@1
|
1996 %endif
|
rlm@1
|
1997 ; Store some stuff
|
rlm@1
|
1998 push ebp
|
rlm@1
|
1999 mov ebp, esp
|
rlm@1
|
2000 push edx
|
rlm@1
|
2001
|
rlm@1
|
2002
|
rlm@1
|
2003 ;Damn thing doesn't work
|
rlm@1
|
2004 ; mov eax,1
|
rlm@1
|
2005 ; cpuid
|
rlm@1
|
2006 ; test edx, 0x00800000 ;test bit 23
|
rlm@1
|
2007 ; jz end2 ;bit not set => no MMX detected
|
rlm@1
|
2008
|
rlm@1
|
2009 mov eax, [ebp+8] ;PixelFormat
|
rlm@1
|
2010 cmp eax, 555
|
rlm@1
|
2011 jz Bits555
|
rlm@1
|
2012 cmp eax, 565
|
rlm@1
|
2013 jz Bits565
|
rlm@1
|
2014 end2:
|
rlm@1
|
2015 mov eax, 1
|
rlm@1
|
2016 jmp end3
|
rlm@1
|
2017 Bits555:
|
rlm@1
|
2018 mov edx, 0x7BDE7BDE
|
rlm@1
|
2019 mov eax, colorMask
|
rlm@1
|
2020 mov [eax], edx
|
rlm@1
|
2021 mov [eax+4], edx
|
rlm@1
|
2022 mov edx, 0x04210421
|
rlm@1
|
2023 mov eax, lowPixelMask
|
rlm@1
|
2024 mov [eax], edx
|
rlm@1
|
2025 mov [eax+4], edx
|
rlm@1
|
2026 mov edx, 0x739C739C
|
rlm@1
|
2027 mov eax, qcolorMask
|
rlm@1
|
2028 mov [eax], edx
|
rlm@1
|
2029 mov [eax+4], edx
|
rlm@1
|
2030 mov edx, 0x0C630C63
|
rlm@1
|
2031 mov eax, qlowpixelMask
|
rlm@1
|
2032 mov [eax], edx
|
rlm@1
|
2033 mov [eax+4], edx
|
rlm@1
|
2034 mov eax, 0
|
rlm@1
|
2035 jmp end3
|
rlm@1
|
2036 Bits565:
|
rlm@1
|
2037 mov edx, 0xF7DEF7DE
|
rlm@1
|
2038 mov eax, colorMask
|
rlm@1
|
2039 mov [eax], edx
|
rlm@1
|
2040 mov [eax+4], edx
|
rlm@1
|
2041 mov edx, 0x08210821
|
rlm@1
|
2042 mov eax, lowPixelMask
|
rlm@1
|
2043 mov [eax], edx
|
rlm@1
|
2044 mov [eax+4], edx
|
rlm@1
|
2045 mov edx, 0xE79CE79C
|
rlm@1
|
2046 mov eax, qcolorMask
|
rlm@1
|
2047 mov [eax], edx
|
rlm@1
|
2048 mov [eax+4], edx
|
rlm@1
|
2049 mov edx, 0x18631863
|
rlm@1
|
2050 mov eax, qlowpixelMask
|
rlm@1
|
2051 mov [eax], edx
|
rlm@1
|
2052 mov [eax+4], edx
|
rlm@1
|
2053 mov eax, 0
|
rlm@1
|
2054 jmp end3
|
rlm@1
|
2055 end3:
|
rlm@1
|
2056 pop edx
|
rlm@1
|
2057 mov esp, ebp
|
rlm@1
|
2058 pop ebp
|
rlm@1
|
2059 ret
|
rlm@1
|
2060
|
rlm@1
|
2061
|
rlm@1
|
2062 ;-------------------------------------------------------------------------
|
rlm@1
|
2063 ;-------------------------------------------------------------------------
|
rlm@1
|
2064 ;-------------------------------------------------------------------------
|
rlm@1
|
2065 ;-------------------------------------------------------------------------
|
rlm@1
|
2066 ;-------------------------------------------------------------------------
|
rlm@1
|
2067 ;-------------------------------------------------------------------------
|
rlm@1
|
2068 ;-------------------------------------------------------------------------
|
rlm@1
|
2069
|
rlm@1
|
2070 SECTION .data ALIGN = 32
|
rlm@1
|
2071 ;Some constants
|
rlm@1
|
2072 colorMask dd 0xF7DEF7DE,0xF7DEF7DE
|
rlm@1
|
2073 lowPixelMask dd 0x08210821,0x08210821
|
rlm@1
|
2074
|
rlm@1
|
2075 qcolorMask dd 0xE79CE79C,0xE79CE79C
|
rlm@1
|
2076 qlowpixelMask dd 0x18631863,0x18631863
|
rlm@1
|
2077
|
rlm@1
|
2078 darkenMask dd 0xC718C718,0xC718C718
|
rlm@1
|
2079 GreenMask dd 0x07E007E0,0x07E007E0
|
rlm@1
|
2080 RedBlueMask dd 0xF81FF81F,0xF81FF81F
|
rlm@1
|
2081
|
rlm@1
|
2082 FALSE dd 0x00000000,0x00000000
|
rlm@1
|
2083 TRUE dd 0xffffffff,0xffffffff
|
rlm@1
|
2084 ONE dd 0x00010001,0x00010001
|
rlm@1
|
2085
|
rlm@1
|
2086
|
rlm@1
|
2087 SECTION .bss ALIGN = 32
|
rlm@1
|
2088 ACPixel resb 8
|
rlm@1
|
2089 Mask1 resb 8
|
rlm@1
|
2090 Mask2 resb 8
|
rlm@1
|
2091
|
rlm@1
|
2092 I56Pixel resb 8
|
rlm@1
|
2093 I23Pixel resb 8
|
rlm@1
|
2094 I5556Pixel resb 8
|
rlm@1
|
2095 I2223Pixel resb 8
|
rlm@1
|
2096 I5666Pixel resb 8
|
rlm@1
|
2097 I2333Pixel resb 8
|
rlm@1
|
2098 Mask26 resb 8
|
rlm@1
|
2099 Mask35 resb 8
|
rlm@1
|
2100 Mask26b resb 8
|
rlm@1
|
2101 Mask35b resb 8
|
rlm@1
|
2102 product1a resb 8
|
rlm@1
|
2103 product1b resb 8
|
rlm@1
|
2104 product2a resb 8
|
rlm@1
|
2105 product2b resb 8
|
rlm@1
|
2106 final1a resb 8
|
rlm@1
|
2107 final1b resb 8
|
rlm@1
|
2108 final2a resb 8
|
rlm@1
|
2109 final2b resb 8
|