Mercurial > vba-linux
comparison src/win32/7zip/7z/C/BwtSort.c @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:8ced16adf2e1 | 1:f9f4f1b99eed |
---|---|
1 /* BwtSort.c -- BWT block sorting | |
2 2008-08-17 | |
3 Igor Pavlov | |
4 Public domain */ | |
5 | |
6 #include "BwtSort.h" | |
7 #include "Sort.h" | |
8 | |
9 /* #define BLOCK_SORT_USE_HEAP_SORT */ | |
10 | |
11 #define NO_INLINE MY_FAST_CALL | |
12 | |
13 /* Don't change it !!! */ | |
14 #define kNumHashBytes 2 | |
15 #define kNumHashValues (1 << (kNumHashBytes * 8)) | |
16 | |
17 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ | |
18 #define kNumRefBitsMax 12 | |
19 | |
20 #define BS_TEMP_SIZE kNumHashValues | |
21 | |
22 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
23 | |
24 /* 32 Flags in UInt32 word */ | |
25 #define kNumFlagsBits 5 | |
26 #define kNumFlagsInWord (1 << kNumFlagsBits) | |
27 #define kFlagsMask (kNumFlagsInWord - 1) | |
28 #define kAllFlags 0xFFFFFFFF | |
29 | |
30 #else | |
31 | |
32 #define kNumBitsMax 20 | |
33 #define kIndexMask ((1 << kNumBitsMax) - 1) | |
34 #define kNumExtraBits (32 - kNumBitsMax) | |
35 #define kNumExtra0Bits (kNumExtraBits - 2) | |
36 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) | |
37 | |
38 #define SetFinishedGroupSize(p, size) \ | |
39 { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ | |
40 if ((size) > (1 << kNumExtra0Bits)) { \ | |
41 *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ | |
42 | |
43 static void SetGroupSize(UInt32 *p, UInt32 size) | |
44 { | |
45 if (--size == 0) | |
46 return; | |
47 *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); | |
48 if (size >= (1 << kNumExtra0Bits)) | |
49 { | |
50 *p |= 0x40000000; | |
51 p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); | |
52 } | |
53 } | |
54 | |
55 #endif | |
56 | |
57 /* | |
58 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks | |
59 "range" is not real range. It's only for optimization. | |
60 returns: 1 - if there are groups, 0 - no more groups | |
61 */ | |
62 | |
63 UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices | |
64 #ifndef BLOCK_SORT_USE_HEAP_SORT | |
65 , UInt32 left, UInt32 range | |
66 #endif | |
67 ) | |
68 { | |
69 UInt32 *ind2 = Indices + groupOffset; | |
70 UInt32 *Groups; | |
71 if (groupSize <= 1) | |
72 { | |
73 /* | |
74 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
75 SetFinishedGroupSize(ind2, 1); | |
76 #endif | |
77 */ | |
78 return 0; | |
79 } | |
80 Groups = Indices + BlockSize + BS_TEMP_SIZE; | |
81 if (groupSize <= ((UInt32)1 << NumRefBits) | |
82 #ifndef BLOCK_SORT_USE_HEAP_SORT | |
83 && groupSize <= range | |
84 #endif | |
85 ) | |
86 { | |
87 UInt32 *temp = Indices + BlockSize; | |
88 UInt32 j; | |
89 UInt32 mask, thereAreGroups, group, cg; | |
90 { | |
91 UInt32 gPrev; | |
92 UInt32 gRes = 0; | |
93 { | |
94 UInt32 sp = ind2[0] + NumSortedBytes; | |
95 if (sp >= BlockSize) sp -= BlockSize; | |
96 gPrev = Groups[sp]; | |
97 temp[0] = (gPrev << NumRefBits); | |
98 } | |
99 | |
100 for (j = 1; j < groupSize; j++) | |
101 { | |
102 UInt32 sp = ind2[j] + NumSortedBytes; | |
103 UInt32 g; | |
104 if (sp >= BlockSize) sp -= BlockSize; | |
105 g = Groups[sp]; | |
106 temp[j] = (g << NumRefBits) | j; | |
107 gRes |= (gPrev ^ g); | |
108 } | |
109 if (gRes == 0) | |
110 { | |
111 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
112 SetGroupSize(ind2, groupSize); | |
113 #endif | |
114 return 1; | |
115 } | |
116 } | |
117 | |
118 HeapSort(temp, groupSize); | |
119 mask = ((1 << NumRefBits) - 1); | |
120 thereAreGroups = 0; | |
121 | |
122 group = groupOffset; | |
123 cg = (temp[0] >> NumRefBits); | |
124 temp[0] = ind2[temp[0] & mask]; | |
125 | |
126 { | |
127 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
128 UInt32 *Flags = Groups + BlockSize; | |
129 #else | |
130 UInt32 prevGroupStart = 0; | |
131 #endif | |
132 | |
133 for (j = 1; j < groupSize; j++) | |
134 { | |
135 UInt32 val = temp[j]; | |
136 UInt32 cgCur = (val >> NumRefBits); | |
137 | |
138 if (cgCur != cg) | |
139 { | |
140 cg = cgCur; | |
141 group = groupOffset + j; | |
142 | |
143 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
144 { | |
145 UInt32 t = group - 1; | |
146 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | |
147 } | |
148 #else | |
149 SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | |
150 prevGroupStart = j; | |
151 #endif | |
152 } | |
153 else | |
154 thereAreGroups = 1; | |
155 { | |
156 UInt32 ind = ind2[val & mask]; | |
157 temp[j] = ind; | |
158 Groups[ind] = group; | |
159 } | |
160 } | |
161 | |
162 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
163 SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | |
164 #endif | |
165 } | |
166 | |
167 for (j = 0; j < groupSize; j++) | |
168 ind2[j] = temp[j]; | |
169 return thereAreGroups; | |
170 } | |
171 | |
172 /* Check that all strings are in one group (cannot sort) */ | |
173 { | |
174 UInt32 group, j; | |
175 UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | |
176 group = Groups[sp]; | |
177 for (j = 1; j < groupSize; j++) | |
178 { | |
179 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | |
180 if (Groups[sp] != group) | |
181 break; | |
182 } | |
183 if (j == groupSize) | |
184 { | |
185 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
186 SetGroupSize(ind2, groupSize); | |
187 #endif | |
188 return 1; | |
189 } | |
190 } | |
191 | |
192 #ifndef BLOCK_SORT_USE_HEAP_SORT | |
193 { | |
194 /* ---------- Range Sort ---------- */ | |
195 UInt32 i; | |
196 UInt32 mid; | |
197 for (;;) | |
198 { | |
199 UInt32 j; | |
200 if (range <= 1) | |
201 { | |
202 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
203 SetGroupSize(ind2, groupSize); | |
204 #endif | |
205 return 1; | |
206 } | |
207 mid = left + ((range + 1) >> 1); | |
208 j = groupSize; | |
209 i = 0; | |
210 do | |
211 { | |
212 UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | |
213 if (Groups[sp] >= mid) | |
214 { | |
215 for (j--; j > i; j--) | |
216 { | |
217 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | |
218 if (Groups[sp] < mid) | |
219 { | |
220 UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; | |
221 break; | |
222 } | |
223 } | |
224 if (i >= j) | |
225 break; | |
226 } | |
227 } | |
228 while (++i < j); | |
229 if (i == 0) | |
230 { | |
231 range = range - (mid - left); | |
232 left = mid; | |
233 } | |
234 else if (i == groupSize) | |
235 range = (mid - left); | |
236 else | |
237 break; | |
238 } | |
239 | |
240 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
241 { | |
242 UInt32 t = (groupOffset + i - 1); | |
243 UInt32 *Flags = Groups + BlockSize; | |
244 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | |
245 } | |
246 #endif | |
247 | |
248 { | |
249 UInt32 j; | |
250 for (j = i; j < groupSize; j++) | |
251 Groups[ind2[j]] = groupOffset + i; | |
252 } | |
253 | |
254 { | |
255 UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); | |
256 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); | |
257 } | |
258 | |
259 } | |
260 | |
261 #else | |
262 | |
263 /* ---------- Heap Sort ---------- */ | |
264 | |
265 { | |
266 UInt32 j; | |
267 for (j = 0; j < groupSize; j++) | |
268 { | |
269 UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | |
270 ind2[j] = sp; | |
271 } | |
272 | |
273 HeapSortRef(ind2, Groups, groupSize); | |
274 | |
275 /* Write Flags */ | |
276 { | |
277 UInt32 sp = ind2[0]; | |
278 UInt32 group = Groups[sp]; | |
279 | |
280 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
281 UInt32 *Flags = Groups + BlockSize; | |
282 #else | |
283 UInt32 prevGroupStart = 0; | |
284 #endif | |
285 | |
286 for (j = 1; j < groupSize; j++) | |
287 { | |
288 sp = ind2[j]; | |
289 if (Groups[sp] != group) | |
290 { | |
291 group = Groups[sp]; | |
292 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
293 { | |
294 UInt32 t = groupOffset + j - 1; | |
295 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | |
296 } | |
297 #else | |
298 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | |
299 prevGroupStart = j; | |
300 #endif | |
301 } | |
302 } | |
303 | |
304 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
305 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | |
306 #endif | |
307 } | |
308 { | |
309 /* Write new Groups values and Check that there are groups */ | |
310 UInt32 thereAreGroups = 0; | |
311 for (j = 0; j < groupSize; j++) | |
312 { | |
313 UInt32 group = groupOffset + j; | |
314 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
315 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); | |
316 if ((ind2[j] & 0x40000000) != 0) | |
317 subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits); | |
318 subGroupSize++; | |
319 for (;;) | |
320 { | |
321 UInt32 original = ind2[j]; | |
322 UInt32 sp = original & kIndexMask; | |
323 if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | |
324 ind2[j] = sp | (original & ~kIndexMask); | |
325 Groups[sp] = group; | |
326 if (--subGroupSize == 0) | |
327 break; | |
328 j++; | |
329 thereAreGroups = 1; | |
330 } | |
331 #else | |
332 UInt32 *Flags = Groups + BlockSize; | |
333 for (;;) | |
334 { | |
335 UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | |
336 ind2[j] = sp; | |
337 Groups[sp] = group; | |
338 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) | |
339 break; | |
340 j++; | |
341 thereAreGroups = 1; | |
342 } | |
343 #endif | |
344 } | |
345 return thereAreGroups; | |
346 } | |
347 } | |
348 #endif | |
349 } | |
350 | |
351 /* conditions: blockSize > 0 */ | |
352 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | |
353 { | |
354 UInt32 *counters = Indices + blockSize; | |
355 UInt32 i; | |
356 UInt32 *Groups; | |
357 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
358 UInt32 *Flags; | |
359 #endif | |
360 | |
361 /* Radix-Sort for 2 bytes */ | |
362 for (i = 0; i < kNumHashValues; i++) | |
363 counters[i] = 0; | |
364 for (i = 0; i < blockSize - 1; i++) | |
365 counters[((UInt32)data[i] << 8) | data[i + 1]]++; | |
366 counters[((UInt32)data[i] << 8) | data[0]]++; | |
367 | |
368 Groups = counters + BS_TEMP_SIZE; | |
369 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
370 Flags = Groups + blockSize; | |
371 { | |
372 UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; | |
373 for (i = 0; i < numWords; i++) | |
374 Flags[i] = kAllFlags; | |
375 } | |
376 #endif | |
377 | |
378 { | |
379 UInt32 sum = 0; | |
380 for (i = 0; i < kNumHashValues; i++) | |
381 { | |
382 UInt32 groupSize = counters[i]; | |
383 if (groupSize > 0) | |
384 { | |
385 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
386 UInt32 t = sum + groupSize - 1; | |
387 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | |
388 #endif | |
389 sum += groupSize; | |
390 } | |
391 counters[i] = sum - groupSize; | |
392 } | |
393 | |
394 for (i = 0; i < blockSize - 1; i++) | |
395 Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]]; | |
396 Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; | |
397 | |
398 for (i = 0; i < blockSize - 1; i++) | |
399 Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i; | |
400 Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; | |
401 | |
402 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
403 { | |
404 UInt32 prev = 0; | |
405 for (i = 0; i < kNumHashValues; i++) | |
406 { | |
407 UInt32 prevGroupSize = counters[i] - prev; | |
408 if (prevGroupSize == 0) | |
409 continue; | |
410 SetGroupSize(Indices + prev, prevGroupSize); | |
411 prev = counters[i]; | |
412 } | |
413 } | |
414 #endif | |
415 } | |
416 | |
417 { | |
418 int NumRefBits; | |
419 UInt32 NumSortedBytes; | |
420 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); | |
421 NumRefBits = 32 - NumRefBits; | |
422 if (NumRefBits > kNumRefBitsMax) | |
423 NumRefBits = kNumRefBitsMax; | |
424 | |
425 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) | |
426 { | |
427 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
428 UInt32 finishedGroupSize = 0; | |
429 #endif | |
430 UInt32 newLimit = 0; | |
431 for (i = 0; i < blockSize;) | |
432 { | |
433 UInt32 groupSize; | |
434 #ifdef BLOCK_SORT_EXTERNAL_FLAGS | |
435 | |
436 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) | |
437 { | |
438 i++; | |
439 continue; | |
440 } | |
441 for (groupSize = 1; | |
442 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; | |
443 groupSize++); | |
444 | |
445 groupSize++; | |
446 | |
447 #else | |
448 | |
449 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | |
450 { | |
451 Bool finishedGroup = ((Indices[i] & 0x80000000) == 0); | |
452 if ((Indices[i] & 0x40000000) != 0) | |
453 { | |
454 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); | |
455 Indices[i + 1] &= kIndexMask; | |
456 } | |
457 Indices[i] &= kIndexMask; | |
458 groupSize++; | |
459 if (finishedGroup || groupSize == 1) | |
460 { | |
461 Indices[i - finishedGroupSize] &= kIndexMask; | |
462 if (finishedGroupSize > 1) | |
463 Indices[i - finishedGroupSize + 1] &= kIndexMask; | |
464 { | |
465 UInt32 newGroupSize = groupSize + finishedGroupSize; | |
466 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize); | |
467 finishedGroupSize = newGroupSize; | |
468 } | |
469 i += groupSize; | |
470 continue; | |
471 } | |
472 finishedGroupSize = 0; | |
473 } | |
474 | |
475 #endif | |
476 | |
477 if (NumSortedBytes >= blockSize) | |
478 { | |
479 UInt32 j; | |
480 for (j = 0; j < groupSize; j++) | |
481 { | |
482 UInt32 t = (i + j); | |
483 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ | |
484 Groups[Indices[t]] = t; | |
485 } | |
486 } | |
487 else | |
488 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices | |
489 #ifndef BLOCK_SORT_USE_HEAP_SORT | |
490 , 0, blockSize | |
491 #endif | |
492 ) != 0) | |
493 newLimit = i + groupSize; | |
494 i += groupSize; | |
495 } | |
496 if (newLimit == 0) | |
497 break; | |
498 } | |
499 } | |
500 #ifndef BLOCK_SORT_EXTERNAL_FLAGS | |
501 for (i = 0; i < blockSize;) | |
502 { | |
503 UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | |
504 if ((Indices[i] & 0x40000000) != 0) | |
505 { | |
506 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); | |
507 Indices[i + 1] &= kIndexMask; | |
508 } | |
509 Indices[i] &= kIndexMask; | |
510 groupSize++; | |
511 i += groupSize; | |
512 } | |
513 #endif | |
514 return Groups[0]; | |
515 } | |
516 |