view src/win32/7zip/7z/C/BwtSort.c @ 4:5f6f2134e8ce

apu appears to not be used
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:35:58 -0600
parents f9f4f1b99eed
children
line wrap: on
line source
1 /* BwtSort.c -- BWT block sorting
2 2008-08-17
3 Igor Pavlov
4 Public domain */
6 #include "BwtSort.h"
7 #include "Sort.h"
9 /* #define BLOCK_SORT_USE_HEAP_SORT */
11 #define NO_INLINE MY_FAST_CALL
13 /* Don't change it !!! */
14 #define kNumHashBytes 2
15 #define kNumHashValues (1 << (kNumHashBytes * 8))
17 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
18 #define kNumRefBitsMax 12
20 #define BS_TEMP_SIZE kNumHashValues
22 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
24 /* 32 Flags in UInt32 word */
25 #define kNumFlagsBits 5
26 #define kNumFlagsInWord (1 << kNumFlagsBits)
27 #define kFlagsMask (kNumFlagsInWord - 1)
28 #define kAllFlags 0xFFFFFFFF
30 #else
32 #define kNumBitsMax 20
33 #define kIndexMask ((1 << kNumBitsMax) - 1)
34 #define kNumExtraBits (32 - kNumBitsMax)
35 #define kNumExtra0Bits (kNumExtraBits - 2)
36 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
38 #define SetFinishedGroupSize(p, size) \
39 { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
40 if ((size) > (1 << kNumExtra0Bits)) { \
41 *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
43 static void SetGroupSize(UInt32 *p, UInt32 size)
44 {
45 if (--size == 0)
46 return;
47 *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
48 if (size >= (1 << kNumExtra0Bits))
49 {
50 *p |= 0x40000000;
51 p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
52 }
53 }
55 #endif
57 /*
58 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
59 "range" is not real range. It's only for optimization.
60 returns: 1 - if there are groups, 0 - no more groups
61 */
63 UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
64 #ifndef BLOCK_SORT_USE_HEAP_SORT
65 , UInt32 left, UInt32 range
66 #endif
67 )
68 {
69 UInt32 *ind2 = Indices + groupOffset;
70 UInt32 *Groups;
71 if (groupSize <= 1)
72 {
73 /*
74 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
75 SetFinishedGroupSize(ind2, 1);
76 #endif
77 */
78 return 0;
79 }
80 Groups = Indices + BlockSize + BS_TEMP_SIZE;
81 if (groupSize <= ((UInt32)1 << NumRefBits)
82 #ifndef BLOCK_SORT_USE_HEAP_SORT
83 && groupSize <= range
84 #endif
85 )
86 {
87 UInt32 *temp = Indices + BlockSize;
88 UInt32 j;
89 UInt32 mask, thereAreGroups, group, cg;
90 {
91 UInt32 gPrev;
92 UInt32 gRes = 0;
93 {
94 UInt32 sp = ind2[0] + NumSortedBytes;
95 if (sp >= BlockSize) sp -= BlockSize;
96 gPrev = Groups[sp];
97 temp[0] = (gPrev << NumRefBits);
98 }
100 for (j = 1; j < groupSize; j++)
101 {
102 UInt32 sp = ind2[j] + NumSortedBytes;
103 UInt32 g;
104 if (sp >= BlockSize) sp -= BlockSize;
105 g = Groups[sp];
106 temp[j] = (g << NumRefBits) | j;
107 gRes |= (gPrev ^ g);
108 }
109 if (gRes == 0)
110 {
111 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
112 SetGroupSize(ind2, groupSize);
113 #endif
114 return 1;
115 }
116 }
118 HeapSort(temp, groupSize);
119 mask = ((1 << NumRefBits) - 1);
120 thereAreGroups = 0;
122 group = groupOffset;
123 cg = (temp[0] >> NumRefBits);
124 temp[0] = ind2[temp[0] & mask];
126 {
127 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
128 UInt32 *Flags = Groups + BlockSize;
129 #else
130 UInt32 prevGroupStart = 0;
131 #endif
133 for (j = 1; j < groupSize; j++)
134 {
135 UInt32 val = temp[j];
136 UInt32 cgCur = (val >> NumRefBits);
138 if (cgCur != cg)
139 {
140 cg = cgCur;
141 group = groupOffset + j;
143 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
144 {
145 UInt32 t = group - 1;
146 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
147 }
148 #else
149 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
150 prevGroupStart = j;
151 #endif
152 }
153 else
154 thereAreGroups = 1;
155 {
156 UInt32 ind = ind2[val & mask];
157 temp[j] = ind;
158 Groups[ind] = group;
159 }
160 }
162 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
163 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
164 #endif
165 }
167 for (j = 0; j < groupSize; j++)
168 ind2[j] = temp[j];
169 return thereAreGroups;
170 }
172 /* Check that all strings are in one group (cannot sort) */
173 {
174 UInt32 group, j;
175 UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
176 group = Groups[sp];
177 for (j = 1; j < groupSize; j++)
178 {
179 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
180 if (Groups[sp] != group)
181 break;
182 }
183 if (j == groupSize)
184 {
185 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
186 SetGroupSize(ind2, groupSize);
187 #endif
188 return 1;
189 }
190 }
192 #ifndef BLOCK_SORT_USE_HEAP_SORT
193 {
194 /* ---------- Range Sort ---------- */
195 UInt32 i;
196 UInt32 mid;
197 for (;;)
198 {
199 UInt32 j;
200 if (range <= 1)
201 {
202 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
203 SetGroupSize(ind2, groupSize);
204 #endif
205 return 1;
206 }
207 mid = left + ((range + 1) >> 1);
208 j = groupSize;
209 i = 0;
210 do
211 {
212 UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
213 if (Groups[sp] >= mid)
214 {
215 for (j--; j > i; j--)
216 {
217 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
218 if (Groups[sp] < mid)
219 {
220 UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
221 break;
222 }
223 }
224 if (i >= j)
225 break;
226 }
227 }
228 while (++i < j);
229 if (i == 0)
230 {
231 range = range - (mid - left);
232 left = mid;
233 }
234 else if (i == groupSize)
235 range = (mid - left);
236 else
237 break;
238 }
240 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
241 {
242 UInt32 t = (groupOffset + i - 1);
243 UInt32 *Flags = Groups + BlockSize;
244 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
245 }
246 #endif
248 {
249 UInt32 j;
250 for (j = i; j < groupSize; j++)
251 Groups[ind2[j]] = groupOffset + i;
252 }
254 {
255 UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
256 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
257 }
259 }
261 #else
263 /* ---------- Heap Sort ---------- */
265 {
266 UInt32 j;
267 for (j = 0; j < groupSize; j++)
268 {
269 UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
270 ind2[j] = sp;
271 }
273 HeapSortRef(ind2, Groups, groupSize);
275 /* Write Flags */
276 {
277 UInt32 sp = ind2[0];
278 UInt32 group = Groups[sp];
280 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
281 UInt32 *Flags = Groups + BlockSize;
282 #else
283 UInt32 prevGroupStart = 0;
284 #endif
286 for (j = 1; j < groupSize; j++)
287 {
288 sp = ind2[j];
289 if (Groups[sp] != group)
290 {
291 group = Groups[sp];
292 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
293 {
294 UInt32 t = groupOffset + j - 1;
295 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
296 }
297 #else
298 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
299 prevGroupStart = j;
300 #endif
301 }
302 }
304 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
305 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
306 #endif
307 }
308 {
309 /* Write new Groups values and Check that there are groups */
310 UInt32 thereAreGroups = 0;
311 for (j = 0; j < groupSize; j++)
312 {
313 UInt32 group = groupOffset + j;
314 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
315 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
316 if ((ind2[j] & 0x40000000) != 0)
317 subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits);
318 subGroupSize++;
319 for (;;)
320 {
321 UInt32 original = ind2[j];
322 UInt32 sp = original & kIndexMask;
323 if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
324 ind2[j] = sp | (original & ~kIndexMask);
325 Groups[sp] = group;
326 if (--subGroupSize == 0)
327 break;
328 j++;
329 thereAreGroups = 1;
330 }
331 #else
332 UInt32 *Flags = Groups + BlockSize;
333 for (;;)
334 {
335 UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
336 ind2[j] = sp;
337 Groups[sp] = group;
338 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
339 break;
340 j++;
341 thereAreGroups = 1;
342 }
343 #endif
344 }
345 return thereAreGroups;
346 }
347 }
348 #endif
349 }
351 /* conditions: blockSize > 0 */
352 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
353 {
354 UInt32 *counters = Indices + blockSize;
355 UInt32 i;
356 UInt32 *Groups;
357 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
358 UInt32 *Flags;
359 #endif
361 /* Radix-Sort for 2 bytes */
362 for (i = 0; i < kNumHashValues; i++)
363 counters[i] = 0;
364 for (i = 0; i < blockSize - 1; i++)
365 counters[((UInt32)data[i] << 8) | data[i + 1]]++;
366 counters[((UInt32)data[i] << 8) | data[0]]++;
368 Groups = counters + BS_TEMP_SIZE;
369 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
370 Flags = Groups + blockSize;
371 {
372 UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
373 for (i = 0; i < numWords; i++)
374 Flags[i] = kAllFlags;
375 }
376 #endif
378 {
379 UInt32 sum = 0;
380 for (i = 0; i < kNumHashValues; i++)
381 {
382 UInt32 groupSize = counters[i];
383 if (groupSize > 0)
384 {
385 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
386 UInt32 t = sum + groupSize - 1;
387 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
388 #endif
389 sum += groupSize;
390 }
391 counters[i] = sum - groupSize;
392 }
394 for (i = 0; i < blockSize - 1; i++)
395 Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]];
396 Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
398 for (i = 0; i < blockSize - 1; i++)
399 Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i;
400 Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
402 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
403 {
404 UInt32 prev = 0;
405 for (i = 0; i < kNumHashValues; i++)
406 {
407 UInt32 prevGroupSize = counters[i] - prev;
408 if (prevGroupSize == 0)
409 continue;
410 SetGroupSize(Indices + prev, prevGroupSize);
411 prev = counters[i];
412 }
413 }
414 #endif
415 }
417 {
418 int NumRefBits;
419 UInt32 NumSortedBytes;
420 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
421 NumRefBits = 32 - NumRefBits;
422 if (NumRefBits > kNumRefBitsMax)
423 NumRefBits = kNumRefBitsMax;
425 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
426 {
427 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
428 UInt32 finishedGroupSize = 0;
429 #endif
430 UInt32 newLimit = 0;
431 for (i = 0; i < blockSize;)
432 {
433 UInt32 groupSize;
434 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
436 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
437 {
438 i++;
439 continue;
440 }
441 for (groupSize = 1;
442 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
443 groupSize++);
445 groupSize++;
447 #else
449 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
450 {
451 Bool finishedGroup = ((Indices[i] & 0x80000000) == 0);
452 if ((Indices[i] & 0x40000000) != 0)
453 {
454 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
455 Indices[i + 1] &= kIndexMask;
456 }
457 Indices[i] &= kIndexMask;
458 groupSize++;
459 if (finishedGroup || groupSize == 1)
460 {
461 Indices[i - finishedGroupSize] &= kIndexMask;
462 if (finishedGroupSize > 1)
463 Indices[i - finishedGroupSize + 1] &= kIndexMask;
464 {
465 UInt32 newGroupSize = groupSize + finishedGroupSize;
466 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
467 finishedGroupSize = newGroupSize;
468 }
469 i += groupSize;
470 continue;
471 }
472 finishedGroupSize = 0;
473 }
475 #endif
477 if (NumSortedBytes >= blockSize)
478 {
479 UInt32 j;
480 for (j = 0; j < groupSize; j++)
481 {
482 UInt32 t = (i + j);
483 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
484 Groups[Indices[t]] = t;
485 }
486 }
487 else
488 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
489 #ifndef BLOCK_SORT_USE_HEAP_SORT
490 , 0, blockSize
491 #endif
492 ) != 0)
493 newLimit = i + groupSize;
494 i += groupSize;
495 }
496 if (newLimit == 0)
497 break;
498 }
499 }
500 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
501 for (i = 0; i < blockSize;)
502 {
503 UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
504 if ((Indices[i] & 0x40000000) != 0)
505 {
506 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
507 Indices[i + 1] &= kIndexMask;
508 }
509 Indices[i] &= kIndexMask;
510 groupSize++;
511 i += groupSize;
512 }
513 #endif
514 return Groups[0];
515 }