Mercurial > vba-clojure
diff src/win32/7zip/7z/C/BwtSort.c @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/win32/7zip/7z/C/BwtSort.c Sat Mar 03 10:31:27 2012 -0600 1.3 @@ -0,0 +1,516 @@ 1.4 +/* BwtSort.c -- BWT block sorting 1.5 +2008-08-17 1.6 +Igor Pavlov 1.7 +Public domain */ 1.8 + 1.9 +#include "BwtSort.h" 1.10 +#include "Sort.h" 1.11 + 1.12 +/* #define BLOCK_SORT_USE_HEAP_SORT */ 1.13 + 1.14 +#define NO_INLINE MY_FAST_CALL 1.15 + 1.16 +/* Don't change it !!! */ 1.17 +#define kNumHashBytes 2 1.18 +#define kNumHashValues (1 << (kNumHashBytes * 8)) 1.19 + 1.20 +/* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ 1.21 +#define kNumRefBitsMax 12 1.22 + 1.23 +#define BS_TEMP_SIZE kNumHashValues 1.24 + 1.25 +#ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.26 + 1.27 +/* 32 Flags in UInt32 word */ 1.28 +#define kNumFlagsBits 5 1.29 +#define kNumFlagsInWord (1 << kNumFlagsBits) 1.30 +#define kFlagsMask (kNumFlagsInWord - 1) 1.31 +#define kAllFlags 0xFFFFFFFF 1.32 + 1.33 +#else 1.34 + 1.35 +#define kNumBitsMax 20 1.36 +#define kIndexMask ((1 << kNumBitsMax) - 1) 1.37 +#define kNumExtraBits (32 - kNumBitsMax) 1.38 +#define kNumExtra0Bits (kNumExtraBits - 2) 1.39 +#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) 1.40 + 1.41 +#define SetFinishedGroupSize(p, size) \ 1.42 + { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ 1.43 + if ((size) > (1 << kNumExtra0Bits)) { \ 1.44 + *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ 1.45 + 1.46 +static void SetGroupSize(UInt32 *p, UInt32 size) 1.47 +{ 1.48 + if (--size == 0) 1.49 + return; 1.50 + *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); 1.51 + if (size >= (1 << kNumExtra0Bits)) 1.52 + { 1.53 + *p |= 0x40000000; 1.54 + p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); 1.55 + } 1.56 +} 1.57 + 1.58 +#endif 1.59 + 1.60 +/* 1.61 +SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks 1.62 + "range" is not real range. It's only for optimization. 1.63 +returns: 1 - if there are groups, 0 - no more groups 1.64 +*/ 1.65 + 1.66 +UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices 1.67 + #ifndef BLOCK_SORT_USE_HEAP_SORT 1.68 + , UInt32 left, UInt32 range 1.69 + #endif 1.70 + ) 1.71 +{ 1.72 + UInt32 *ind2 = Indices + groupOffset; 1.73 + UInt32 *Groups; 1.74 + if (groupSize <= 1) 1.75 + { 1.76 + /* 1.77 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.78 + SetFinishedGroupSize(ind2, 1); 1.79 + #endif 1.80 + */ 1.81 + return 0; 1.82 + } 1.83 + Groups = Indices + BlockSize + BS_TEMP_SIZE; 1.84 + if (groupSize <= ((UInt32)1 << NumRefBits) 1.85 + #ifndef BLOCK_SORT_USE_HEAP_SORT 1.86 + && groupSize <= range 1.87 + #endif 1.88 + ) 1.89 + { 1.90 + UInt32 *temp = Indices + BlockSize; 1.91 + UInt32 j; 1.92 + UInt32 mask, thereAreGroups, group, cg; 1.93 + { 1.94 + UInt32 gPrev; 1.95 + UInt32 gRes = 0; 1.96 + { 1.97 + UInt32 sp = ind2[0] + NumSortedBytes; 1.98 + if (sp >= BlockSize) sp -= BlockSize; 1.99 + gPrev = Groups[sp]; 1.100 + temp[0] = (gPrev << NumRefBits); 1.101 + } 1.102 + 1.103 + for (j = 1; j < groupSize; j++) 1.104 + { 1.105 + UInt32 sp = ind2[j] + NumSortedBytes; 1.106 + UInt32 g; 1.107 + if (sp >= BlockSize) sp -= BlockSize; 1.108 + g = Groups[sp]; 1.109 + temp[j] = (g << NumRefBits) | j; 1.110 + gRes |= (gPrev ^ g); 1.111 + } 1.112 + if (gRes == 0) 1.113 + { 1.114 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.115 + SetGroupSize(ind2, groupSize); 1.116 + #endif 1.117 + return 1; 1.118 + } 1.119 + } 1.120 + 1.121 + HeapSort(temp, groupSize); 1.122 + mask = ((1 << NumRefBits) - 1); 1.123 + thereAreGroups = 0; 1.124 + 1.125 + group = groupOffset; 1.126 + cg = (temp[0] >> NumRefBits); 1.127 + temp[0] = ind2[temp[0] & mask]; 1.128 + 1.129 + { 1.130 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.131 + UInt32 *Flags = Groups + BlockSize; 1.132 + #else 1.133 + UInt32 prevGroupStart = 0; 1.134 + #endif 1.135 + 1.136 + for (j = 1; j < groupSize; j++) 1.137 + { 1.138 + UInt32 val = temp[j]; 1.139 + UInt32 cgCur = (val >> NumRefBits); 1.140 + 1.141 + if (cgCur != cg) 1.142 + { 1.143 + cg = cgCur; 1.144 + group = groupOffset + j; 1.145 + 1.146 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.147 + { 1.148 + UInt32 t = group - 1; 1.149 + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 1.150 + } 1.151 + #else 1.152 + SetGroupSize(temp + prevGroupStart, j - prevGroupStart); 1.153 + prevGroupStart = j; 1.154 + #endif 1.155 + } 1.156 + else 1.157 + thereAreGroups = 1; 1.158 + { 1.159 + UInt32 ind = ind2[val & mask]; 1.160 + temp[j] = ind; 1.161 + Groups[ind] = group; 1.162 + } 1.163 + } 1.164 + 1.165 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.166 + SetGroupSize(temp + prevGroupStart, j - prevGroupStart); 1.167 + #endif 1.168 + } 1.169 + 1.170 + for (j = 0; j < groupSize; j++) 1.171 + ind2[j] = temp[j]; 1.172 + return thereAreGroups; 1.173 + } 1.174 + 1.175 + /* Check that all strings are in one group (cannot sort) */ 1.176 + { 1.177 + UInt32 group, j; 1.178 + UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 1.179 + group = Groups[sp]; 1.180 + for (j = 1; j < groupSize; j++) 1.181 + { 1.182 + sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 1.183 + if (Groups[sp] != group) 1.184 + break; 1.185 + } 1.186 + if (j == groupSize) 1.187 + { 1.188 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.189 + SetGroupSize(ind2, groupSize); 1.190 + #endif 1.191 + return 1; 1.192 + } 1.193 + } 1.194 + 1.195 + #ifndef BLOCK_SORT_USE_HEAP_SORT 1.196 + { 1.197 + /* ---------- Range Sort ---------- */ 1.198 + UInt32 i; 1.199 + UInt32 mid; 1.200 + for (;;) 1.201 + { 1.202 + UInt32 j; 1.203 + if (range <= 1) 1.204 + { 1.205 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.206 + SetGroupSize(ind2, groupSize); 1.207 + #endif 1.208 + return 1; 1.209 + } 1.210 + mid = left + ((range + 1) >> 1); 1.211 + j = groupSize; 1.212 + i = 0; 1.213 + do 1.214 + { 1.215 + UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 1.216 + if (Groups[sp] >= mid) 1.217 + { 1.218 + for (j--; j > i; j--) 1.219 + { 1.220 + sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 1.221 + if (Groups[sp] < mid) 1.222 + { 1.223 + UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; 1.224 + break; 1.225 + } 1.226 + } 1.227 + if (i >= j) 1.228 + break; 1.229 + } 1.230 + } 1.231 + while (++i < j); 1.232 + if (i == 0) 1.233 + { 1.234 + range = range - (mid - left); 1.235 + left = mid; 1.236 + } 1.237 + else if (i == groupSize) 1.238 + range = (mid - left); 1.239 + else 1.240 + break; 1.241 + } 1.242 + 1.243 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.244 + { 1.245 + UInt32 t = (groupOffset + i - 1); 1.246 + UInt32 *Flags = Groups + BlockSize; 1.247 + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 1.248 + } 1.249 + #endif 1.250 + 1.251 + { 1.252 + UInt32 j; 1.253 + for (j = i; j < groupSize; j++) 1.254 + Groups[ind2[j]] = groupOffset + i; 1.255 + } 1.256 + 1.257 + { 1.258 + UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); 1.259 + return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); 1.260 + } 1.261 + 1.262 + } 1.263 + 1.264 + #else 1.265 + 1.266 + /* ---------- Heap Sort ---------- */ 1.267 + 1.268 + { 1.269 + UInt32 j; 1.270 + for (j = 0; j < groupSize; j++) 1.271 + { 1.272 + UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 1.273 + ind2[j] = sp; 1.274 + } 1.275 + 1.276 + HeapSortRef(ind2, Groups, groupSize); 1.277 + 1.278 + /* Write Flags */ 1.279 + { 1.280 + UInt32 sp = ind2[0]; 1.281 + UInt32 group = Groups[sp]; 1.282 + 1.283 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.284 + UInt32 *Flags = Groups + BlockSize; 1.285 + #else 1.286 + UInt32 prevGroupStart = 0; 1.287 + #endif 1.288 + 1.289 + for (j = 1; j < groupSize; j++) 1.290 + { 1.291 + sp = ind2[j]; 1.292 + if (Groups[sp] != group) 1.293 + { 1.294 + group = Groups[sp]; 1.295 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.296 + { 1.297 + UInt32 t = groupOffset + j - 1; 1.298 + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 1.299 + } 1.300 + #else 1.301 + SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); 1.302 + prevGroupStart = j; 1.303 + #endif 1.304 + } 1.305 + } 1.306 + 1.307 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.308 + SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); 1.309 + #endif 1.310 + } 1.311 + { 1.312 + /* Write new Groups values and Check that there are groups */ 1.313 + UInt32 thereAreGroups = 0; 1.314 + for (j = 0; j < groupSize; j++) 1.315 + { 1.316 + UInt32 group = groupOffset + j; 1.317 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.318 + UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); 1.319 + if ((ind2[j] & 0x40000000) != 0) 1.320 + subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits); 1.321 + subGroupSize++; 1.322 + for (;;) 1.323 + { 1.324 + UInt32 original = ind2[j]; 1.325 + UInt32 sp = original & kIndexMask; 1.326 + if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; 1.327 + ind2[j] = sp | (original & ~kIndexMask); 1.328 + Groups[sp] = group; 1.329 + if (--subGroupSize == 0) 1.330 + break; 1.331 + j++; 1.332 + thereAreGroups = 1; 1.333 + } 1.334 + #else 1.335 + UInt32 *Flags = Groups + BlockSize; 1.336 + for (;;) 1.337 + { 1.338 + UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; 1.339 + ind2[j] = sp; 1.340 + Groups[sp] = group; 1.341 + if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) 1.342 + break; 1.343 + j++; 1.344 + thereAreGroups = 1; 1.345 + } 1.346 + #endif 1.347 + } 1.348 + return thereAreGroups; 1.349 + } 1.350 + } 1.351 + #endif 1.352 +} 1.353 + 1.354 +/* conditions: blockSize > 0 */ 1.355 +UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) 1.356 +{ 1.357 + UInt32 *counters = Indices + blockSize; 1.358 + UInt32 i; 1.359 + UInt32 *Groups; 1.360 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.361 + UInt32 *Flags; 1.362 + #endif 1.363 + 1.364 + /* Radix-Sort for 2 bytes */ 1.365 + for (i = 0; i < kNumHashValues; i++) 1.366 + counters[i] = 0; 1.367 + for (i = 0; i < blockSize - 1; i++) 1.368 + counters[((UInt32)data[i] << 8) | data[i + 1]]++; 1.369 + counters[((UInt32)data[i] << 8) | data[0]]++; 1.370 + 1.371 + Groups = counters + BS_TEMP_SIZE; 1.372 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.373 + Flags = Groups + blockSize; 1.374 + { 1.375 + UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; 1.376 + for (i = 0; i < numWords; i++) 1.377 + Flags[i] = kAllFlags; 1.378 + } 1.379 + #endif 1.380 + 1.381 + { 1.382 + UInt32 sum = 0; 1.383 + for (i = 0; i < kNumHashValues; i++) 1.384 + { 1.385 + UInt32 groupSize = counters[i]; 1.386 + if (groupSize > 0) 1.387 + { 1.388 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.389 + UInt32 t = sum + groupSize - 1; 1.390 + Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 1.391 + #endif 1.392 + sum += groupSize; 1.393 + } 1.394 + counters[i] = sum - groupSize; 1.395 + } 1.396 + 1.397 + for (i = 0; i < blockSize - 1; i++) 1.398 + Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]]; 1.399 + Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; 1.400 + 1.401 + for (i = 0; i < blockSize - 1; i++) 1.402 + Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i; 1.403 + Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; 1.404 + 1.405 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.406 + { 1.407 + UInt32 prev = 0; 1.408 + for (i = 0; i < kNumHashValues; i++) 1.409 + { 1.410 + UInt32 prevGroupSize = counters[i] - prev; 1.411 + if (prevGroupSize == 0) 1.412 + continue; 1.413 + SetGroupSize(Indices + prev, prevGroupSize); 1.414 + prev = counters[i]; 1.415 + } 1.416 + } 1.417 + #endif 1.418 + } 1.419 + 1.420 + { 1.421 + int NumRefBits; 1.422 + UInt32 NumSortedBytes; 1.423 + for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); 1.424 + NumRefBits = 32 - NumRefBits; 1.425 + if (NumRefBits > kNumRefBitsMax) 1.426 + NumRefBits = kNumRefBitsMax; 1.427 + 1.428 + for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) 1.429 + { 1.430 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.431 + UInt32 finishedGroupSize = 0; 1.432 + #endif 1.433 + UInt32 newLimit = 0; 1.434 + for (i = 0; i < blockSize;) 1.435 + { 1.436 + UInt32 groupSize; 1.437 + #ifdef BLOCK_SORT_EXTERNAL_FLAGS 1.438 + 1.439 + if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) 1.440 + { 1.441 + i++; 1.442 + continue; 1.443 + } 1.444 + for (groupSize = 1; 1.445 + (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; 1.446 + groupSize++); 1.447 + 1.448 + groupSize++; 1.449 + 1.450 + #else 1.451 + 1.452 + groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); 1.453 + { 1.454 + Bool finishedGroup = ((Indices[i] & 0x80000000) == 0); 1.455 + if ((Indices[i] & 0x40000000) != 0) 1.456 + { 1.457 + groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); 1.458 + Indices[i + 1] &= kIndexMask; 1.459 + } 1.460 + Indices[i] &= kIndexMask; 1.461 + groupSize++; 1.462 + if (finishedGroup || groupSize == 1) 1.463 + { 1.464 + Indices[i - finishedGroupSize] &= kIndexMask; 1.465 + if (finishedGroupSize > 1) 1.466 + Indices[i - finishedGroupSize + 1] &= kIndexMask; 1.467 + { 1.468 + UInt32 newGroupSize = groupSize + finishedGroupSize; 1.469 + SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize); 1.470 + finishedGroupSize = newGroupSize; 1.471 + } 1.472 + i += groupSize; 1.473 + continue; 1.474 + } 1.475 + finishedGroupSize = 0; 1.476 + } 1.477 + 1.478 + #endif 1.479 + 1.480 + if (NumSortedBytes >= blockSize) 1.481 + { 1.482 + UInt32 j; 1.483 + for (j = 0; j < groupSize; j++) 1.484 + { 1.485 + UInt32 t = (i + j); 1.486 + /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ 1.487 + Groups[Indices[t]] = t; 1.488 + } 1.489 + } 1.490 + else 1.491 + if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices 1.492 + #ifndef BLOCK_SORT_USE_HEAP_SORT 1.493 + , 0, blockSize 1.494 + #endif 1.495 + ) != 0) 1.496 + newLimit = i + groupSize; 1.497 + i += groupSize; 1.498 + } 1.499 + if (newLimit == 0) 1.500 + break; 1.501 + } 1.502 + } 1.503 + #ifndef BLOCK_SORT_EXTERNAL_FLAGS 1.504 + for (i = 0; i < blockSize;) 1.505 + { 1.506 + UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); 1.507 + if ((Indices[i] & 0x40000000) != 0) 1.508 + { 1.509 + groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); 1.510 + Indices[i + 1] &= kIndexMask; 1.511 + } 1.512 + Indices[i] &= kIndexMask; 1.513 + groupSize++; 1.514 + i += groupSize; 1.515 + } 1.516 + #endif 1.517 + return Groups[0]; 1.518 +} 1.519 +