diff src/win32/7zip/7z/C/BwtSort.c @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/win32/7zip/7z/C/BwtSort.c	Sat Mar 03 10:31:27 2012 -0600
     1.3 @@ -0,0 +1,516 @@
     1.4 +/* BwtSort.c -- BWT block sorting
     1.5 +2008-08-17
     1.6 +Igor Pavlov
     1.7 +Public domain */
     1.8 +
     1.9 +#include "BwtSort.h"
    1.10 +#include "Sort.h"
    1.11 +
    1.12 +/* #define BLOCK_SORT_USE_HEAP_SORT */
    1.13 +
    1.14 +#define NO_INLINE MY_FAST_CALL
    1.15 +
    1.16 +/* Don't change it !!! */
    1.17 +#define kNumHashBytes 2
    1.18 +#define kNumHashValues (1 << (kNumHashBytes * 8))
    1.19 +
    1.20 +/* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
    1.21 +#define kNumRefBitsMax 12
    1.22 +
    1.23 +#define BS_TEMP_SIZE kNumHashValues
    1.24 +
    1.25 +#ifdef BLOCK_SORT_EXTERNAL_FLAGS
    1.26 +
    1.27 +/* 32 Flags in UInt32 word */
    1.28 +#define kNumFlagsBits 5
    1.29 +#define kNumFlagsInWord (1 << kNumFlagsBits)
    1.30 +#define kFlagsMask (kNumFlagsInWord - 1)
    1.31 +#define kAllFlags 0xFFFFFFFF
    1.32 +
    1.33 +#else
    1.34 +
    1.35 +#define kNumBitsMax 20
    1.36 +#define kIndexMask ((1 << kNumBitsMax) - 1)
    1.37 +#define kNumExtraBits (32 - kNumBitsMax)
    1.38 +#define kNumExtra0Bits (kNumExtraBits - 2)
    1.39 +#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
    1.40 +
    1.41 +#define SetFinishedGroupSize(p, size) \
    1.42 +  {  *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
    1.43 +    if ((size) > (1 << kNumExtra0Bits)) { \
    1.44 +    *(p) |= 0x40000000;  *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
    1.45 +
    1.46 +static void SetGroupSize(UInt32 *p, UInt32 size)
    1.47 +{
    1.48 +  if (--size == 0)
    1.49 +    return;
    1.50 +  *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
    1.51 +  if (size >= (1 << kNumExtra0Bits))
    1.52 +  {
    1.53 +    *p |= 0x40000000;
    1.54 +    p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
    1.55 +  }
    1.56 +}
    1.57 +
    1.58 +#endif
    1.59 +
    1.60 +/*
    1.61 +SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
    1.62 +  "range" is not real range. It's only for optimization.
    1.63 +returns: 1 - if there are groups, 0 - no more groups
    1.64 +*/
    1.65 +
    1.66 +UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
    1.67 +  #ifndef BLOCK_SORT_USE_HEAP_SORT
    1.68 +  , UInt32 left, UInt32 range
    1.69 +  #endif
    1.70 +  )
    1.71 +{
    1.72 +  UInt32 *ind2 = Indices + groupOffset;
    1.73 +  UInt32 *Groups;
    1.74 +  if (groupSize <= 1)
    1.75 +  {
    1.76 +    /*
    1.77 +    #ifndef BLOCK_SORT_EXTERNAL_FLAGS
    1.78 +    SetFinishedGroupSize(ind2, 1);
    1.79 +    #endif
    1.80 +    */
    1.81 +    return 0;
    1.82 +  }
    1.83 +  Groups = Indices + BlockSize + BS_TEMP_SIZE;
    1.84 +  if (groupSize <= ((UInt32)1 << NumRefBits)
    1.85 +      #ifndef BLOCK_SORT_USE_HEAP_SORT
    1.86 +      && groupSize <= range
    1.87 +      #endif
    1.88 +      )
    1.89 +  {
    1.90 +    UInt32 *temp = Indices + BlockSize;
    1.91 +    UInt32 j;
    1.92 +    UInt32 mask, thereAreGroups, group, cg;
    1.93 +    {
    1.94 +      UInt32 gPrev;
    1.95 +      UInt32 gRes = 0;
    1.96 +      {
    1.97 +        UInt32 sp = ind2[0] + NumSortedBytes;
    1.98 +        if (sp >= BlockSize) sp -= BlockSize;
    1.99 +        gPrev = Groups[sp];
   1.100 +        temp[0] = (gPrev << NumRefBits);
   1.101 +      }
   1.102 +      
   1.103 +      for (j = 1; j < groupSize; j++)
   1.104 +      {
   1.105 +        UInt32 sp = ind2[j] + NumSortedBytes;
   1.106 +        UInt32 g;
   1.107 +        if (sp >= BlockSize) sp -= BlockSize;
   1.108 +        g = Groups[sp];
   1.109 +        temp[j] = (g << NumRefBits) | j;
   1.110 +        gRes |= (gPrev ^ g);
   1.111 +      }
   1.112 +      if (gRes == 0)
   1.113 +      {
   1.114 +        #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.115 +        SetGroupSize(ind2, groupSize);
   1.116 +        #endif
   1.117 +        return 1;
   1.118 +      }
   1.119 +    }
   1.120 +    
   1.121 +    HeapSort(temp, groupSize);
   1.122 +    mask = ((1 << NumRefBits) - 1);
   1.123 +    thereAreGroups = 0;
   1.124 +    
   1.125 +    group = groupOffset;
   1.126 +    cg = (temp[0] >> NumRefBits);
   1.127 +    temp[0] = ind2[temp[0] & mask];
   1.128 +
   1.129 +    {
   1.130 +    #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.131 +    UInt32 *Flags = Groups + BlockSize;
   1.132 +    #else
   1.133 +    UInt32 prevGroupStart = 0;
   1.134 +    #endif
   1.135 +    
   1.136 +    for (j = 1; j < groupSize; j++)
   1.137 +    {
   1.138 +      UInt32 val = temp[j];
   1.139 +      UInt32 cgCur = (val >> NumRefBits);
   1.140 +      
   1.141 +      if (cgCur != cg)
   1.142 +      {
   1.143 +        cg = cgCur;
   1.144 +        group = groupOffset + j;
   1.145 +
   1.146 +        #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.147 +        {
   1.148 +        UInt32 t = group - 1;
   1.149 +        Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
   1.150 +        }
   1.151 +        #else
   1.152 +        SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
   1.153 +        prevGroupStart = j;
   1.154 +        #endif
   1.155 +      }
   1.156 +      else
   1.157 +        thereAreGroups = 1;
   1.158 +      {
   1.159 +      UInt32 ind = ind2[val & mask];
   1.160 +      temp[j] = ind;
   1.161 +      Groups[ind] = group;
   1.162 +      }
   1.163 +    }
   1.164 +
   1.165 +    #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.166 +    SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
   1.167 +    #endif
   1.168 +    }
   1.169 +
   1.170 +    for (j = 0; j < groupSize; j++)
   1.171 +      ind2[j] = temp[j];
   1.172 +    return thereAreGroups;
   1.173 +  }
   1.174 +
   1.175 +  /* Check that all strings are in one group (cannot sort) */
   1.176 +  {
   1.177 +    UInt32 group, j;
   1.178 +    UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
   1.179 +    group = Groups[sp];
   1.180 +    for (j = 1; j < groupSize; j++)
   1.181 +    {
   1.182 +      sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
   1.183 +      if (Groups[sp] != group)
   1.184 +        break;
   1.185 +    }
   1.186 +    if (j == groupSize)
   1.187 +    {
   1.188 +      #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.189 +      SetGroupSize(ind2, groupSize);
   1.190 +      #endif
   1.191 +      return 1;
   1.192 +    }
   1.193 +  }
   1.194 +
   1.195 +  #ifndef BLOCK_SORT_USE_HEAP_SORT
   1.196 +  {
   1.197 +  /* ---------- Range Sort ---------- */
   1.198 +  UInt32 i;
   1.199 +  UInt32 mid;
   1.200 +  for (;;)
   1.201 +  {
   1.202 +    UInt32 j;
   1.203 +    if (range <= 1)
   1.204 +    {
   1.205 +      #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.206 +      SetGroupSize(ind2, groupSize);
   1.207 +      #endif
   1.208 +      return 1;
   1.209 +    }
   1.210 +    mid = left + ((range + 1) >> 1);
   1.211 +    j = groupSize;
   1.212 +    i = 0;
   1.213 +    do
   1.214 +    {
   1.215 +      UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
   1.216 +      if (Groups[sp] >= mid)
   1.217 +      {
   1.218 +        for (j--; j > i; j--)
   1.219 +        {
   1.220 +          sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
   1.221 +          if (Groups[sp] < mid)
   1.222 +          {
   1.223 +            UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
   1.224 +            break;
   1.225 +          }
   1.226 +        }
   1.227 +        if (i >= j)
   1.228 +          break;
   1.229 +      }
   1.230 +    }
   1.231 +    while (++i < j);
   1.232 +    if (i == 0)
   1.233 +    {
   1.234 +      range = range - (mid - left);
   1.235 +      left = mid;
   1.236 +    }
   1.237 +    else if (i == groupSize)
   1.238 +      range = (mid - left);
   1.239 +    else
   1.240 +      break;
   1.241 +  }
   1.242 +
   1.243 +  #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.244 +  {
   1.245 +    UInt32 t = (groupOffset + i - 1);
   1.246 +    UInt32 *Flags = Groups + BlockSize;
   1.247 +    Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
   1.248 +  }
   1.249 +  #endif
   1.250 +
   1.251 +  {
   1.252 +    UInt32 j;
   1.253 +    for (j = i; j < groupSize; j++)
   1.254 +      Groups[ind2[j]] = groupOffset + i;
   1.255 +  }
   1.256 +
   1.257 +  {
   1.258 +  UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
   1.259 +  return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
   1.260 +  }
   1.261 +
   1.262 +  }
   1.263 +
   1.264 +  #else
   1.265 +
   1.266 +  /* ---------- Heap Sort ---------- */
   1.267 +
   1.268 +  {
   1.269 +    UInt32 j;
   1.270 +    for (j = 0; j < groupSize; j++)
   1.271 +    {
   1.272 +      UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
   1.273 +      ind2[j] = sp;
   1.274 +    }
   1.275 +
   1.276 +    HeapSortRef(ind2, Groups, groupSize);
   1.277 +
   1.278 +    /* Write Flags */
   1.279 +    {
   1.280 +    UInt32 sp = ind2[0];
   1.281 +    UInt32 group = Groups[sp];
   1.282 +
   1.283 +    #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.284 +    UInt32 *Flags = Groups + BlockSize;
   1.285 +    #else
   1.286 +    UInt32 prevGroupStart = 0;
   1.287 +    #endif
   1.288 +
   1.289 +    for (j = 1; j < groupSize; j++)
   1.290 +    {
   1.291 +      sp = ind2[j];
   1.292 +      if (Groups[sp] != group)
   1.293 +      {
   1.294 +        group = Groups[sp];
   1.295 +        #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.296 +        {
   1.297 +        UInt32 t = groupOffset + j - 1;
   1.298 +        Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
   1.299 +        }
   1.300 +        #else
   1.301 +        SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
   1.302 +        prevGroupStart = j;
   1.303 +        #endif
   1.304 +      }
   1.305 +    }
   1.306 +
   1.307 +    #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.308 +    SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
   1.309 +    #endif
   1.310 +    }
   1.311 +    {
   1.312 +    /* Write new Groups values and Check that there are groups */
   1.313 +    UInt32 thereAreGroups = 0;
   1.314 +    for (j = 0; j < groupSize; j++)
   1.315 +    {
   1.316 +      UInt32 group = groupOffset + j;
   1.317 +      #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.318 +      UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
   1.319 +      if ((ind2[j] & 0x40000000) != 0)
   1.320 +        subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits);
   1.321 +      subGroupSize++;
   1.322 +      for (;;)
   1.323 +      {
   1.324 +        UInt32 original = ind2[j];
   1.325 +        UInt32 sp = original & kIndexMask;
   1.326 +        if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
   1.327 +        ind2[j] = sp | (original & ~kIndexMask);
   1.328 +        Groups[sp] = group;
   1.329 +        if (--subGroupSize == 0)
   1.330 +          break;
   1.331 +        j++;
   1.332 +        thereAreGroups = 1;
   1.333 +      }
   1.334 +      #else
   1.335 +      UInt32 *Flags = Groups + BlockSize;
   1.336 +      for (;;)
   1.337 +      {
   1.338 +        UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
   1.339 +        ind2[j] = sp;
   1.340 +        Groups[sp] = group;
   1.341 +        if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
   1.342 +          break;
   1.343 +        j++;
   1.344 +        thereAreGroups = 1;
   1.345 +      }
   1.346 +      #endif
   1.347 +    }
   1.348 +    return thereAreGroups;
   1.349 +    }
   1.350 +  }
   1.351 +  #endif
   1.352 +}
   1.353 +
   1.354 +/* conditions: blockSize > 0 */
   1.355 +UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
   1.356 +{
   1.357 +  UInt32 *counters = Indices + blockSize;
   1.358 +  UInt32 i;
   1.359 +  UInt32 *Groups;
   1.360 +  #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.361 +  UInt32 *Flags;
   1.362 +  #endif
   1.363 +
   1.364 +  /* Radix-Sort for 2 bytes */
   1.365 +  for (i = 0; i < kNumHashValues; i++)
   1.366 +    counters[i] = 0;
   1.367 +  for (i = 0; i < blockSize - 1; i++)
   1.368 +    counters[((UInt32)data[i] << 8) | data[i + 1]]++;
   1.369 +  counters[((UInt32)data[i] << 8) | data[0]]++;
   1.370 +
   1.371 +  Groups = counters + BS_TEMP_SIZE;
   1.372 +  #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.373 +  Flags = Groups + blockSize;
   1.374 +    {
   1.375 +      UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
   1.376 +      for (i = 0; i < numWords; i++)
   1.377 +        Flags[i] = kAllFlags;
   1.378 +    }
   1.379 +  #endif
   1.380 +
   1.381 +  {
   1.382 +    UInt32 sum = 0;
   1.383 +    for (i = 0; i < kNumHashValues; i++)
   1.384 +    {
   1.385 +      UInt32 groupSize = counters[i];
   1.386 +      if (groupSize > 0)
   1.387 +      {
   1.388 +        #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.389 +        UInt32 t = sum + groupSize - 1;
   1.390 +        Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
   1.391 +        #endif
   1.392 +        sum += groupSize;
   1.393 +      }
   1.394 +      counters[i] = sum - groupSize;
   1.395 +    }
   1.396 +
   1.397 +    for (i = 0; i < blockSize - 1; i++)
   1.398 +      Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]];
   1.399 +    Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
   1.400 +
   1.401 +    for (i = 0; i < blockSize - 1; i++)
   1.402 +      Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i;
   1.403 +    Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
   1.404 +    
   1.405 +    #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.406 +    {
   1.407 +    UInt32 prev = 0;
   1.408 +    for (i = 0; i < kNumHashValues; i++)
   1.409 +    {
   1.410 +      UInt32 prevGroupSize = counters[i] - prev;
   1.411 +      if (prevGroupSize == 0)
   1.412 +        continue;
   1.413 +      SetGroupSize(Indices + prev, prevGroupSize);
   1.414 +      prev = counters[i];
   1.415 +    }
   1.416 +    }
   1.417 +    #endif
   1.418 +  }
   1.419 +
   1.420 +  {
   1.421 +  int NumRefBits;
   1.422 +  UInt32 NumSortedBytes;
   1.423 +  for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
   1.424 +  NumRefBits = 32 - NumRefBits;
   1.425 +  if (NumRefBits > kNumRefBitsMax)
   1.426 +    NumRefBits = kNumRefBitsMax;
   1.427 +
   1.428 +  for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
   1.429 +  {
   1.430 +    #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.431 +    UInt32 finishedGroupSize = 0;
   1.432 +    #endif
   1.433 +    UInt32 newLimit = 0;
   1.434 +    for (i = 0; i < blockSize;)
   1.435 +    {
   1.436 +      UInt32 groupSize;
   1.437 +      #ifdef BLOCK_SORT_EXTERNAL_FLAGS
   1.438 +
   1.439 +      if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
   1.440 +      {
   1.441 +        i++;
   1.442 +        continue;
   1.443 +      }
   1.444 +      for (groupSize = 1;
   1.445 +        (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
   1.446 +        groupSize++);
   1.447 +      
   1.448 +      groupSize++;
   1.449 +
   1.450 +      #else
   1.451 +
   1.452 +      groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
   1.453 +      {
   1.454 +      Bool finishedGroup = ((Indices[i] & 0x80000000) == 0);
   1.455 +      if ((Indices[i] & 0x40000000) != 0)
   1.456 +      {
   1.457 +        groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
   1.458 +        Indices[i + 1] &= kIndexMask;
   1.459 +      }
   1.460 +      Indices[i] &= kIndexMask;
   1.461 +      groupSize++;
   1.462 +      if (finishedGroup || groupSize == 1)
   1.463 +      {
   1.464 +        Indices[i - finishedGroupSize] &= kIndexMask;
   1.465 +        if (finishedGroupSize > 1)
   1.466 +          Indices[i - finishedGroupSize + 1] &= kIndexMask;
   1.467 +        {
   1.468 +        UInt32 newGroupSize = groupSize + finishedGroupSize;
   1.469 +        SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
   1.470 +        finishedGroupSize = newGroupSize;
   1.471 +        }
   1.472 +        i += groupSize;
   1.473 +        continue;
   1.474 +      }
   1.475 +      finishedGroupSize = 0;
   1.476 +      }
   1.477 +
   1.478 +      #endif
   1.479 +      
   1.480 +      if (NumSortedBytes >= blockSize)
   1.481 +      {
   1.482 +        UInt32 j;
   1.483 +        for (j = 0; j < groupSize; j++)
   1.484 +        {
   1.485 +          UInt32 t = (i + j);
   1.486 +          /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
   1.487 +          Groups[Indices[t]] = t;
   1.488 +        }
   1.489 +      }
   1.490 +      else
   1.491 +        if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
   1.492 +          #ifndef BLOCK_SORT_USE_HEAP_SORT
   1.493 +          , 0, blockSize
   1.494 +          #endif
   1.495 +          ) != 0)
   1.496 +          newLimit = i + groupSize;
   1.497 +      i += groupSize;
   1.498 +    }
   1.499 +    if (newLimit == 0)
   1.500 +      break;
   1.501 +  }
   1.502 +  }
   1.503 +  #ifndef BLOCK_SORT_EXTERNAL_FLAGS
   1.504 +  for (i = 0; i < blockSize;)
   1.505 +  {
   1.506 +    UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
   1.507 +    if ((Indices[i] & 0x40000000) != 0)
   1.508 +    {
   1.509 +      groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
   1.510 +      Indices[i + 1] &= kIndexMask;
   1.511 +    }
   1.512 +    Indices[i] &= kIndexMask;
   1.513 +    groupSize++;
   1.514 +    i += groupSize;
   1.515 +  }
   1.516 +  #endif
   1.517 +  return Groups[0];
   1.518 +}
   1.519 +