rlm@1: /* BwtSort.c -- BWT block sorting rlm@1: 2008-08-17 rlm@1: Igor Pavlov rlm@1: Public domain */ rlm@1: rlm@1: #include "BwtSort.h" rlm@1: #include "Sort.h" rlm@1: rlm@1: /* #define BLOCK_SORT_USE_HEAP_SORT */ rlm@1: rlm@1: #define NO_INLINE MY_FAST_CALL rlm@1: rlm@1: /* Don't change it !!! */ rlm@1: #define kNumHashBytes 2 rlm@1: #define kNumHashValues (1 << (kNumHashBytes * 8)) rlm@1: rlm@1: /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ rlm@1: #define kNumRefBitsMax 12 rlm@1: rlm@1: #define BS_TEMP_SIZE kNumHashValues rlm@1: rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: rlm@1: /* 32 Flags in UInt32 word */ rlm@1: #define kNumFlagsBits 5 rlm@1: #define kNumFlagsInWord (1 << kNumFlagsBits) rlm@1: #define kFlagsMask (kNumFlagsInWord - 1) rlm@1: #define kAllFlags 0xFFFFFFFF rlm@1: rlm@1: #else rlm@1: rlm@1: #define kNumBitsMax 20 rlm@1: #define kIndexMask ((1 << kNumBitsMax) - 1) rlm@1: #define kNumExtraBits (32 - kNumBitsMax) rlm@1: #define kNumExtra0Bits (kNumExtraBits - 2) rlm@1: #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) rlm@1: rlm@1: #define SetFinishedGroupSize(p, size) \ rlm@1: { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ rlm@1: if ((size) > (1 << kNumExtra0Bits)) { \ rlm@1: *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ rlm@1: rlm@1: static void SetGroupSize(UInt32 *p, UInt32 size) rlm@1: { rlm@1: if (--size == 0) rlm@1: return; rlm@1: *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); rlm@1: if (size >= (1 << kNumExtra0Bits)) rlm@1: { rlm@1: *p |= 0x40000000; rlm@1: p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); rlm@1: } rlm@1: } rlm@1: rlm@1: #endif rlm@1: rlm@1: /* rlm@1: SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks rlm@1: "range" is not real range. It's only for optimization. rlm@1: returns: 1 - if there are groups, 0 - no more groups rlm@1: */ rlm@1: rlm@1: UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices rlm@1: #ifndef BLOCK_SORT_USE_HEAP_SORT rlm@1: , UInt32 left, UInt32 range rlm@1: #endif rlm@1: ) rlm@1: { rlm@1: UInt32 *ind2 = Indices + groupOffset; rlm@1: UInt32 *Groups; rlm@1: if (groupSize <= 1) rlm@1: { rlm@1: /* rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetFinishedGroupSize(ind2, 1); rlm@1: #endif rlm@1: */ rlm@1: return 0; rlm@1: } rlm@1: Groups = Indices + BlockSize + BS_TEMP_SIZE; rlm@1: if (groupSize <= ((UInt32)1 << NumRefBits) rlm@1: #ifndef BLOCK_SORT_USE_HEAP_SORT rlm@1: && groupSize <= range rlm@1: #endif rlm@1: ) rlm@1: { rlm@1: UInt32 *temp = Indices + BlockSize; rlm@1: UInt32 j; rlm@1: UInt32 mask, thereAreGroups, group, cg; rlm@1: { rlm@1: UInt32 gPrev; rlm@1: UInt32 gRes = 0; rlm@1: { rlm@1: UInt32 sp = ind2[0] + NumSortedBytes; rlm@1: if (sp >= BlockSize) sp -= BlockSize; rlm@1: gPrev = Groups[sp]; rlm@1: temp[0] = (gPrev << NumRefBits); rlm@1: } rlm@1: rlm@1: for (j = 1; j < groupSize; j++) rlm@1: { rlm@1: UInt32 sp = ind2[j] + NumSortedBytes; rlm@1: UInt32 g; rlm@1: if (sp >= BlockSize) sp -= BlockSize; rlm@1: g = Groups[sp]; rlm@1: temp[j] = (g << NumRefBits) | j; rlm@1: gRes |= (gPrev ^ g); rlm@1: } rlm@1: if (gRes == 0) rlm@1: { rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetGroupSize(ind2, groupSize); rlm@1: #endif rlm@1: return 1; rlm@1: } rlm@1: } rlm@1: rlm@1: HeapSort(temp, groupSize); rlm@1: mask = ((1 << NumRefBits) - 1); rlm@1: thereAreGroups = 0; rlm@1: rlm@1: group = groupOffset; rlm@1: cg = (temp[0] >> NumRefBits); rlm@1: temp[0] = ind2[temp[0] & mask]; rlm@1: rlm@1: { rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 *Flags = Groups + BlockSize; rlm@1: #else rlm@1: UInt32 prevGroupStart = 0; rlm@1: #endif rlm@1: rlm@1: for (j = 1; j < groupSize; j++) rlm@1: { rlm@1: UInt32 val = temp[j]; rlm@1: UInt32 cgCur = (val >> NumRefBits); rlm@1: rlm@1: if (cgCur != cg) rlm@1: { rlm@1: cg = cgCur; rlm@1: group = groupOffset + j; rlm@1: rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: { rlm@1: UInt32 t = group - 1; rlm@1: Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); rlm@1: } rlm@1: #else rlm@1: SetGroupSize(temp + prevGroupStart, j - prevGroupStart); rlm@1: prevGroupStart = j; rlm@1: #endif rlm@1: } rlm@1: else rlm@1: thereAreGroups = 1; rlm@1: { rlm@1: UInt32 ind = ind2[val & mask]; rlm@1: temp[j] = ind; rlm@1: Groups[ind] = group; rlm@1: } rlm@1: } rlm@1: rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetGroupSize(temp + prevGroupStart, j - prevGroupStart); rlm@1: #endif rlm@1: } rlm@1: rlm@1: for (j = 0; j < groupSize; j++) rlm@1: ind2[j] = temp[j]; rlm@1: return thereAreGroups; rlm@1: } rlm@1: rlm@1: /* Check that all strings are in one group (cannot sort) */ rlm@1: { rlm@1: UInt32 group, j; rlm@1: UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; rlm@1: group = Groups[sp]; rlm@1: for (j = 1; j < groupSize; j++) rlm@1: { rlm@1: sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; rlm@1: if (Groups[sp] != group) rlm@1: break; rlm@1: } rlm@1: if (j == groupSize) rlm@1: { rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetGroupSize(ind2, groupSize); rlm@1: #endif rlm@1: return 1; rlm@1: } rlm@1: } rlm@1: rlm@1: #ifndef BLOCK_SORT_USE_HEAP_SORT rlm@1: { rlm@1: /* ---------- Range Sort ---------- */ rlm@1: UInt32 i; rlm@1: UInt32 mid; rlm@1: for (;;) rlm@1: { rlm@1: UInt32 j; rlm@1: if (range <= 1) rlm@1: { rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetGroupSize(ind2, groupSize); rlm@1: #endif rlm@1: return 1; rlm@1: } rlm@1: mid = left + ((range + 1) >> 1); rlm@1: j = groupSize; rlm@1: i = 0; rlm@1: do rlm@1: { rlm@1: UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; rlm@1: if (Groups[sp] >= mid) rlm@1: { rlm@1: for (j--; j > i; j--) rlm@1: { rlm@1: sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; rlm@1: if (Groups[sp] < mid) rlm@1: { rlm@1: UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; rlm@1: break; rlm@1: } rlm@1: } rlm@1: if (i >= j) rlm@1: break; rlm@1: } rlm@1: } rlm@1: while (++i < j); rlm@1: if (i == 0) rlm@1: { rlm@1: range = range - (mid - left); rlm@1: left = mid; rlm@1: } rlm@1: else if (i == groupSize) rlm@1: range = (mid - left); rlm@1: else rlm@1: break; rlm@1: } rlm@1: rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: { rlm@1: UInt32 t = (groupOffset + i - 1); rlm@1: UInt32 *Flags = Groups + BlockSize; rlm@1: Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); rlm@1: } rlm@1: #endif rlm@1: rlm@1: { rlm@1: UInt32 j; rlm@1: for (j = i; j < groupSize; j++) rlm@1: Groups[ind2[j]] = groupOffset + i; rlm@1: } rlm@1: rlm@1: { rlm@1: UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); rlm@1: return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); rlm@1: } rlm@1: rlm@1: } rlm@1: rlm@1: #else rlm@1: rlm@1: /* ---------- Heap Sort ---------- */ rlm@1: rlm@1: { rlm@1: UInt32 j; rlm@1: for (j = 0; j < groupSize; j++) rlm@1: { rlm@1: UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; rlm@1: ind2[j] = sp; rlm@1: } rlm@1: rlm@1: HeapSortRef(ind2, Groups, groupSize); rlm@1: rlm@1: /* Write Flags */ rlm@1: { rlm@1: UInt32 sp = ind2[0]; rlm@1: UInt32 group = Groups[sp]; rlm@1: rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 *Flags = Groups + BlockSize; rlm@1: #else rlm@1: UInt32 prevGroupStart = 0; rlm@1: #endif rlm@1: rlm@1: for (j = 1; j < groupSize; j++) rlm@1: { rlm@1: sp = ind2[j]; rlm@1: if (Groups[sp] != group) rlm@1: { rlm@1: group = Groups[sp]; rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: { rlm@1: UInt32 t = groupOffset + j - 1; rlm@1: Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); rlm@1: } rlm@1: #else rlm@1: SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); rlm@1: prevGroupStart = j; rlm@1: #endif rlm@1: } rlm@1: } rlm@1: rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); rlm@1: #endif rlm@1: } rlm@1: { rlm@1: /* Write new Groups values and Check that there are groups */ rlm@1: UInt32 thereAreGroups = 0; rlm@1: for (j = 0; j < groupSize; j++) rlm@1: { rlm@1: UInt32 group = groupOffset + j; rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); rlm@1: if ((ind2[j] & 0x40000000) != 0) rlm@1: subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits); rlm@1: subGroupSize++; rlm@1: for (;;) rlm@1: { rlm@1: UInt32 original = ind2[j]; rlm@1: UInt32 sp = original & kIndexMask; rlm@1: if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; rlm@1: ind2[j] = sp | (original & ~kIndexMask); rlm@1: Groups[sp] = group; rlm@1: if (--subGroupSize == 0) rlm@1: break; rlm@1: j++; rlm@1: thereAreGroups = 1; rlm@1: } rlm@1: #else rlm@1: UInt32 *Flags = Groups + BlockSize; rlm@1: for (;;) rlm@1: { rlm@1: UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; rlm@1: ind2[j] = sp; rlm@1: Groups[sp] = group; rlm@1: if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) rlm@1: break; rlm@1: j++; rlm@1: thereAreGroups = 1; rlm@1: } rlm@1: #endif rlm@1: } rlm@1: return thereAreGroups; rlm@1: } rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: /* conditions: blockSize > 0 */ rlm@1: UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) rlm@1: { rlm@1: UInt32 *counters = Indices + blockSize; rlm@1: UInt32 i; rlm@1: UInt32 *Groups; rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 *Flags; rlm@1: #endif rlm@1: rlm@1: /* Radix-Sort for 2 bytes */ rlm@1: for (i = 0; i < kNumHashValues; i++) rlm@1: counters[i] = 0; rlm@1: for (i = 0; i < blockSize - 1; i++) rlm@1: counters[((UInt32)data[i] << 8) | data[i + 1]]++; rlm@1: counters[((UInt32)data[i] << 8) | data[0]]++; rlm@1: rlm@1: Groups = counters + BS_TEMP_SIZE; rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: Flags = Groups + blockSize; rlm@1: { rlm@1: UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; rlm@1: for (i = 0; i < numWords; i++) rlm@1: Flags[i] = kAllFlags; rlm@1: } rlm@1: #endif rlm@1: rlm@1: { rlm@1: UInt32 sum = 0; rlm@1: for (i = 0; i < kNumHashValues; i++) rlm@1: { rlm@1: UInt32 groupSize = counters[i]; rlm@1: if (groupSize > 0) rlm@1: { rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 t = sum + groupSize - 1; rlm@1: Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); rlm@1: #endif rlm@1: sum += groupSize; rlm@1: } rlm@1: counters[i] = sum - groupSize; rlm@1: } rlm@1: rlm@1: for (i = 0; i < blockSize - 1; i++) rlm@1: Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]]; rlm@1: Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; rlm@1: rlm@1: for (i = 0; i < blockSize - 1; i++) rlm@1: Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i; rlm@1: Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; rlm@1: rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: { rlm@1: UInt32 prev = 0; rlm@1: for (i = 0; i < kNumHashValues; i++) rlm@1: { rlm@1: UInt32 prevGroupSize = counters[i] - prev; rlm@1: if (prevGroupSize == 0) rlm@1: continue; rlm@1: SetGroupSize(Indices + prev, prevGroupSize); rlm@1: prev = counters[i]; rlm@1: } rlm@1: } rlm@1: #endif rlm@1: } rlm@1: rlm@1: { rlm@1: int NumRefBits; rlm@1: UInt32 NumSortedBytes; rlm@1: for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); rlm@1: NumRefBits = 32 - NumRefBits; rlm@1: if (NumRefBits > kNumRefBitsMax) rlm@1: NumRefBits = kNumRefBitsMax; rlm@1: rlm@1: for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) rlm@1: { rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: UInt32 finishedGroupSize = 0; rlm@1: #endif rlm@1: UInt32 newLimit = 0; rlm@1: for (i = 0; i < blockSize;) rlm@1: { rlm@1: UInt32 groupSize; rlm@1: #ifdef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: rlm@1: if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) rlm@1: { rlm@1: i++; rlm@1: continue; rlm@1: } rlm@1: for (groupSize = 1; rlm@1: (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; rlm@1: groupSize++); rlm@1: rlm@1: groupSize++; rlm@1: rlm@1: #else rlm@1: rlm@1: groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); rlm@1: { rlm@1: Bool finishedGroup = ((Indices[i] & 0x80000000) == 0); rlm@1: if ((Indices[i] & 0x40000000) != 0) rlm@1: { rlm@1: groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); rlm@1: Indices[i + 1] &= kIndexMask; rlm@1: } rlm@1: Indices[i] &= kIndexMask; rlm@1: groupSize++; rlm@1: if (finishedGroup || groupSize == 1) rlm@1: { rlm@1: Indices[i - finishedGroupSize] &= kIndexMask; rlm@1: if (finishedGroupSize > 1) rlm@1: Indices[i - finishedGroupSize + 1] &= kIndexMask; rlm@1: { rlm@1: UInt32 newGroupSize = groupSize + finishedGroupSize; rlm@1: SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize); rlm@1: finishedGroupSize = newGroupSize; rlm@1: } rlm@1: i += groupSize; rlm@1: continue; rlm@1: } rlm@1: finishedGroupSize = 0; rlm@1: } rlm@1: rlm@1: #endif rlm@1: rlm@1: if (NumSortedBytes >= blockSize) rlm@1: { rlm@1: UInt32 j; rlm@1: for (j = 0; j < groupSize; j++) rlm@1: { rlm@1: UInt32 t = (i + j); rlm@1: /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ rlm@1: Groups[Indices[t]] = t; rlm@1: } rlm@1: } rlm@1: else rlm@1: if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices rlm@1: #ifndef BLOCK_SORT_USE_HEAP_SORT rlm@1: , 0, blockSize rlm@1: #endif rlm@1: ) != 0) rlm@1: newLimit = i + groupSize; rlm@1: i += groupSize; rlm@1: } rlm@1: if (newLimit == 0) rlm@1: break; rlm@1: } rlm@1: } rlm@1: #ifndef BLOCK_SORT_EXTERNAL_FLAGS rlm@1: for (i = 0; i < blockSize;) rlm@1: { rlm@1: UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); rlm@1: if ((Indices[i] & 0x40000000) != 0) rlm@1: { rlm@1: groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); rlm@1: Indices[i + 1] &= kIndexMask; rlm@1: } rlm@1: Indices[i] &= kIndexMask; rlm@1: groupSize++; rlm@1: i += groupSize; rlm@1: } rlm@1: #endif rlm@1: return Groups[0]; rlm@1: } rlm@1: