diff src/win32/7zip/7z/C/LzFindMt.c @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/win32/7zip/7z/C/LzFindMt.c	Sat Mar 03 10:31:27 2012 -0600
     1.3 @@ -0,0 +1,793 @@
     1.4 +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
     1.5 +2008-10-04 : Igor Pavlov : Public domain */
     1.6 +
     1.7 +#include "LzHash.h"
     1.8 +
     1.9 +#include "LzFindMt.h"
    1.10 +
    1.11 +void MtSync_Construct(CMtSync *p)
    1.12 +{
    1.13 +  p->wasCreated = False;
    1.14 +  p->csWasInitialized = False;
    1.15 +  p->csWasEntered = False;
    1.16 +  Thread_Construct(&p->thread);
    1.17 +  Event_Construct(&p->canStart);
    1.18 +  Event_Construct(&p->wasStarted);
    1.19 +  Event_Construct(&p->wasStopped);
    1.20 +  Semaphore_Construct(&p->freeSemaphore);
    1.21 +  Semaphore_Construct(&p->filledSemaphore);
    1.22 +}
    1.23 +
    1.24 +void MtSync_GetNextBlock(CMtSync *p)
    1.25 +{
    1.26 +  if (p->needStart)
    1.27 +  {
    1.28 +    p->numProcessedBlocks = 1;
    1.29 +    p->needStart = False;
    1.30 +    p->stopWriting = False;
    1.31 +    p->exit = False;
    1.32 +    Event_Reset(&p->wasStarted);
    1.33 +    Event_Reset(&p->wasStopped);
    1.34 +
    1.35 +    Event_Set(&p->canStart);
    1.36 +    Event_Wait(&p->wasStarted);
    1.37 +  }
    1.38 +  else
    1.39 +  {
    1.40 +    CriticalSection_Leave(&p->cs);
    1.41 +    p->csWasEntered = False;
    1.42 +    p->numProcessedBlocks++;
    1.43 +    Semaphore_Release1(&p->freeSemaphore);
    1.44 +  }
    1.45 +  Semaphore_Wait(&p->filledSemaphore);
    1.46 +  CriticalSection_Enter(&p->cs);
    1.47 +  p->csWasEntered = True;
    1.48 +}
    1.49 +
    1.50 +/* MtSync_StopWriting must be called if Writing was started */
    1.51 +
    1.52 +void MtSync_StopWriting(CMtSync *p)
    1.53 +{
    1.54 +  UInt32 myNumBlocks = p->numProcessedBlocks;
    1.55 +  if (!Thread_WasCreated(&p->thread) || p->needStart)
    1.56 +    return;
    1.57 +  p->stopWriting = True;
    1.58 +  if (p->csWasEntered)
    1.59 +  {
    1.60 +    CriticalSection_Leave(&p->cs);
    1.61 +    p->csWasEntered = False;
    1.62 +  }
    1.63 +  Semaphore_Release1(&p->freeSemaphore);
    1.64 + 
    1.65 +  Event_Wait(&p->wasStopped);
    1.66 +
    1.67 +  while (myNumBlocks++ != p->numProcessedBlocks)
    1.68 +  {
    1.69 +    Semaphore_Wait(&p->filledSemaphore);
    1.70 +    Semaphore_Release1(&p->freeSemaphore);
    1.71 +  }
    1.72 +  p->needStart = True;
    1.73 +}
    1.74 +
    1.75 +void MtSync_Destruct(CMtSync *p)
    1.76 +{
    1.77 +  if (Thread_WasCreated(&p->thread))
    1.78 +  {
    1.79 +    MtSync_StopWriting(p);
    1.80 +    p->exit = True;
    1.81 +    if (p->needStart)
    1.82 +      Event_Set(&p->canStart);
    1.83 +    Thread_Wait(&p->thread);
    1.84 +    Thread_Close(&p->thread);
    1.85 +  }
    1.86 +  if (p->csWasInitialized)
    1.87 +  {
    1.88 +    CriticalSection_Delete(&p->cs);
    1.89 +    p->csWasInitialized = False;
    1.90 +  }
    1.91 +
    1.92 +  Event_Close(&p->canStart);
    1.93 +  Event_Close(&p->wasStarted);
    1.94 +  Event_Close(&p->wasStopped);
    1.95 +  Semaphore_Close(&p->freeSemaphore);
    1.96 +  Semaphore_Close(&p->filledSemaphore);
    1.97 +
    1.98 +  p->wasCreated = False;
    1.99 +}
   1.100 +
   1.101 +#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
   1.102 +
   1.103 +static SRes MtSync_Create2(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks)
   1.104 +{
   1.105 +  if (p->wasCreated)
   1.106 +    return SZ_OK;
   1.107 +
   1.108 +  RINOK_THREAD(CriticalSection_Init(&p->cs));
   1.109 +  p->csWasInitialized = True;
   1.110 +
   1.111 +  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
   1.112 +  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
   1.113 +  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
   1.114 +  
   1.115 +  RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
   1.116 +  RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
   1.117 +
   1.118 +  p->needStart = True;
   1.119 +  
   1.120 +  RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
   1.121 +  p->wasCreated = True;
   1.122 +  return SZ_OK;
   1.123 +}
   1.124 +
   1.125 +static SRes MtSync_Create(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks)
   1.126 +{
   1.127 +  SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
   1.128 +  if (res != SZ_OK)
   1.129 +    MtSync_Destruct(p);
   1.130 +  return res;
   1.131 +}
   1.132 +
   1.133 +void MtSync_Init(CMtSync *p) { p->needStart = True; }
   1.134 +
   1.135 +#define kMtMaxValForNormalize 0xFFFFFFFF
   1.136 +
   1.137 +#define DEF_GetHeads2(name, v, action) \
   1.138 +static void GetHeads ## name(const Byte *p, UInt32 pos, \
   1.139 +UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
   1.140 +{ action; for (; numHeads != 0; numHeads--) { \
   1.141 +const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++;  } }
   1.142 +
   1.143 +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
   1.144 +
   1.145 +DEF_GetHeads2(2,  (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc; )
   1.146 +DEF_GetHeads(3,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
   1.147 +DEF_GetHeads(4,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
   1.148 +DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
   1.149 +DEF_GetHeads(5,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask)
   1.150 +
   1.151 +void HashThreadFunc(CMatchFinderMt *mt)
   1.152 +{
   1.153 +  CMtSync *p = &mt->hashSync;
   1.154 +  for (;;)
   1.155 +  {
   1.156 +    UInt32 numProcessedBlocks = 0;
   1.157 +    Event_Wait(&p->canStart);
   1.158 +    Event_Set(&p->wasStarted);
   1.159 +    for (;;)
   1.160 +    {
   1.161 +      if (p->exit)
   1.162 +        return;
   1.163 +      if (p->stopWriting)
   1.164 +      {
   1.165 +        p->numProcessedBlocks = numProcessedBlocks;
   1.166 +        Event_Set(&p->wasStopped);
   1.167 +        break;
   1.168 +      }
   1.169 +
   1.170 +      {
   1.171 +        CMatchFinder *mf = mt->MatchFinder;
   1.172 +        if (MatchFinder_NeedMove(mf))
   1.173 +        {
   1.174 +          CriticalSection_Enter(&mt->btSync.cs);
   1.175 +          CriticalSection_Enter(&mt->hashSync.cs);
   1.176 +          {
   1.177 +            const Byte *beforePtr = MatchFinder_GetPointerToCurrentPos(mf);
   1.178 +            const Byte *afterPtr;
   1.179 +            MatchFinder_MoveBlock(mf);
   1.180 +            afterPtr = MatchFinder_GetPointerToCurrentPos(mf);
   1.181 +            mt->pointerToCurPos -= beforePtr - afterPtr;
   1.182 +            mt->buffer -= beforePtr - afterPtr;
   1.183 +          }
   1.184 +          CriticalSection_Leave(&mt->btSync.cs);
   1.185 +          CriticalSection_Leave(&mt->hashSync.cs);
   1.186 +          continue;
   1.187 +        }
   1.188 +
   1.189 +        Semaphore_Wait(&p->freeSemaphore);
   1.190 +
   1.191 +        MatchFinder_ReadIfRequired(mf);
   1.192 +        if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
   1.193 +        {
   1.194 +          UInt32 subValue = (mf->pos - mf->historySize - 1);
   1.195 +          MatchFinder_ReduceOffsets(mf, subValue);
   1.196 +          MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1);
   1.197 +        }
   1.198 +        {
   1.199 +          UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
   1.200 +          UInt32 num = mf->streamPos - mf->pos;
   1.201 +          heads[0] = 2;
   1.202 +          heads[1] = num;
   1.203 +          if (num >= mf->numHashBytes)
   1.204 +          {
   1.205 +            num = num - mf->numHashBytes + 1;
   1.206 +            if (num > kMtHashBlockSize - 2)
   1.207 +              num = kMtHashBlockSize - 2;
   1.208 +            mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
   1.209 +            heads[0] += num;
   1.210 +          }
   1.211 +          mf->pos += num;
   1.212 +          mf->buffer += num;
   1.213 +        }
   1.214 +      }
   1.215 +
   1.216 +      Semaphore_Release1(&p->filledSemaphore);
   1.217 +    }
   1.218 +  }
   1.219 +}
   1.220 +
   1.221 +void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
   1.222 +{
   1.223 +  MtSync_GetNextBlock(&p->hashSync);
   1.224 +  p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
   1.225 +  p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
   1.226 +  p->hashNumAvail = p->hashBuf[p->hashBufPos++];
   1.227 +}
   1.228 +
   1.229 +#define kEmptyHashValue 0
   1.230 +
   1.231 +/* #define MFMT_GM_INLINE */
   1.232 +
   1.233 +#ifdef MFMT_GM_INLINE
   1.234 +
   1.235 +#define NO_INLINE MY_FAST_CALL
   1.236 +
   1.237 +Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
   1.238 +    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
   1.239 +    UInt32 *_distances, UInt32 _maxLen, const UInt32 *hash, Int32 limit, UInt32 size, UInt32 *posRes)
   1.240 +{
   1.241 +  do
   1.242 +  {
   1.243 +  UInt32 *distances = _distances + 1;
   1.244 +  UInt32 curMatch = pos - *hash++;
   1.245 +
   1.246 +  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
   1.247 +  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
   1.248 +  UInt32 len0 = 0, len1 = 0;
   1.249 +  UInt32 cutValue = _cutValue;
   1.250 +  UInt32 maxLen = _maxLen;
   1.251 +  for (;;)
   1.252 +  {
   1.253 +    UInt32 delta = pos - curMatch;
   1.254 +    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
   1.255 +    {
   1.256 +      *ptr0 = *ptr1 = kEmptyHashValue;
   1.257 +      break;
   1.258 +    }
   1.259 +    {
   1.260 +      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
   1.261 +      const Byte *pb = cur - delta;
   1.262 +      UInt32 len = (len0 < len1 ? len0 : len1);
   1.263 +      if (pb[len] == cur[len])
   1.264 +      {
   1.265 +        if (++len != lenLimit && pb[len] == cur[len])
   1.266 +          while (++len != lenLimit)
   1.267 +            if (pb[len] != cur[len])
   1.268 +              break;
   1.269 +        if (maxLen < len)
   1.270 +        {
   1.271 +          *distances++ = maxLen = len;
   1.272 +          *distances++ = delta - 1;
   1.273 +          if (len == lenLimit)
   1.274 +          {
   1.275 +            *ptr1 = pair[0];
   1.276 +            *ptr0 = pair[1];
   1.277 +            break;
   1.278 +          }
   1.279 +        }
   1.280 +      }
   1.281 +      if (pb[len] < cur[len])
   1.282 +      {
   1.283 +        *ptr1 = curMatch;
   1.284 +        ptr1 = pair + 1;
   1.285 +        curMatch = *ptr1;
   1.286 +        len1 = len;
   1.287 +      }
   1.288 +      else
   1.289 +      {
   1.290 +        *ptr0 = curMatch;
   1.291 +        ptr0 = pair;
   1.292 +        curMatch = *ptr0;
   1.293 +        len0 = len;
   1.294 +      }
   1.295 +    }
   1.296 +  }
   1.297 +  pos++;
   1.298 +  _cyclicBufferPos++;
   1.299 +  cur++;
   1.300 +  {
   1.301 +    UInt32 num = (UInt32)(distances - _distances);
   1.302 +    *_distances = num - 1;
   1.303 +    _distances += num;
   1.304 +    limit -= num;
   1.305 +  }
   1.306 +  }
   1.307 +  while (limit > 0 && --size != 0);
   1.308 +  *posRes = pos;
   1.309 +  return limit;
   1.310 +}
   1.311 +
   1.312 +#endif
   1.313 +
   1.314 +void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
   1.315 +{
   1.316 +  UInt32 numProcessed = 0;
   1.317 +  UInt32 curPos = 2;
   1.318 +  UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
   1.319 +  distances[1] = p->hashNumAvail;
   1.320 +  while (curPos < limit)
   1.321 +  {
   1.322 +    if (p->hashBufPos == p->hashBufPosLimit)
   1.323 +    {
   1.324 +      MatchFinderMt_GetNextBlock_Hash(p);
   1.325 +      distances[1] = numProcessed + p->hashNumAvail;
   1.326 +      if (p->hashNumAvail >= p->numHashBytes)
   1.327 +        continue;
   1.328 +      for (; p->hashNumAvail != 0; p->hashNumAvail--)
   1.329 +        distances[curPos++] = 0;
   1.330 +      break;
   1.331 +    }
   1.332 +    {
   1.333 +      UInt32 size = p->hashBufPosLimit - p->hashBufPos;
   1.334 +      UInt32 lenLimit = p->matchMaxLen;
   1.335 +      UInt32 pos = p->pos;
   1.336 +      UInt32 cyclicBufferPos = p->cyclicBufferPos;
   1.337 +      if (lenLimit >= p->hashNumAvail)
   1.338 +        lenLimit = p->hashNumAvail;
   1.339 +      {
   1.340 +        UInt32 size2 = p->hashNumAvail - lenLimit + 1;
   1.341 +        if (size2 < size)
   1.342 +          size = size2;
   1.343 +        size2 = p->cyclicBufferSize - cyclicBufferPos;
   1.344 +        if (size2 < size)
   1.345 +          size = size2;
   1.346 +      }
   1.347 +      #ifndef MFMT_GM_INLINE
   1.348 +      while (curPos < limit && size-- != 0)
   1.349 +      {
   1.350 +        UInt32 *startDistances = distances + curPos;
   1.351 +        UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
   1.352 +          pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
   1.353 +          startDistances + 1, p->numHashBytes - 1) - startDistances);
   1.354 +        *startDistances = num - 1;
   1.355 +        curPos += num;
   1.356 +        cyclicBufferPos++;
   1.357 +        pos++;
   1.358 +        p->buffer++;
   1.359 +      }
   1.360 +      #else
   1.361 +      {
   1.362 +        UInt32 posRes;
   1.363 +        curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
   1.364 +          distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos) , size, &posRes);
   1.365 +        p->hashBufPos += posRes - pos;
   1.366 +        cyclicBufferPos += posRes - pos;
   1.367 +        p->buffer += posRes - pos;
   1.368 +        pos = posRes;
   1.369 +      }
   1.370 +      #endif
   1.371 +
   1.372 +      numProcessed += pos - p->pos;
   1.373 +      p->hashNumAvail -= pos - p->pos;
   1.374 +      p->pos = pos;
   1.375 +      if (cyclicBufferPos == p->cyclicBufferSize)
   1.376 +        cyclicBufferPos = 0;
   1.377 +      p->cyclicBufferPos = cyclicBufferPos;
   1.378 +    }
   1.379 +  }
   1.380 +  distances[0] = curPos;
   1.381 +}
   1.382 +
   1.383 +void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
   1.384 +{
   1.385 +  CMtSync *sync = &p->hashSync;
   1.386 +  if (!sync->needStart)
   1.387 +  {
   1.388 +    CriticalSection_Enter(&sync->cs);
   1.389 +    sync->csWasEntered = True;
   1.390 +  }
   1.391 +  
   1.392 +  BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
   1.393 +
   1.394 +  if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
   1.395 +  {
   1.396 +    UInt32 subValue = p->pos - p->cyclicBufferSize;
   1.397 +    MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2);
   1.398 +    p->pos -= subValue;
   1.399 +  }
   1.400 +
   1.401 +  if (!sync->needStart)
   1.402 +  {
   1.403 +    CriticalSection_Leave(&sync->cs);
   1.404 +    sync->csWasEntered = False;
   1.405 +  }
   1.406 +}
   1.407 +
   1.408 +void BtThreadFunc(CMatchFinderMt *mt)
   1.409 +{
   1.410 +  CMtSync *p = &mt->btSync;
   1.411 +  for (;;)
   1.412 +  {
   1.413 +    UInt32 blockIndex = 0;
   1.414 +    Event_Wait(&p->canStart);
   1.415 +    Event_Set(&p->wasStarted);
   1.416 +    for (;;)
   1.417 +    {
   1.418 +      if (p->exit)
   1.419 +        return;
   1.420 +      if (p->stopWriting)
   1.421 +      {
   1.422 +        p->numProcessedBlocks = blockIndex;
   1.423 +        MtSync_StopWriting(&mt->hashSync);
   1.424 +        Event_Set(&p->wasStopped);
   1.425 +        break;
   1.426 +      }
   1.427 +      Semaphore_Wait(&p->freeSemaphore);
   1.428 +      BtFillBlock(mt, blockIndex++);
   1.429 +      Semaphore_Release1(&p->filledSemaphore);
   1.430 +    }
   1.431 +  }
   1.432 +}
   1.433 +
   1.434 +void MatchFinderMt_Construct(CMatchFinderMt *p)
   1.435 +{
   1.436 +  p->hashBuf = 0;
   1.437 +  MtSync_Construct(&p->hashSync);
   1.438 +  MtSync_Construct(&p->btSync);
   1.439 +}
   1.440 +
   1.441 +void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAlloc *alloc)
   1.442 +{
   1.443 +  alloc->Free(alloc, p->hashBuf);
   1.444 +  p->hashBuf = 0;
   1.445 +}
   1.446 +
   1.447 +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc)
   1.448 +{
   1.449 +  MtSync_Destruct(&p->hashSync);
   1.450 +  MtSync_Destruct(&p->btSync);
   1.451 +  MatchFinderMt_FreeMem(p, alloc);
   1.452 +}
   1.453 +
   1.454 +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
   1.455 +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
   1.456 +
   1.457 +static unsigned MY_STD_CALL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
   1.458 +static unsigned MY_STD_CALL BtThreadFunc2(void *p)
   1.459 +{
   1.460 +  Byte allocaDummy[0x180];
   1.461 +  int i = 0;
   1.462 +  for (i = 0; i < 16; i++)
   1.463 +    allocaDummy[i] = (Byte)i;
   1.464 +  BtThreadFunc((CMatchFinderMt *)p);
   1.465 +  return 0;
   1.466 +}
   1.467 +
   1.468 +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
   1.469 +    UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc)
   1.470 +{
   1.471 +  CMatchFinder *mf = p->MatchFinder;
   1.472 +  p->historySize = historySize;
   1.473 +  if (kMtBtBlockSize <= matchMaxLen * 4)
   1.474 +    return SZ_ERROR_PARAM;
   1.475 +  if (p->hashBuf == 0)
   1.476 +  {
   1.477 +    p->hashBuf = (UInt32 *)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
   1.478 +    if (p->hashBuf == 0)
   1.479 +      return SZ_ERROR_MEM;
   1.480 +    p->btBuf = p->hashBuf + kHashBufferSize;
   1.481 +  }
   1.482 +  keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
   1.483 +  keepAddBufferAfter += kMtHashBlockSize;
   1.484 +  if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
   1.485 +    return SZ_ERROR_MEM;
   1.486 +
   1.487 +  RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
   1.488 +  RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
   1.489 +  return SZ_OK;
   1.490 +}
   1.491 +
   1.492 +/* Call it after ReleaseStream / SetStream */
   1.493 +void MatchFinderMt_Init(CMatchFinderMt *p)
   1.494 +{
   1.495 +  CMatchFinder *mf = p->MatchFinder;
   1.496 +  p->btBufPos = p->btBufPosLimit = 0;
   1.497 +  p->hashBufPos = p->hashBufPosLimit = 0;
   1.498 +  MatchFinder_Init(mf);
   1.499 +  p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf);
   1.500 +  p->btNumAvailBytes = 0;
   1.501 +  p->lzPos = p->historySize + 1;
   1.502 +
   1.503 +  p->hash = mf->hash;
   1.504 +  p->fixedHashSize = mf->fixedHashSize;
   1.505 +  p->crc = mf->crc;
   1.506 +
   1.507 +  p->son = mf->son;
   1.508 +  p->matchMaxLen = mf->matchMaxLen;
   1.509 +  p->numHashBytes = mf->numHashBytes;
   1.510 +  p->pos = mf->pos;
   1.511 +  p->buffer = mf->buffer;
   1.512 +  p->cyclicBufferPos = mf->cyclicBufferPos;
   1.513 +  p->cyclicBufferSize = mf->cyclicBufferSize;
   1.514 +  p->cutValue = mf->cutValue;
   1.515 +}
   1.516 +
   1.517 +/* ReleaseStream is required to finish multithreading */
   1.518 +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
   1.519 +{
   1.520 +  MtSync_StopWriting(&p->btSync);
   1.521 +  /* p->MatchFinder->ReleaseStream(); */
   1.522 +}
   1.523 +
   1.524 +void MatchFinderMt_Normalize(CMatchFinderMt *p)
   1.525 +{
   1.526 +  MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
   1.527 +  p->lzPos = p->historySize + 1;
   1.528 +}
   1.529 +
   1.530 +void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
   1.531 +{
   1.532 +  UInt32 blockIndex;
   1.533 +  MtSync_GetNextBlock(&p->btSync);
   1.534 +  blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
   1.535 +  p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
   1.536 +  p->btBufPosLimit += p->btBuf[p->btBufPos++];
   1.537 +  p->btNumAvailBytes = p->btBuf[p->btBufPos++];
   1.538 +  if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
   1.539 +    MatchFinderMt_Normalize(p);
   1.540 +}
   1.541 +
   1.542 +const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
   1.543 +{
   1.544 +  return p->pointerToCurPos;
   1.545 +}
   1.546 +
   1.547 +#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
   1.548 +
   1.549 +UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
   1.550 +{
   1.551 +  GET_NEXT_BLOCK_IF_REQUIRED;
   1.552 +  return p->btNumAvailBytes;
   1.553 +}
   1.554 +
   1.555 +Byte MatchFinderMt_GetIndexByte(CMatchFinderMt *p, Int32 index)
   1.556 +{
   1.557 +  return p->pointerToCurPos[index];
   1.558 +}
   1.559 +
   1.560 +UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
   1.561 +{
   1.562 +  UInt32 hash2Value, curMatch2;
   1.563 +  UInt32 *hash = p->hash;
   1.564 +  const Byte *cur = p->pointerToCurPos;
   1.565 +  UInt32 lzPos = p->lzPos;
   1.566 +  MT_HASH2_CALC
   1.567 +      
   1.568 +  curMatch2 = hash[hash2Value];
   1.569 +  hash[hash2Value] = lzPos;
   1.570 +
   1.571 +  if (curMatch2 >= matchMinPos)
   1.572 +    if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
   1.573 +    {
   1.574 +      *distances++ = 2;
   1.575 +      *distances++ = lzPos - curMatch2 - 1;
   1.576 +    }
   1.577 +  return distances;
   1.578 +}
   1.579 +
   1.580 +UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
   1.581 +{
   1.582 +  UInt32 hash2Value, hash3Value, curMatch2, curMatch3;
   1.583 +  UInt32 *hash = p->hash;
   1.584 +  const Byte *cur = p->pointerToCurPos;
   1.585 +  UInt32 lzPos = p->lzPos;
   1.586 +  MT_HASH3_CALC
   1.587 +
   1.588 +  curMatch2 = hash[                hash2Value];
   1.589 +  curMatch3 = hash[kFix3HashSize + hash3Value];
   1.590 +  
   1.591 +  hash[                hash2Value] =
   1.592 +  hash[kFix3HashSize + hash3Value] =
   1.593 +    lzPos;
   1.594 +
   1.595 +  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
   1.596 +  {
   1.597 +    distances[1] = lzPos - curMatch2 - 1;
   1.598 +    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
   1.599 +    {
   1.600 +      distances[0] = 3;
   1.601 +      return distances + 2;
   1.602 +    }
   1.603 +    distances[0] = 2;
   1.604 +    distances += 2;
   1.605 +  }
   1.606 +  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
   1.607 +  {
   1.608 +    *distances++ = 3;
   1.609 +    *distances++ = lzPos - curMatch3 - 1;
   1.610 +  }
   1.611 +  return distances;
   1.612 +}
   1.613 +
   1.614 +/*
   1.615 +UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
   1.616 +{
   1.617 +  UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4;
   1.618 +  UInt32 *hash = p->hash;
   1.619 +  const Byte *cur = p->pointerToCurPos;
   1.620 +  UInt32 lzPos = p->lzPos;
   1.621 +  MT_HASH4_CALC
   1.622 +      
   1.623 +  curMatch2 = hash[                hash2Value];
   1.624 +  curMatch3 = hash[kFix3HashSize + hash3Value];
   1.625 +  curMatch4 = hash[kFix4HashSize + hash4Value];
   1.626 +  
   1.627 +  hash[                hash2Value] =
   1.628 +  hash[kFix3HashSize + hash3Value] =
   1.629 +  hash[kFix4HashSize + hash4Value] =
   1.630 +    lzPos;
   1.631 +
   1.632 +  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
   1.633 +  {
   1.634 +    distances[1] = lzPos - curMatch2 - 1;
   1.635 +    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
   1.636 +    {
   1.637 +      distances[0] =  (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
   1.638 +      return distances + 2;
   1.639 +    }
   1.640 +    distances[0] = 2;
   1.641 +    distances += 2;
   1.642 +  }
   1.643 +  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
   1.644 +  {
   1.645 +    distances[1] = lzPos - curMatch3 - 1;
   1.646 +    if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
   1.647 +    {
   1.648 +      distances[0] = 4;
   1.649 +      return distances + 2;
   1.650 +    }
   1.651 +    distances[0] = 3;
   1.652 +    distances += 2;
   1.653 +  }
   1.654 +
   1.655 +  if (curMatch4 >= matchMinPos)
   1.656 +    if (
   1.657 +      cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
   1.658 +      cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
   1.659 +      )
   1.660 +    {
   1.661 +      *distances++ = 4;
   1.662 +      *distances++ = lzPos - curMatch4 - 1;
   1.663 +    }
   1.664 +  return distances;
   1.665 +}
   1.666 +*/
   1.667 +
   1.668 +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
   1.669 +
   1.670 +UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
   1.671 +{
   1.672 +  const UInt32 *btBuf = p->btBuf + p->btBufPos;
   1.673 +  UInt32 len = *btBuf++;
   1.674 +  p->btBufPos += 1 + len;
   1.675 +  p->btNumAvailBytes--;
   1.676 +  {
   1.677 +    UInt32 i;
   1.678 +    for (i = 0; i < len; i += 2)
   1.679 +    {
   1.680 +      *distances++ = *btBuf++;
   1.681 +      *distances++ = *btBuf++;
   1.682 +    }
   1.683 +  }
   1.684 +  INCREASE_LZ_POS
   1.685 +  return len;
   1.686 +}
   1.687 +
   1.688 +UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
   1.689 +{
   1.690 +  const UInt32 *btBuf = p->btBuf + p->btBufPos;
   1.691 +  UInt32 len = *btBuf++;
   1.692 +  p->btBufPos += 1 + len;
   1.693 +
   1.694 +  if (len == 0)
   1.695 +  {
   1.696 +    if (p->btNumAvailBytes-- >= 4)
   1.697 +      len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
   1.698 +  }
   1.699 +  else
   1.700 +  {
   1.701 +    /* Condition: there are matches in btBuf with length < p->numHashBytes */
   1.702 +    UInt32 *distances2;
   1.703 +    p->btNumAvailBytes--;
   1.704 +    distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
   1.705 +    do
   1.706 +    {
   1.707 +      *distances2++ = *btBuf++;
   1.708 +      *distances2++ = *btBuf++;
   1.709 +    }
   1.710 +    while ((len -= 2) != 0);
   1.711 +    len  = (UInt32)(distances2 - (distances));
   1.712 +  }
   1.713 +  INCREASE_LZ_POS
   1.714 +  return len;
   1.715 +}
   1.716 +
   1.717 +#define SKIP_HEADER2  do { GET_NEXT_BLOCK_IF_REQUIRED
   1.718 +#define SKIP_HEADER(n) SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
   1.719 +#define SKIP_FOOTER } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
   1.720 +
   1.721 +void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
   1.722 +{
   1.723 +  SKIP_HEADER2 { p->btNumAvailBytes--;
   1.724 +  SKIP_FOOTER
   1.725 +}
   1.726 +
   1.727 +void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
   1.728 +{
   1.729 +  SKIP_HEADER(2)
   1.730 +      UInt32 hash2Value;
   1.731 +      MT_HASH2_CALC
   1.732 +      hash[hash2Value] = p->lzPos;
   1.733 +  SKIP_FOOTER
   1.734 +}
   1.735 +
   1.736 +void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
   1.737 +{
   1.738 +  SKIP_HEADER(3)
   1.739 +      UInt32 hash2Value, hash3Value;
   1.740 +      MT_HASH3_CALC
   1.741 +      hash[kFix3HashSize + hash3Value] =
   1.742 +      hash[                hash2Value] =
   1.743 +        p->lzPos;
   1.744 +  SKIP_FOOTER
   1.745 +}
   1.746 +
   1.747 +/*
   1.748 +void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
   1.749 +{
   1.750 +  SKIP_HEADER(4)
   1.751 +      UInt32 hash2Value, hash3Value, hash4Value;
   1.752 +      MT_HASH4_CALC
   1.753 +      hash[kFix4HashSize + hash4Value] =
   1.754 +      hash[kFix3HashSize + hash3Value] =
   1.755 +      hash[                hash2Value] =
   1.756 +        p->lzPos;
   1.757 +  SKIP_FOOTER
   1.758 +}
   1.759 +*/
   1.760 +
   1.761 +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
   1.762 +{
   1.763 +  vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
   1.764 +  vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte;
   1.765 +  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
   1.766 +  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
   1.767 +  vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
   1.768 +  switch(p->MatchFinder->numHashBytes)
   1.769 +  {
   1.770 +    case 2:
   1.771 +      p->GetHeadsFunc = GetHeads2;
   1.772 +      p->MixMatchesFunc = (Mf_Mix_Matches)0;
   1.773 +      vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
   1.774 +      vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
   1.775 +      break;
   1.776 +    case 3:
   1.777 +      p->GetHeadsFunc = GetHeads3;
   1.778 +      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
   1.779 +      vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
   1.780 +      break;
   1.781 +    default:
   1.782 +    /* case 4: */
   1.783 +      p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
   1.784 +      /* p->GetHeadsFunc = GetHeads4; */
   1.785 +      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
   1.786 +      vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
   1.787 +      break;
   1.788 +    /*
   1.789 +    default:
   1.790 +      p->GetHeadsFunc = GetHeads5;
   1.791 +      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
   1.792 +      vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
   1.793 +      break;
   1.794 +    */
   1.795 +  }
   1.796 +}