diff src/win32/7zip/7z/CPP/Common/UTFConvert.cpp @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/win32/7zip/7z/CPP/Common/UTFConvert.cpp	Sat Mar 03 10:31:27 2012 -0600
     1.3 @@ -0,0 +1,145 @@
     1.4 +// UTFConvert.cpp
     1.5 +
     1.6 +#include "StdAfx.h"
     1.7 +
     1.8 +#include "UTFConvert.h"
     1.9 +#include "Types.h"
    1.10 +
    1.11 +static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
    1.12 +
    1.13 +static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
    1.14 +{
    1.15 +  size_t destPos = 0, srcPos = 0;
    1.16 +  for (;;)
    1.17 +  {
    1.18 +    Byte c;
    1.19 +    int numAdds;
    1.20 +    if (srcPos == srcLen)
    1.21 +    {
    1.22 +      *destLen = destPos;
    1.23 +      return True;
    1.24 +    }
    1.25 +    c = (Byte)src[srcPos++];
    1.26 +
    1.27 +    if (c < 0x80)
    1.28 +    {
    1.29 +      if (dest)
    1.30 +        dest[destPos] = (wchar_t)c;
    1.31 +      destPos++;
    1.32 +      continue;
    1.33 +    }
    1.34 +    if (c < 0xC0)
    1.35 +      break;
    1.36 +    for (numAdds = 1; numAdds < 5; numAdds++)
    1.37 +      if (c < kUtf8Limits[numAdds])
    1.38 +        break;
    1.39 +    UInt32 value = (c - kUtf8Limits[numAdds - 1]);
    1.40 +
    1.41 +    do
    1.42 +    {
    1.43 +      Byte c2;
    1.44 +      if (srcPos == srcLen)
    1.45 +        break;
    1.46 +      c2 = (Byte)src[srcPos++];
    1.47 +      if (c2 < 0x80 || c2 >= 0xC0)
    1.48 +        break;
    1.49 +      value <<= 6;
    1.50 +      value |= (c2 - 0x80);
    1.51 +    }
    1.52 +    while (--numAdds != 0);
    1.53 +    
    1.54 +    if (value < 0x10000)
    1.55 +    {
    1.56 +      if (dest)
    1.57 +        dest[destPos] = (wchar_t)value;
    1.58 +      destPos++;
    1.59 +    }
    1.60 +    else
    1.61 +    {
    1.62 +      value -= 0x10000;
    1.63 +      if (value >= 0x100000)
    1.64 +        break;
    1.65 +      if (dest)
    1.66 +      {
    1.67 +        dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
    1.68 +        dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
    1.69 +      }
    1.70 +      destPos += 2;
    1.71 +    }
    1.72 +  }
    1.73 +  *destLen = destPos;
    1.74 +  return False;
    1.75 +}
    1.76 +
    1.77 +static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
    1.78 +{
    1.79 +  size_t destPos = 0, srcPos = 0;
    1.80 +  for (;;)
    1.81 +  {
    1.82 +    unsigned numAdds;
    1.83 +    UInt32 value;
    1.84 +    if (srcPos == srcLen)
    1.85 +    {
    1.86 +      *destLen = destPos;
    1.87 +      return True;
    1.88 +    }
    1.89 +    value = src[srcPos++];
    1.90 +    if (value < 0x80)
    1.91 +    {
    1.92 +      if (dest)
    1.93 +        dest[destPos] = (char)value;
    1.94 +      destPos++;
    1.95 +      continue;
    1.96 +    }
    1.97 +    if (value >= 0xD800 && value < 0xE000)
    1.98 +    {
    1.99 +      UInt32 c2;
   1.100 +      if (value >= 0xDC00 || srcPos == srcLen)
   1.101 +        break;
   1.102 +      c2 = src[srcPos++];
   1.103 +      if (c2 < 0xDC00 || c2 >= 0xE000)
   1.104 +        break;
   1.105 +      value = ((value - 0xD800) << 10) | (c2 - 0xDC00);
   1.106 +    }
   1.107 +    for (numAdds = 1; numAdds < 5; numAdds++)
   1.108 +      if (value < (((UInt32)1) << (numAdds * 5 + 6)))
   1.109 +        break;
   1.110 +    if (dest)
   1.111 +      dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
   1.112 +    destPos++;
   1.113 +    do
   1.114 +    {
   1.115 +      numAdds--;
   1.116 +      if (dest)
   1.117 +        dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
   1.118 +      destPos++;
   1.119 +    }
   1.120 +    while (numAdds != 0);
   1.121 +  }
   1.122 +  *destLen = destPos;
   1.123 +  return False;
   1.124 +}
   1.125 +
   1.126 +bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
   1.127 +{
   1.128 +  dest.Empty();
   1.129 +  size_t destLen = 0;
   1.130 +  Utf8_To_Utf16(NULL, &destLen, src, src.Length());
   1.131 +  wchar_t *p = dest.GetBuffer((int)destLen);
   1.132 +  Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
   1.133 +  p[destLen] = 0;
   1.134 +  dest.ReleaseBuffer();
   1.135 +  return res ? true : false;
   1.136 +}
   1.137 +
   1.138 +bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
   1.139 +{
   1.140 +  dest.Empty();
   1.141 +  size_t destLen = 0;
   1.142 +  Utf16_To_Utf8(NULL, &destLen, src, src.Length());
   1.143 +  char *p = dest.GetBuffer((int)destLen);
   1.144 +  Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
   1.145 +  p[destLen] = 0;
   1.146 +  dest.ReleaseBuffer();
   1.147 +  return res ? true : false;
   1.148 +}