Mercurial > vba-clojure
diff src/win32/7zip/7z/CPP/Common/UTFConvert.cpp @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/win32/7zip/7z/CPP/Common/UTFConvert.cpp Sat Mar 03 10:31:27 2012 -0600 1.3 @@ -0,0 +1,145 @@ 1.4 +// UTFConvert.cpp 1.5 + 1.6 +#include "StdAfx.h" 1.7 + 1.8 +#include "UTFConvert.h" 1.9 +#include "Types.h" 1.10 + 1.11 +static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 1.12 + 1.13 +static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) 1.14 +{ 1.15 + size_t destPos = 0, srcPos = 0; 1.16 + for (;;) 1.17 + { 1.18 + Byte c; 1.19 + int numAdds; 1.20 + if (srcPos == srcLen) 1.21 + { 1.22 + *destLen = destPos; 1.23 + return True; 1.24 + } 1.25 + c = (Byte)src[srcPos++]; 1.26 + 1.27 + if (c < 0x80) 1.28 + { 1.29 + if (dest) 1.30 + dest[destPos] = (wchar_t)c; 1.31 + destPos++; 1.32 + continue; 1.33 + } 1.34 + if (c < 0xC0) 1.35 + break; 1.36 + for (numAdds = 1; numAdds < 5; numAdds++) 1.37 + if (c < kUtf8Limits[numAdds]) 1.38 + break; 1.39 + UInt32 value = (c - kUtf8Limits[numAdds - 1]); 1.40 + 1.41 + do 1.42 + { 1.43 + Byte c2; 1.44 + if (srcPos == srcLen) 1.45 + break; 1.46 + c2 = (Byte)src[srcPos++]; 1.47 + if (c2 < 0x80 || c2 >= 0xC0) 1.48 + break; 1.49 + value <<= 6; 1.50 + value |= (c2 - 0x80); 1.51 + } 1.52 + while (--numAdds != 0); 1.53 + 1.54 + if (value < 0x10000) 1.55 + { 1.56 + if (dest) 1.57 + dest[destPos] = (wchar_t)value; 1.58 + destPos++; 1.59 + } 1.60 + else 1.61 + { 1.62 + value -= 0x10000; 1.63 + if (value >= 0x100000) 1.64 + break; 1.65 + if (dest) 1.66 + { 1.67 + dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); 1.68 + dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); 1.69 + } 1.70 + destPos += 2; 1.71 + } 1.72 + } 1.73 + *destLen = destPos; 1.74 + return False; 1.75 +} 1.76 + 1.77 +static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) 1.78 +{ 1.79 + size_t destPos = 0, srcPos = 0; 1.80 + for (;;) 1.81 + { 1.82 + unsigned numAdds; 1.83 + UInt32 value; 1.84 + if (srcPos == srcLen) 1.85 + { 1.86 + *destLen = destPos; 1.87 + return True; 1.88 + } 1.89 + value = src[srcPos++]; 1.90 + if (value < 0x80) 1.91 + { 1.92 + if (dest) 1.93 + dest[destPos] = (char)value; 1.94 + destPos++; 1.95 + continue; 1.96 + } 1.97 + if (value >= 0xD800 && value < 0xE000) 1.98 + { 1.99 + UInt32 c2; 1.100 + if (value >= 0xDC00 || srcPos == srcLen) 1.101 + break; 1.102 + c2 = src[srcPos++]; 1.103 + if (c2 < 0xDC00 || c2 >= 0xE000) 1.104 + break; 1.105 + value = ((value - 0xD800) << 10) | (c2 - 0xDC00); 1.106 + } 1.107 + for (numAdds = 1; numAdds < 5; numAdds++) 1.108 + if (value < (((UInt32)1) << (numAdds * 5 + 6))) 1.109 + break; 1.110 + if (dest) 1.111 + dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); 1.112 + destPos++; 1.113 + do 1.114 + { 1.115 + numAdds--; 1.116 + if (dest) 1.117 + dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); 1.118 + destPos++; 1.119 + } 1.120 + while (numAdds != 0); 1.121 + } 1.122 + *destLen = destPos; 1.123 + return False; 1.124 +} 1.125 + 1.126 +bool ConvertUTF8ToUnicode(const AString &src, UString &dest) 1.127 +{ 1.128 + dest.Empty(); 1.129 + size_t destLen = 0; 1.130 + Utf8_To_Utf16(NULL, &destLen, src, src.Length()); 1.131 + wchar_t *p = dest.GetBuffer((int)destLen); 1.132 + Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); 1.133 + p[destLen] = 0; 1.134 + dest.ReleaseBuffer(); 1.135 + return res ? true : false; 1.136 +} 1.137 + 1.138 +bool ConvertUnicodeToUTF8(const UString &src, AString &dest) 1.139 +{ 1.140 + dest.Empty(); 1.141 + size_t destLen = 0; 1.142 + Utf16_To_Utf8(NULL, &destLen, src, src.Length()); 1.143 + char *p = dest.GetBuffer((int)destLen); 1.144 + Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); 1.145 + p[destLen] = 0; 1.146 + dest.ReleaseBuffer(); 1.147 + return res ? true : false; 1.148 +}