Mercurial > vba-clojure
view src/win32/7zip/7z/CPP/Common/UTFConvert.cpp @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
line wrap: on
line source
1 // UTFConvert.cpp3 #include "StdAfx.h"5 #include "UTFConvert.h"6 #include "Types.h"8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)11 {12 size_t destPos = 0, srcPos = 0;13 for (;;)14 {15 Byte c;16 int numAdds;17 if (srcPos == srcLen)18 {19 *destLen = destPos;20 return True;21 }22 c = (Byte)src[srcPos++];24 if (c < 0x80)25 {26 if (dest)27 dest[destPos] = (wchar_t)c;28 destPos++;29 continue;30 }31 if (c < 0xC0)32 break;33 for (numAdds = 1; numAdds < 5; numAdds++)34 if (c < kUtf8Limits[numAdds])35 break;36 UInt32 value = (c - kUtf8Limits[numAdds - 1]);38 do39 {40 Byte c2;41 if (srcPos == srcLen)42 break;43 c2 = (Byte)src[srcPos++];44 if (c2 < 0x80 || c2 >= 0xC0)45 break;46 value <<= 6;47 value |= (c2 - 0x80);48 }49 while (--numAdds != 0);51 if (value < 0x10000)52 {53 if (dest)54 dest[destPos] = (wchar_t)value;55 destPos++;56 }57 else58 {59 value -= 0x10000;60 if (value >= 0x100000)61 break;62 if (dest)63 {64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));66 }67 destPos += 2;68 }69 }70 *destLen = destPos;71 return False;72 }74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)75 {76 size_t destPos = 0, srcPos = 0;77 for (;;)78 {79 unsigned numAdds;80 UInt32 value;81 if (srcPos == srcLen)82 {83 *destLen = destPos;84 return True;85 }86 value = src[srcPos++];87 if (value < 0x80)88 {89 if (dest)90 dest[destPos] = (char)value;91 destPos++;92 continue;93 }94 if (value >= 0xD800 && value < 0xE000)95 {96 UInt32 c2;97 if (value >= 0xDC00 || srcPos == srcLen)98 break;99 c2 = src[srcPos++];100 if (c2 < 0xDC00 || c2 >= 0xE000)101 break;102 value = ((value - 0xD800) << 10) | (c2 - 0xDC00);103 }104 for (numAdds = 1; numAdds < 5; numAdds++)105 if (value < (((UInt32)1) << (numAdds * 5 + 6)))106 break;107 if (dest)108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));109 destPos++;110 do111 {112 numAdds--;113 if (dest)114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));115 destPos++;116 }117 while (numAdds != 0);118 }119 *destLen = destPos;120 return False;121 }123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)124 {125 dest.Empty();126 size_t destLen = 0;127 Utf8_To_Utf16(NULL, &destLen, src, src.Length());128 wchar_t *p = dest.GetBuffer((int)destLen);129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());130 p[destLen] = 0;131 dest.ReleaseBuffer();132 return res ? true : false;133 }135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)136 {137 dest.Empty();138 size_t destLen = 0;139 Utf16_To_Utf8(NULL, &destLen, src, src.Length());140 char *p = dest.GetBuffer((int)destLen);141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());142 p[destLen] = 0;143 dest.ReleaseBuffer();144 return res ? true : false;145 }