rlm@1: // UTFConvert.cpp rlm@1: rlm@1: #include "StdAfx.h" rlm@1: rlm@1: #include "UTFConvert.h" rlm@1: #include "Types.h" rlm@1: rlm@1: static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; rlm@1: rlm@1: static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) rlm@1: { rlm@1: size_t destPos = 0, srcPos = 0; rlm@1: for (;;) rlm@1: { rlm@1: Byte c; rlm@1: int numAdds; rlm@1: if (srcPos == srcLen) rlm@1: { rlm@1: *destLen = destPos; rlm@1: return True; rlm@1: } rlm@1: c = (Byte)src[srcPos++]; rlm@1: rlm@1: if (c < 0x80) rlm@1: { rlm@1: if (dest) rlm@1: dest[destPos] = (wchar_t)c; rlm@1: destPos++; rlm@1: continue; rlm@1: } rlm@1: if (c < 0xC0) rlm@1: break; rlm@1: for (numAdds = 1; numAdds < 5; numAdds++) rlm@1: if (c < kUtf8Limits[numAdds]) rlm@1: break; rlm@1: UInt32 value = (c - kUtf8Limits[numAdds - 1]); rlm@1: rlm@1: do rlm@1: { rlm@1: Byte c2; rlm@1: if (srcPos == srcLen) rlm@1: break; rlm@1: c2 = (Byte)src[srcPos++]; rlm@1: if (c2 < 0x80 || c2 >= 0xC0) rlm@1: break; rlm@1: value <<= 6; rlm@1: value |= (c2 - 0x80); rlm@1: } rlm@1: while (--numAdds != 0); rlm@1: rlm@1: if (value < 0x10000) rlm@1: { rlm@1: if (dest) rlm@1: dest[destPos] = (wchar_t)value; rlm@1: destPos++; rlm@1: } rlm@1: else rlm@1: { rlm@1: value -= 0x10000; rlm@1: if (value >= 0x100000) rlm@1: break; rlm@1: if (dest) rlm@1: { rlm@1: dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); rlm@1: dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); rlm@1: } rlm@1: destPos += 2; rlm@1: } rlm@1: } rlm@1: *destLen = destPos; rlm@1: return False; rlm@1: } rlm@1: rlm@1: static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) rlm@1: { rlm@1: size_t destPos = 0, srcPos = 0; rlm@1: for (;;) rlm@1: { rlm@1: unsigned numAdds; rlm@1: UInt32 value; rlm@1: if (srcPos == srcLen) rlm@1: { rlm@1: *destLen = destPos; rlm@1: return True; rlm@1: } rlm@1: value = src[srcPos++]; rlm@1: if (value < 0x80) rlm@1: { rlm@1: if (dest) rlm@1: dest[destPos] = (char)value; rlm@1: destPos++; rlm@1: continue; rlm@1: } rlm@1: if (value >= 0xD800 && value < 0xE000) rlm@1: { rlm@1: UInt32 c2; rlm@1: if (value >= 0xDC00 || srcPos == srcLen) rlm@1: break; rlm@1: c2 = src[srcPos++]; rlm@1: if (c2 < 0xDC00 || c2 >= 0xE000) rlm@1: break; rlm@1: value = ((value - 0xD800) << 10) | (c2 - 0xDC00); rlm@1: } rlm@1: for (numAdds = 1; numAdds < 5; numAdds++) rlm@1: if (value < (((UInt32)1) << (numAdds * 5 + 6))) rlm@1: break; rlm@1: if (dest) rlm@1: dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); rlm@1: destPos++; rlm@1: do rlm@1: { rlm@1: numAdds--; rlm@1: if (dest) rlm@1: dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); rlm@1: destPos++; rlm@1: } rlm@1: while (numAdds != 0); rlm@1: } rlm@1: *destLen = destPos; rlm@1: return False; rlm@1: } rlm@1: rlm@1: bool ConvertUTF8ToUnicode(const AString &src, UString &dest) rlm@1: { rlm@1: dest.Empty(); rlm@1: size_t destLen = 0; rlm@1: Utf8_To_Utf16(NULL, &destLen, src, src.Length()); rlm@1: wchar_t *p = dest.GetBuffer((int)destLen); rlm@1: Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); rlm@1: p[destLen] = 0; rlm@1: dest.ReleaseBuffer(); rlm@1: return res ? true : false; rlm@1: } rlm@1: rlm@1: bool ConvertUnicodeToUTF8(const UString &src, AString &dest) rlm@1: { rlm@1: dest.Empty(); rlm@1: size_t destLen = 0; rlm@1: Utf16_To_Utf8(NULL, &destLen, src, src.Length()); rlm@1: char *p = dest.GetBuffer((int)destLen); rlm@1: Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); rlm@1: p[destLen] = 0; rlm@1: dest.ReleaseBuffer(); rlm@1: return res ? true : false; rlm@1: }