rlm@1
|
1 // UTFConvert.cpp
|
rlm@1
|
2
|
rlm@1
|
3 #include "StdAfx.h"
|
rlm@1
|
4
|
rlm@1
|
5 #include "UTFConvert.h"
|
rlm@1
|
6 #include "Types.h"
|
rlm@1
|
7
|
rlm@1
|
8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
rlm@1
|
9
|
rlm@1
|
10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
|
rlm@1
|
11 {
|
rlm@1
|
12 size_t destPos = 0, srcPos = 0;
|
rlm@1
|
13 for (;;)
|
rlm@1
|
14 {
|
rlm@1
|
15 Byte c;
|
rlm@1
|
16 int numAdds;
|
rlm@1
|
17 if (srcPos == srcLen)
|
rlm@1
|
18 {
|
rlm@1
|
19 *destLen = destPos;
|
rlm@1
|
20 return True;
|
rlm@1
|
21 }
|
rlm@1
|
22 c = (Byte)src[srcPos++];
|
rlm@1
|
23
|
rlm@1
|
24 if (c < 0x80)
|
rlm@1
|
25 {
|
rlm@1
|
26 if (dest)
|
rlm@1
|
27 dest[destPos] = (wchar_t)c;
|
rlm@1
|
28 destPos++;
|
rlm@1
|
29 continue;
|
rlm@1
|
30 }
|
rlm@1
|
31 if (c < 0xC0)
|
rlm@1
|
32 break;
|
rlm@1
|
33 for (numAdds = 1; numAdds < 5; numAdds++)
|
rlm@1
|
34 if (c < kUtf8Limits[numAdds])
|
rlm@1
|
35 break;
|
rlm@1
|
36 UInt32 value = (c - kUtf8Limits[numAdds - 1]);
|
rlm@1
|
37
|
rlm@1
|
38 do
|
rlm@1
|
39 {
|
rlm@1
|
40 Byte c2;
|
rlm@1
|
41 if (srcPos == srcLen)
|
rlm@1
|
42 break;
|
rlm@1
|
43 c2 = (Byte)src[srcPos++];
|
rlm@1
|
44 if (c2 < 0x80 || c2 >= 0xC0)
|
rlm@1
|
45 break;
|
rlm@1
|
46 value <<= 6;
|
rlm@1
|
47 value |= (c2 - 0x80);
|
rlm@1
|
48 }
|
rlm@1
|
49 while (--numAdds != 0);
|
rlm@1
|
50
|
rlm@1
|
51 if (value < 0x10000)
|
rlm@1
|
52 {
|
rlm@1
|
53 if (dest)
|
rlm@1
|
54 dest[destPos] = (wchar_t)value;
|
rlm@1
|
55 destPos++;
|
rlm@1
|
56 }
|
rlm@1
|
57 else
|
rlm@1
|
58 {
|
rlm@1
|
59 value -= 0x10000;
|
rlm@1
|
60 if (value >= 0x100000)
|
rlm@1
|
61 break;
|
rlm@1
|
62 if (dest)
|
rlm@1
|
63 {
|
rlm@1
|
64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
|
rlm@1
|
65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
|
rlm@1
|
66 }
|
rlm@1
|
67 destPos += 2;
|
rlm@1
|
68 }
|
rlm@1
|
69 }
|
rlm@1
|
70 *destLen = destPos;
|
rlm@1
|
71 return False;
|
rlm@1
|
72 }
|
rlm@1
|
73
|
rlm@1
|
74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
|
rlm@1
|
75 {
|
rlm@1
|
76 size_t destPos = 0, srcPos = 0;
|
rlm@1
|
77 for (;;)
|
rlm@1
|
78 {
|
rlm@1
|
79 unsigned numAdds;
|
rlm@1
|
80 UInt32 value;
|
rlm@1
|
81 if (srcPos == srcLen)
|
rlm@1
|
82 {
|
rlm@1
|
83 *destLen = destPos;
|
rlm@1
|
84 return True;
|
rlm@1
|
85 }
|
rlm@1
|
86 value = src[srcPos++];
|
rlm@1
|
87 if (value < 0x80)
|
rlm@1
|
88 {
|
rlm@1
|
89 if (dest)
|
rlm@1
|
90 dest[destPos] = (char)value;
|
rlm@1
|
91 destPos++;
|
rlm@1
|
92 continue;
|
rlm@1
|
93 }
|
rlm@1
|
94 if (value >= 0xD800 && value < 0xE000)
|
rlm@1
|
95 {
|
rlm@1
|
96 UInt32 c2;
|
rlm@1
|
97 if (value >= 0xDC00 || srcPos == srcLen)
|
rlm@1
|
98 break;
|
rlm@1
|
99 c2 = src[srcPos++];
|
rlm@1
|
100 if (c2 < 0xDC00 || c2 >= 0xE000)
|
rlm@1
|
101 break;
|
rlm@1
|
102 value = ((value - 0xD800) << 10) | (c2 - 0xDC00);
|
rlm@1
|
103 }
|
rlm@1
|
104 for (numAdds = 1; numAdds < 5; numAdds++)
|
rlm@1
|
105 if (value < (((UInt32)1) << (numAdds * 5 + 6)))
|
rlm@1
|
106 break;
|
rlm@1
|
107 if (dest)
|
rlm@1
|
108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
|
rlm@1
|
109 destPos++;
|
rlm@1
|
110 do
|
rlm@1
|
111 {
|
rlm@1
|
112 numAdds--;
|
rlm@1
|
113 if (dest)
|
rlm@1
|
114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
|
rlm@1
|
115 destPos++;
|
rlm@1
|
116 }
|
rlm@1
|
117 while (numAdds != 0);
|
rlm@1
|
118 }
|
rlm@1
|
119 *destLen = destPos;
|
rlm@1
|
120 return False;
|
rlm@1
|
121 }
|
rlm@1
|
122
|
rlm@1
|
123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
|
rlm@1
|
124 {
|
rlm@1
|
125 dest.Empty();
|
rlm@1
|
126 size_t destLen = 0;
|
rlm@1
|
127 Utf8_To_Utf16(NULL, &destLen, src, src.Length());
|
rlm@1
|
128 wchar_t *p = dest.GetBuffer((int)destLen);
|
rlm@1
|
129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
|
rlm@1
|
130 p[destLen] = 0;
|
rlm@1
|
131 dest.ReleaseBuffer();
|
rlm@1
|
132 return res ? true : false;
|
rlm@1
|
133 }
|
rlm@1
|
134
|
rlm@1
|
135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
|
rlm@1
|
136 {
|
rlm@1
|
137 dest.Empty();
|
rlm@1
|
138 size_t destLen = 0;
|
rlm@1
|
139 Utf16_To_Utf8(NULL, &destLen, src, src.Length());
|
rlm@1
|
140 char *p = dest.GetBuffer((int)destLen);
|
rlm@1
|
141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
|
rlm@1
|
142 p[destLen] = 0;
|
rlm@1
|
143 dest.ReleaseBuffer();
|
rlm@1
|
144 return res ? true : false;
|
rlm@1
|
145 }
|