Mercurial > vba-linux
comparison src/win32/7zip/7z/CPP/Common/UTFConvert.cpp @ 1:f9f4f1b99eed
importing src directory
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 10:31:27 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:8ced16adf2e1 | 1:f9f4f1b99eed |
---|---|
1 // UTFConvert.cpp | |
2 | |
3 #include "StdAfx.h" | |
4 | |
5 #include "UTFConvert.h" | |
6 #include "Types.h" | |
7 | |
8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
9 | |
10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) | |
11 { | |
12 size_t destPos = 0, srcPos = 0; | |
13 for (;;) | |
14 { | |
15 Byte c; | |
16 int numAdds; | |
17 if (srcPos == srcLen) | |
18 { | |
19 *destLen = destPos; | |
20 return True; | |
21 } | |
22 c = (Byte)src[srcPos++]; | |
23 | |
24 if (c < 0x80) | |
25 { | |
26 if (dest) | |
27 dest[destPos] = (wchar_t)c; | |
28 destPos++; | |
29 continue; | |
30 } | |
31 if (c < 0xC0) | |
32 break; | |
33 for (numAdds = 1; numAdds < 5; numAdds++) | |
34 if (c < kUtf8Limits[numAdds]) | |
35 break; | |
36 UInt32 value = (c - kUtf8Limits[numAdds - 1]); | |
37 | |
38 do | |
39 { | |
40 Byte c2; | |
41 if (srcPos == srcLen) | |
42 break; | |
43 c2 = (Byte)src[srcPos++]; | |
44 if (c2 < 0x80 || c2 >= 0xC0) | |
45 break; | |
46 value <<= 6; | |
47 value |= (c2 - 0x80); | |
48 } | |
49 while (--numAdds != 0); | |
50 | |
51 if (value < 0x10000) | |
52 { | |
53 if (dest) | |
54 dest[destPos] = (wchar_t)value; | |
55 destPos++; | |
56 } | |
57 else | |
58 { | |
59 value -= 0x10000; | |
60 if (value >= 0x100000) | |
61 break; | |
62 if (dest) | |
63 { | |
64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); | |
65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); | |
66 } | |
67 destPos += 2; | |
68 } | |
69 } | |
70 *destLen = destPos; | |
71 return False; | |
72 } | |
73 | |
74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) | |
75 { | |
76 size_t destPos = 0, srcPos = 0; | |
77 for (;;) | |
78 { | |
79 unsigned numAdds; | |
80 UInt32 value; | |
81 if (srcPos == srcLen) | |
82 { | |
83 *destLen = destPos; | |
84 return True; | |
85 } | |
86 value = src[srcPos++]; | |
87 if (value < 0x80) | |
88 { | |
89 if (dest) | |
90 dest[destPos] = (char)value; | |
91 destPos++; | |
92 continue; | |
93 } | |
94 if (value >= 0xD800 && value < 0xE000) | |
95 { | |
96 UInt32 c2; | |
97 if (value >= 0xDC00 || srcPos == srcLen) | |
98 break; | |
99 c2 = src[srcPos++]; | |
100 if (c2 < 0xDC00 || c2 >= 0xE000) | |
101 break; | |
102 value = ((value - 0xD800) << 10) | (c2 - 0xDC00); | |
103 } | |
104 for (numAdds = 1; numAdds < 5; numAdds++) | |
105 if (value < (((UInt32)1) << (numAdds * 5 + 6))) | |
106 break; | |
107 if (dest) | |
108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); | |
109 destPos++; | |
110 do | |
111 { | |
112 numAdds--; | |
113 if (dest) | |
114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); | |
115 destPos++; | |
116 } | |
117 while (numAdds != 0); | |
118 } | |
119 *destLen = destPos; | |
120 return False; | |
121 } | |
122 | |
123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest) | |
124 { | |
125 dest.Empty(); | |
126 size_t destLen = 0; | |
127 Utf8_To_Utf16(NULL, &destLen, src, src.Length()); | |
128 wchar_t *p = dest.GetBuffer((int)destLen); | |
129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); | |
130 p[destLen] = 0; | |
131 dest.ReleaseBuffer(); | |
132 return res ? true : false; | |
133 } | |
134 | |
135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest) | |
136 { | |
137 dest.Empty(); | |
138 size_t destLen = 0; | |
139 Utf16_To_Utf8(NULL, &destLen, src, src.Length()); | |
140 char *p = dest.GetBuffer((int)destLen); | |
141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); | |
142 p[destLen] = 0; | |
143 dest.ReleaseBuffer(); | |
144 return res ? true : false; | |
145 } |