annotate src/win32/7zip/7z/CPP/Common/UTFConvert.cpp @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600 (2012-03-03)
parents
children
rev   line source
rlm@1 1 // UTFConvert.cpp
rlm@1 2
rlm@1 3 #include "StdAfx.h"
rlm@1 4
rlm@1 5 #include "UTFConvert.h"
rlm@1 6 #include "Types.h"
rlm@1 7
rlm@1 8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
rlm@1 9
rlm@1 10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
rlm@1 11 {
rlm@1 12 size_t destPos = 0, srcPos = 0;
rlm@1 13 for (;;)
rlm@1 14 {
rlm@1 15 Byte c;
rlm@1 16 int numAdds;
rlm@1 17 if (srcPos == srcLen)
rlm@1 18 {
rlm@1 19 *destLen = destPos;
rlm@1 20 return True;
rlm@1 21 }
rlm@1 22 c = (Byte)src[srcPos++];
rlm@1 23
rlm@1 24 if (c < 0x80)
rlm@1 25 {
rlm@1 26 if (dest)
rlm@1 27 dest[destPos] = (wchar_t)c;
rlm@1 28 destPos++;
rlm@1 29 continue;
rlm@1 30 }
rlm@1 31 if (c < 0xC0)
rlm@1 32 break;
rlm@1 33 for (numAdds = 1; numAdds < 5; numAdds++)
rlm@1 34 if (c < kUtf8Limits[numAdds])
rlm@1 35 break;
rlm@1 36 UInt32 value = (c - kUtf8Limits[numAdds - 1]);
rlm@1 37
rlm@1 38 do
rlm@1 39 {
rlm@1 40 Byte c2;
rlm@1 41 if (srcPos == srcLen)
rlm@1 42 break;
rlm@1 43 c2 = (Byte)src[srcPos++];
rlm@1 44 if (c2 < 0x80 || c2 >= 0xC0)
rlm@1 45 break;
rlm@1 46 value <<= 6;
rlm@1 47 value |= (c2 - 0x80);
rlm@1 48 }
rlm@1 49 while (--numAdds != 0);
rlm@1 50
rlm@1 51 if (value < 0x10000)
rlm@1 52 {
rlm@1 53 if (dest)
rlm@1 54 dest[destPos] = (wchar_t)value;
rlm@1 55 destPos++;
rlm@1 56 }
rlm@1 57 else
rlm@1 58 {
rlm@1 59 value -= 0x10000;
rlm@1 60 if (value >= 0x100000)
rlm@1 61 break;
rlm@1 62 if (dest)
rlm@1 63 {
rlm@1 64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
rlm@1 65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
rlm@1 66 }
rlm@1 67 destPos += 2;
rlm@1 68 }
rlm@1 69 }
rlm@1 70 *destLen = destPos;
rlm@1 71 return False;
rlm@1 72 }
rlm@1 73
rlm@1 74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
rlm@1 75 {
rlm@1 76 size_t destPos = 0, srcPos = 0;
rlm@1 77 for (;;)
rlm@1 78 {
rlm@1 79 unsigned numAdds;
rlm@1 80 UInt32 value;
rlm@1 81 if (srcPos == srcLen)
rlm@1 82 {
rlm@1 83 *destLen = destPos;
rlm@1 84 return True;
rlm@1 85 }
rlm@1 86 value = src[srcPos++];
rlm@1 87 if (value < 0x80)
rlm@1 88 {
rlm@1 89 if (dest)
rlm@1 90 dest[destPos] = (char)value;
rlm@1 91 destPos++;
rlm@1 92 continue;
rlm@1 93 }
rlm@1 94 if (value >= 0xD800 && value < 0xE000)
rlm@1 95 {
rlm@1 96 UInt32 c2;
rlm@1 97 if (value >= 0xDC00 || srcPos == srcLen)
rlm@1 98 break;
rlm@1 99 c2 = src[srcPos++];
rlm@1 100 if (c2 < 0xDC00 || c2 >= 0xE000)
rlm@1 101 break;
rlm@1 102 value = ((value - 0xD800) << 10) | (c2 - 0xDC00);
rlm@1 103 }
rlm@1 104 for (numAdds = 1; numAdds < 5; numAdds++)
rlm@1 105 if (value < (((UInt32)1) << (numAdds * 5 + 6)))
rlm@1 106 break;
rlm@1 107 if (dest)
rlm@1 108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
rlm@1 109 destPos++;
rlm@1 110 do
rlm@1 111 {
rlm@1 112 numAdds--;
rlm@1 113 if (dest)
rlm@1 114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
rlm@1 115 destPos++;
rlm@1 116 }
rlm@1 117 while (numAdds != 0);
rlm@1 118 }
rlm@1 119 *destLen = destPos;
rlm@1 120 return False;
rlm@1 121 }
rlm@1 122
rlm@1 123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
rlm@1 124 {
rlm@1 125 dest.Empty();
rlm@1 126 size_t destLen = 0;
rlm@1 127 Utf8_To_Utf16(NULL, &destLen, src, src.Length());
rlm@1 128 wchar_t *p = dest.GetBuffer((int)destLen);
rlm@1 129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
rlm@1 130 p[destLen] = 0;
rlm@1 131 dest.ReleaseBuffer();
rlm@1 132 return res ? true : false;
rlm@1 133 }
rlm@1 134
rlm@1 135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
rlm@1 136 {
rlm@1 137 dest.Empty();
rlm@1 138 size_t destLen = 0;
rlm@1 139 Utf16_To_Utf8(NULL, &destLen, src, src.Length());
rlm@1 140 char *p = dest.GetBuffer((int)destLen);
rlm@1 141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
rlm@1 142 p[destLen] = 0;
rlm@1 143 dest.ReleaseBuffer();
rlm@1 144 return res ? true : false;
rlm@1 145 }