70 |
70 |
// *****************************************************************************
|
71 |
71 |
// local declarations
|
72 |
72 |
namespace {
|
73 |
|
#if defined WIN32 && !defined __CYGWIN__
|
74 |
73 |
// Convert string charset with Windows functions.
|
75 |
|
bool convertStringCharsetWindows(std::string& str, const char* from, const char* to);
|
76 |
|
#endif
|
|
74 |
bool convertStringCharsetUni(std::string& str, const char* from, const char* to);
|
77 |
75 |
#if defined EXV_HAVE_ICONV
|
78 |
76 |
// Convert string charset with iconv.
|
79 |
77 |
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to);
|
... | ... | |
1334 |
1332 |
bool ret = false;
|
1335 |
1333 |
#if defined EXV_HAVE_ICONV
|
1336 |
1334 |
ret = convertStringCharsetIconv(str, from, to);
|
1337 |
|
#elif defined WIN32 && !defined __CYGWIN__
|
1338 |
|
ret = convertStringCharsetWindows(str, from, to);
|
1339 |
1335 |
#else
|
1340 |
|
# ifndef SUPPRESS_WARNINGS
|
1341 |
|
EXV_WARNING << "Charset conversion required but no character mapping functionality available.\n";
|
1342 |
|
# endif
|
|
1336 |
ret = convertStringCharsetUni(str, from, to);
|
1343 |
1337 |
#endif
|
1344 |
1338 |
return ret;
|
1345 |
1339 |
}
|
... | ... | |
1350 |
1344 |
namespace {
|
1351 |
1345 |
|
1352 |
1346 |
using namespace Exiv2;
|
1353 |
|
|
1354 |
|
#if defined WIN32 && !defined __CYGWIN__
|
|
1347 |
|
1355 |
1348 |
bool swapBytes(std::string& str)
|
1356 |
1349 |
{
|
1357 |
1350 |
// Naive byte-swapping, I'm sure this can be done more efficiently
|
... | ... | |
1369 |
1362 |
return true;
|
1370 |
1363 |
}
|
1371 |
1364 |
|
1372 |
|
bool mb2wc(UINT cp, std::string& str)
|
1373 |
|
{
|
1374 |
|
if (str.empty()) return true;
|
1375 |
|
int len = MultiByteToWideChar(cp, 0, str.c_str(), (int)str.size(), 0, 0);
|
1376 |
|
if (len == 0) {
|
1377 |
|
#ifdef DEBUG
|
1378 |
|
EXV_DEBUG << "mb2wc: Failed to determine required size of output buffer.\n";
|
1379 |
|
#endif
|
1380 |
|
return false;
|
1381 |
|
}
|
1382 |
|
std::vector<std::string::value_type> out;
|
1383 |
|
out.resize(len * 2);
|
1384 |
|
int ret = MultiByteToWideChar(cp, 0, str.c_str(), (int)str.size(), (LPWSTR)&out[0], len * 2);
|
1385 |
|
if (ret == 0) {
|
1386 |
|
#ifdef DEBUG
|
1387 |
|
EXV_DEBUG << "mb2wc: Failed to convert the input string to a wide character string.\n";
|
1388 |
|
#endif
|
1389 |
|
return false;
|
1390 |
|
}
|
1391 |
|
str.assign(out.begin(), out.end());
|
1392 |
|
return true;
|
1393 |
|
}
|
1394 |
|
|
1395 |
|
bool wc2mb(UINT cp, std::string& str)
|
|
1365 |
bool utf8ToUcs2le(std::string& str)
|
1396 |
1366 |
{
|
1397 |
|
if (str.empty()) return true;
|
1398 |
|
if (str.size() & 1) {
|
1399 |
|
#ifdef DEBUG
|
1400 |
|
EXV_DEBUG << "wc2mb: Size " << str.size() << " of input string is not even.\n";
|
1401 |
|
#endif
|
1402 |
|
return false;
|
1403 |
|
}
|
1404 |
|
int len = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), (int)str.size() / 2, 0, 0, 0, 0);
|
1405 |
|
if (len == 0) {
|
1406 |
|
#ifdef DEBUG
|
1407 |
|
EXV_DEBUG << "wc2mb: Failed to determine required size of output buffer.\n";
|
1408 |
|
#endif
|
1409 |
|
return false;
|
1410 |
|
}
|
1411 |
|
std::vector<std::string::value_type> out;
|
1412 |
|
out.resize(len);
|
1413 |
|
int ret = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), (int)str.size() / 2, (LPSTR)&out[0], len, 0, 0);
|
1414 |
|
if (ret == 0) {
|
1415 |
|
#ifdef DEBUG
|
1416 |
|
EXV_DEBUG << "wc2mb: Failed to convert the input string to a multi byte string.\n";
|
1417 |
|
#endif
|
1418 |
|
return false;
|
1419 |
|
}
|
1420 |
|
str.assign(out.begin(), out.end());
|
1421 |
|
return true;
|
|
1367 |
try {
|
|
1368 |
std::string _str;
|
|
1369 |
for (unsigned int i = 0; i < str.size();) {
|
|
1370 |
int val = 0;
|
|
1371 |
if ((unsigned char)str[i] < 0x80) val = str[i], ++i;
|
|
1372 |
else if ((str[i] & 0xE0) == 0xE0) {
|
|
1373 |
if (str.at(i + 1) == 0 || str.at(i + 2) == 0) return false;
|
|
1374 |
i += 3;
|
|
1375 |
val = (str.at(i) & 0x0F)<<12 | (str.at(i + 1) & 0x3F)<<6 | (str.at(i + 2) & 0x3F);
|
|
1376 |
}
|
|
1377 |
else if ((str[i] & 0xC0) == 0xC0) {
|
|
1378 |
if (str.at(i + 1) == 0) return false;
|
|
1379 |
i += 2;
|
|
1380 |
val = (str.at(i) & 0x1F)<<6 | (str.at(i + 1) & 0x3F);
|
|
1381 |
}
|
|
1382 |
_str += (unsigned char)(val & 0xff), _str += (unsigned char)(val >> 8);
|
|
1383 |
}
|
|
1384 |
str = _str;
|
|
1385 |
}
|
|
1386 |
catch (...) {
|
|
1387 |
return false;
|
|
1388 |
}
|
|
1389 |
return true;
|
|
1390 |
}
|
|
1391 |
|
|
1392 |
bool ucs2leToUtf8(std::string& str)
|
|
1393 |
{
|
|
1394 |
std::string _str;
|
|
1395 |
for (unsigned int i = 0; i < str.size(); i += 2) {
|
|
1396 |
unsigned short c = (unsigned char)str[i] + (((unsigned short)str[i + 1]) << 8);
|
|
1397 |
if (c<0x80) _str+=c;
|
|
1398 |
else if (c<0x800) _str+=192+c/64, _str+=128+c%64;
|
|
1399 |
else if (c-0xd800u<0x800) return false;
|
|
1400 |
else _str+=224+c/4096, _str+=128+c/64%64, _str+=128+c%64;
|
|
1401 |
}
|
|
1402 |
str = _str;
|
|
1403 |
return true;
|
1422 |
1404 |
}
|
1423 |
1405 |
|
1424 |
1406 |
bool utf8ToUcs2be(std::string& str)
|
1425 |
1407 |
{
|
1426 |
|
bool ret = mb2wc(CP_UTF8, str);
|
|
1408 |
bool ret = utf8ToUcs2le(str);
|
1427 |
1409 |
if (ret) ret = swapBytes(str);
|
1428 |
1410 |
return ret;
|
1429 |
1411 |
}
|
1430 |
1412 |
|
1431 |
|
bool utf8ToUcs2le(std::string& str)
|
1432 |
|
{
|
1433 |
|
return mb2wc(CP_UTF8, str);
|
1434 |
|
}
|
1435 |
|
|
1436 |
1413 |
bool ucs2beToUtf8(std::string& str)
|
1437 |
1414 |
{
|
1438 |
1415 |
bool ret = swapBytes(str);
|
1439 |
|
if (ret) ret = wc2mb(CP_UTF8, str);
|
|
1416 |
if (ret) ret = ucs2leToUtf8(str);
|
1440 |
1417 |
return ret;
|
1441 |
1418 |
}
|
1442 |
1419 |
|
... | ... | |
1445 |
1422 |
return swapBytes(str);
|
1446 |
1423 |
}
|
1447 |
1424 |
|
1448 |
|
bool ucs2leToUtf8(std::string& str)
|
1449 |
|
{
|
1450 |
|
return wc2mb(CP_UTF8, str);
|
1451 |
|
}
|
1452 |
|
|
1453 |
1425 |
bool ucs2leToUcs2be(std::string& str)
|
1454 |
1426 |
{
|
1455 |
1427 |
return swapBytes(str);
|
... | ... | |
1457 |
1429 |
|
1458 |
1430 |
bool iso88591ToUtf8(std::string& str)
|
1459 |
1431 |
{
|
1460 |
|
bool ret = mb2wc(28591, str);
|
1461 |
|
if (ret) ret = wc2mb(CP_UTF8, str);
|
1462 |
|
return ret;
|
|
1432 |
std::string mcstr;
|
|
1433 |
mcstr.reserve(str.size() * 2);
|
|
1434 |
for (unsigned int i = 0; i < str.size(); ++i) {
|
|
1435 |
mcstr += str[i];
|
|
1436 |
mcstr += '\0';
|
|
1437 |
}
|
|
1438 |
str = mcstr;
|
|
1439 |
return ucs2leToUtf8(str);
|
1463 |
1440 |
}
|
1464 |
1441 |
|
1465 |
1442 |
bool asciiToUtf8(std::string& /*str*/)
|
... | ... | |
1490 |
1467 |
// Update the convertStringCharset() documentation if you add more here!
|
1491 |
1468 |
};
|
1492 |
1469 |
|
1493 |
|
bool convertStringCharsetWindows(std::string& str, const char* from, const char* to)
|
|
1470 |
bool convertStringCharsetUni(std::string& str, const char* from, const char* to)
|
1494 |
1471 |
{
|
1495 |
1472 |
bool ret = false;
|
1496 |
1473 |
const ConvFctList* p = find(convFctList, std::make_pair(from, to));
|
... | ... | |
1498 |
1475 |
if (p) ret = p->convFct_(tmpstr);
|
1499 |
1476 |
#ifndef SUPPRESS_WARNINGS
|
1500 |
1477 |
else {
|
1501 |
|
EXV_WARNING << "No Windows function to map character string from " << from << " to " << to << " available.\n";
|
|
1478 |
EXV_WARNING << "No function to map character string from " << from << " to " << to << " available.\n";
|
1502 |
1479 |
}
|
1503 |
1480 |
#endif
|
1504 |
1481 |
if (ret) str = tmpstr;
|
1505 |
1482 |
return ret;
|
1506 |
1483 |
}
|
1507 |
1484 |
|
1508 |
|
#endif // defined WIN32 && !defined __CYGWIN__
|
1509 |
1485 |
#if defined EXV_HAVE_ICONV
|
1510 |
1486 |
bool convertStringCharsetIconv(std::string& str, const char* from, const char* to)
|
1511 |
1487 |
{
|