Project

General

Profile

Feature #902 ยป exiv2_convert.patch

Proposition patch to use internal charset encoding - Eric TRINH, 20 May 2013 01:51

View differences:

exiv2/src/convert.cpp 2013-05-20 07:35:27 +0000
70 70
// *****************************************************************************
71 71
// local declarations
72 72
namespace {
73
#if defined WIN32 && !defined __CYGWIN__
74 73
    // Convert string charset with Windows functions.
75
    bool convertStringCharsetWindows(std::string& str, const char* from, const char* to);
76
#endif
74
    bool convertStringCharsetUni(std::string& str, const char* from, const char* to);
77 75
#if defined EXV_HAVE_ICONV
78 76
    // Convert string charset with iconv.
79 77
    bool convertStringCharsetIconv(std::string& str, const char* from, const char* to);
......
1334 1332
        bool ret = false;
1335 1333
#if defined EXV_HAVE_ICONV
1336 1334
        ret = convertStringCharsetIconv(str, from, to);
1337
#elif defined WIN32 && !defined __CYGWIN__
1338
        ret = convertStringCharsetWindows(str, from, to);
1339 1335
#else
1340
# ifndef SUPPRESS_WARNINGS
1341
        EXV_WARNING << "Charset conversion required but no character mapping functionality available.\n";
1342
# endif
1336
        ret = convertStringCharsetUni(str, from, to);
1343 1337
#endif
1344 1338
        return ret;
1345 1339
    }
......
1350 1344
namespace {
1351 1345

  
1352 1346
    using namespace Exiv2;
1353

  
1354
#if defined WIN32 && !defined __CYGWIN__
1347
	
1355 1348
    bool swapBytes(std::string& str)
1356 1349
    {
1357 1350
        // Naive byte-swapping, I'm sure this can be done more efficiently
......
1369 1362
        return true;
1370 1363
    }
1371 1364

  
1372
    bool mb2wc(UINT cp, std::string& str)
1373
    {
1374
        if (str.empty()) return true;
1375
        int len = MultiByteToWideChar(cp, 0, str.c_str(), (int)str.size(), 0, 0);
1376
        if (len == 0) {
1377
#ifdef DEBUG
1378
            EXV_DEBUG << "mb2wc: Failed to determine required size of output buffer.\n";
1379
#endif
1380
            return false;
1381
        }
1382
        std::vector<std::string::value_type> out;
1383
        out.resize(len * 2);
1384
        int ret = MultiByteToWideChar(cp, 0, str.c_str(), (int)str.size(), (LPWSTR)&out[0], len * 2);
1385
        if (ret == 0) {
1386
#ifdef DEBUG
1387
            EXV_DEBUG << "mb2wc: Failed to convert the input string to a wide character string.\n";
1388
#endif
1389
            return false;
1390
        }
1391
        str.assign(out.begin(), out.end());
1392
        return true;
1393
    }
1394

  
1395
    bool wc2mb(UINT cp, std::string& str)
1365
    bool utf8ToUcs2le(std::string& str)
1396 1366
    {
1397
        if (str.empty()) return true;
1398
        if (str.size() & 1) {
1399
#ifdef DEBUG
1400
            EXV_DEBUG << "wc2mb: Size " << str.size() << " of input string is not even.\n";
1401
#endif
1402
            return false;
1403
        }
1404
        int len = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), (int)str.size() / 2, 0, 0, 0, 0);
1405
        if (len == 0) {
1406
#ifdef DEBUG
1407
            EXV_DEBUG << "wc2mb: Failed to determine required size of output buffer.\n";
1408
#endif
1409
            return false;
1410
        }
1411
        std::vector<std::string::value_type> out;
1412
        out.resize(len);
1413
        int ret = WideCharToMultiByte(cp, 0, (LPCWSTR)str.data(), (int)str.size() / 2, (LPSTR)&out[0], len, 0, 0);
1414
        if (ret == 0) {
1415
#ifdef DEBUG
1416
            EXV_DEBUG << "wc2mb: Failed to convert the input string to a multi byte string.\n";
1417
#endif
1418
            return false;
1419
        }
1420
        str.assign(out.begin(), out.end());
1421
        return true;
1367
		try {
1368
			std::string _str;
1369
			for (unsigned int i = 0; i < str.size();) {	
1370
				int val = 0;
1371
				if ((unsigned char)str[i] < 0x80) val = str[i], ++i;
1372
				else if ((str[i] & 0xE0) == 0xE0) {
1373
					if (str.at(i + 1) == 0 || str.at(i + 2) == 0) return false;
1374
					i += 3;
1375
					val = (str.at(i) & 0x0F)<<12 | (str.at(i + 1) & 0x3F)<<6  | (str.at(i + 2) & 0x3F);
1376
				}
1377
				else if ((str[i] & 0xC0) == 0xC0) {
1378
					if (str.at(i + 1) == 0) return false;
1379
					i += 2;
1380
					val = (str.at(i) & 0x1F)<<6 | (str.at(i + 1) & 0x3F);
1381
				}
1382
				_str += (unsigned char)(val & 0xff), _str += (unsigned char)(val >> 8);				
1383
			}
1384
			str = _str;
1385
		}
1386
		catch (...) {
1387
			return false;
1388
		}
1389
		return true;
1390
    }
1391

  
1392
	bool ucs2leToUtf8(std::string& str)
1393
    {
1394
		std::string _str;
1395
		for (unsigned int i = 0; i < str.size(); i += 2) {
1396
			unsigned short c = (unsigned char)str[i] + (((unsigned short)str[i + 1]) << 8);
1397
			if (c<0x80) _str+=c;
1398
			else if (c<0x800) _str+=192+c/64, _str+=128+c%64;
1399
			else if (c-0xd800u<0x800) return false;
1400
			else _str+=224+c/4096, _str+=128+c/64%64, _str+=128+c%64;
1401
		}
1402
		str = _str;
1403
		return true;
1422 1404
    }
1423 1405

  
1424 1406
    bool utf8ToUcs2be(std::string& str)
1425 1407
    {
1426
        bool ret = mb2wc(CP_UTF8, str);
1408
        bool ret = utf8ToUcs2le(str);
1427 1409
        if (ret) ret = swapBytes(str);
1428 1410
        return ret;
1429 1411
    }
1430 1412

  
1431
    bool utf8ToUcs2le(std::string& str)
1432
    {
1433
        return mb2wc(CP_UTF8, str);
1434
    }
1435

  
1436 1413
    bool ucs2beToUtf8(std::string& str)
1437 1414
    {
1438 1415
        bool ret = swapBytes(str);
1439
        if (ret) ret = wc2mb(CP_UTF8, str);
1416
        if (ret) ret = ucs2leToUtf8(str);
1440 1417
        return ret;
1441 1418
    }
1442 1419

  
......
1445 1422
        return swapBytes(str);
1446 1423
    }
1447 1424

  
1448
    bool ucs2leToUtf8(std::string& str)
1449
    {
1450
        return wc2mb(CP_UTF8, str);
1451
    }
1452

  
1453 1425
    bool ucs2leToUcs2be(std::string& str)
1454 1426
    {
1455 1427
        return swapBytes(str);
......
1457 1429

  
1458 1430
    bool iso88591ToUtf8(std::string& str)
1459 1431
    {
1460
        bool ret = mb2wc(28591, str);
1461
        if (ret) ret = wc2mb(CP_UTF8, str);
1462
        return ret;
1432
		std::string mcstr;
1433
		mcstr.reserve(str.size() * 2);
1434
        for (unsigned int i = 0; i < str.size(); ++i) {
1435
			mcstr += str[i];
1436
			mcstr += '\0';
1437
        }
1438
		str = mcstr;
1439
        return ucs2leToUtf8(str);
1463 1440
    }
1464 1441

  
1465 1442
    bool asciiToUtf8(std::string& /*str*/)
......
1490 1467
        // Update the convertStringCharset() documentation if you add more here!
1491 1468
    };
1492 1469

  
1493
    bool convertStringCharsetWindows(std::string& str, const char* from, const char* to)
1470
    bool convertStringCharsetUni(std::string& str, const char* from, const char* to)
1494 1471
    {
1495 1472
        bool ret = false;
1496 1473
        const ConvFctList* p = find(convFctList, std::make_pair(from, to));
......
1498 1475
        if (p) ret = p->convFct_(tmpstr);
1499 1476
#ifndef SUPPRESS_WARNINGS
1500 1477
        else {
1501
            EXV_WARNING << "No Windows function to map character string from " << from << " to " << to << " available.\n";
1478
            EXV_WARNING << "No function to map character string from " << from << " to " << to << " available.\n";
1502 1479
        }
1503 1480
#endif
1504 1481
        if (ret) str = tmpstr;
1505 1482
        return ret;
1506 1483
    }
1507 1484

  
1508
#endif // defined WIN32 && !defined __CYGWIN__
1509 1485
#if defined EXV_HAVE_ICONV
1510 1486
    bool convertStringCharsetIconv(std::string& str, const char* from, const char* to)
1511 1487
    {
    (1-1/1)