Bug #662 » unicode_support.patch
| convert.cpp (working copy) | ||
|---|---|---|
| 1325 | 1325 |
std::string outstr; |
| 1326 | 1326 |
EXV_ICONV_CONST char *inptr = const_cast<char *>(str.c_str()); |
| 1327 | 1327 |
size_t inbytesleft = str.length(); |
| 1328 | ||
| 1328 |
int outbytesProduced = 0; |
|
| 1329 | 1329 |
while (inbytesleft) {
|
| 1330 | 1330 |
char outbuf[100]; |
| 1331 | 1331 |
char *outptr = outbuf; |
| ... | ... | |
| 1335 | 1335 |
&inbytesleft, |
| 1336 | 1336 |
&outptr, |
| 1337 | 1337 |
&outbytesleft); |
| 1338 |
outbytesProduced += sizeof(outbuf) - 1 - outbytesleft; |
|
| 1338 | 1339 |
if (rc == size_t(-1) && errno != E2BIG) {
|
| 1339 | 1340 |
#ifndef SUPPRESS_WARNINGS |
| 1340 | 1341 |
std::cerr << "Warning: iconv: " |
| ... | ... | |
| 1345 | 1346 |
break; |
| 1346 | 1347 |
} |
| 1347 | 1348 |
*outptr = '\0'; |
| 1348 |
outstr.append(outbuf);
|
|
| 1349 |
outstr.append(std::string(outbuf, outbytesProduced));
|
|
| 1349 | 1350 |
} |
| 1350 | 1351 |
if (cd != (iconv_t)(-1)) {
|
| 1351 | 1352 |
iconv_close(cd); |
| exiv2.cpp (working copy) | ||
|---|---|---|
| 41 | 41 |
#include "exiv2.hpp" |
| 42 | 42 |
#include "actions.hpp" |
| 43 | 43 |
#include "utils.hpp" |
| 44 |
#include "convert.hpp" |
|
| 44 | 45 |
#include "i18n.h" // NLS support. |
| 45 | 46 |
#include "xmp.hpp" |
| 46 | 47 | |
| ... | ... | |
| 117 | 118 |
*/ |
| 118 | 119 |
bool parseLine(ModifyCmd& modifyCmd, |
| 119 | 120 |
const std::string& line, int num); |
| 120 | ||
| 121 |
|
|
| 122 |
/*! |
|
| 123 |
@brief Parses a string containing backslash-escapes |
|
| 124 |
@param input Input string |
|
| 125 |
*/ |
|
| 126 |
std::string parseEscapes(const std::string& input); |
|
| 121 | 127 |
} |
| 122 | 128 | |
| 123 | 129 |
// ***************************************************************************** |
| ... | ... | |
| 981 | 987 |
catch (const Exiv2::AnyError& error) {
|
| 982 | 988 |
std::cerr << _("-M option") << " " << error << "\n";
|
| 983 | 989 |
return false; |
| 984 |
}
|
|
| 990 |
}
|
|
| 985 | 991 |
} // parseCmdLines |
| 986 | 992 | |
| 987 | 993 |
bool parseLine(ModifyCmd& modifyCmd, const std::string& line, int num) |
| ... | ... | |
| 1076 | 1082 |
} |
| 1077 | 1083 |
} |
| 1078 | 1084 | |
| 1079 |
value = line.substr(valStart, valEnd+1-valStart);
|
|
| 1085 |
value = parseEscapes(line.substr(valStart, valEnd+1-valStart));
|
|
| 1080 | 1086 |
std::string::size_type last = value.length()-1; |
| 1081 | 1087 |
if ( (value[0] == '"' && value[last] == '"') |
| 1082 | 1088 |
|| (value[0] == '\'' && value[last] == '\'')) {
|
| ... | ... | |
| 1107 | 1113 |
&& cmdIdAndString[i].cmdString_ != cmdString; ++i) {}
|
| 1108 | 1114 |
return cmdIdAndString[i].cmdId_; |
| 1109 | 1115 |
} |
| 1110 | ||
| 1116 |
|
|
| 1117 |
std::string parseEscapes(const std::string& input) |
|
| 1118 |
{
|
|
| 1119 |
std::string ucs2buffer = ""; |
|
| 1120 |
for (unsigned int i = 0; i < input.length(); ++i) {
|
|
| 1121 |
char ch = input[i]; |
|
| 1122 |
if (ch == '\\') {
|
|
| 1123 |
int escapeStart = i; |
|
| 1124 |
if (input.length() - 1 > i) {
|
|
| 1125 |
++i; |
|
| 1126 |
ch = input[i]; |
|
| 1127 |
switch (ch) {
|
|
| 1128 |
// Escaping of backslash |
|
| 1129 |
case '\\': |
|
| 1130 |
ucs2buffer.push_back('\0');
|
|
| 1131 |
ucs2buffer.push_back('\\');
|
|
| 1132 |
break; |
|
| 1133 |
|
|
| 1134 |
// Escaping of newline |
|
| 1135 |
case 'n': |
|
| 1136 |
ucs2buffer.push_back('\0');
|
|
| 1137 |
ucs2buffer.push_back('\n');
|
|
| 1138 |
break; |
|
| 1139 |
|
|
| 1140 |
// Escaping of tab |
|
| 1141 |
case 't': |
|
| 1142 |
ucs2buffer.push_back('\0');
|
|
| 1143 |
ucs2buffer.push_back('\n');
|
|
| 1144 |
break; |
|
| 1145 |
|
|
| 1146 |
// Escaping of unicode |
|
| 1147 |
case 'u': |
|
| 1148 |
if (input.length() - 4 > i) {
|
|
| 1149 |
int acc = 0; |
|
| 1150 |
for (int j = 0; j < 4; ++j) {
|
|
| 1151 |
++i; |
|
| 1152 |
acc <<= 4; |
|
| 1153 |
if (input[i] >= '0' && input[i] <= '9') {
|
|
| 1154 |
acc |= input[i] - '0'; |
|
| 1155 |
} else if (input[i] >= 'a' && input[i] <= 'f') {
|
|
| 1156 |
acc |= input[i] - 'a' + 10; |
|
| 1157 |
} else if (input[i] >= 'A' && input[i] <= 'F') {
|
|
| 1158 |
acc |= input[i] - 'A' + 10; |
|
| 1159 |
} else {
|
|
| 1160 |
acc = -1; |
|
| 1161 |
break; |
|
| 1162 |
} |
|
| 1163 |
} |
|
| 1164 |
if (acc == -1) {
|
|
| 1165 |
ucs2buffer.push_back('\0');
|
|
| 1166 |
ucs2buffer.push_back('\\');
|
|
| 1167 |
i = escapeStart; |
|
| 1168 |
break; |
|
| 1169 |
} |
|
| 1170 |
|
|
| 1171 |
ucs2buffer.push_back((char) ((acc & 0xff00) >> 8)); |
|
| 1172 |
ucs2buffer.push_back((char) (acc & 0x00ff)); |
|
| 1173 |
} else {
|
|
| 1174 |
ucs2buffer.push_back('\0');
|
|
| 1175 |
ucs2buffer.push_back('\\');
|
|
| 1176 |
ucs2buffer.push_back('\0');
|
|
| 1177 |
ucs2buffer.push_back(ch); |
|
| 1178 |
} |
|
| 1179 |
break; |
|
| 1180 |
|
|
| 1181 |
default: |
|
| 1182 |
ucs2buffer.push_back('\0');
|
|
| 1183 |
ucs2buffer.push_back('\\');
|
|
| 1184 |
ucs2buffer.push_back('\0');
|
|
| 1185 |
ucs2buffer.push_back(ch); |
|
| 1186 |
} |
|
| 1187 |
} else {
|
|
| 1188 |
ucs2buffer.push_back('\0');
|
|
| 1189 |
ucs2buffer.push_back(ch); |
|
| 1190 |
} |
|
| 1191 |
} else {
|
|
| 1192 |
ucs2buffer.push_back('\0');
|
|
| 1193 |
ucs2buffer.push_back(ch); |
|
| 1194 |
} |
|
| 1195 |
} |
|
| 1196 |
|
|
| 1197 |
if (Exiv2::convertStringCharset (ucs2buffer, "UCS-2BE", "UTF-8")) {
|
|
| 1198 |
return ucs2buffer; |
|
| 1199 |
} else {
|
|
| 1200 |
return input; |
|
| 1201 |
} |
|
| 1202 |
} |
|
| 1203 |
|
|
| 1111 | 1204 |
} |
| value.cpp (working copy) | ||
|---|---|---|
| 34 | 34 |
// included header files |
| 35 | 35 |
#include "value.hpp" |
| 36 | 36 |
#include "types.hpp" |
| 37 |
#include "convert.hpp" |
|
| 37 | 38 |
#include "error.hpp" |
| 38 | 39 | |
| 39 | 40 |
// + standard includes |
| ... | ... | |
| 457 | 458 |
int CommentValue::read(const std::string& comment) |
| 458 | 459 |
{
|
| 459 | 460 |
std::string c = comment; |
| 460 |
CharsetId charsetId = undefined;
|
|
| 461 |
this->_charsetId = undefined;
|
|
| 461 | 462 |
if (comment.length() > 8 && comment.substr(0, 8) == "charset=") {
|
| 462 | 463 |
std::string::size_type pos = comment.find_first_of(' ');
|
| 463 | 464 |
std::string name = comment.substr(8, pos-8); |
| 464 | 465 |
// Strip quotes (so you can also specify the charset without quotes) |
| 465 | 466 |
if (name[0] == '"') name = name.substr(1); |
| 466 | 467 |
if (name[name.length()-1] == '"') name = name.substr(0, name.length()-1); |
| 467 |
charsetId = CharsetInfo::charsetIdByName(name); |
|
| 468 |
if (charsetId == invalidCharsetId) {
|
|
| 468 |
this->_charsetId = CharsetInfo::charsetIdByName(name);
|
|
| 469 |
if (_charsetId == invalidCharsetId) {
|
|
| 469 | 470 |
#ifndef SUPPRESS_WARNINGS |
| 470 | 471 |
std::cerr << "Warning: " << Error(28, name) << "\n"; |
| 471 | 472 |
#endif |
| ... | ... | |
| 474 | 475 |
c.clear(); |
| 475 | 476 |
if (pos != std::string::npos) c = comment.substr(pos+1); |
| 476 | 477 |
} |
| 477 |
const std::string code(CharsetInfo::code(charsetId), 8); |
|
| 478 |
return StringValueBase::read(code + c); |
|
| 478 |
this->_comment = c; |
|
| 479 |
int ret = StringValueBase::read(comment); |
|
| 480 |
|
|
| 481 |
return ret; |
|
| 479 | 482 |
} |
| 483 |
|
|
| 484 |
int CommentValue::read(const byte* buf, long len, ByteOrder byteOrder) |
|
| 485 |
{
|
|
| 486 |
// byteOrder not needed |
|
| 487 |
if (buf) {
|
|
| 488 |
std::string rawValue = std::string(reinterpret_cast<const char*>(buf), len); |
|
| 489 |
if (rawValue.length() < 8) {
|
|
| 490 |
return 0; |
|
| 491 |
} |
|
| 492 |
this->_charsetId = CharsetInfo::charsetIdByCode(rawValue.substr(0, 8)); |
|
| 493 |
this->_comment = std::string(rawValue.substr(8)); |
|
| 494 |
if (byteOrder == littleEndian) {
|
|
| 495 |
Exiv2::convertStringCharset(this->_comment, "UCS-2LE", "UTF-8"); |
|
| 496 |
} else {
|
|
| 497 |
Exiv2::convertStringCharset(this->_comment, "UCS-2BE", "UTF-8"); |
|
| 498 |
} |
|
| 499 |
|
|
| 500 |
value_ = std::string("charset=\"") + CharsetInfo::name(charsetId()) +
|
|
| 501 |
"\" " + comment(); |
|
| 502 |
} |
|
| 503 |
return 0; |
|
| 504 |
} |
|
| 480 | 505 | |
| 481 | 506 |
std::ostream& CommentValue::write(std::ostream& os) const |
| 482 | 507 |
{
|
| ... | ... | |
| 486 | 511 |
} |
| 487 | 512 |
return os << comment(); |
| 488 | 513 |
} |
| 514 |
|
|
| 515 |
long CommentValue::copy(byte* buf, ByteOrder byteOrder) const |
|
| 516 |
{
|
|
| 517 |
std::string encoded = encode (byteOrder); |
|
| 518 |
memcpy(buf, encoded.c_str(), encoded.length()); |
|
| 519 |
return encoded.length(); |
|
| 520 |
} |
|
| 521 |
|
|
| 522 |
long CommentValue::count() const |
|
| 523 |
{
|
|
| 524 |
return encode(littleEndian).length(); |
|
| 525 |
} |
|
| 526 |
|
|
| 527 |
long CommentValue::size() const |
|
| 528 |
{
|
|
| 529 |
return encode(littleEndian).length(); |
|
| 530 |
} |
|
| 531 |
|
|
| 532 |
std::string CommentValue::encode(ByteOrder byteOrder) const |
|
| 533 |
{
|
|
| 534 |
std::string result = ""; |
|
| 535 |
result.append (std::string(CharsetInfo::code(charsetId()), 8)); |
|
| 536 |
switch (charsetId()) {
|
|
| 537 |
case unicode: {
|
|
| 538 |
std::string copyOfComment = std::string(comment()); |
|
| 539 |
if (byteOrder == littleEndian) {
|
|
| 540 |
Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2LE"); |
|
| 541 |
} else {
|
|
| 542 |
Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2BE"); |
|
| 543 |
} |
|
| 544 |
|
|
| 545 |
result.append (copyOfComment); |
|
| 546 |
return result; |
|
| 547 |
} |
|
| 548 |
|
|
| 549 |
default: |
|
| 550 |
result.append (comment()); |
|
| 551 |
return result; |
|
| 552 |
} |
|
| 553 |
|
|
| 554 |
return result; |
|
| 555 |
} |
|
| 489 | 556 | |
| 490 | 557 |
std::string CommentValue::comment() const |
| 491 | 558 |
{
|
| 492 |
if (value_.length() >= 8) return value_.substr(8); |
|
| 493 |
return ""; |
|
| 559 |
return _comment; |
|
| 494 | 560 |
} |
| 495 | 561 | |
| 496 | 562 |
CommentValue::CharsetId CommentValue::charsetId() const |
| 497 | 563 |
{
|
| 498 |
CharsetId charsetId = undefined; |
|
| 499 |
if (value_.length() >= 8) {
|
|
| 500 |
const std::string code = value_.substr(0, 8); |
|
| 501 |
charsetId = CharsetInfo::charsetIdByCode(code); |
|
| 502 |
} |
|
| 503 |
return charsetId; |
|
| 564 |
return this->_charsetId; |
|
| 504 | 565 |
} |
| 505 | 566 | |
| 506 | 567 |
CommentValue* CommentValue::clone_() const |
| value.hpp (working copy) | ||
|---|---|---|
| 581 | 581 |
*/ |
| 582 | 582 |
int read(const std::string& comment); |
| 583 | 583 |
//@} |
| 584 |
|
|
| 585 |
/*! |
|
| 586 |
@brief Read the value from a byte buffer |
|
| 587 |
*/ |
|
| 588 |
int read(const byte* buf, long len, ByteOrder /*byteOrder*/); |
|
| 584 | 589 | |
| 585 | 590 |
//! @name Accessors |
| 586 | 591 |
//@{
|
| ... | ... | |
| 590 | 595 |
read(const std::string& comment). |
| 591 | 596 |
*/ |
| 592 | 597 |
std::ostream& write(std::ostream& os) const; |
| 598 |
|
|
| 599 |
long copy(byte* buf, ByteOrder byteOrder) const; |
|
| 600 |
long count() const; |
|
| 601 |
long size() const; |
|
| 602 |
|
|
| 593 | 603 |
//! Return the comment (without a charset="..." prefix) |
| 594 | 604 |
std::string comment() const; |
| 595 | 605 |
//! Return the charset id of the comment |
| ... | ... | |
| 597 | 607 |
//@} |
| 598 | 608 | |
| 599 | 609 |
private: |
| 610 |
//! The character set of the comment string |
|
| 611 |
CharsetId _charsetId; |
|
| 612 |
|
|
| 613 |
//! the comment string |
|
| 614 |
std::string _comment; |
|
| 615 |
|
|
| 616 |
//! Encodes this value as an EXIF-comment |
|
| 617 |
std::string encode(ByteOrder byteOrder) const; |
|
| 618 |
|
|
| 600 | 619 |
//! Internal virtual copy constructor. |
| 601 | 620 |
EXV_DLLLOCAL virtual CommentValue* clone_() const; |
| 602 | 621 | |