Bug #662 » unicode_support.patch
convert.cpp (working copy) | ||
---|---|---|
1325 | 1325 |
std::string outstr; |
1326 | 1326 |
EXV_ICONV_CONST char *inptr = const_cast<char *>(str.c_str()); |
1327 | 1327 |
size_t inbytesleft = str.length(); |
1328 | ||
1328 |
int outbytesProduced = 0; |
|
1329 | 1329 |
while (inbytesleft) { |
1330 | 1330 |
char outbuf[100]; |
1331 | 1331 |
char *outptr = outbuf; |
... | ... | |
1335 | 1335 |
&inbytesleft, |
1336 | 1336 |
&outptr, |
1337 | 1337 |
&outbytesleft); |
1338 |
outbytesProduced += sizeof(outbuf) - 1 - outbytesleft; |
|
1338 | 1339 |
if (rc == size_t(-1) && errno != E2BIG) { |
1339 | 1340 |
#ifndef SUPPRESS_WARNINGS |
1340 | 1341 |
std::cerr << "Warning: iconv: " |
... | ... | |
1345 | 1346 |
break; |
1346 | 1347 |
} |
1347 | 1348 |
*outptr = '\0'; |
1348 |
outstr.append(outbuf);
|
|
1349 |
outstr.append(std::string(outbuf, outbytesProduced));
|
|
1349 | 1350 |
} |
1350 | 1351 |
if (cd != (iconv_t)(-1)) { |
1351 | 1352 |
iconv_close(cd); |
exiv2.cpp (working copy) | ||
---|---|---|
41 | 41 |
#include "exiv2.hpp" |
42 | 42 |
#include "actions.hpp" |
43 | 43 |
#include "utils.hpp" |
44 |
#include "convert.hpp" |
|
44 | 45 |
#include "i18n.h" // NLS support. |
45 | 46 |
#include "xmp.hpp" |
46 | 47 | |
... | ... | |
117 | 118 |
*/ |
118 | 119 |
bool parseLine(ModifyCmd& modifyCmd, |
119 | 120 |
const std::string& line, int num); |
120 | ||
121 |
|
|
122 |
/*! |
|
123 |
@brief Parses a string containing backslash-escapes |
|
124 |
@param input Input string |
|
125 |
*/ |
|
126 |
std::string parseEscapes(const std::string& input); |
|
121 | 127 |
} |
122 | 128 | |
123 | 129 |
// ***************************************************************************** |
... | ... | |
981 | 987 |
catch (const Exiv2::AnyError& error) { |
982 | 988 |
std::cerr << _("-M option") << " " << error << "\n"; |
983 | 989 |
return false; |
984 |
}
|
|
990 |
}
|
|
985 | 991 |
} // parseCmdLines |
986 | 992 | |
987 | 993 |
bool parseLine(ModifyCmd& modifyCmd, const std::string& line, int num) |
... | ... | |
1076 | 1082 |
} |
1077 | 1083 |
} |
1078 | 1084 | |
1079 |
value = line.substr(valStart, valEnd+1-valStart);
|
|
1085 |
value = parseEscapes(line.substr(valStart, valEnd+1-valStart));
|
|
1080 | 1086 |
std::string::size_type last = value.length()-1; |
1081 | 1087 |
if ( (value[0] == '"' && value[last] == '"') |
1082 | 1088 |
|| (value[0] == '\'' && value[last] == '\'')) { |
... | ... | |
1107 | 1113 |
&& cmdIdAndString[i].cmdString_ != cmdString; ++i) {} |
1108 | 1114 |
return cmdIdAndString[i].cmdId_; |
1109 | 1115 |
} |
1110 | ||
1116 |
|
|
1117 |
std::string parseEscapes(const std::string& input) |
|
1118 |
{ |
|
1119 |
std::string ucs2buffer = ""; |
|
1120 |
for (unsigned int i = 0; i < input.length(); ++i) { |
|
1121 |
char ch = input[i]; |
|
1122 |
if (ch == '\\') { |
|
1123 |
int escapeStart = i; |
|
1124 |
if (input.length() - 1 > i) { |
|
1125 |
++i; |
|
1126 |
ch = input[i]; |
|
1127 |
switch (ch) { |
|
1128 |
// Escaping of backslash |
|
1129 |
case '\\': |
|
1130 |
ucs2buffer.push_back('\0'); |
|
1131 |
ucs2buffer.push_back('\\'); |
|
1132 |
break; |
|
1133 |
|
|
1134 |
// Escaping of newline |
|
1135 |
case 'n': |
|
1136 |
ucs2buffer.push_back('\0'); |
|
1137 |
ucs2buffer.push_back('\n'); |
|
1138 |
break; |
|
1139 |
|
|
1140 |
// Escaping of tab |
|
1141 |
case 't': |
|
1142 |
ucs2buffer.push_back('\0'); |
|
1143 |
ucs2buffer.push_back('\n'); |
|
1144 |
break; |
|
1145 |
|
|
1146 |
// Escaping of unicode |
|
1147 |
case 'u': |
|
1148 |
if (input.length() - 4 > i) { |
|
1149 |
int acc = 0; |
|
1150 |
for (int j = 0; j < 4; ++j) { |
|
1151 |
++i; |
|
1152 |
acc <<= 4; |
|
1153 |
if (input[i] >= '0' && input[i] <= '9') { |
|
1154 |
acc |= input[i] - '0'; |
|
1155 |
} else if (input[i] >= 'a' && input[i] <= 'f') { |
|
1156 |
acc |= input[i] - 'a' + 10; |
|
1157 |
} else if (input[i] >= 'A' && input[i] <= 'F') { |
|
1158 |
acc |= input[i] - 'A' + 10; |
|
1159 |
} else { |
|
1160 |
acc = -1; |
|
1161 |
break; |
|
1162 |
} |
|
1163 |
} |
|
1164 |
if (acc == -1) { |
|
1165 |
ucs2buffer.push_back('\0'); |
|
1166 |
ucs2buffer.push_back('\\'); |
|
1167 |
i = escapeStart; |
|
1168 |
break; |
|
1169 |
} |
|
1170 |
|
|
1171 |
ucs2buffer.push_back((char) ((acc & 0xff00) >> 8)); |
|
1172 |
ucs2buffer.push_back((char) (acc & 0x00ff)); |
|
1173 |
} else { |
|
1174 |
ucs2buffer.push_back('\0'); |
|
1175 |
ucs2buffer.push_back('\\'); |
|
1176 |
ucs2buffer.push_back('\0'); |
|
1177 |
ucs2buffer.push_back(ch); |
|
1178 |
} |
|
1179 |
break; |
|
1180 |
|
|
1181 |
default: |
|
1182 |
ucs2buffer.push_back('\0'); |
|
1183 |
ucs2buffer.push_back('\\'); |
|
1184 |
ucs2buffer.push_back('\0'); |
|
1185 |
ucs2buffer.push_back(ch); |
|
1186 |
} |
|
1187 |
} else { |
|
1188 |
ucs2buffer.push_back('\0'); |
|
1189 |
ucs2buffer.push_back(ch); |
|
1190 |
} |
|
1191 |
} else { |
|
1192 |
ucs2buffer.push_back('\0'); |
|
1193 |
ucs2buffer.push_back(ch); |
|
1194 |
} |
|
1195 |
} |
|
1196 |
|
|
1197 |
if (Exiv2::convertStringCharset (ucs2buffer, "UCS-2BE", "UTF-8")) { |
|
1198 |
return ucs2buffer; |
|
1199 |
} else { |
|
1200 |
return input; |
|
1201 |
} |
|
1202 |
} |
|
1203 |
|
|
1111 | 1204 |
} |
value.cpp (working copy) | ||
---|---|---|
34 | 34 |
// included header files |
35 | 35 |
#include "value.hpp" |
36 | 36 |
#include "types.hpp" |
37 |
#include "convert.hpp" |
|
37 | 38 |
#include "error.hpp" |
38 | 39 | |
39 | 40 |
// + standard includes |
... | ... | |
457 | 458 |
int CommentValue::read(const std::string& comment) |
458 | 459 |
{ |
459 | 460 |
std::string c = comment; |
460 |
CharsetId charsetId = undefined;
|
|
461 |
this->_charsetId = undefined;
|
|
461 | 462 |
if (comment.length() > 8 && comment.substr(0, 8) == "charset=") { |
462 | 463 |
std::string::size_type pos = comment.find_first_of(' '); |
463 | 464 |
std::string name = comment.substr(8, pos-8); |
464 | 465 |
// Strip quotes (so you can also specify the charset without quotes) |
465 | 466 |
if (name[0] == '"') name = name.substr(1); |
466 | 467 |
if (name[name.length()-1] == '"') name = name.substr(0, name.length()-1); |
467 |
charsetId = CharsetInfo::charsetIdByName(name); |
|
468 |
if (charsetId == invalidCharsetId) { |
|
468 |
this->_charsetId = CharsetInfo::charsetIdByName(name);
|
|
469 |
if (_charsetId == invalidCharsetId) {
|
|
469 | 470 |
#ifndef SUPPRESS_WARNINGS |
470 | 471 |
std::cerr << "Warning: " << Error(28, name) << "\n"; |
471 | 472 |
#endif |
... | ... | |
474 | 475 |
c.clear(); |
475 | 476 |
if (pos != std::string::npos) c = comment.substr(pos+1); |
476 | 477 |
} |
477 |
const std::string code(CharsetInfo::code(charsetId), 8); |
|
478 |
return StringValueBase::read(code + c); |
|
478 |
this->_comment = c; |
|
479 |
int ret = StringValueBase::read(comment); |
|
480 |
|
|
481 |
return ret; |
|
479 | 482 |
} |
483 |
|
|
484 |
int CommentValue::read(const byte* buf, long len, ByteOrder byteOrder) |
|
485 |
{ |
|
486 |
// byteOrder not needed |
|
487 |
if (buf) { |
|
488 |
std::string rawValue = std::string(reinterpret_cast<const char*>(buf), len); |
|
489 |
if (rawValue.length() < 8) { |
|
490 |
return 0; |
|
491 |
} |
|
492 |
this->_charsetId = CharsetInfo::charsetIdByCode(rawValue.substr(0, 8)); |
|
493 |
this->_comment = std::string(rawValue.substr(8)); |
|
494 |
if (byteOrder == littleEndian) { |
|
495 |
Exiv2::convertStringCharset(this->_comment, "UCS-2LE", "UTF-8"); |
|
496 |
} else { |
|
497 |
Exiv2::convertStringCharset(this->_comment, "UCS-2BE", "UTF-8"); |
|
498 |
} |
|
499 |
|
|
500 |
value_ = std::string("charset=\"") + CharsetInfo::name(charsetId()) + |
|
501 |
"\" " + comment(); |
|
502 |
} |
|
503 |
return 0; |
|
504 |
} |
|
480 | 505 | |
481 | 506 |
std::ostream& CommentValue::write(std::ostream& os) const |
482 | 507 |
{ |
... | ... | |
486 | 511 |
} |
487 | 512 |
return os << comment(); |
488 | 513 |
} |
514 |
|
|
515 |
long CommentValue::copy(byte* buf, ByteOrder byteOrder) const |
|
516 |
{ |
|
517 |
std::string encoded = encode (byteOrder); |
|
518 |
memcpy(buf, encoded.c_str(), encoded.length()); |
|
519 |
return encoded.length(); |
|
520 |
} |
|
521 |
|
|
522 |
long CommentValue::count() const |
|
523 |
{ |
|
524 |
return encode(littleEndian).length(); |
|
525 |
} |
|
526 |
|
|
527 |
long CommentValue::size() const |
|
528 |
{ |
|
529 |
return encode(littleEndian).length(); |
|
530 |
} |
|
531 |
|
|
532 |
std::string CommentValue::encode(ByteOrder byteOrder) const |
|
533 |
{ |
|
534 |
std::string result = ""; |
|
535 |
result.append (std::string(CharsetInfo::code(charsetId()), 8)); |
|
536 |
switch (charsetId()) { |
|
537 |
case unicode: { |
|
538 |
std::string copyOfComment = std::string(comment()); |
|
539 |
if (byteOrder == littleEndian) { |
|
540 |
Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2LE"); |
|
541 |
} else { |
|
542 |
Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2BE"); |
|
543 |
} |
|
544 |
|
|
545 |
result.append (copyOfComment); |
|
546 |
return result; |
|
547 |
} |
|
548 |
|
|
549 |
default: |
|
550 |
result.append (comment()); |
|
551 |
return result; |
|
552 |
} |
|
553 |
|
|
554 |
return result; |
|
555 |
} |
|
489 | 556 | |
490 | 557 |
std::string CommentValue::comment() const |
491 | 558 |
{ |
492 |
if (value_.length() >= 8) return value_.substr(8); |
|
493 |
return ""; |
|
559 |
return _comment; |
|
494 | 560 |
} |
495 | 561 | |
496 | 562 |
CommentValue::CharsetId CommentValue::charsetId() const |
497 | 563 |
{ |
498 |
CharsetId charsetId = undefined; |
|
499 |
if (value_.length() >= 8) { |
|
500 |
const std::string code = value_.substr(0, 8); |
|
501 |
charsetId = CharsetInfo::charsetIdByCode(code); |
|
502 |
} |
|
503 |
return charsetId; |
|
564 |
return this->_charsetId; |
|
504 | 565 |
} |
505 | 566 | |
506 | 567 |
CommentValue* CommentValue::clone_() const |
value.hpp (working copy) | ||
---|---|---|
581 | 581 |
*/ |
582 | 582 |
int read(const std::string& comment); |
583 | 583 |
//@} |
584 |
|
|
585 |
/*! |
|
586 |
@brief Read the value from a byte buffer |
|
587 |
*/ |
|
588 |
int read(const byte* buf, long len, ByteOrder /*byteOrder*/); |
|
584 | 589 | |
585 | 590 |
//! @name Accessors |
586 | 591 |
//@{ |
... | ... | |
590 | 595 |
read(const std::string& comment). |
591 | 596 |
*/ |
592 | 597 |
std::ostream& write(std::ostream& os) const; |
598 |
|
|
599 |
long copy(byte* buf, ByteOrder byteOrder) const; |
|
600 |
long count() const; |
|
601 |
long size() const; |
|
602 |
|
|
593 | 603 |
//! Return the comment (without a charset="..." prefix) |
594 | 604 |
std::string comment() const; |
595 | 605 |
//! Return the charset id of the comment |
... | ... | |
597 | 607 |
//@} |
598 | 608 | |
599 | 609 |
private: |
610 |
//! The character set of the comment string |
|
611 |
CharsetId _charsetId; |
|
612 |
|
|
613 |
//! the comment string |
|
614 |
std::string _comment; |
|
615 |
|
|
616 |
//! Encodes this value as an EXIF-comment |
|
617 |
std::string encode(ByteOrder byteOrder) const; |
|
618 |
|
|
600 | 619 |
//! Internal virtual copy constructor. |
601 | 620 |
EXV_DLLLOCAL virtual CommentValue* clone_() const; |
602 | 621 |