Project

General

Profile

Bug #662 » unicode_support.patch

Leo Sutic, 07 Jan 2010 09:52

View differences:

convert.cpp (working copy)
1325 1325
        std::string outstr;
1326 1326
        EXV_ICONV_CONST char *inptr = const_cast<char *>(str.c_str());
1327 1327
        size_t inbytesleft = str.length();
1328

  
1328
        int outbytesProduced = 0;
1329 1329
        while (inbytesleft) {
1330 1330
            char outbuf[100];
1331 1331
            char *outptr = outbuf;
......
1335 1335
                              &inbytesleft,
1336 1336
                              &outptr,
1337 1337
                              &outbytesleft);
1338
            outbytesProduced += sizeof(outbuf) - 1 - outbytesleft;
1338 1339
            if (rc == size_t(-1) && errno != E2BIG) {
1339 1340
#ifndef SUPPRESS_WARNINGS
1340 1341
                std::cerr << "Warning: iconv: "
......
1345 1346
                break;
1346 1347
            }
1347 1348
            *outptr = '\0';
1348
            outstr.append(outbuf);
1349
            outstr.append(std::string(outbuf, outbytesProduced));
1349 1350
        }
1350 1351
        if (cd != (iconv_t)(-1)) {
1351 1352
            iconv_close(cd);
exiv2.cpp (working copy)
41 41
#include "exiv2.hpp"
42 42
#include "actions.hpp"
43 43
#include "utils.hpp"
44
#include "convert.hpp"
44 45
#include "i18n.h"      // NLS support.
45 46
#include "xmp.hpp"
46 47

  
......
117 118
     */
118 119
    bool parseLine(ModifyCmd& modifyCmd,
119 120
                   const std::string& line, int num);
120

  
121
    
122
    /*!
123
      @brief Parses a string containing backslash-escapes
124
      @param input Input string
125
     */
126
    std::string parseEscapes(const std::string& input);
121 127
}
122 128

  
123 129
// *****************************************************************************
......
981 987
        catch (const Exiv2::AnyError& error) {
982 988
            std::cerr << _("-M option") << " " << error << "\n";
983 989
            return false;
984
	}
990
    }
985 991
    } // parseCmdLines
986 992

  
987 993
    bool parseLine(ModifyCmd& modifyCmd, const std::string& line, int num)
......
1076 1082
                }
1077 1083
            }
1078 1084

  
1079
            value = line.substr(valStart, valEnd+1-valStart);
1085
            value = parseEscapes(line.substr(valStart, valEnd+1-valStart));
1080 1086
            std::string::size_type last = value.length()-1;
1081 1087
            if (   (value[0] == '"' && value[last] == '"')
1082 1088
                || (value[0] == '\'' && value[last] == '\'')) {
......
1107 1113
                 && cmdIdAndString[i].cmdString_ != cmdString; ++i) {}
1108 1114
        return cmdIdAndString[i].cmdId_;
1109 1115
    }
1110

  
1116
    
1117
    std::string parseEscapes(const std::string& input) 
1118
    {
1119
        std::string ucs2buffer = "";
1120
        for (unsigned int i = 0; i < input.length(); ++i) {
1121
            char ch = input[i];
1122
            if (ch == '\\') {
1123
                int escapeStart = i;
1124
                if (input.length() - 1 > i) {
1125
                    ++i;
1126
                    ch = input[i];
1127
                    switch (ch) {
1128
                        // Escaping of backslash
1129
                        case '\\':
1130
                        ucs2buffer.push_back('\0');
1131
                        ucs2buffer.push_back('\\');
1132
                        break;
1133
                        
1134
                        // Escaping of newline
1135
                        case 'n':
1136
                        ucs2buffer.push_back('\0');
1137
                        ucs2buffer.push_back('\n');
1138
                        break;
1139
                        
1140
                        // Escaping of tab
1141
                        case 't':
1142
                        ucs2buffer.push_back('\0');
1143
                        ucs2buffer.push_back('\n');
1144
                        break;
1145
                        
1146
                        // Escaping of unicode
1147
                        case 'u':
1148
                        if (input.length() - 4 > i) {
1149
                            int acc = 0;
1150
                            for (int j = 0; j < 4; ++j) {
1151
                                ++i;
1152
                                acc <<= 4;
1153
                                if (input[i] >= '0' && input[i] <= '9') {
1154
                                    acc |= input[i] - '0';
1155
                                } else if (input[i] >= 'a' && input[i] <= 'f') {
1156
                                    acc |= input[i] - 'a' + 10;
1157
                                } else if (input[i] >= 'A' && input[i] <= 'F') {
1158
                                    acc |= input[i] - 'A' + 10;
1159
                                } else {
1160
                                    acc = -1;
1161
                                    break;
1162
                                }
1163
                            }
1164
                            if (acc == -1) {
1165
                                ucs2buffer.push_back('\0');
1166
                                ucs2buffer.push_back('\\');
1167
                                i = escapeStart;
1168
                                break;
1169
                            }
1170
                            
1171
                            ucs2buffer.push_back((char) ((acc & 0xff00) >> 8));
1172
                            ucs2buffer.push_back((char) (acc & 0x00ff));
1173
                        } else {
1174
                            ucs2buffer.push_back('\0');
1175
                            ucs2buffer.push_back('\\');
1176
                            ucs2buffer.push_back('\0');
1177
                            ucs2buffer.push_back(ch);
1178
                        }
1179
                        break;
1180
                        
1181
                        default:
1182
                        ucs2buffer.push_back('\0');
1183
                        ucs2buffer.push_back('\\');
1184
                        ucs2buffer.push_back('\0');
1185
                        ucs2buffer.push_back(ch);
1186
                    }
1187
                } else {
1188
                    ucs2buffer.push_back('\0');
1189
                    ucs2buffer.push_back(ch);
1190
                }
1191
            } else {
1192
                ucs2buffer.push_back('\0');
1193
                ucs2buffer.push_back(ch);
1194
            }
1195
        }
1196
        
1197
        if (Exiv2::convertStringCharset (ucs2buffer, "UCS-2BE", "UTF-8")) {
1198
            return ucs2buffer;
1199
        } else {
1200
            return input;
1201
        }
1202
    }
1203
    
1111 1204
}
value.cpp (working copy)
34 34
// included header files
35 35
#include "value.hpp"
36 36
#include "types.hpp"
37
#include "convert.hpp"
37 38
#include "error.hpp"
38 39

  
39 40
// + standard includes
......
457 458
    int CommentValue::read(const std::string& comment)
458 459
    {
459 460
        std::string c = comment;
460
        CharsetId charsetId = undefined;
461
        this->_charsetId = undefined;
461 462
        if (comment.length() > 8 && comment.substr(0, 8) == "charset=") {
462 463
            std::string::size_type pos = comment.find_first_of(' ');
463 464
            std::string name = comment.substr(8, pos-8);
464 465
            // Strip quotes (so you can also specify the charset without quotes)
465 466
            if (name[0] == '"') name = name.substr(1);
466 467
            if (name[name.length()-1] == '"') name = name.substr(0, name.length()-1);
467
            charsetId = CharsetInfo::charsetIdByName(name);
468
            if (charsetId == invalidCharsetId) {
468
            this->_charsetId = CharsetInfo::charsetIdByName(name);
469
            if (_charsetId == invalidCharsetId) {
469 470
#ifndef SUPPRESS_WARNINGS
470 471
                std::cerr << "Warning: " << Error(28, name) << "\n";
471 472
#endif
......
474 475
            c.clear();
475 476
            if (pos != std::string::npos) c = comment.substr(pos+1);
476 477
        }
477
        const std::string code(CharsetInfo::code(charsetId), 8);
478
        return StringValueBase::read(code + c);
478
        this->_comment = c;
479
        int ret = StringValueBase::read(comment);
480
        
481
        return ret;
479 482
    }
483
    
484
    int CommentValue::read(const byte* buf, long len, ByteOrder byteOrder)
485
    {
486
        // byteOrder not needed
487
        if (buf) {
488
            std::string rawValue = std::string(reinterpret_cast<const char*>(buf), len);
489
            if (rawValue.length() < 8) {
490
                return 0;
491
            }
492
            this->_charsetId = CharsetInfo::charsetIdByCode(rawValue.substr(0, 8));
493
            this->_comment = std::string(rawValue.substr(8));
494
            if (byteOrder == littleEndian) {
495
                Exiv2::convertStringCharset(this->_comment, "UCS-2LE", "UTF-8");
496
            } else {
497
                Exiv2::convertStringCharset(this->_comment, "UCS-2BE", "UTF-8");
498
            }
499
            
500
            value_ = std::string("charset=\"") + CharsetInfo::name(charsetId()) +
501
                "\" " + comment();
502
        }
503
        return 0;
504
    }
480 505

  
481 506
    std::ostream& CommentValue::write(std::ostream& os) const
482 507
    {
......
486 511
        }
487 512
        return os << comment();
488 513
    }
514
    
515
    long CommentValue::copy(byte* buf, ByteOrder byteOrder) const
516
    {
517
        std::string encoded = encode (byteOrder);
518
        memcpy(buf, encoded.c_str(), encoded.length());
519
        return encoded.length();
520
    }
521
    
522
    long CommentValue::count() const
523
    {
524
        return encode(littleEndian).length();
525
    }
526
    
527
    long CommentValue::size() const 
528
    {
529
        return encode(littleEndian).length();
530
    }
531
    
532
    std::string CommentValue::encode(ByteOrder byteOrder) const
533
    {
534
        std::string result = "";
535
        result.append (std::string(CharsetInfo::code(charsetId()), 8));				
536
        switch (charsetId()) {
537
            case unicode: {
538
                std::string copyOfComment = std::string(comment());
539
                if (byteOrder == littleEndian) {
540
                    Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2LE");
541
                } else {
542
                    Exiv2::convertStringCharset(copyOfComment, "UTF-8", "UCS-2BE");
543
                }
544
                
545
                result.append (copyOfComment);
546
                return result;
547
            }
548
            
549
            default:
550
                result.append (comment());
551
                return result;
552
        }
553
        
554
        return result;
555
    }
489 556

  
490 557
    std::string CommentValue::comment() const
491 558
    {
492
        if (value_.length() >= 8) return value_.substr(8);
493
        return "";
559
        return _comment;
494 560
    }
495 561

  
496 562
    CommentValue::CharsetId CommentValue::charsetId() const
497 563
    {
498
        CharsetId charsetId = undefined;
499
        if (value_.length() >= 8) {
500
            const std::string code = value_.substr(0, 8);
501
            charsetId = CharsetInfo::charsetIdByCode(code);
502
        }
503
        return charsetId;
564
        return this->_charsetId;
504 565
    }
505 566

  
506 567
    CommentValue* CommentValue::clone_() const
value.hpp (working copy)
581 581
        */
582 582
        int read(const std::string& comment);
583 583
        //@}
584
        
585
        /*!
586
          @brief Read the value from a byte buffer
587
         */
588
        int read(const byte* buf, long len, ByteOrder /*byteOrder*/);
584 589

  
585 590
        //! @name Accessors
586 591
        //@{
......
590 595
          read(const std::string& comment).
591 596
         */
592 597
        std::ostream& write(std::ostream& os) const;
598
        
599
        long copy(byte* buf, ByteOrder byteOrder) const;
600
        long count() const;
601
        long size() const;
602
        
593 603
        //! Return the comment (without a charset="..." prefix)
594 604
        std::string comment() const;
595 605
        //! Return the charset id of the comment
......
597 607
        //@}
598 608

  
599 609
    private:
610
        //! The character set of the comment string
611
        CharsetId _charsetId;
612
        
613
        //! the comment string
614
        std::string _comment;
615
        
616
        //! Encodes this value as an EXIF-comment
617
        std::string encode(ByteOrder byteOrder) const;
618
        
600 619
        //! Internal virtual copy constructor.
601 620
        EXV_DLLLOCAL virtual CommentValue* clone_() const;
602 621

  
(1-1/5)