Project

General

Profile

RE: Charset conversion ยป exiv2-iptc-charset.patch

Vladimir Nadvornik, 12 Sep 2009 03:07

View differences:

iptc.cpp (working copy)
229 229
        return iptcMetadata_.erase(pos);
230 230
    }
231 231

  
232
    const char *IptcData::detectCharset() const
233
    {
234
        const_iterator pos = findKey(IptcKey("Iptc.Envelope.CharacterSet"));
235
        if (pos != end()) {
236
            const std::string value = pos->toString();
237
            if (!pos->value().ok()) {
238
                if (value == "\033%G")
239
                    return "UTF-8";
240
                // other values are probably not practically relevant
241
            }
242
        }
243

  
244
        bool ascii = true;
245
        bool utf8 = true;
246

  
247
        for (pos = begin(); pos != end(); ++pos) {
248
            std::string value = pos->toString();
249
            if (pos->value().ok()) {
250
                int seqCount = 0;
251
                std::string::iterator i;
252
                for (i = value.begin(); i != value.end(); ++i) {
253
                    char c = *i;
254
                    if (seqCount) {
255
                        if ((c & 0xc0) != 0x80) {
256
                            utf8 = false;
257
                            break;
258
                        }
259
                        --seqCount;
260
                    }
261
                    else {
262
                        if (c & 0x80) ascii = false;
263
                        else continue; // ascii character
264

  
265
                        if      ((c & 0xe0) == 0xc0) seqCount = 1;
266
                        else if ((c & 0xf0) == 0xe0) seqCount = 2;
267
                        else if ((c & 0xf8) == 0xf0) seqCount = 3;
268
                        else if ((c & 0xfc) == 0xf8) seqCount = 4;
269
                        else if ((c & 0xfe) == 0xfc) seqCount = 5;
270
                        else {
271
                            utf8 = false;
272
                            break;
273
                        }
274
                    }
275
                }
276
                if (seqCount) utf8 = false; // unterminated seq
277
                if (!utf8) break;
278
            }
279
        }
280

  
281
        if (ascii) return "ASCII";
282
        if (utf8) return "UTF-8";
283
        return NULL;
284
    }
285
    
232 286
    const byte IptcParser::marker_ = 0x1C;          // Dataset marker
233 287

  
234 288
    int IptcParser::decode(
convert.hpp (working copy)
69 69
    EXIV2API void syncExifWithXmp(ExifData& exifData, XmpData& xmpData);
70 70

  
71 71
    //! Convert (copy) IPTC datasets to XMP properties.
72
    EXIV2API void copyIptcToXmp(const IptcData& iptcData, XmpData& xmpData);
72
    EXIV2API void copyIptcToXmp(const IptcData& iptcData, XmpData& xmpData, const char *iptcCharset = 0);
73 73
    //! Convert (move) IPTC datasets to XMP properties, remove converted IPTC datasets.
74
    EXIV2API void moveIptcToXmp(IptcData& iptcData, XmpData& xmpData);
74
    EXIV2API void moveIptcToXmp(IptcData& iptcData, XmpData& xmpData, const char *iptcCharset = 0);
75 75

  
76 76
    //! Convert (copy) XMP properties to IPTC datasets.
77 77
    EXIV2API void copyXmpToIptc(const XmpData& xmpData, IptcData& iptcData);
iptc.hpp (working copy)
304 304
          @brief Return the exact size of all contained IPTC metadata
305 305
         */
306 306
        long size() const;
307
        /*!
308
          @brief Return the metadata charset name or 0
309
         */
310
        const char *detectCharset() const;
307 311
        //@}
308 312

  
309 313
    private:
convert.cpp (working copy)
36 36
#include "exif.hpp"
37 37
#include "iptc.hpp"
38 38
#include "xmp.hpp"
39
#include "futils.hpp"
39 40
#include "convert.hpp"
40 41

  
41 42
// + standard includes
......
47 48
# define snprintf _snprintf
48 49
#endif
49 50

  
51
#ifdef EXV_HAVE_ICONV
52
# include <iconv.h>
53
# include <errno.h>
54
#endif
55

  
50 56
// Adobe XMP Toolkit
51 57
#ifdef EXV_HAVE_XMP_TOOLKIT
52 58
# define TXMP_STRING_TYPE std::string
......
66 72
      The return code indicates if the operation was successful.
67 73
     */
68 74
    bool getTextValue(std::string& value, const Exiv2::XmpData::iterator& pos);
75

  
76
    /*!
77
      @brief Convert string charset with iconv.
78
     */
79
    bool convertStringCharset(std::string &str, const char* from, const char* to);
80

  
69 81
}
70 82

  
71 83
// *****************************************************************************
......
97 109
        //! Constructor for Exif tags and XMP properties.
98 110
        Converter(ExifData& exifData, XmpData& xmpData);
99 111
        //! Constructor for Iptc tags and XMP properties.
100
        Converter(IptcData& iptcData, XmpData& xmpData);
112
        Converter(IptcData& iptcData, XmpData& xmpData, const char *iptcCharset = 0);
101 113
        //@}
102 114

  
103 115
        //! @name Manipulators
......
277 289
        ExifData *exifData_;
278 290
        IptcData *iptcData_;
279 291
        XmpData  *xmpData_;
292
        const char *iptcCharset_;
280 293

  
281 294
    }; // class Converter
282 295

  
......
411 424
    };
412 425

  
413 426
    Converter::Converter(ExifData& exifData, XmpData& xmpData)
414
        : erase_(false), overwrite_(true), exifData_(&exifData), iptcData_(0), xmpData_(&xmpData)
427
        : erase_(false), overwrite_(true), exifData_(&exifData), iptcData_(0), xmpData_(&xmpData), iptcCharset_(0)
415 428
    {
416 429
    }
417 430

  
418
    Converter::Converter(IptcData& iptcData, XmpData& xmpData)
419
        : erase_(false), overwrite_(true), exifData_(0), iptcData_(&iptcData), xmpData_(&xmpData)
431
    Converter::Converter(IptcData& iptcData, XmpData& xmpData, const char *iptcCharset)
432
        : erase_(false), overwrite_(true), exifData_(0), iptcData_(&iptcData), xmpData_(&xmpData), iptcCharset_(iptcCharset)
420 433
    {
421 434
    }
422 435

  
......
1056 1069
                    ++pos;
1057 1070
                    continue;
1058 1071
                }
1072
                if (iptcCharset_) convertStringCharset(value, iptcCharset_, "UTF-8");
1059 1073
                (*xmpData_)[to] = value;
1060 1074
                if (erase_) {
1061 1075
                    pos = iptcData_->erase(pos);
......
1208 1222
#endif
1209 1223
    }
1210 1224

  
1225

  
1211 1226
    // *************************************************************************
1212 1227
    // free functions
1213 1228
    void copyExifToXmp(const ExifData& exifData, XmpData& xmpData)
......
1242 1257
        converter.syncExifWithXmp();
1243 1258
    }
1244 1259

  
1245
    void copyIptcToXmp(const IptcData& iptcData, XmpData& xmpData)
1260
    void copyIptcToXmp(const IptcData& iptcData, XmpData& xmpData, const char *iptcCharset)
1246 1261
    {
1247
        Converter converter(const_cast<IptcData&>(iptcData), xmpData);
1262
        if (!iptcCharset) iptcCharset = iptcData.detectCharset();
1263
        if (!iptcCharset) iptcCharset = "ISO-8859-1";
1264

  
1265
        Converter converter(const_cast<IptcData&>(iptcData), xmpData, iptcCharset);
1248 1266
        converter.cnvToXmp();
1249 1267
    }
1250 1268

  
1251
    void moveIptcToXmp(IptcData& iptcData, XmpData& xmpData)
1269
    void moveIptcToXmp(IptcData& iptcData, XmpData& xmpData, const char *iptcCharset)
1252 1270
    {
1253
        Converter converter(const_cast<IptcData&>(iptcData), xmpData);
1271
        if (!iptcCharset) iptcCharset = iptcData.detectCharset();
1272
        if (!iptcCharset) iptcCharset = "ISO-8859-1";
1273
        Converter converter(const_cast<IptcData&>(iptcData), xmpData, iptcCharset);
1254 1274
        converter.setErase();
1255 1275
        converter.cnvToXmp();
1256 1276
    }
......
1259 1279
    {
1260 1280
        Converter converter(iptcData, const_cast<XmpData&>(xmpData));
1261 1281
        converter.cnvFromXmp();
1282
        iptcData["Iptc.Envelope.CharacterSet"] = "\033%G"; // indicate UTF-8 encoding
1262 1283
    }
1263 1284

  
1264 1285
    void moveXmpToIptc(XmpData& xmpData, IptcData& iptcData)
......
1266 1287
        Converter converter(iptcData, const_cast<XmpData&>(xmpData));
1267 1288
        converter.setErase();
1268 1289
        converter.cnvFromXmp();
1290
        iptcData["Iptc.Envelope.CharacterSet"] = "\033%G"; // indicate UTF-8 encoding
1269 1291
    }
1270 1292

  
1271 1293
}                                       // namespace Exiv2
......
1301 1323
        return pos->value().ok();
1302 1324
    }
1303 1325

  
1326
    bool convertStringCharset(std::string &str, const char* from, const char* to)
1327
    {
1328
        if (std::string(from) == to) return true; // nothing to do
1329
#if defined EXV_HAVE_ICONV
1330
        bool ret = true;
1331
        iconv_t cd;
1332
        cd = iconv_open(to, from);
1333
        if (cd == (iconv_t)(-1)) {
1334
#ifndef SUPPRESS_WARNINGS
1335
            std::cerr << "Warning: iconv_open: " << Exiv2::strError() << "\n";
1336
#endif
1337
            return false;
1338
        }
1339
        std::string outstr;
1340
        char *inptr = const_cast<char *>(str.c_str());
1341
        size_t inbytesleft = str.length();
1342
        
1343
        while (inbytesleft) {
1344
            char outbuf[100];
1345
            char *outptr = outbuf;
1346
            size_t outbytesleft = sizeof(outbuf) - 1;
1347
            size_t rc = iconv(cd,
1348
                              &inptr,
1349
                              &inbytesleft,
1350
                              &outptr,
1351
                              &outbytesleft);
1352
            if (rc == size_t(-1) && errno != E2BIG) {
1353
#ifndef SUPPRESS_WARNINGS
1354
                std::cerr << "Warning: iconv: "
1355
                          << Exiv2::strError()
1356
                          << " inbytesleft = " << inbytesleft << "\n";
1357
#endif
1358
                ret = false;
1359
                break;
1360
            }
1361
            *outptr = '\0';
1362
            outstr.append(outbuf);
1363
        }
1364
        if (cd != (iconv_t)(-1)) {
1365
            iconv_close(cd);
1366
        }
1367
        
1368
        if (ret) str = outstr;
1369
        return ret;
1370
#else // !EXV_HAVE_ICONV
1371
        return false;
1372
#endif // EXV_HAVE_ICONV
1373
    }
1374

  
1304 1375
}
datasets.cpp (working copy)
141 141
                "invocation or designation of coded character sets. The control functions follow "
142 142
                "the ISO 2022 standard and may consist of the escape control "
143 143
                "character and one or more graphic characters."),
144
                false, false, 0, 32, Exiv2::undefined, IptcDataSets::envelope, ""),
144
                false, false, 0, 32, Exiv2::string, IptcDataSets::envelope, ""),
145 145
        DataSet(IptcDataSets::UNO, "UNO", N_("Unique Name Object"),
146 146
                N_("This tag provide a globally unique "
147 147
                "identification for objects as specified in the IIM, independent of "
    (1-1/1)