i18n.cpp

Go to the documentation of this file.
00001 /*
00002  * i18n.cpp
00003  *
00004  * Copyright (c) 2010 Thomas A. Vaughan
00005  * All rights reserved.
00006  *
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions are met:
00010  *     * Redistributions of source code must retain the above copyright
00011  *       notice, this list of conditions and the following disclaimer.
00012  *     * Redistributions in binary form must reproduce the above copyright
00013  *       notice, this list of conditions and the following disclaimer in the
00014  *       documentation and/or other materials provided with the distribution.
00015  *     * Neither the name of the <organization> nor the
00016  *       names of its contributors may be used to endorse or promote products
00017  *       derived from this software without specific prior written permission.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THOMAS A. VAUGHAN ''AS IS'' AND ANY
00020  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00021  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00022  * DISCLAIMED. IN NO EVENT SHALL THOMAS A. VAUGHAN BE LIABLE FOR ANY
00023  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00024  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00025  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00026  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00028  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *
00030  *
00031  * Basic localized string management.  See i18n.h
00032  */
00033 
00034 // includes --------------------------------------------------------------------
00035 #include "i18n.h"               // always include our own header first
00036 
00037 #include "common/wave_ex.h"
00038 #include "datahash/datahash_text.h"
00039 #include "datahash/datahash_util.h"
00040 #include "perf/perf.h"
00041 #include "resources/resources-internal.h"
00042 #include "util/parsing.h"
00043 #include "util/string-buffer.h"
00044 
00045 
00046 // needed to force resource linking
00047 void registerWaveResource_i18n_country_codes_txt(void) throw();
00048 void registerWaveResource_i18n_language_codes_txt(void) throw();
00049 
00050 namespace i18n {
00051 
00052 // interface destructors
00053 Manager::~Manager(void) throw() { }
00054 
00055 
00056 struct two_character_map_t {
00057         enum eConstants {
00058                 eDimension      = 26,   //
00059                 eSize           = eDimension * eDimension
00060         };
00061 
00062         void clear(void) throw() {
00063                         base = 'a';
00064                         memset(map, 0, eSize);
00065                 }
00066 
00067         // WARNING: returns -1 for bad input
00068         int getIndex(IN const char * code) const throw() {
00069                         ASSERT(code, "null");
00070                         char c0 = *code;
00071                         char c1 = *(code + 1);
00072 
00073                         if (c0 < base || c0 >= base + eDimension) {
00074                                 DPRINTF("Bad first character: %s", code);
00075                                 return -1;
00076                         }
00077                         if (c1 < base || c1 >= base + eDimension) {
00078                                 DPRINTF("Bad second character: %s", code);
00079                                 return -1;
00080                         }
00081 
00082                         int offset = (c0 - base) * eDimension + (c1 - base);
00083                         return offset;
00084                 }
00085 
00086         bool isValid(IN const char * code) const throw() {
00087                         int index = this->getIndex(code);
00088                         if (index < 0 || !map[index]) {
00089                                 DPRINTF("Not a valid %s code: '%s'",
00090                                     type, code);
00091                                 return false;
00092                         }
00093                         return true;
00094                 }
00095 
00096         // data fields
00097         char            base;   // 'a' or 'A', typically
00098         byte_t          map[eSize];
00099         char            type[16];
00100 };
00101 
00102 
00103 static two_character_map_t s_countryCodes;
00104 static two_character_map_t s_languageCodes;
00105 
00106 static const char * s_resourceNamespace         = "i18n";
00107 
00108 
00109 ////////////////////////////////////////////////////////////////////////////////
00110 //
00111 //      static helper methods
00112 //
00113 ////////////////////////////////////////////////////////////////////////////////
00114 
00115 static void
00116 loadTable
00117 (
00118 IO two_character_map_t& table,
00119 IN char base,
00120 IN const char * name
00121 )
00122 {
00123         ASSERT(base, "bad character base");
00124         ASSERT(name, "null");
00125 
00126         table.clear();
00127         table.base = base;
00128 
00129         DPRINTF("Number of resource namespaces: %d",
00130             getResourceNamespaceCount());
00131         DPRINTF("Name of first namespace: %s",
00132             getResourceNamespaceName(0));
00133         DPRINTF("Namespace: %s", s_resourceNamespace);
00134         DPRINTF("  name: %s", name);
00135 
00136         int bytes = getResourceSize(s_resourceNamespace, name);
00137         ASSERT(bytes > 0, "null");
00138 
00139         const byte_t * raw = getResourceData(s_resourceNamespace, name);
00140         const char * value = (const char *) raw;
00141         ASSERT(value, "null");
00142 
00143 //      DPRINTF("Resource '%s':\n%s", name, value);
00144 
00145         // okay, now run through each line...
00146         while (bytes > 0) {
00147                 int offset = table.getIndex(value);
00148 //              DPRINTF("%c%c : index %d", value[0], value[1], offset);
00149 
00150                 table.map[offset] = 1;
00151 
00152                 // walk forward to end of line
00153                 while (*value && '\n' != *value && bytes > 0) {
00154                         ++value;
00155                         --bytes;
00156                 }
00157                 if (*value) {
00158                         ++value;        // skip newline character
00159                         --bytes;
00160                 }
00161         }
00162 }
00163 
00164 
00165 
00166 static void
00167 loadTables
00168 (
00169 void
00170 )
00171 throw()
00172 {
00173         // this static methods loads our static tables from linked-in resource
00174         //      files
00175         static bool s_loaded = false;
00176         if (s_loaded)
00177                 return; // already loaded!
00178         s_loaded = true;        // set flag
00179 
00180         // register our resources manually
00181         registerWaveResource_i18n_country_codes_txt();
00182         registerWaveResource_i18n_language_codes_txt();
00183 
00184         strcpy(s_countryCodes.type, "country");
00185         strcpy(s_languageCodes.type, "language");
00186 
00187         loadTable(s_countryCodes, 'A', "country-codes.txt");
00188         loadTable(s_languageCodes, 'a', "language-codes.txt");
00189 }
00190 
00191 
00192 
00193 ////////////////////////////////////////////////////////////////////////////////
00194 //
00195 //      locale_t -- method implementations
00196 //
00197 ////////////////////////////////////////////////////////////////////////////////
00198 
00199 bool
00200 locale_t::isValid
00201 (
00202 void
00203 )
00204 const
00205 throw()
00206 {
00207         char buffer[eBufferSize];
00208         this->getLanguageCode(buffer);
00209 //      DPRINTF("Language code: %s", buffer);
00210         if (!isValidLanguageCode(buffer)) {
00211                 DPRINTF("Language code not valid: '%s'", buffer);
00212                 return false;
00213         }
00214 
00215         this->getCountryCode(buffer);
00216 //      DPRINTF("Country code: %s", buffer);
00217         if (!isValidCountryCode(buffer)) {
00218                 DPRINTF("Country code not valid: '%s'", buffer);
00219                 return false;
00220         }
00221 
00222         this->getEncoding(buffer);
00223 //      DPRINTF("Encoding: %s", buffer);
00224         if (!isValidEncoding(buffer)) {
00225                 DPRINTF("Encoding not valid: '%s'", buffer);
00226                 DPRINTF("  Only 'UTF-8' is supported for now!");
00227                 return false;
00228         }
00229 
00230         // got here?  All elements must be valid!
00231         return true;
00232 }
00233 
00234 
00235 
00236 ////////////////////////////////////////////////////////////////////////////////
00237 //
00238 //      Mgr -- class that implements the i18n::Manager interface
00239 //
00240 ////////////////////////////////////////////////////////////////////////////////
00241 
00242 class Mgr : public Manager {
00243 public:
00244         // constructor, destructor ---------------------------------------------
00245         ~Mgr(void) throw() { }
00246 
00247         // public class methods ------------------------------------------------
00248         void initialize(IN const char * locale);
00249 
00250         // i18n::Manager class interface methods -------------------------------
00251         const char * getLocale(void) const throw() { return m_locale.string; }
00252         void parseStrings(IO nstream::Stream * stream);
00253         void parseFolder(IN nstream::Folder * folder,
00254                                 IN const SetString * extensions,
00255                                 IN const char * filter);
00256         const char * getString(IN const char * id) const throw();
00257 
00258 private:
00259         // private typedefs ----------------------------------------------------
00260         struct string_record_t {
00261                 int     valueOffset;    // localized string
00262                 int     filenameOffset; // pointer to filename
00263         };
00264 
00265         typedef std::map<std::string, string_record_t> string_map_t;
00266         typedef std::map<std::string, int> filename_map_t;
00267 
00268         // private helper methods ----------------------------------------------
00269         int getFilenameOffset(IN const char * filename);
00270         const string_record_t * findString(IN const char * id) const throw();
00271         static nstream::eIterationFlag parseEntry(IN nstream::Entry * entry,
00272                                 IN void * context);
00273 
00274         // private member data -------------------------------------------------
00275         locale_t                m_locale;
00276         filename_map_t          m_filenames;
00277         smart_ptr<StringBuffer> m_sbuf; // list of all strings
00278         string_map_t            m_map;
00279 };
00280 
00281 
00282 
00283 void
00284 Mgr::initialize
00285 (
00286 IN const char * locale
00287 )
00288 {
00289         ASSERT(locale, "null");
00290 
00291         getLocaleFromString(locale, m_locale);
00292         ASSERT_THROW(m_locale.isValid(), "Invalid locale: " << locale);
00293 
00294         m_sbuf = StringBuffer::create();
00295         ASSERT(m_sbuf, "out of memory");
00296 }
00297 
00298 
00299 
00300 ////////////////////////////////////////////////////////////////////////////////
00301 //
00302 //      Mgr -- i18n::Manager class interface methods
00303 //
00304 ////////////////////////////////////////////////////////////////////////////////
00305 
00306 void
00307 Mgr::parseStrings
00308 (
00309 IN nstream::Stream * stream
00310 )
00311 {
00312         ASSERT(m_sbuf, "null");
00313         ASSERT(stream, "null");
00314 
00315         // get the File object associated with this stream
00316         smart_ptr<nstream::File> file = stream->getFile();
00317         ASSERT(file, "null file associated with named stream");
00318         const char * name = file->getName();
00319         //DPRINTF("Parsing file for strings: %s", name);
00320         int filenameOffset = this->getFilenameOffset(name);
00321 
00322         std::istream& instream = stream->getStream();
00323         ASSERT_THROW(!instream.bad(), "bad stream?");
00324 
00325         // keep parsing until eof or we hit "localeInfo"
00326         // NOTE: this isn't a simple hash structure!  We require "localeInfo"
00327         // to be the first token on the line, and we won't pay attention to
00328         // nesting of brackets etc.
00329         std::string line;
00330         eParseBehavior parse = eParse_Strip;
00331         while (instream.good()) {
00332                 line = getNextLineFromStream(instream, parse);
00333                 if (strncmp(line.c_str(), "localeInfo", 10))
00334                         continue;       // skip this line
00335 
00336                 // okay this is a locale ...maybe
00337                 const char * p = line.c_str();
00338                 p = expectFromString(p, "localeInfo", parse);
00339                 p = expectFromString(p, "{", parse);
00340 
00341                 // okay, read the rest as a hash
00342                 smart_ptr<Datahash> hash =
00343                     readHashFromStream("localeInfo", instream);
00344                 ASSERT(hash, "null");
00345 
00346                 // extract locale
00347                 const char * locale = ::getString(hash, "locale");
00348                 ASSERT_THROW(locale,
00349                     "No 'locale' specified in 'localeInfo' block?");
00350                 if (strcmp(locale, m_locale.getString())) {
00351                         DPRINTF("Skipping locale '%s'", locale);
00352                         continue;       // skip this block--wrong locale
00353                 }
00354 
00355                 // okay, this is our locale!
00356                 smart_ptr<Datahash> strings =
00357                     getSubhash(hash, "localizedStrings");
00358                 if (!strings)
00359                         break;  // no localized strings--skip
00360 
00361                 Datahash::iterator_t i;
00362                 strings->getIterator(i);
00363                 std::string key;
00364                 const hash_value_t * phv;
00365                 while ((phv = strings->getNextElementUnsafe(i, key))) {
00366                         if (eHashDataType_String != phv->type)
00367                                 continue;       // not a string--skip
00368 
00369                         const char * val = phv->text.c_str();
00370                         //DPRINTF("string %s --> '%s'", key.c_str(), val);
00371 
00372                         // already seen this key?
00373                         const string_record_t * psr =
00374                             this->findString(key.c_str());
00375                         ASSERT_THROW(!psr, "Duplicate string definition!  " <<
00376                             "String id = '" << key << "'.  Previous definition "
00377                             << "in file " <<
00378                             m_sbuf->getString(psr->filenameOffset) << ", second"
00379                             << " definition in file " <<
00380                             m_sbuf->getString(filenameOffset));
00381 
00382                         string_record_t sr;
00383                         sr.filenameOffset = filenameOffset;
00384                         sr.valueOffset = m_sbuf->appendString(val);
00385                         m_map[key] = sr;
00386                 }
00387         }
00388         //DPRINTF("%d strings in map", (int) m_map.size());
00389 }
00390 
00391 
00392 
00393 void
00394 Mgr::parseFolder
00395 (
00396 IN nstream::Folder * folder,
00397 IN const SetString * extensions,
00398 IN const char * filter
00399 )
00400 {
00401         ASSERT(m_sbuf, "null");
00402         ASSERT(folder, "null");
00403         // ASSERT(extensions) -- can be null!
00404         // ASSERT(filter) -- can be null!
00405 
00406         // recursively walk the folder
00407         bool visitHidden = false; // skip entries beginning with '.'
00408         walkChildFolders(folder, parseEntry, this, extensions, filter,
00409             visitHidden);
00410 }
00411 
00412 
00413 
00414 const char *
00415 Mgr::getString
00416 (
00417 IN const char * id
00418 )
00419 const
00420 throw()
00421 {
00422         ASSERT(id, "null");
00423 
00424         const string_record_t * psr = this->findString(id);
00425         if (!psr)
00426                 return NULL;
00427 
00428         ASSERT(m_sbuf, "null");
00429         return m_sbuf->getString(psr->valueOffset);
00430 }
00431 
00432 
00433 
00434 ////////////////////////////////////////////////////////////////////////////////
00435 //
00436 //      Mgr -- private helper methods
00437 //
00438 ////////////////////////////////////////////////////////////////////////////////
00439 
00440 int
00441 Mgr::getFilenameOffset
00442 (
00443 IN const char * filename
00444 )
00445 {
00446         ASSERT(filename, "null");
00447 
00448         // is this already in our set?
00449         filename_map_t::iterator i = m_filenames.find(filename);
00450         if (m_filenames.end() != i) {
00451                 return i->second;
00452         }
00453 
00454         // not already encountered--add it!
00455         int offset = m_sbuf->appendString(filename);
00456         m_filenames[filename] = offset;
00457         return offset;
00458 }
00459 
00460 
00461 
00462 const Mgr::string_record_t *
00463 Mgr::findString
00464 (
00465 IN const char * id
00466 )
00467 const
00468 throw()
00469 {
00470         ASSERT(id, "null");
00471 
00472         string_map_t::const_iterator i = m_map.find(id);
00473         if (m_map.end() == i)
00474                 return NULL;    // no such string
00475         return &i->second;
00476 }
00477 
00478 
00479 
00480 nstream::eIterationFlag
00481 Mgr::parseEntry
00482 (
00483 IN nstream::Entry * entry,
00484 IN void * context
00485 )
00486 {
00487         ASSERT(entry, "null");
00488         Mgr * pThis = (Mgr *) context;
00489         ASSERT(pThis, "null context?");
00490 
00491         nstream::File * file = dynamic_cast<nstream::File *>(entry);
00492         if (!file)
00493                 return nstream::eIterate_Continue;
00494 
00495         // okay, this is a file we care about!
00496         smart_ptr<nstream::Stream> stream = file->openStream();
00497         ASSERT_THROW(stream, "failed to open stream");
00498 
00499         pThis->parseStrings(stream);
00500         return nstream::eIterate_Continue;
00501 }
00502 
00503 
00504 
00505 ////////////////////////////////////////////////////////////////////////////////
00506 //
00507 //      Public APIs
00508 //
00509 ////////////////////////////////////////////////////////////////////////////////
00510 
00511 smart_ptr<Manager>
00512 Manager::create
00513 (
00514 IN const char * locale
00515 )
00516 {
00517         ASSERT(locale, "null");
00518 
00519         smart_ptr<Mgr> local = new Mgr;
00520         ASSERT(local, "out of memory");
00521 
00522         local->initialize(locale);
00523 
00524         return local;
00525 }
00526 
00527 
00528 
00529 void
00530 getLocaleFromString
00531 (
00532 IN const char * localeString,
00533 OUT locale_t& locale
00534 )
00535 {
00536         ASSERT(localeString, "null");
00537         locale.clear();
00538 
00539         ASSERT_THROW(strlen(localeString) <= locale_t::eMaxLength,
00540             "Locale string is too long: " << localeString);
00541 
00542         strcpy(locale.string, localeString);
00543 
00544         ASSERT_THROW(locale.isValid(),
00545             "Locale string is not valid: " << localeString);
00546 }
00547 
00548 
00549 
00550 bool
00551 isValidCountryCode
00552 (
00553 IN const char * code
00554 )
00555 {
00556         ASSERT(code, "null");
00557 
00558         loadTables();
00559 
00560         return s_countryCodes.isValid(code);
00561 }
00562 
00563 
00564 
00565 bool
00566 isValidLanguageCode
00567 (
00568 IN const char * code
00569 )
00570 {
00571         ASSERT(code, "null");
00572 
00573         loadTables();
00574 
00575         return s_languageCodes.isValid(code);
00576 }
00577 
00578 
00579 
00580 bool
00581 isValidEncoding
00582 (
00583 IN const char * encoding
00584 )
00585 {
00586         ASSERT(encoding, "null");
00587 
00588         // only support 'UTF-8' for now!
00589         return (!strcmp(encoding, "UTF-8"));
00590 }
00591 
00592 
00593 
00594 const char *
00595 getString
00596 (
00597 IN const Manager * mgr,
00598 IN const char * id
00599 )
00600 {
00601         ASSERT(id, "null");
00602         ASSERT(mgr, "null");
00603 
00604         const char * val = mgr->getString(id);
00605         if (val)
00606                 return val;
00607 
00608         // string wasn't found!  Return a temp
00609         const int bufsize = 256;
00610         static char buffer[bufsize];
00611 
00612 #ifdef WIN32
00613         _snprintf(
00614 #else   // WIN32
00615         snprintf(
00616 #endif  // WIN32
00617             buffer, bufsize,
00618             "Missing a localized string (id='%s') for locale '%s'",
00619             id, mgr->getLocale());
00620 
00621         return buffer;
00622 }
00623 
00624 
00625 
00626 const char *
00627 getHostLocale
00628 (
00629 void
00630 )
00631 {
00632         // try environment
00633         const char * val = getenv("LANG");
00634         if (val)
00635                 return val;
00636 
00637 #ifdef WIN32
00638         // windows: make a win32 API call.
00639         // NOTE: this call only works for Vista+.  need to support XP?
00640         const int bufsize = LOCALE_NAME_MAX_LENGTH;
00641         WCHAR locale[bufsize];
00642         int nChars = GetUserDefaultLocaleName(locale, bufsize);
00643         ASSERT_THROW(nChars > 0, "Failed to get default locale name");
00644 
00645         // need to translate back to UTF-8
00646         UINT codePage = CP_UTF8;
00647         DWORD dwFlags = 0;
00648         LPCSTR defaultChar = NULL;
00649         LPBOOL usedDefault = NULL;
00650         const int maxLocaleSize = 2 * LOCALE_NAME_MAX_LENGTH;
00651         static char s_localeUTF8[maxLocaleSize];
00652         int nBytes = WideCharToMultiByte(codePage, dwFlags,
00653             locale, nChars, s_localeUTF8, maxLocaleSize,
00654             defaultChar, usedDefault);
00655         ASSERT_THROW(nBytes > 0, "Failed to translate locale to UTF8");
00656         // Windows uses "-" (hyphen) rather than "_" (underscore)
00657         // translate that too...  (assumes RFC639)
00658         if (nBytes > 2 && '-' == s_localeUTF8[2]) {
00659                 s_localeUTF8[2] = '_';
00660         }
00661 
00662         // mandate UTF8 encoding
00663         strcpy(s_localeUTF8 + nBytes - 1, ".UTF-8");
00664         DPRINTF("win32 locale: '%s'", s_localeUTF8);
00665 
00666         return s_localeUTF8;
00667 #endif  // WIN32
00668 
00669         // unable to determine locale!
00670         DPRINTF("Unable to determine host's locale!");
00671         return NULL;
00672 }
00673 
00674 
00675 
00676 
00677 };      // i18n namespace
00678