00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "parsing.h"
00036
00037 #include "common/wave_ex.h"
00038
00039
00040
00041 static const byte_t s_maskHigh = 0x80;
00042 static const byte_t s_maskTwoHigh = 0xC0;
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 void
00059 utf8_char_t::setToReplacement
00060 (
00061 void
00062 )
00063 throw()
00064 {
00065
00066
00067
00068 nBytes = 3;
00069 value[0] = (char) 0xEF;
00070 value[1] = (char) 0xBF;
00071 value[2] = (char) 0xBD;
00072 value[3] = 0;
00073 }
00074
00075
00076
00077 int
00078 utf8_char_t::getValue
00079 (
00080 void
00081 )
00082 const
00083 throw()
00084 {
00085 if (1 == nBytes) {
00086 return value[0];
00087 } else if (nBytes > 1 && nBytes <= eMaxChars) {
00088 int nBits = 7 - nBytes;
00089 int retval = 0;
00090 for (int i = 0; i < nBytes; ++i) {
00091
00092 int mask = 1 << (nBits - 1);
00093 for (int j = 0; j < nBits; ++j) {
00094 retval *= 2;
00095 if (((byte_t) value[i]) & mask) {
00096 retval += 1;
00097 }
00098 mask /= 2;
00099 }
00100 nBits = 6;
00101 }
00102 return retval;
00103 } else {
00104 ASSERT(false, "Bad byte count for utf-8 character: %d", nBytes);
00105 }
00106
00107 ASSERT(false, "should never get here!");
00108 return 0;
00109 }
00110
00111
00112
00113 void
00114 utf8_char_t::dump
00115 (
00116 IN const char * title
00117 )
00118 const
00119 throw()
00120 {
00121 ASSERT(title, "null");
00122
00123 DPRINTF("%s (utf-8 char): %d bytes value=0x%04x '%s'",
00124 title, nBytes, getValue(), value);
00125 }
00126
00127
00128
00129 bool
00130 getUTF8CharacterFromStream
00131 (
00132 IN std::istream& stream,
00133 OUT utf8_char_t& c
00134 )
00135 {
00136 ASSERT_THROW(stream.good(), "bad stream reading UTF8 character");
00137 c.clear();
00138
00139
00140 char a;
00141 stream.read(&a, 1);
00142 if (stream.eof()) {
00143 return false;
00144 }
00145
00146
00147 if (isLeadingUTF8Byte(a)) {
00148 c.value[0] = a;
00149 c.nBytes = getUTF8ByteCount(a);
00150 if (c.nBytes < 1) {
00151 c.setToReplacement();
00152 return true;
00153 }
00154 } else {
00155 c.setToReplacement();
00156 return true;
00157 }
00158
00159
00160 for (int i = 1; i < c.nBytes; ++i) {
00161 ASSERT_THROW(stream.good(),
00162 "bad stream reading next byte of UTF8 character");
00163 stream.read(&a, 1);
00164 if (!isTrailingUTF8Byte(a)) {
00165 c.setToReplacement();
00166 return true;
00167 }
00168 c.value[i] = a;
00169 }
00170 c.value[c.nBytes] = 0;
00171 return true;
00172 }
00173
00174
00175
00176 const char *
00177 getUTF8CharacterFromString
00178 (
00179 IN const char * input,
00180 OUT utf8_char_t& c
00181 )
00182 {
00183 ASSERT(input, "null");
00184 c.clear();
00185
00186
00187 char a = *input;
00188 ++input;
00189
00190 if (isLeadingUTF8Byte(a)) {
00191 c.value[0] = a;
00192 c.nBytes = getUTF8ByteCount(a);
00193 if (c.nBytes < 1) {
00194 c.setToReplacement();
00195 return input;
00196 }
00197 } else {
00198 c.setToReplacement();
00199 return input;
00200 }
00201
00202
00203 for (int i = 1; i < c.nBytes; ++i) {
00204 char a = *input;
00205 ++input;
00206 if (!isTrailingUTF8Byte(a)) {
00207 c.setToReplacement();
00208 return input;
00209 }
00210 c.value[i] = a;
00211 }
00212 c.value[c.nBytes] = 0;
00213
00214
00215 return input;
00216 }
00217
00218
00219
00220 int
00221 getUTF8ByteCount
00222 (
00223 IN char a
00224 )
00225 {
00226 byte_t b = (byte_t) a;
00227
00228
00229 if (!(s_maskHigh & b))
00230 return 1;
00231
00232
00233 ASSERT_THROW((s_maskTwoHigh & b) == s_maskTwoHigh,
00234 "bad leading UTF8 character: " << (int) b);
00235
00236
00237 byte_t mask = 0x20;
00238 int count = 2;
00239 while (mask & b) {
00240 mask /= 2;
00241 ++count;
00242 }
00243 if (count > utf8_char_t::eMaxChars) {
00244 return -1;
00245 }
00246
00247 return count;
00248 }
00249
00250
00251
00252 std::string
00253 getNextLineFromStream
00254 (
00255 IN std::istream& stream,
00256 IN eParseBehavior behavior
00257 )
00258 {
00259 ASSERT(stream.good(), "bad stream?");
00260 ASSERT(!(eParse_Invalid & behavior), "bad behavior");
00261
00262
00263
00264 std::string output;
00265
00266 bool in_quote = false;
00267 bool in_comment = false;
00268
00269 while (true) {
00270 if (stream.fail() || !stream.good()) {
00271 WAVE_EX(wex);
00272 wex << "Failure reading line from stream.";
00273 }
00274
00275 char a;
00276 stream.read(&a, 1);
00277 if (stream.eof())
00278 break;
00279
00280 if ('\n' == a)
00281 break;
00282
00283 if ((eParse_RespectQuotes & behavior) &&
00284 '\"' == a)
00285 in_quote = !in_quote;
00286
00287 if (!in_quote &&
00288 '#' == a &&
00289 (eParse_StripComments & behavior))
00290 in_comment = true;
00291
00292 if (!in_comment) {
00293
00294 if (!(eParse_StripBogus & behavior) ||
00295 !isBogus(a)) {
00296 output += a;
00297 }
00298 }
00299 }
00300
00301 return output;
00302 }
00303
00304
00305
00306 const char *
00307 getNextTokenFromString
00308 (
00309 IN const char * input,
00310 OUT std::string& token,
00311 IN eParseBehavior behavior
00312 )
00313 {
00314 ASSERT(input, "null input buffer");
00315 ASSERT(!(eParse_Invalid & behavior), "invalid behavior");
00316 token.clear();
00317
00318
00319
00320 const char * p = input;
00321
00322
00323 while (*p && isSpace(*p)) {
00324 ++p;
00325 }
00326
00327
00328 if ((eParse_RespectQuotes & behavior) && '\"' == *p) {
00329 ++p;
00330 while (*p && '\"' != *p) {
00331 token += *p;
00332 ++p;
00333 }
00334 if ('"' == *p) {
00335 ++p;
00336 } else {
00337 DPRINTF("WARNING unterminated quote token: %s",
00338 token.c_str());
00339 }
00340 } else {
00341
00342
00343 while (*p && !isSpace(*p)) {
00344 token += *p;
00345 ++p;
00346 }
00347 }
00348
00349
00350 while (*p && !isSpace(*p)) {
00351 ++p;
00352 }
00353
00354
00355
00356
00357 return p;
00358 }
00359
00360
00361
00362 const char *
00363 getNextTokenFromString
00364 (
00365 IN const char * input,
00366 IO char * buffer,
00367 IN int buffer_size,
00368 IN eParseBehavior behavior,
00369 OUT int& chars
00370 )
00371 throw()
00372 {
00373 ASSERT(input, "null");
00374 ASSERT(buffer, "null");
00375 ASSERT(buffer_size > 0, "Bad buffer size: %d", buffer_size);
00376 ASSERT(eParse_None == behavior, "unsupported parse behavior!");
00377 chars = 0;
00378
00379 const char * p = input;
00380
00381
00382 for (; *p && isSpace(*p); ++p) { }
00383
00384
00385 for (; *p && !isSpace(*p); ++p) {
00386 if (chars < buffer_size) {
00387 buffer[chars] = *p;
00388 }
00389 ++chars;
00390 }
00391
00392
00393 if (chars < buffer_size) {
00394 buffer[chars] = 0;
00395 } else {
00396 buffer[buffer_size - 1] = 0;
00397 }
00398
00399
00400 return p;
00401 }
00402
00403
00404
00405 const char *
00406 expectFromString
00407 (
00408 IN const char * input,
00409 IN const char * expect,
00410 IN eParseBehavior parse
00411 )
00412 {
00413 ASSERT(input, "null");
00414 ASSERT(expect, "null");
00415
00416
00417 while (*input && isSpace(*input)) {
00418 ++input;
00419 }
00420
00421
00422 const char * p = expect;
00423 while (*p) {
00424 ASSERT_THROW(*input == *p,
00425 "Expected token '" << expect << "' not found");
00426 ++p;
00427 ++input;
00428 }
00429
00430
00431 ASSERT_THROW(!*input || isSpace(*input),
00432 "Expected token '" << expect << "' has trailing characters");
00433
00434
00435 return input;
00436 }
00437
00438
00439
00440 bool
00441 isBogus
00442 (
00443 IN char a
00444 )
00445 throw()
00446 {
00447 return (a == '\r');
00448 }
00449
00450
00451
00452 char *
00453 stripComments
00454 (
00455 IN char * line
00456 )
00457 throw()
00458 {
00459 ASSERT(line, "null");
00460
00461 bool in_quotes = false;
00462 char * p = line;
00463 while (*p) {
00464 if ('\"' == *p) {
00465 in_quotes = !in_quotes;
00466 } else if (!in_quotes && '#' == *p) {
00467 *p = 0;
00468 return line;
00469 }
00470 ++p;
00471 }
00472
00473
00474 return line;
00475 }
00476
00477
00478
00479 char *
00480 stripBogus
00481 (
00482 IN char * line
00483 )
00484 throw()
00485 {
00486 char * src = line;
00487 char * dst = line;
00488
00489 while (*src) {
00490 if (!isBogus(*src)) {
00491 *dst = *src;
00492 ++dst;
00493 }
00494 ++src;
00495 }
00496 *dst = 0;
00497 return line;
00498 }
00499
00500
00501
00502 char *
00503 lowercase
00504 (
00505 IN char * wext
00506 )
00507 throw()
00508 {
00509 ASSERT(wext, "null");
00510
00511 char * p = wext;
00512 while (*p) {
00513 *p = tolower(*p);
00514 ++p;
00515 }
00516
00517 return wext;
00518 }
00519
00520
00521
00522 void
00523 getDictionaryFromString
00524 (
00525 IN const char * cursor,
00526 IN const char * info,
00527 OUT dictionary_t& dict
00528 )
00529 {
00530 ASSERT(cursor, "null string passed to getDictionaryFromString()");
00531 ASSERT(info, "null debug info passed to getDictionaryFromString()");
00532 dict.clear();
00533
00534 std::string key, value;
00535
00536
00537 while (true) {
00538 cursor = getNextTokenFromString(cursor, key, eParse_None);
00539 if ("" == key) {
00540 break;
00541 }
00542
00543
00544 dictionary_t::iterator i = dict.find(key);
00545 if (dict.end() != i) {
00546 WAVE_EX(wex);
00547 wex << "Invalid key/value string for '" << info;
00548 wex << "'. Key '" << key << "' appears more than once.";
00549 }
00550
00551 cursor = getNextTokenFromString(cursor, value,
00552 eParse_RespectQuotes);
00553 dict[key] = value;
00554 }
00555 }
00556
00557
00558
00559 const char *
00560 getValue
00561 (
00562 IN const dictionary_t& data,
00563 IN const char * key
00564 )
00565 {
00566 dictionary_t::const_iterator i = data.find(key);
00567 if (data.end() == i)
00568 return NULL;
00569 return i->second.c_str();
00570 }
00571
00572
00573
00574 const char *
00575 getRequiredValue
00576 (
00577 IN const dictionary_t& data,
00578 IN const char * key
00579 )
00580 {
00581 ASSERT(key, "null key passed in to getRequiredValue()");
00582
00583 const char * value = getValue(data, key);
00584 if (!value) {
00585
00586
00587 WAVE_EX(wex);
00588 wex << "Missing a required parameter: '" << key << "'";
00589 }
00590
00591
00592 return value;
00593 }
00594
00595
00596
00597
00598 const char *
00599 getOptionalValue
00600 (
00601 IN const dictionary_t& data,
00602 IN const char * key,
00603 IN const char * default_value
00604 )
00605 throw()
00606 {
00607 ASSERT(key, "null key passed in to getOptionalValue()");
00608 ASSERT(default_value,
00609 "null default value passed in to getOptionalValue()");
00610
00611 const char * value = getValue(data, key);
00612 return (value) ? value : default_value;
00613 }
00614
00615
00616
00617 const char *
00618 getFloatFromString
00619 (
00620 IN const char * input,
00621 OUT float& x
00622 )
00623 throw()
00624 {
00625 ASSERT(input, "null");
00626 x = 0;
00627
00628 static const int s_maxSize = 15;
00629 static const int s_bufSize = s_maxSize + 1;
00630 char buffer[s_bufSize];
00631
00632 int nChars = 0;
00633 input = getNextTokenFromString(input, buffer, s_bufSize, eParse_None,
00634 nChars);
00635 if (nChars > s_maxSize) {
00636 DPRINTF("Warning: float was truncated! "
00637 "(read %d out of %d characters)", s_maxSize, nChars);
00638 }
00639 x = atof(buffer);
00640
00641 return input;
00642 }
00643
00644
00645
00646 int
00647 readFloatsFromString
00648 (
00649 IN const char * input,
00650 IN int nFloats,
00651 OUT float * output
00652 )
00653 throw()
00654 {
00655 ASSERT(input, "null");
00656 ASSERT(nFloats > 0, "Bad float count: %d", nFloats);
00657 ASSERT(output, "null");
00658
00659 for (int i = 0; i < nFloats; ++i, ++output) {
00660 if (!*input) {
00661 return i;
00662 }
00663
00664 input = getFloatFromString(input, *output);
00665 }
00666
00667
00668 return nFloats;
00669 }
00670
00671
00672
00673 bool
00674 getBooleanFromString
00675 (
00676 IN const char * input
00677 )
00678 throw()
00679 {
00680 ASSERT(input, "null");
00681
00682
00683 if (!*input)
00684 return false;
00685
00686
00687 if (!*(input + 1)) {
00688 if ('F' == *input || 'f' == *input || '0' == *input)
00689 return false;
00690 return true;
00691 }
00692
00693
00694 return (0 != strcasecmp("false", input));
00695 }
00696
00697
00698
00699 const char *
00700 getNextWord
00701 (
00702 IN const char * line,
00703 OUT std::string& word
00704 )
00705 {
00706 ASSERT(line, "null");
00707 word.clear();
00708
00709
00710 const char * p = line;
00711
00712
00713 for (; *p && '\n' != *p && isSpace(*p); ++p) { }
00714
00715
00716 if ('\n' == *p) {
00717 word = "\n";
00718 return p + 1;
00719 }
00720
00721
00722 for (; *p && '\n' != *p && !isSpace(*p); ++p) {
00723 word += *p;
00724 }
00725
00726 return p;
00727 }