00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/endian_func.hpp"
00014 #include "../string_func.h"
00015 #include "../strings_type.h"
00016 #include "../language.h"
00017 #include "../table/control_codes.h"
00018
00019 #include <stdarg.h>
00020
00021 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00022 #include <unistd.h>
00023 #include <sys/stat.h>
00024 #endif
00025
00026 #if defined WIN32 || defined __WATCOMC__
00027 #include <direct.h>
00028 #endif
00029
00030 #ifdef __MORPHOS__
00031 #ifdef stderr
00032 #undef stderr
00033 #endif
00034 #define stderr stdout
00035 #endif
00036
00037 #include "../table/strgen_tables.h"
00038
00039
00040
00041 struct Case {
00042 int caseidx;
00043 char *string;
00044 Case *next;
00045 };
00046
00047 static bool _masterlang;
00048 static bool _translated;
00049 static bool _translation;
00050 static const char *_file = "(unknown file)";
00051 static FILE *_output_file = NULL;
00052 static const char *_output_filename = NULL;
00053 static int _cur_line;
00054 static int _errors, _warnings, _show_todo;
00055
00056 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
00057
00058 struct LangString {
00059 char *name;
00060 char *english;
00061 char *translated;
00062 uint16 hash_next;
00063 uint16 index;
00064 int line;
00065 Case *translated_case;
00066 };
00067
00068 static LangString *_strings[65536];
00069 static LanguagePackHeader _lang;
00070
00071
00072 #define HASH_SIZE 32767
00073 static uint16 _hash_head[HASH_SIZE];
00074
00075 static byte _put_buf[4096];
00076 static uint _put_pos;
00077 static int _next_string_id;
00078
00079 static uint32 _hash;
00080
00081 static const char *_cur_ident;
00082
00083 struct CmdPair {
00084 const CmdStruct *a;
00085 const char *v;
00086 };
00087
00088 struct ParsedCommandStruct {
00089 uint np;
00090 CmdPair pairs[32];
00091 const CmdStruct *cmd[32];
00092 };
00093
00094
00095 static ParsedCommandStruct _cur_pcs;
00096 static int _cur_argidx;
00097
00098 static uint HashStr(const char *s)
00099 {
00100 uint hash = 0;
00101 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00102 return hash % HASH_SIZE;
00103 }
00104
00105 static void HashAdd(const char *s, LangString *ls)
00106 {
00107 uint hash = HashStr(s);
00108 ls->hash_next = _hash_head[hash];
00109 _hash_head[hash] = ls->index + 1;
00110 }
00111
00112 static LangString *HashFind(const char *s)
00113 {
00114 int idx = _hash_head[HashStr(s)];
00115
00116 while (--idx >= 0) {
00117 LangString *ls = _strings[idx];
00118
00119 if (strcmp(ls->name, s) == 0) return ls;
00120 idx = ls->hash_next;
00121 }
00122 return NULL;
00123 }
00124
00125 #ifdef _MSC_VER
00126 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00127 #else
00128 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
00129 #endif
00130
00131 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00132
00133 static void CDECL strgen_warning(const char *s, ...)
00134 {
00135 char buf[1024];
00136 va_list va;
00137 va_start(va, s);
00138 vsnprintf(buf, lengthof(buf), s, va);
00139 va_end(va);
00140 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00141 _warnings++;
00142 }
00143
00144 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00145
00146 static void CDECL strgen_error(const char *s, ...)
00147 {
00148 char buf[1024];
00149 va_list va;
00150 va_start(va, s);
00151 vsnprintf(buf, lengthof(buf), s, va);
00152 va_end(va);
00153 fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00154 _errors++;
00155 }
00156
00157 void NORETURN CDECL error(const char *s, ...)
00158 {
00159 char buf[1024];
00160 va_list va;
00161 va_start(va, s);
00162 vsnprintf(buf, lengthof(buf), s, va);
00163 va_end(va);
00164 fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00165 #ifdef _MSC_VER
00166 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00167 #endif
00168
00169 if (_output_file != NULL) {
00170 fclose(_output_file);
00171 unlink(_output_filename);
00172 }
00173 exit(1);
00174 }
00175
00176 static void PutByte(byte c)
00177 {
00178 if (_put_pos >= lengthof(_put_buf)) error("Put buffer too small");
00179 _put_buf[_put_pos++] = c;
00180 }
00181
00182
00183 static void PutUtf8(uint32 value)
00184 {
00185 if (value < 0x80) {
00186 PutByte(value);
00187 } else if (value < 0x800) {
00188 PutByte(0xC0 + GB(value, 6, 5));
00189 PutByte(0x80 + GB(value, 0, 6));
00190 } else if (value < 0x10000) {
00191 PutByte(0xE0 + GB(value, 12, 4));
00192 PutByte(0x80 + GB(value, 6, 6));
00193 PutByte(0x80 + GB(value, 0, 6));
00194 } else if (value < 0x110000) {
00195 PutByte(0xF0 + GB(value, 18, 3));
00196 PutByte(0x80 + GB(value, 12, 6));
00197 PutByte(0x80 + GB(value, 6, 6));
00198 PutByte(0x80 + GB(value, 0, 6));
00199 } else {
00200 strgen_warning("Invalid unicode value U+0x%X", value);
00201 }
00202 }
00203
00204
00205 size_t Utf8Validate(const char *s)
00206 {
00207 uint32 c;
00208
00209 if (!HasBit(s[0], 7)) {
00210
00211 return 1;
00212 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00213
00214 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00215 if (c >= 0x80) return 2;
00216 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00217
00218 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00219 if (c >= 0x800) return 3;
00220 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00221
00222 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00223 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00224 }
00225
00226 return 0;
00227 }
00228
00229
00230 static void EmitSingleChar(char *buf, int value)
00231 {
00232 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00233 PutUtf8(value);
00234 }
00235
00236
00237
00238
00239
00240
00241
00242
00243 bool ParseRelNum(char **buf, int *value, int *offset)
00244 {
00245 const char *s = *buf;
00246 char *end;
00247 bool rel = false;
00248
00249 while (*s == ' ' || *s == '\t') s++;
00250 if (*s == '+') {
00251 rel = true;
00252 s++;
00253 }
00254 int v = strtol(s, &end, 0);
00255 if (end == s) return false;
00256 if (rel || v < 0) {
00257 *value += v;
00258 } else {
00259 *value = v;
00260 }
00261 if (offset != NULL && *end == ':') {
00262
00263 s = end + 1;
00264 *offset = strtol(s, &end, 0);
00265 if (end == s) return false;
00266 }
00267 *buf = end;
00268 return true;
00269 }
00270
00271
00272 char *ParseWord(char **buf)
00273 {
00274 char *s = *buf, *r;
00275
00276 while (*s == ' ' || *s == '\t') s++;
00277 if (*s == '\0') return NULL;
00278
00279 if (*s == '"') {
00280 r = ++s;
00281
00282 for (;;) {
00283 if (*s == '\0') break;
00284 if (*s == '"') {
00285 *s++ = '\0';
00286 break;
00287 }
00288 s++;
00289 }
00290 } else {
00291
00292 r = s;
00293 for (;;) {
00294 if (*s == '\0') break;
00295 if (*s == ' ' || *s == '\t') {
00296 *s++ = '\0';
00297 break;
00298 }
00299 s++;
00300 }
00301 }
00302 *buf = s;
00303 return r;
00304 }
00305
00306
00307 static int TranslateArgumentIdx(int arg, int offset = 0);
00308
00309 static void EmitWordList(const char * const *words, uint nw)
00310 {
00311 PutByte(nw);
00312 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00313 for (uint i = 0; i < nw; i++) {
00314 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00315 PutByte(0);
00316 }
00317 }
00318
00319 static void EmitPlural(char *buf, int value)
00320 {
00321 int argidx = _cur_argidx;
00322 int offset = 0;
00323 const char *words[5];
00324 int nw = 0;
00325
00326
00327 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00328
00329
00330 for (nw = 0; nw < 5; nw++) {
00331 words[nw] = ParseWord(&buf);
00332 if (words[nw] == NULL) break;
00333 }
00334
00335 if (nw == 0) {
00336 error("%s: No plural words", _cur_ident);
00337 }
00338
00339 if (_plural_forms[_lang.plural_form].plural_count != nw) {
00340 if (_translated) {
00341 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00342 _plural_forms[_lang.plural_form].plural_count, nw);
00343 } else {
00344 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00345 if (nw > _plural_forms[_lang.plural_form].plural_count) {
00346 nw = _plural_forms[_lang.plural_form].plural_count;
00347 } else {
00348 for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
00349 words[nw] = words[nw - 1];
00350 }
00351 }
00352 }
00353 }
00354
00355 PutUtf8(SCC_PLURAL_LIST);
00356 PutByte(_lang.plural_form);
00357 PutByte(TranslateArgumentIdx(argidx, offset));
00358 EmitWordList(words, nw);
00359 }
00360
00361
00362 static void EmitGender(char *buf, int value)
00363 {
00364 int argidx = _cur_argidx;
00365 int offset = 0;
00366 uint nw;
00367
00368 if (buf[0] == '=') {
00369 buf++;
00370
00371
00372 nw = _lang.GetGenderIndex(buf);
00373 if (nw >= MAX_NUM_GENDERS) error("G argument '%s' invalid", buf);
00374
00375
00376 PutUtf8(SCC_GENDER_INDEX);
00377 PutByte(nw);
00378 } else {
00379 const char *words[MAX_NUM_GENDERS];
00380
00381
00382
00383 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00384
00385 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00386 if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00387 error("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00388 }
00389
00390 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
00391 words[nw] = ParseWord(&buf);
00392 if (words[nw] == NULL) break;
00393 }
00394 if (nw != _lang.num_genders) error("Bad # of arguments for gender command");
00395
00396 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00397 PutUtf8(SCC_GENDER_LIST);
00398 PutByte(TranslateArgumentIdx(argidx, offset));
00399 EmitWordList(words, nw);
00400 }
00401 }
00402
00403 static const CmdStruct *FindCmd(const char *s, int len)
00404 {
00405 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00406 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00407 }
00408 return NULL;
00409 }
00410
00411 static uint ResolveCaseName(const char *str, uint len)
00412 {
00413
00414 char case_str[CASE_GENDER_LEN];
00415 len = min(lengthof(case_str) - 1, len);
00416 memcpy(case_str, str, len);
00417 case_str[len] = '\0';
00418
00419 uint8 case_idx = _lang.GetCaseIndex(case_str);
00420 if (case_idx >= MAX_NUM_CASES) error("Invalid case-name '%s'", case_str);
00421 return case_idx + 1;
00422 }
00423
00424
00425
00426
00427 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00428 {
00429 const char *s = *str, *start;
00430 char c;
00431
00432 *argno = -1;
00433 *casei = -1;
00434
00435
00436 for (; *s != '{'; s++) {
00437 if (*s == '\0') return NULL;
00438 }
00439 s++;
00440
00441 if (*s >= '0' && *s <= '9') {
00442 char *end;
00443
00444 *argno = strtoul(s, &end, 0);
00445 if (*end != ':') error("missing arg #");
00446 s = end + 1;
00447 }
00448
00449
00450 start = s;
00451 do {
00452 c = *s++;
00453 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00454
00455 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00456 if (cmd == NULL) {
00457 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00458 return NULL;
00459 }
00460
00461 if (c == '.') {
00462 const char *casep = s;
00463
00464 if (!(cmd->flags & C_CASE)) {
00465 error("Command '%s' can't have a case", cmd->cmd);
00466 }
00467
00468 do {
00469 c = *s++;
00470 } while (c != '}' && c != ' ' && c != '\0');
00471 *casei = ResolveCaseName(casep, s - casep - 1);
00472 }
00473
00474 if (c == '\0') {
00475 strgen_error("Missing } from command '%s'", start);
00476 return NULL;
00477 }
00478
00479
00480 if (c != '}') {
00481 if (c == '=') s--;
00482
00483 start = s;
00484 for (;;) {
00485 c = *s++;
00486 if (c == '}') break;
00487 if (c == '\0') {
00488 strgen_error("Missing } from command '%s'", start);
00489 return NULL;
00490 }
00491 if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
00492 *param++ = c;
00493 }
00494 }
00495 *param = '\0';
00496
00497 *str = s;
00498
00499 return cmd;
00500 }
00501
00502
00503 static void HandlePragma(char *str, bool master)
00504 {
00505 if (!memcmp(str, "id ", 3)) {
00506 _next_string_id = strtoul(str + 3, NULL, 0);
00507 } else if (!memcmp(str, "name ", 5)) {
00508 strecpy(_lang.name, str + 5, lastof(_lang.name));
00509 } else if (!memcmp(str, "ownname ", 8)) {
00510 strecpy(_lang.own_name, str + 8, lastof(_lang.own_name));
00511 } else if (!memcmp(str, "isocode ", 8)) {
00512 strecpy(_lang.isocode, str + 8, lastof(_lang.isocode));
00513 } else if (!memcmp(str, "plural ", 7)) {
00514 _lang.plural_form = atoi(str + 7);
00515 if (_lang.plural_form >= lengthof(_plural_forms)) {
00516 error("Invalid pluralform %d", _lang.plural_form);
00517 }
00518 } else if (!memcmp(str, "textdir ", 8)) {
00519 if (!memcmp(str + 8, "ltr", 3)) {
00520 _lang.text_dir = TD_LTR;
00521 } else if (!memcmp(str + 8, "rtl", 3)) {
00522 _lang.text_dir = TD_RTL;
00523 } else {
00524 error("Invalid textdir %s", str + 8);
00525 }
00526 } else if (!memcmp(str, "digitsep ", 9)) {
00527 str += 9;
00528 strecpy(_lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator));
00529 } else if (!memcmp(str, "digitsepcur ", 12)) {
00530 str += 12;
00531 strecpy(_lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator_currency));
00532 } else if (!memcmp(str, "decimalsep ", 11)) {
00533 str += 11;
00534 strecpy(_lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_decimal_separator));
00535 } else if (!memcmp(str, "winlangid ", 10)) {
00536 const char *buf = str + 10;
00537 long langid = strtol(buf, NULL, 16);
00538 if (langid > (long)UINT16_MAX || langid < 0) {
00539 error("Invalid winlangid %s", buf);
00540 }
00541 _lang.winlangid = (uint16)langid;
00542 } else if (!memcmp(str, "grflangid ", 10)) {
00543 const char *buf = str + 10;
00544 long langid = strtol(buf, NULL, 16);
00545 if (langid >= 0x7F || langid < 0) {
00546 error("Invalid grflangid %s", buf);
00547 }
00548 _lang.newgrflangid = (uint8)langid;
00549 } else if (!memcmp(str, "gender ", 7)) {
00550 if (master) error("Genders are not allowed in the base translation.");
00551 char *buf = str + 7;
00552
00553 for (;;) {
00554 const char *s = ParseWord(&buf);
00555
00556 if (s == NULL) break;
00557 if (_lang.num_genders >= MAX_NUM_GENDERS) error("Too many genders, max %d", MAX_NUM_GENDERS);
00558 strecpy(_lang.genders[_lang.num_genders], s, lastof(_lang.genders[_lang.num_genders]));
00559 _lang.num_genders++;
00560 }
00561 } else if (!memcmp(str, "case ", 5)) {
00562 if (master) error("Cases are not allowed in the base translation.");
00563 char *buf = str + 5;
00564
00565 for (;;) {
00566 const char *s = ParseWord(&buf);
00567
00568 if (s == NULL) break;
00569 if (_lang.num_cases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00570 strecpy(_lang.cases[_lang.num_cases], s, lastof(_lang.cases[_lang.num_cases]));
00571 _lang.num_cases++;
00572 }
00573 } else {
00574 error("unknown pragma '%s'", str);
00575 }
00576 }
00577
00578 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00579 {
00580 char param[MAX_COMMAND_PARAM_SIZE];
00581 int argno;
00582 int argidx = 0;
00583 int casei;
00584
00585 memset(p, 0, sizeof(*p));
00586
00587 for (;;) {
00588
00589 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00590
00591 if (ar == NULL) break;
00592
00593
00594 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00595
00596 if (ar->consumes) {
00597 if (argno != -1) argidx = argno;
00598 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00599 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00600
00601 p->cmd[argidx++] = ar;
00602 } else if (!(ar->flags & C_DONTCOUNT)) {
00603 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00604 p->pairs[p->np].a = ar;
00605 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00606 p->np++;
00607 }
00608 }
00609 }
00610
00611
00612 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00613 {
00614 if (a == NULL) return NULL;
00615
00616 if (strcmp(a->cmd, "STRING1") == 0 ||
00617 strcmp(a->cmd, "STRING2") == 0 ||
00618 strcmp(a->cmd, "STRING3") == 0 ||
00619 strcmp(a->cmd, "STRING4") == 0 ||
00620 strcmp(a->cmd, "STRING5") == 0 ||
00621 strcmp(a->cmd, "RAW_STRING") == 0) {
00622 return FindCmd("STRING", 6);
00623 }
00624
00625 return a;
00626 }
00627
00628
00629 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00630 {
00631
00632
00633
00634
00635 if (!_translation) return true;
00636
00637 ParsedCommandStruct templ;
00638 ParsedCommandStruct lang;
00639 bool result = true;
00640
00641 ExtractCommandString(&templ, b, true);
00642 ExtractCommandString(&lang, a, true);
00643
00644
00645 if (templ.np != lang.np) {
00646 strgen_warning("%s: template string and language string have a different # of commands", name);
00647 result = false;
00648 }
00649
00650 for (uint i = 0; i < templ.np; i++) {
00651
00652 bool found = false;
00653 for (uint j = 0; j < lang.np; j++) {
00654 if (templ.pairs[i].a == lang.pairs[j].a &&
00655 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00656
00657 lang.pairs[j].a = NULL;
00658 found = true;
00659 break;
00660 }
00661 }
00662
00663 if (!found) {
00664 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00665 result = false;
00666 }
00667 }
00668
00669
00670
00671 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00672 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00673 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00674 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00675 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00676 result = false;
00677 }
00678 }
00679
00680 return result;
00681 }
00682
00683 static void HandleString(char *str, bool master)
00684 {
00685 if (*str == '#') {
00686 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2, master);
00687 return;
00688 }
00689
00690
00691 if (*str == ';' || *str == ' ' || *str == '\0') return;
00692
00693 char *s = strchr(str, ':');
00694 if (s == NULL) {
00695 strgen_error("Line has no ':' delimiter");
00696 return;
00697 }
00698
00699 char *t;
00700
00701
00702 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00703 *t = 0;
00704 s++;
00705
00706
00707 const char *tmp;
00708 for (tmp = s; *tmp != '\0';) {
00709 size_t len = Utf8Validate(tmp);
00710 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00711
00712 WChar c;
00713 Utf8Decode(&c, tmp);
00714 if (c <= 0x001F ||
00715 (c >= 0xE000 && c <= 0xF8FF) ||
00716 (c >= 0xFFF0 && c <= 0xFFFF)) {
00717 error("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
00718 }
00719
00720 tmp += len;
00721 }
00722
00723
00724
00725 char *casep = strchr(str, '.');
00726 if (casep) *casep++ = '\0';
00727
00728
00729 LangString *ent = HashFind(str);
00730
00731 if (master) {
00732 if (casep != NULL) {
00733 strgen_error("Cases in the base translation are not supported.");
00734 return;
00735 }
00736
00737 if (ent != NULL) {
00738 strgen_error("String name '%s' is used multiple times", str);
00739 return;
00740 }
00741
00742 if (_strings[_next_string_id]) {
00743 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00744 return;
00745 }
00746
00747
00748 ent = CallocT<LangString>(1);
00749 _strings[_next_string_id] = ent;
00750 ent->index = _next_string_id++;
00751 ent->name = strdup(str);
00752 ent->line = _cur_line;
00753
00754 HashAdd(str, ent);
00755
00756 ent->english = strdup(s);
00757 } else {
00758 if (ent == NULL) {
00759 strgen_warning("String name '%s' does not exist in master file", str);
00760 return;
00761 }
00762
00763 if (ent->translated && casep == NULL) {
00764 strgen_error("String name '%s' is used multiple times", str);
00765 return;
00766 }
00767
00768
00769 if (!CheckCommandsMatch(s, ent->english, str)) return;
00770
00771 if (casep != NULL) {
00772 Case *c = MallocT<Case>(1);
00773
00774 c->caseidx = ResolveCaseName(casep, strlen(casep));
00775 c->string = strdup(s);
00776 c->next = ent->translated_case;
00777 ent->translated_case = c;
00778 } else {
00779 ent->translated = strdup(s);
00780
00781
00782
00783 ent->line = _cur_line;
00784 }
00785 }
00786 }
00787
00788
00789 static void rstrip(char *buf)
00790 {
00791 int i = strlen(buf);
00792 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00793 buf[i] = '\0';
00794 }
00795
00796
00797 static void ParseFile(const char *file, bool english)
00798 {
00799 FILE *in;
00800 char buf[2048];
00801
00802
00803 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00804 const char *next_file = strrchr(file, PATHSEPCHAR);
00805 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00806 _file = file;
00807
00808
00809 MemSetT(&_lang, 0);
00810 strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
00811 strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
00812 strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
00813
00814 in = fopen(file, "r");
00815 if (in == NULL) error("Cannot open file");
00816 _cur_line = 1;
00817 while (fgets(buf, sizeof(buf), in) != NULL) {
00818 rstrip(buf);
00819 HandleString(buf, english);
00820 _cur_line++;
00821 }
00822 fclose(in);
00823
00824 if (StrEmpty(_lang.name) || StrEmpty(_lang.own_name) || StrEmpty(_lang.isocode)) {
00825 error("Language must include ##name, ##ownname and ##isocode");
00826 }
00827 }
00828
00829
00830 static uint32 MyHashStr(uint32 hash, const char *s)
00831 {
00832 for (; *s != '\0'; s++) {
00833 hash = ROL(hash, 3) ^ *s;
00834 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00835 }
00836 return hash;
00837 }
00838
00839
00840
00841 static void MakeHashOfStrings()
00842 {
00843 uint32 hash = 0;
00844 uint i;
00845
00846 for (i = 0; i != lengthof(_strings); i++) {
00847 const LangString *ls = _strings[i];
00848
00849 if (ls != NULL) {
00850 const CmdStruct *cs;
00851 const char *s;
00852 char buf[MAX_COMMAND_PARAM_SIZE];
00853 int argno;
00854 int casei;
00855
00856 s = ls->name;
00857 hash ^= i * 0x717239;
00858 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00859 hash = MyHashStr(hash, s + 1);
00860
00861 s = ls->english;
00862 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00863 if (cs->flags & C_DONTCOUNT) continue;
00864
00865 hash ^= (cs - _cmd_structs) * 0x1234567;
00866 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00867 }
00868 }
00869 }
00870 _hash = hash;
00871 }
00872
00873
00874 static uint CountInUse(uint grp)
00875 {
00876 int i;
00877
00878 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00879 return i + 1;
00880 }
00881
00882
00883 bool CompareFiles(const char *n1, const char *n2)
00884 {
00885 FILE *f2 = fopen(n2, "rb");
00886 if (f2 == NULL) return false;
00887
00888 FILE *f1 = fopen(n1, "rb");
00889 if (f1 == NULL) error("can't open %s", n1);
00890
00891 size_t l1, l2;
00892 do {
00893 char b1[4096];
00894 char b2[4096];
00895 l1 = fread(b1, 1, sizeof(b1), f1);
00896 l2 = fread(b2, 1, sizeof(b2), f2);
00897
00898 if (l1 != l2 || memcmp(b1, b2, l1)) {
00899 fclose(f2);
00900 fclose(f1);
00901 return false;
00902 }
00903 } while (l1);
00904
00905 fclose(f2);
00906 fclose(f1);
00907 return true;
00908 }
00909
00910
00911 static void WriteStringsH(const char *filename)
00912 {
00913 int next = -1;
00914
00915 _output_filename = "tmp.xxx";
00916 _output_file = fopen(_output_filename, "w");
00917 if (_output_file == NULL) error("can't open tmp.xxx");
00918
00919 fprintf(_output_file, "/* This file is automatically generated. Do not modify */\n\n");
00920 fprintf(_output_file, "#ifndef TABLE_STRINGS_H\n");
00921 fprintf(_output_file, "#define TABLE_STRINGS_H\n");
00922
00923 for (int i = 0; i != lengthof(_strings); i++) {
00924 if (_strings[i] != NULL) {
00925 if (next != i) fprintf(_output_file, "\n");
00926 fprintf(_output_file, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00927 next = i + 1;
00928 }
00929 }
00930
00931 fprintf(_output_file, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n\n", next - 1);
00932
00933
00934 int max_plural_forms = 0;
00935 for (uint i = 0; i < lengthof(_plural_forms); i++) {
00936 max_plural_forms = max(max_plural_forms, _plural_forms[i].plural_count);
00937 }
00938
00939 fprintf(_output_file,
00940 "static const uint LANGUAGE_PACK_VERSION = 0x%X;\n"
00941 "static const uint LANGUAGE_MAX_PLURAL = %d;\n"
00942 "static const uint LANGUAGE_MAX_PLURAL_FORMS = %d;\n\n",
00943 (uint)_hash, (uint)lengthof(_plural_forms), max_plural_forms
00944 );
00945
00946 fprintf(_output_file, "#endif /* TABLE_STRINGS_H */\n");
00947
00948 fclose(_output_file);
00949 _output_file = NULL;
00950
00951 if (CompareFiles(_output_filename, filename)) {
00952
00953 unlink(_output_filename);
00954 } else {
00955
00956 #if defined(WIN32) || defined(WIN64)
00957 unlink(filename);
00958 #endif
00959 if (rename(_output_filename, filename) == -1) error("rename() failed");
00960 }
00961 _output_filename = NULL;
00962 }
00963
00964 static int TranslateArgumentIdx(int argidx, int offset)
00965 {
00966 int sum;
00967
00968 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00969 error("invalid argidx %d", argidx);
00970 }
00971 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00972 if (cs != NULL && cs->consumes <= offset) {
00973 error("invalid argidx offset %d:%d", argidx, offset);
00974 }
00975
00976 if (_cur_pcs.cmd[argidx] == NULL) {
00977 error("no command for this argidx %d", argidx);
00978 }
00979
00980 for (int i = sum = 0; i < argidx; i++) {
00981 const CmdStruct *cs = _cur_pcs.cmd[i];
00982
00983 sum += (cs != NULL) ? cs->consumes : 1;
00984 }
00985
00986 return sum + offset;
00987 }
00988
00989 static void PutArgidxCommand()
00990 {
00991 PutUtf8(SCC_ARG_INDEX);
00992 PutByte(TranslateArgumentIdx(_cur_argidx));
00993 }
00994
00995
00996 static void PutCommandString(const char *str)
00997 {
00998 _cur_argidx = 0;
00999
01000 while (*str != '\0') {
01001
01002 if (*str != '{') {
01003 PutByte(*str++);
01004 continue;
01005 }
01006
01007 char param[MAX_COMMAND_PARAM_SIZE];
01008 int argno;
01009 int casei;
01010 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01011 if (cs == NULL) break;
01012
01013 if (casei != -1) {
01014 PutUtf8(SCC_SETCASE);
01015 PutByte(casei);
01016 }
01017
01018
01019 if (cs->consumes > 0) {
01020
01021 if (argno != -1 && argno != _cur_argidx) {
01022 _cur_argidx = argno;
01023 PutArgidxCommand();
01024 }
01025
01026
01027 cs = _cur_pcs.cmd[_cur_argidx++];
01028 if (cs == NULL) {
01029 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01030 }
01031 }
01032
01033 cs->proc(param, cs->value);
01034 }
01035 }
01036
01037 static void WriteLength(FILE *f, uint length)
01038 {
01039 if (length < 0xC0) {
01040 fputc(length, f);
01041 } else if (length < 0x4000) {
01042 fputc((length >> 8) | 0xC0, f);
01043 fputc(length & 0xFF, f);
01044 } else {
01045 error("string too long");
01046 }
01047 }
01048
01049
01050 static void WriteLangfile(const char *filename)
01051 {
01052 uint in_use[32];
01053
01054 _output_filename = filename;
01055 _output_file = fopen(filename, "wb");
01056 if (_output_file == NULL) error("can't open %s", filename);
01057
01058 for (int i = 0; i != 32; i++) {
01059 uint n = CountInUse(i);
01060
01061 in_use[i] = n;
01062 _lang.offsets[i] = TO_LE16(n);
01063 }
01064
01065 _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
01066 _lang.version = TO_LE32(_hash);
01067 _lang.winlangid = TO_LE16(_lang.winlangid);
01068
01069 fwrite(&_lang, sizeof(_lang), 1, _output_file);
01070
01071 for (int i = 0; i != 32; i++) {
01072 for (uint j = 0; j != in_use[i]; j++) {
01073 const LangString *ls = _strings[(i << 11) + j];
01074 const Case *casep;
01075 const char *cmdp;
01076
01077
01078 if (ls == NULL) {
01079 WriteLength(_output_file, 0);
01080 continue;
01081 }
01082
01083 _cur_ident = ls->name;
01084 _cur_line = ls->line;
01085
01086
01087 if (_show_todo > 0 && ls->translated == NULL) {
01088 if ((_show_todo & 2) != 0) {
01089 strgen_warning("'%s' is untranslated", ls->name);
01090 }
01091 if ((_show_todo & 1) != 0) {
01092 const char *s = "<TODO> ";
01093 while (*s != '\0') PutByte(*s++);
01094 }
01095 }
01096
01097
01098 ExtractCommandString(&_cur_pcs, ls->english, false);
01099
01100 if (ls->translated_case != NULL || ls->translated != NULL) {
01101 casep = ls->translated_case;
01102 cmdp = ls->translated;
01103 } else {
01104 casep = NULL;
01105 cmdp = ls->english;
01106 }
01107
01108 _translated = _masterlang || (cmdp != ls->english);
01109
01110 if (casep != NULL) {
01111 const Case *c;
01112 uint num;
01113
01114
01115
01116
01117
01118 PutUtf8(SCC_SWITCH_CASE);
01119
01120 for (num = 0, c = casep; c; c = c->next) num++;
01121 PutByte(num);
01122
01123
01124 for (c = casep; c != NULL; c = c->next) {
01125 uint pos;
01126
01127 PutByte(c->caseidx);
01128
01129 pos = _put_pos;
01130 PutByte(0);
01131 PutByte(0);
01132
01133 PutCommandString(c->string);
01134 PutByte(0);
01135
01136 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01137 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01138 }
01139 }
01140
01141 if (cmdp != NULL) PutCommandString(cmdp);
01142
01143 WriteLength(_output_file, _put_pos);
01144 fwrite(_put_buf, 1, _put_pos, _output_file);
01145 _put_pos = 0;
01146 }
01147 }
01148
01149 fputc(0, _output_file);
01150 fclose(_output_file);
01151
01152 _output_file = NULL;
01153 _output_filename = NULL;
01154 }
01155
01157 static inline void ottd_mkdir(const char *directory)
01158 {
01159 #if defined(WIN32) || defined(__WATCOMC__)
01160 mkdir(directory);
01161 #else
01162 mkdir(directory, 0755);
01163 #endif
01164 }
01165
01171 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01172 {
01173 ttd_strlcpy(buf, path, buflen);
01174
01175 char *p = strchr(buf, '\0');
01176 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01177 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01178 return buf;
01179 }
01180
01181 #if defined(__MINGW32__)
01182
01187 static inline char *replace_pathsep(char *s)
01188 {
01189 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01190 return s;
01191 }
01192 #else
01193 static inline char *replace_pathsep(char *s) { return s; }
01194 #endif
01195
01196 int CDECL main(int argc, char *argv[])
01197 {
01198 char pathbuf[MAX_PATH];
01199 const char *src_dir = ".";
01200 const char *dest_dir = NULL;
01201
01202 while (argc > 1 && *argv[1] == '-') {
01203 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01204 puts("$Revision$");
01205 return 0;
01206 }
01207
01208 if (strcmp(argv[1], "-export-commands") == 0) {
01209 printf("args\tflags\tcommand\treplacement\n");
01210 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01211 char flags;
01212 switch (cs->value) {
01213 case 0x200E: case 0x200F:
01214 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01215 case 0xA0:
01216 case '\n':
01217 case '{':
01218
01219 flags = 'i';
01220 break;
01221
01222 default:
01223 if (cs->proc == EmitGender) {
01224 flags = 'g';
01225 } else if (cs->proc == EmitPlural) {
01226 flags = 'p';
01227 } else {
01228 flags = '0';
01229 }
01230 }
01231 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01232 }
01233 return 0;
01234 }
01235
01236 if (strcmp(argv[1], "-export-plurals") == 0) {
01237 printf("count\tdescription\n");
01238 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01239 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01240 }
01241 return 0;
01242 }
01243
01244 if (strcmp(argv[1], "-export-pragmas") == 0) {
01245 printf("name\tflags\tdefault\tdescription\n");
01246 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01247 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01248 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01249 }
01250 return 0;
01251 }
01252
01253 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01254 _show_todo |= 1;
01255 argc--, argv++;
01256 continue;
01257 }
01258
01259 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01260 _show_todo |= 2;
01261 argc--, argv++;
01262 continue;
01263 }
01264
01265 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01266 puts(
01267 "strgen - $Revision$\n"
01268 " -v | --version print version information and exit\n"
01269 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01270 " -w | --warning print a warning for any untranslated strings\n"
01271 " -h | -? | --help print this help message and exit\n"
01272 " -s | --source_dir search for english.txt in the specified directory\n"
01273 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01274 " -export-commands export all commands and exit\n"
01275 " -export-plurals export all plural forms and exit\n"
01276 " -export-pragmas export all pragmas and exit\n"
01277 " Run without parameters and strgen will search for english.txt and parse it,\n"
01278 " creating strings.h. Passing an argument, strgen will translate that language\n"
01279 " file using english.txt as a reference and output <language>.lng."
01280 );
01281 return 0;
01282 }
01283
01284 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01285 src_dir = replace_pathsep(argv[2]);
01286 argc -= 2, argv += 2;
01287 continue;
01288 }
01289
01290 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01291 dest_dir = replace_pathsep(argv[2]);
01292 argc -= 2, argv += 2;
01293 continue;
01294 }
01295
01296 fprintf(stderr, "Invalid arguments\n");
01297 return 0;
01298 }
01299
01300 if (dest_dir == NULL) dest_dir = src_dir;
01301
01302
01303
01304
01305
01306 if (argc == 1) {
01307 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01308
01309
01310 _masterlang = true;
01311 ParseFile(pathbuf, true);
01312 MakeHashOfStrings();
01313 if (_errors) return 1;
01314
01315
01316 ottd_mkdir(dest_dir);
01317 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01318 WriteStringsH(pathbuf);
01319 } else if (argc == 2) {
01320 char *r;
01321
01322 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01323
01324
01325 _masterlang = false;
01326 ParseFile(pathbuf, true);
01327 MakeHashOfStrings();
01328 ParseFile(replace_pathsep(argv[1]), false);
01329 if (_errors) return 1;
01330
01331
01332 r = strrchr(argv[1], PATHSEPCHAR);
01333 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01334
01335
01336 r = strrchr(pathbuf, '.');
01337 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01338 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01339 WriteLangfile(pathbuf);
01340
01341
01342 if ((_show_todo & 2) != 0) {
01343 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01344 }
01345 } else {
01346 fprintf(stderr, "Invalid arguments\n");
01347 }
01348
01349 return 0;
01350 }