The Battle for Wesnoth  1.19.4+dev
tstring.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2024
3  by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Copyright (C) 2004 by Philippe Plantier <ayin@anathas.org>
5  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 /**
18  * @file
19  * Routines for translatable strings.
20  */
21 
22 #include "tstring.hpp"
23 
24 #include "gettext.hpp"
25 #include "log.hpp"
26 
27 #include <boost/multi_index/hashed_index.hpp>
28 
29 #include <map>
30 #include <vector>
31 
32 static lg::log_domain log_config("config");
33 #define LOG_CF LOG_STREAM(info, log_config)
34 #define ERR_CF LOG_STREAM(err, log_config)
35 
36 static unsigned language_counter = 0;
37 
38 namespace
39 {
40 const char TRANSLATABLE_PART = 0x01;
41 const char UNTRANSLATABLE_PART = 0x02;
42 const char TEXTDOMAIN_SEPARATOR = 0x03;
43 const char ID_TRANSLATABLE_PART = 0x04;
44 const char PLURAL_PART = 0x05;
45 }
46 
47 std::size_t t_string_base::hash_value() const
48 {
49  std::size_t seed = 0;
50  boost::hash_combine(seed, value_);
51  boost::hash_combine(seed, translatable_);
52  boost::hash_combine(seed, last_untranslatable_);
53  return seed;
54 }
55 
57  : string_(string.value_)
58  , begin_(0)
59  , end_(string_.size())
60  , textdomain_()
61  , translatable_(false)
62  , countable_(false)
63  , count_(0)
64 {
65  if(string.translatable_) {
66  update();
67  }
68 }
69 
70 static std::string mark = std::string(TRANSLATABLE_PART, 1) + UNTRANSLATABLE_PART + ID_TRANSLATABLE_PART + PLURAL_PART;
71 
73 {
74  unsigned int id;
75 
76  if(begin_ == string_.size()) {
77  return;
78  }
79 
80  switch(string_[begin_]) {
81  case TRANSLATABLE_PART: {
82  // Format: [TRANSLATABLE_PART]textdomain[TEXTDOMAIN_SEPARATOR]msgid[...]
83  std::string::size_type textdomain_end = string_.find(TEXTDOMAIN_SEPARATOR, begin_ + 1);
84 
85  if(textdomain_end == std::string::npos || textdomain_end >= string_.size() - 1) {
86  ERR_CF << "Error: invalid string: " << string_;
87  begin_ = string_.size();
88  return;
89  }
90 
91  end_ = string_.find_first_of(mark, textdomain_end + 1);
92  if(end_ == std::string::npos) {
93  end_ = string_.size();
94  }
95 
96  textdomain_ = std::string(string_, begin_ + 1, textdomain_end - begin_ - 1);
97  translatable_ = true;
98  begin_ = textdomain_end + 1;
99 
100  break;
101  }
102  case ID_TRANSLATABLE_PART:
103  // Format: [ID_TRANSLATABLE_PART][2-byte textdomain ID]msgid[...]
104  if(begin_ + 3 >= string_.size()) {
105  ERR_CF << "Error: invalid string: " << string_;
106  begin_ = string_.size();
107  return;
108  }
109 
110  end_ = string_.find_first_of(mark, begin_ + 3);
111  if(end_ == std::string::npos) {
112  end_ = string_.size();
113  }
114 
115  id = static_cast<unsigned char>(string_[begin_ + 1]) + static_cast<unsigned char>(string_[begin_ + 2]) * 256;
116  if(id >= id_to_textdomain.size()) {
117  ERR_CF << "Error: invalid string: " << string_;
118  begin_ = string_.size();
119  return;
120  }
121 
122  textdomain_ = id_to_textdomain[id];
123  begin_ += 3;
124  translatable_ = true;
125 
126  break;
127 
128  case UNTRANSLATABLE_PART:
129  end_ = string_.find_first_of(mark, begin_ + 1);
130  if(end_ == std::string::npos) {
131  end_ = string_.size();
132  }
133 
134  if(end_ <= begin_ + 1) {
135  ERR_CF << "Error: invalid string: " << string_;
136  begin_ = string_.size();
137  return;
138  }
139 
140  translatable_ = false;
141  textdomain_ = "";
142  begin_ += 1;
143  break;
144 
145  case PLURAL_PART:
146  begin_ = string_.find_first_of(mark, end_ + 5);
147  if(begin_ == std::string::npos) {
148  begin_ = string_.size();
149  }
150 
151  if(string_[begin_] == PLURAL_PART) {
152  ERR_CF << "Error: invalid string: " << string_;
153  begin_ = string_.size();
154  return;
155  }
156 
157  update();
158  break;
159 
160  default:
161  end_ = string_.size();
162  translatable_ = false;
163  textdomain_ = "";
164  break;
165  }
166 
167  if(translatable_ && string_[end_] == PLURAL_PART) {
168  // Format: [PLURAL_PART][4-byte count]msgid_plural[...]
169  if(end_ + 5 >= string_.size()) {
170  ERR_CF << "Error: invalid string: " << string_;
171  begin_ = string_.size();
172  return;
173  }
174 
175  std::string::size_type real_end = string_.find_first_of(mark, end_ + 6);
176  if(real_end < string_.size() && string_[real_end] == PLURAL_PART) {
177  ERR_CF << "Error: invalid string: " << string_;
178  begin_ = string_.size();
179  return;
180  }
181 
182  countable_ = true;
183 
184  union {
185  int32_t count;
186  char data[4];
187  } cvt;
188 
189  std::copy_n(string_.data() + end_ + 1, 4, cvt.data);
190  count_ = cvt.count;
191  } else {
192  countable_ = false;
193  count_ = 0;
194  }
195 }
196 
197 std::string::const_iterator t_string_base::walker::plural_begin() const
198 {
199  if(!countable_) {
200  return begin();
201  }
202 
203  return end() + 5;
204 }
205 
206 std::string::const_iterator t_string_base::walker::plural_end() const
207 {
208  if(!countable_) {
209  return end();
210  }
211 
212  std::string::size_type pl_end = string_.find_first_of(mark, end_ + 5);
213  if(pl_end == std::string::npos) {
214  pl_end = string_.size();
215  }
216 
217  return string_.begin() + pl_end;
218 }
219 
221  : value_()
224  , translatable_(false)
225  , last_untranslatable_(false)
226 {
227 }
228 
230 {
231 }
232 
234  : value_(string.value_)
235  , translated_value_(string.translated_value_)
236  , translation_timestamp_(string.translation_timestamp_)
237  , translatable_(string.translatable_)
238  , last_untranslatable_(string.last_untranslatable_)
239 {
240 }
241 
242 t_string_base::t_string_base(const std::string& string)
243  : value_(string)
244  , translated_value_()
245  , translation_timestamp_(0)
246  , translatable_(false)
247  , last_untranslatable_(false)
248 {
249 }
250 
251 t_string_base::t_string_base(const std::string& string, const std::string& textdomain)
252  : value_(1, ID_TRANSLATABLE_PART)
253  , translated_value_()
254  , translation_timestamp_(0)
255  , translatable_(true)
256  , last_untranslatable_(false)
257 {
258  if(string.empty()) {
259  value_.clear();
260  translatable_ = false;
261  return;
262  }
263 
264  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
265  unsigned int id;
266 
267  if(idi == textdomain_to_id.end()) {
268  id = id_to_textdomain.size();
269  textdomain_to_id[textdomain] = id;
270  id_to_textdomain.push_back(textdomain);
271  } else {
272  id = idi->second;
273  }
274 
275  value_ += static_cast<char>(id & 0xff);
276  value_ += static_cast<char>(id >> 8);
277  value_ += string;
278 }
279 
280 t_string_base::t_string_base(const std::string& sing, const std::string& pl, int count, const std::string& textdomain)
281  : value_(1, ID_TRANSLATABLE_PART)
282  , translated_value_()
283  , translation_timestamp_(0)
284  , translatable_(true)
285  , last_untranslatable_(false)
286 {
287  if(sing.empty() && pl.empty()) {
288  value_.clear();
289  translatable_ = false;
290  return;
291  }
292 
293  std::map<std::string, unsigned int>::const_iterator idi = textdomain_to_id.find(textdomain);
294  unsigned int id;
295 
296  if(idi == textdomain_to_id.end()) {
297  id = id_to_textdomain.size();
298  textdomain_to_id[textdomain] = id;
299  id_to_textdomain.push_back(textdomain);
300  } else {
301  id = idi->second;
302  }
303 
304  value_ += static_cast<char>(id & 0xff);
305  value_ += static_cast<char>(id >> 8);
306  value_ += sing;
307  value_ += PLURAL_PART;
308 
309  union {
310  int32_t count;
311  char data[4];
312  } cvt;
313 
314  cvt.count = count;
315  for(char c : cvt.data) {
316  value_ += c;
317  }
318 
319  value_ += pl;
320 }
321 
322 t_string_base::t_string_base(const char* string)
323  : value_(string)
324  , translated_value_()
325  , translation_timestamp_(0)
326  , translatable_(false)
327  , last_untranslatable_(false)
328 {
329 }
330 
332 {
333  t_string_base orig(string);
334 
335  if(!string.empty() && (string[0] == TRANSLATABLE_PART || string[0] == UNTRANSLATABLE_PART)) {
336  orig.translatable_ = true;
337  } else {
338  orig.translatable_ = false;
339  }
340 
341  t_string_base res;
342 
343  for(walker w(orig); !w.eos(); w.next()) {
344  std::string substr(w.begin(), w.end());
345 
346  if(w.translatable()) {
347  res += t_string_base(substr, w.textdomain());
348  } else {
349  res += substr;
350  }
351  }
352 
353  return res;
354 }
355 
356 std::string t_string_base::base_str() const
357 {
358  std::string res;
359  for(walker w(*this); !w.eos(); w.next()) {
360  res += std::string(w.begin(), w.end());
361  }
362 
363  return res;
364 }
365 
366 std::string t_string_base::to_serialized() const
367 {
368  t_string_base res;
369 
370  for(walker w(*this); !w.eos(); w.next()) {
371  t_string_base chunk;
372 
373  std::string substr(w.begin(), w.end());
374  if(w.translatable()) {
375  chunk.translatable_ = true;
376  chunk.last_untranslatable_ = false;
377  chunk.value_ = TRANSLATABLE_PART + w.textdomain() + TEXTDOMAIN_SEPARATOR + substr;
378  } else {
379  chunk.translatable_ = false;
380  chunk.value_ = substr;
381  }
382 
383  res += chunk;
384  }
385 
386  return res.value();
387 }
388 
390 {
391  value_ = string.value_;
392  translated_value_ = string.translated_value_;
393  translation_timestamp_ = string.translation_timestamp_;
394  translatable_ = string.translatable_;
395  last_untranslatable_ = string.last_untranslatable_;
396 
397  return *this;
398 }
399 
400 t_string_base& t_string_base::operator=(const std::string& string)
401 {
402  value_ = string;
403  translated_value_ = "";
405  translatable_ = false;
406  last_untranslatable_ = false;
407 
408  return *this;
409 }
410 
412 {
413  value_ = string;
414  translated_value_ = "";
416  translatable_ = false;
417  last_untranslatable_ = false;
418 
419  return *this;
420 }
421 
423 {
424  t_string_base res(*this);
425  res += string;
426  return res;
427 }
428 
429 t_string_base t_string_base::operator+(const std::string& string) const
430 {
431  t_string_base res(*this);
432  res += string;
433  return res;
434 }
435 
436 t_string_base t_string_base::operator+(const char* string) const
437 {
438  t_string_base res(*this);
439  res += string;
440  return res;
441 }
442 
444 {
445  if(string.value_.empty()) {
446  return *this;
447  }
448 
449  if(value_.empty()) {
450  *this = string;
451  return *this;
452  }
453 
454  if(translatable_ || string.translatable_) {
455  if(!translatable_) {
456  value_ = UNTRANSLATABLE_PART + value_;
457  translatable_ = true;
458  last_untranslatable_ = true;
459  } else {
460  translated_value_ = "";
461  }
462 
463  if(string.translatable_) {
464  if(last_untranslatable_ && string.value_[0] == UNTRANSLATABLE_PART) {
465  value_.append(string.value_.begin() + 1, string.value_.end());
466  } else {
467  value_ += string.value_;
468  }
469 
470  last_untranslatable_ = string.last_untranslatable_;
471  } else {
472  if(!last_untranslatable_) {
473  value_ += UNTRANSLATABLE_PART;
474  last_untranslatable_ = true;
475  }
476 
477  value_ += string.value_;
478  }
479  } else {
480  value_ += string.value_;
481  }
482 
483  return *this;
484 }
485 
486 t_string_base& t_string_base::operator+=(const std::string& string)
487 {
488  if(string.empty()) {
489  return *this;
490  }
491 
492  if(value_.empty()) {
493  *this = string;
494  return *this;
495  }
496 
497  if(translatable_) {
498  if(!last_untranslatable_) {
499  value_ += UNTRANSLATABLE_PART;
500  last_untranslatable_ = true;
501  }
502 
503  value_ += string;
504  translated_value_ = "";
505  } else {
506  value_ += string;
507  }
508 
509  return *this;
510 }
511 
513 {
514  if(string[0] == 0) {
515  return *this;
516  }
517 
518  if(value_.empty()) {
519  *this = string;
520  return *this;
521  }
522 
523  if(translatable_) {
524  if(!last_untranslatable_) {
525  value_ += UNTRANSLATABLE_PART;
526  last_untranslatable_ = true;
527  }
528 
529  value_ += string;
530  translated_value_ = "";
531  } else {
532  value_ += string;
533  }
534 
535  return *this;
536 }
537 
539 {
540  return that.translatable_ == translatable_ && that.value_ == value_;
541 }
542 
543 bool t_string_base::operator==(const std::string& that) const
544 {
545  return !translatable_ && value_ == that;
546 }
547 
548 bool t_string_base::operator==(const char* that) const
549 {
550  return !translatable_ && value_ == that;
551 }
552 
554 {
555  return value_ < that.value_;
556 }
557 
558 const std::string& t_string_base::str() const
559 {
560  if(!translatable_) {
561  return value_;
562  }
563 
565  return translated_value_;
566  }
567 
568  translated_value_.clear();
569 
570  for(walker w(*this); !w.eos(); w.next()) {
571  std::string part(w.begin(), w.end());
572 
573  if(w.translatable()) {
574  if(w.countable()) {
575  std::string plural(w.plural_begin(), w.plural_end());
577  translation::dsngettext(w.textdomain().c_str(), part.c_str(), plural.c_str(), w.count());
578  } else {
580  translation::dsgettext(w.textdomain().c_str(), part.c_str());
581  }
582  } else {
583  translated_value_ += part;
584  }
585  }
586 
588  return translated_value_;
589 }
590 
592  : val_(new base())
593 {
594 }
595 
597 {
598 }
599 
601  : val_(o.val_)
602 {
603 }
604 
606  : val_(new base(o))
607 {
608 }
609 
610 t_string::t_string(const char* o)
611  : val_(new base(o))
612 {
613 }
614 
615 t_string::t_string(const std::string& o)
616  : val_(new base(o))
617 {
618 }
619 
620 t_string::t_string(const std::string& o, const std::string& textdomain)
621  : val_(new base(o, textdomain))
622 {
623 }
624 
625 t_string::t_string(const std::string& s, const std::string& pl, int c, const std::string& textdomain)
626  : val_(new base(s, pl, c, textdomain))
627 {
628 }
629 
631 {
632  val_ = o.val_;
633  return *this;
634 }
635 
637 {
638  t_string o2(o);
639  swap(o2);
640  return *this;
641 }
642 
643 void t_string::add_textdomain(const std::string& name, const std::string& path)
644 {
645  LOG_CF << "Binding textdomain " << name << " to path " << path;
646 
647  // Register and (re-)bind this textdomain
648  translation::bind_textdomain(name.c_str(), path.c_str(), "UTF-8");
649 }
650 
652 {
654 }
655 
656 void swap(t_string& lhs, t_string& rhs)
657 {
658  lhs.swap(rhs);
659 }
660 
661 std::ostream& operator<<(std::ostream& stream, const t_string_base& string)
662 {
663  stream << string.str();
664  return stream;
665 }
const route_iterator begin_
Definition: move.cpp:303
std::string::const_iterator plural_end() const
Definition: tstring.cpp:206
walker(const t_string_base &string)
Definition: tstring.cpp:56
std::string::const_iterator plural_begin() const
Definition: tstring.cpp:197
Helper class for translatable strings.
Definition: tstring.hpp:27
t_string_base & operator=(const t_string_base &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:389
bool last_untranslatable_
Definition: tstring.hpp:119
static std::map< std::string, unsigned int > textdomain_to_id
Definition: tstring.hpp:121
std::string translated_value_
Definition: tstring.hpp:117
t_string_base & operator+=(const t_string_base &)
Definition: tstring.cpp:443
std::string to_serialized() const
Definition: tstring.cpp:366
bool operator==(const t_string_base &) const
Definition: tstring.cpp:538
std::string value_
Definition: tstring.hpp:116
std::string base_str() const
Definition: tstring.cpp:356
const std::string & value() const
Definition: tstring.hpp:110
bool empty() const
Definition: tstring.hpp:100
bool operator<(const t_string_base &string) const
Definition: tstring.cpp:553
static std::vector< std::string > id_to_textdomain
Definition: tstring.hpp:120
~t_string_base()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:229
unsigned translation_timestamp_
Definition: tstring.hpp:118
bool translatable_
Definition: tstring.hpp:119
std::size_t hash_value() const
Definition: tstring.cpp:47
const std::string & str() const
Definition: tstring.cpp:558
static t_string_base from_serialized(const std::string &string)
Definition: tstring.cpp:331
t_string_base operator+(const t_string_base &) const
Definition: tstring.cpp:422
static void reset_translations()
Definition: tstring.cpp:651
static void add_textdomain(const std::string &name, const std::string &path)
Definition: tstring.cpp:643
void swap(t_string &other)
Definition: tstring.hpp:200
~t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:596
std::shared_ptr< const t_string_base > val_
Definition: tstring.hpp:204
t_string & operator=(const t_string &)
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:630
t_string()
Default implementation, but defined out-of-line for efficiency reasons.
Definition: tstring.cpp:591
int w
std::string id
Text to match against addon_info.tags()
Definition: manager.cpp:198
Standard logging facilities (interface).
static void update()
std::string path
Definition: filesystem.cpp:90
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:479
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:434
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:464
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:85
std::string_view data
Definition: picture.cpp:178
mock_char c
static map_location::direction s
static std::string mark
Definition: tstring.cpp:70
static unsigned language_counter
Definition: tstring.cpp:36
void swap(t_string &lhs, t_string &rhs)
Implement non-member swap function for std::swap (calls t_string::swap).
Definition: tstring.cpp:656
std::ostream & operator<<(std::ostream &stream, const t_string_base &string)
Definition: tstring.cpp:661
#define LOG_CF
Definition: tstring.cpp:33
#define ERR_CF
Definition: tstring.cpp:34
static lg::log_domain log_config("config")