The Battle for Wesnoth  1.17.23+dev
gettext.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 - 2023
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 #include "gettext.hpp"
16 #include "log.hpp"
17 #include "filesystem.hpp"
18 
19 #include <algorithm>
20 #include <iomanip>
21 #include <iterator>
22 #include <fstream>
23 #include <locale>
24 #include <map>
25 #include <boost/locale.hpp>
26 #include <set>
27 #include <type_traits>
28 
29 #if defined(__GNUC__)
30 #pragma GCC diagnostic push
31 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
32 #endif
33 #include "spirit_po/spirit_po.hpp"
34 #if defined(__GNUC__)
35 #pragma GCC diagnostic pop
36 #endif
37 
38 #define DBG_G LOG_STREAM(debug, lg::general())
39 #define LOG_G LOG_STREAM(info, lg::general())
40 #define WRN_G LOG_STREAM(warn, lg::general())
41 #define ERR_G LOG_STREAM(err, lg::general())
42 
43 namespace bl = boost::locale;
44 namespace
45 {
46 
47  class default_utf8_locale_name
48  {
49  public:
50  static const std::string& name()
51  {
52  //Use pointers because we don't want it to be destructed at program end.
53  static default_utf8_locale_name* lname = new default_utf8_locale_name();
54  return lname->name_;
55  }
56  private:
57  default_utf8_locale_name()
58  : name_()
59  {
60  LOG_G << "Generating default locale";
61  try
62  {
63  //NOTE: the default_locale objects needs to live as least as long as the locale_info object. Otherwise the program will segfault.
64  std::locale default_locale = bl::generator().generate("");
65  const bl::info& locale_info = std::use_facet<bl::info>(default_locale);
66  name_ += locale_info.language();
67  if(!locale_info.country().empty())
68  name_ += "_" + locale_info.country();
69  name_ += ".UTF-8";
70  if(!locale_info.variant().empty())
71  name_ += "@" + locale_info.variant();
72  }
73  catch(const std::exception& e)
74  {
75  ERR_G << "Failed to generate default locale string. message:" << e.what();
76  }
77  LOG_G << "Finished generating default locale, default is now '" << name_ << "'";
78  }
79 
80  std::string name_;
81  };
82  class wesnoth_message_format : public bl::message_format<char>
83  {
84  public:
85  wesnoth_message_format(std::locale base, const std::set<std::string>& domains, const std::set<std::string>& paths)
86  : base_loc_(base)
87  {
88  const bl::info& inf = std::use_facet<bl::info>(base);
89  if(inf.language() == "c") {
90  return;
91  }
92  std::string lang_name_short = inf.language();
93  std::string lang_name_long = lang_name_short;
94  if(!inf.country().empty()) {
95  lang_name_long += '_';
96  lang_name_long += inf.country();
97  }
98  if(!inf.variant().empty()) {
99  lang_name_long += '@';
100  lang_name_long += inf.variant();
101  lang_name_short += '@';
102  lang_name_short += inf.variant();
103  }
104  DBG_G << "Loading po files for language " << lang_name_long;
105  for(auto& domain : domains) {
106  DBG_G << "Searching for po files for domain " << domain;
107  std::string path;
108  for(auto base_path : paths) {
109  DBG_G << "Searching in dir " << base_path;
110  if(base_path[base_path.length()-1] != '/') {
111  base_path += '/';
112  }
113  base_path += domain;
114  base_path += '/';
115  path = base_path + lang_name_long + ".po";
116  DBG_G << " Trying path " << path;
118  break;
119  }
120  path = base_path + lang_name_short + ".po";
121  DBG_G << " Trying path " << path;
123  break;
124  }
125  }
127  continue;
128  }
129  LOG_G << "Loading language file from " << path;
130  try {
132  po_file->exceptions(std::ios::badbit);
133  const auto& cat = spirit_po::default_catalog::from_istream(*po_file);
134  extra_messages_.emplace(get_base().domain(domain), cat);
135  } catch(const spirit_po::catalog_exception& e) {
136  // Treat any parsing error in the same way as the file not existing - just leave
137  // this domain untranslated but continue to load other domains.
138  log_po_error(lang_name_long, domain, e.what());
139  } catch(const std::ios::failure&) {
140  log_po_error(lang_name_long, domain, strerror(errno));
141  }
142  }
143  }
144 
145  static void log_po_error(const std::string& lang, const std::string& dom, const std::string& detail) {
146  ERR_G << "Error opening language file for " << lang << ", textdomain " << dom
147  << ":\n " << detail;
148  }
149 
150  const char* get(int domain_id, const char* ctx, const char* msg_id) const override
151  {
152  auto& base = get_base();
153  const char* msg = base.get(domain_id, ctx, msg_id);
154  if(msg == nullptr) {
155  auto iter = extra_messages_.find(domain_id);
156  if(iter == extra_messages_.end()) {
157  return nullptr;
158  }
159  auto& catalog = iter->second;
160  const char* lookup = ctx ? catalog.pgettext(ctx, msg_id) : catalog.gettext(msg_id);
161  if(lookup != msg_id) {
162  // (p)gettext returns the input pointer if the string was not found
163  msg = lookup;
164  }
165  }
166  return msg;
167  }
168 
169 #if BOOST_VERSION < 108300
170  const char* get(int domain_id, const char* ctx, const char* sid, int n) const override
171 #else
172  const char* get(int domain_id, const char* ctx, const char* sid, bl::count_type n) const override
173 #endif
174  {
175  auto& base = get_base();
176  const char* msg = base.get(domain_id, ctx, sid, n);
177  if(msg == nullptr) {
178  auto iter = extra_messages_.find(domain_id);
179  if(iter == extra_messages_.end()) {
180  return nullptr;
181  }
182  auto& catalog = iter->second;
183  const char* lookup = ctx ? catalog.npgettext(ctx, sid, sid, n) : catalog.ngettext(sid, sid, n);
184  if(lookup != sid) {
185  // n(p)gettext returns one of the input pointers if the string was not found
186  msg = lookup;
187  }
188  }
189  return msg;
190  }
191 
192  int domain(const std::string& domain) const override
193  {
194  auto& base = get_base();
195  return base.domain(domain);
196  }
197 
198  const char* convert(const char* msg, std::string& buffer) const override
199  {
200  auto& base = get_base();
201  return base.convert(msg, buffer);
202  }
203  private:
204  const bl::message_format<char>& get_base() const
205  {
206  return std::use_facet<bl::message_format<char>>(base_loc_);
207  }
208 
209  std::locale base_loc_;
210  std::map<int, spirit_po::default_catalog> extra_messages_;
211  };
212  struct translation_manager
213  {
214  translation_manager()
215  : loaded_paths_()
216  , loaded_domains_()
217  , current_language_(default_utf8_locale_name::name())
218  , generator_()
219  , current_locale_()
220  , is_dirty_(true)
221  {
222  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
223  for(const std::string& name : g_mgr.get_all_backends())
224  {
225  LOG_G << "Found boost locale backend: '" << name << "'";
226  }
227 
228  generator_.use_ansi_encoding(false);
229 #if BOOST_VERSION < 108100
230  generator_.categories(bl::message_facet | bl::information_facet | bl::collation_facet | bl::formatting_facet | bl::convert_facet);
231  generator_.characters(bl::char_facet);
232 #else
233  generator_.categories(bl::category_t::message | bl::category_t::information | bl::category_t::collation | bl::category_t::formatting | bl::category_t::convert);
234  generator_.characters(bl::char_facet_t::char_f);
235 #endif
236  // We cannot have current_locale_ be a non boost-generated locale since it might not supply
237  // the bl::info facet. As soon as we add message paths, update_locale_internal might fail,
238  // for example because of invalid .mo files. So make sure we call it at least once before adding paths/domains
239  update_locale_internal();
240  }
241 
242  void add_messages_domain(const std::string& domain)
243  {
244  if(loaded_domains_.find(domain) != loaded_domains_.end())
245  {
246  return;
247  }
248 
249  if(domain.find('/') != std::string::npos)
250  {
251  // Forward slash has a specific meaning in Boost.Locale domain
252  // names, specifying the encoding. We use UTF-8 for everything
253  // so we can't possibly support that, and odds are it's a user
254  // mistake (as in bug #23839).
255  ERR_G << "illegal textdomain name '" << domain
256  << "', skipping textdomain";
257  return;
258  }
259 
260  generator_.add_messages_domain(domain);
261  loaded_domains_.insert(domain);
262  }
263 
264  void add_messages_path(const std::string& path)
265  {
266  if(loaded_paths_.find(path) != loaded_paths_.end())
267  {
268  return;
269  }
270  generator_.add_messages_path(path);
271  loaded_paths_.insert(path);
272  }
273 
274  void set_default_messages_domain(const std::string& domain)
275  {
276  generator_.set_default_messages_domain(domain);
277  update_locale();
278  }
279 
280  void set_language(const std::string& language)
281  {
282  std::string::size_type at_pos = language.rfind('@');
283  if(language.empty())
284  {
285  current_language_ = default_utf8_locale_name::name();
286  }
287  else if(at_pos != std::string::npos)
288  {
289  current_language_ = language.substr(0, at_pos) + ".UTF-8" + language.substr(at_pos);
290  }
291  else
292  {
293  current_language_ = language + ".UTF-8";
294  }
295  update_locale();
296  }
297 
298  void update_locale()
299  {
300  is_dirty_ = true;
301  }
302 
303  /* This is called three times: once during the constructor, before any .mo files' paths have
304  * been added to the generator, once after adding the mainline .mo files, and once more
305  * after adding all add-ons. Corrupt .mo files might make the called functions throw, and so
306  * this might fail as soon as we've added message paths.
307  *
308  * Throwing exceptions from here is (in 1.15.18) going to end up in wesnoth.cpp's "Caught
309  * general ... exception" handler, so the effect of letting an exception escape this
310  * function is an immediate exit. Given that, it doesn't seem useful to change the assert
311  * to a throw, at least not within the 1.16 branch.
312  *
313  * Postcondition: current_locale_ is a valid boost-generated locale, supplying the bl::info
314  * facet. If there are corrupt .mo files, the locale might have no translations loaded.
315  */
316  void update_locale_internal()
317  {
318  try
319  {
320  LOG_G << "attempting to generate locale by name '" << current_language_ << "'";
321  current_locale_ = generator_.generate(current_language_);
322  current_locale_ = std::locale(current_locale_, new wesnoth_message_format(current_locale_, loaded_domains_, loaded_paths_));
323  const bl::info& info = std::use_facet<bl::info>(current_locale_);
324  LOG_G << "updated locale to '" << current_language_ << "' locale is now '" << current_locale_.name() << "' ( "
325  << "name='" << info.name()
326  << "' country='" << info.country()
327  << "' language='" << info.language()
328  << "' encoding='" << info.encoding()
329  << "' variant='" << info.variant() << "')";
330  }
331  catch(const bl::conv::conversion_error&)
332  {
333  assert(std::has_facet<bl::info>(current_locale_));
334  const bl::info& info = std::use_facet<bl::info>(current_locale_);
335  ERR_G << "Failed to update locale due to conversion error, locale is now: "
336  << "name='" << info.name()
337  << "' country='" << info.country()
338  << "' language='" << info.language()
339  << "' encoding='" << info.encoding()
340  << "' variant='" << info.variant()
341  << "'";
342  }
343  catch(const std::runtime_error&)
344  {
345  assert(std::has_facet<bl::info>(current_locale_));
346  const bl::info& info = std::use_facet<bl::info>(current_locale_);
347  ERR_G << "Failed to update locale due to runtime error, locale is now: "
348  << "name='" << info.name()
349  << "' country='" << info.country()
350  << "' language='" << info.language()
351  << "' encoding='" << info.encoding()
352  << "' variant='" << info.variant()
353  << "'";
354  }
355  is_dirty_ = false;
356  }
357 
358  std::string debug_description()
359  {
360  std::stringstream res;
361  const bl::localization_backend_manager& g_mgr = bl::localization_backend_manager::global();
362  for(const std::string& name : g_mgr.get_all_backends())
363  {
364  res << "has backend: '" << name << "',";
365  }
366  if(std::has_facet<bl::info>(current_locale_)) {
367  const bl::info& info = std::use_facet<bl::info>(current_locale_);
368  res << " locale: (name='" << info.name()
369  << "' country='" << info.country()
370  << "' language='" << info.language()
371  << "' encoding='" << info.encoding()
372  << "' variant='" << info.variant()
373  << "'),";
374  }
375  if(std::has_facet<bl::collator<char>>(current_locale_)) {
376  res << "has bl::collator<char> facet, ";
377  }
378 #if BOOST_VERSION < 108100
379  res << "generator categories='" << generator_.categories() << "'";
380 #else
381  res << "generator categories='" <<
382  static_cast<std::underlying_type<bl::category_t>::type>(generator_.categories()) << "'";
383 #endif
384  return res.str();
385  }
386 
387  const std::locale& get_locale()
388  {
389  if(is_dirty_)
390  {
391  update_locale_internal();
392  }
393  return current_locale_;
394  }
395 
396  private:
397  std::set<std::string> loaded_paths_;
398  std::set<std::string> loaded_domains_;
399  std::string current_language_;
400  bl::generator generator_;
401  std::locale current_locale_;
402  bool is_dirty_;
403  };
404 
405  translation_manager& get_manager()
406  {
407  static translation_manager* mng = new translation_manager();
408  return *mng;
409  }
410 
411  // Converts ASCII letters to lowercase. Ignores Unicode letters.
412  std::string ascii_to_lowercase(const std::string& str)
413  {
414  std::string result;
415  result.reserve(str.length());
416  std::transform(str.begin(), str.end(), std::back_inserter(result), [](char c)
417  {
418  return c >= 'A' && c <= 'Z' ? c | 0x20 : c;
419  });
420  return result;
421  }
422 }
423 
424 namespace translation
425 {
426 
427 std::string dgettext(const char* domain, const char* msgid)
428 {
429  return bl::dgettext(domain, msgid, get_manager().get_locale());
430 }
431 std::string egettext(char const *msgid)
432 {
433  return msgid[0] == '\0' ? msgid : bl::gettext(msgid, get_manager().get_locale());
434 }
435 
436 std::string dsgettext (const char * domainname, const char *msgid)
437 {
438  std::string msgval = dgettext (domainname, msgid);
439  if (msgval == msgid) {
440  const char* firsthat = std::strchr (msgid, '^');
441  if (firsthat == nullptr)
442  msgval = msgid;
443  else
444  msgval = firsthat + 1;
445  }
446  return msgval;
447 }
448 
449 namespace {
450 
451 inline const char* is_unlocalized_string2(const std::string& str, const char* singular, const char* plural)
452 {
453  if (str == singular) {
454  return singular;
455  }
456 
457  if (str == plural) {
458  return plural;
459  }
460 
461  return nullptr;
462 }
463 
464 }
465 
466 std::string dsngettext (const char * domainname, const char *singular, const char *plural, int n)
467 {
468  std::string msgval = bl::dngettext(domainname, singular, plural, n, get_manager().get_locale());
469 
470  auto original = is_unlocalized_string2(msgval, singular, plural);
471  if (original) {
472  const char* firsthat = std::strchr (original, '^');
473  if (firsthat == nullptr)
474  msgval = original;
475  else
476  msgval = firsthat + 1;
477  }
478  return msgval;
479 }
480 
481 void bind_textdomain(const char* domain, const char* directory, const char* /*encoding*/)
482 {
483  LOG_G << "adding textdomain '" << domain << "' in directory '" << directory << "'";
484  get_manager().add_messages_domain(domain);
485  get_manager().add_messages_path(directory);
486  get_manager().update_locale();
487 }
488 
489 void set_default_textdomain(const char* domain)
490 {
491  LOG_G << "set_default_textdomain: '" << domain << "'";
492  get_manager().set_default_messages_domain(domain);
493 }
494 
495 
496 void set_language(const std::string& language, const std::vector<std::string>* /*alternates*/)
497 {
498  // why should we need alternates? which languages we support should only be related
499  // to which languages we ship with and not which the os supports
500  LOG_G << "setting language to '" << language << "'";
501  get_manager().set_language(language);
502 }
503 
504 int compare(const std::string& s1, const std::string& s2)
505 {
506 
507  try {
508  return std::use_facet<std::collate<char>>(get_manager().get_locale()).compare(s1.c_str(), s1.c_str() + s1.size(), s2.c_str(), s2.c_str() + s2.size());
509  } catch(const std::bad_cast&) {
510  static bool bad_cast_once = false;
511 
512  if(!bad_cast_once) {
513  ERR_G << "locale set-up for compare() is broken, falling back to std::string::compare()";
514  bad_cast_once = true;
515  }
516 
517  return s1.compare(s2);
518  }
519 }
520 
521 int icompare(const std::string& s1, const std::string& s2)
522 {
523  // todo: maybe we should replace this preprocessor check with a std::has_facet<bl::collator<char>> check?
524 #ifdef __APPLE__
525  // https://github.com/wesnoth/wesnoth/issues/2094
526  return compare(ascii_to_lowercase(s1), ascii_to_lowercase(s2));
527 #else
528 
529  try {
530 #if BOOST_VERSION < 108100
531  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
532  bl::collator_base::secondary, s1, s2);
533 #else
534  return std::use_facet<bl::collator<char>>(get_manager().get_locale()).compare(
535  bl::collate_level::secondary, s1, s2);
536 #endif
537  } catch(const std::bad_cast&) {
538  static bool bad_cast_once = false;
539 
540  if(!bad_cast_once) {
541  ERR_G << "locale set-up for icompare() is broken, falling back to std::string::compare()";
542 
543  try { //just to be safe.
544  ERR_G << get_manager().debug_description();
545  } catch (const std::exception& e) {
546  ERR_G << e.what();
547  }
548  bad_cast_once = true;
549  }
550 
551  // Let's convert at least ASCII letters to lowercase to get a somewhat case-insensitive comparison.
552  return ascii_to_lowercase(s1).compare(ascii_to_lowercase(s2));
553  }
554 #endif
555 }
556 
557 std::string strftime(const std::string& format, const std::tm* time)
558 {
559  std::basic_ostringstream<char> dummy;
560  dummy.imbue(get_manager().get_locale()); // TODO: Calling imbue() with hard-coded locale appears to work with put_time in glibc, but not with get_locale()...
561  // Revert to use of boost (from 1.14) instead of std::put_time() because the latter does not appear to handle locale properly in Linux
562  dummy << bl::as::ftime(format) << mktime(const_cast<std::tm*>(time));
563 
564  return dummy.str();
565 }
566 
567 bool ci_search(const std::string& s1, const std::string& s2)
568 {
569  const std::locale& locale = get_manager().get_locale();
570 
571  std::string ls1 = bl::to_lower(s1, locale);
572  std::string ls2 = bl::to_lower(s2, locale);
573 
574  return std::search(ls1.begin(), ls1.end(),
575  ls2.begin(), ls2.end()) != ls1.end();
576 }
577 
579 {
580  return std::use_facet<boost::locale::info>(get_manager().get_locale());
581 }
582 }
static catalog from_istream(std::istream &is, warning_channel_type w=warning_channel_type())
Definition: catalog.hpp:269
Declarations for File-IO.
#define ERR_G
Definition: gettext.cpp:41
#define LOG_G
Definition: gettext.cpp:39
#define DBG_G
Definition: gettext.cpp:38
const language_def & get_locale()
Definition: language.cpp:329
Standard logging facilities (interface).
CURSOR_TYPE get()
Definition: cursor.cpp:216
filesystem::scoped_istream istream_file(const std::string &fname, bool treat_failure_as_error)
static bool file_exists(const bfs::path &fpath)
Definition: filesystem.cpp:321
std::unique_ptr< std::istream > scoped_istream
Definition: filesystem.hpp:50
std::string path
Definition: filesystem.cpp:86
static log_domain dom("general")
static domain_map * domains
Definition: log.cpp:221
logger & info()
Definition: log.cpp:238
std::string language()
Definition: general.cpp:535
rng * generator
This generator is automatically synced during synced context.
Definition: random.cpp:61
void set_language(const std::string &language, const std::vector< std::string > *)
Definition: gettext.cpp:496
std::string egettext(char const *msgid)
Definition: gettext.cpp:431
static std::string gettext(const char *str)
Definition: gettext.hpp:60
void bind_textdomain(const char *domain, const char *directory, const char *)
Definition: gettext.cpp:481
void set_default_textdomain(const char *domain)
Definition: gettext.cpp:489
int compare(const std::string &s1, const std::string &s2)
Case-sensitive lexicographical comparison.
Definition: gettext.cpp:504
int icompare(const std::string &s1, const std::string &s2)
Case-insensitive lexicographical comparison.
Definition: gettext.cpp:521
const boost::locale::info & get_effective_locale_info()
A facet that holds general information about the effective locale.
Definition: gettext.cpp:578
std::string strftime(const std::string &format, const std::tm *time)
Definition: gettext.cpp:557
std::string dgettext(const char *domain, const char *msgid)
Definition: gettext.cpp:427
bool ci_search(const std::string &s1, const std::string &s2)
Definition: gettext.cpp:567
std::string dsgettext(const char *domainname, const char *msgid)
Definition: gettext.cpp:436
std::string dsngettext(const char *domainname, const char *singular, const char *plural, int n)
Definition: gettext.cpp:466
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:110
mock_char c
static map_location::DIRECTION n
#define e