The Battle for Wesnoth  1.19.11+dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2025
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
25 
26 #include "log.hpp"
28 #include "utils/general.hpp"
29 
30 static lg::log_domain log_config("config");
31 #define ERR_SWML LOG_STREAM(err, log_config)
32 #define LOG_SWML LOG_STREAM(info, log_config)
33 
34 namespace simple_wml {
35 
36 std::size_t document::document_size_limit = 40000000;
37 
38 namespace {
39 
40 void debug_delete(node* n) {
41  delete n;
42 }
43 
44 char* uncompress_buffer(const string_span& input, string_span* span)
45 {
46  int nalloc = input.size();
47  int state = 0;
48  try {
49  std::istringstream stream(std::string(input.begin(), input.end()));
50  state = 1;
51  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
52  state = 2;
53  if (!span->empty() && *span->begin() == 'B') {
54  filter.push(boost::iostreams::bzip2_decompressor());
55  } else {
56  filter.push(boost::iostreams::gzip_decompressor());
57  }
58  filter.push(stream);
59  state = 3;
60 
61  const std::size_t chunk_size = input.size() * 10;
62  nalloc = chunk_size;
63  std::vector<char> buf(chunk_size);
64  state = 4;
65  std::size_t len = 0;
66  std::size_t pos = 0;
67  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
68  if(pos + chunk_size > document::document_size_limit) {
69  throw error("WML document exceeded size limit during decompression");
70  }
71 
72  pos += len;
73  buf.resize(pos + chunk_size);
74  len = 0;
75  }
76 
77  if(!filter.eof() && !filter.good()) {
78  throw error("failed to uncompress");
79  }
80 
81  pos += len;
82  state = 5;
83  nalloc = pos;
84 
85  buf.resize(pos);
86  state = 6;
87 
88  char* small_out = new char[pos+1];
89  memcpy(small_out, &buf[0], pos);
90  state = 7;
91 
92  small_out[pos] = 0;
93 
94  *span = string_span(small_out, pos);
95  state = 8;
96  return small_out;
97  } catch (const std::bad_alloc& e) {
98  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
99  << state << " alloc bytes " << nalloc << " with input: '"
100  << input << "' " << e.what();
101  throw error("Bad allocation request in uncompress_buffer().");
102  }
103 }
104 
105 char* compress_buffer(const char* input, string_span* span, bool bzip2)
106 {
107  int nalloc = strlen(input);
108  int state = 0;
109  try {
110  std::string in(input);
111  state = 1;
112  std::istringstream istream(in);
113  state = 2;
114  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
115  state = 3;
116  if (bzip2) {
117  filter.push(boost::iostreams::bzip2_compressor());
118  } else {
119  filter.push(boost::iostreams::gzip_compressor());
120  }
121  state = 4;
122  nalloc = in.size()*2 + 80;
123  std::vector<char> buf(nalloc);
124  boost::iostreams::array_sink out(&buf[0], buf.size());
125  filter.push(boost::iostreams::counter());
126  filter.push(out);
127 
128  state = 5;
129 
130  boost::iostreams::copy(istream, filter, buf.size());
131  const int len = filter.component<boost::iostreams::counter>(1)->characters();
132  assert(len < 128*1024*1024);
133  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
134  throw error("failed to compress");
135  }
136  state = 6;
137  nalloc = len;
138 
139  buf.resize(len);
140  state = 7;
141 
142  char* small_out = new char[len];
143  memcpy(small_out, &buf[0], len);
144  state = 8;
145 
146  *span = string_span(small_out, len);
147  assert(*small_out == (bzip2 ? 'B' : 31));
148  state = 9;
149  return small_out;
150  } catch (const std::bad_alloc& e) {
151  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
152  << state << " alloc bytes " << nalloc << " with input: '"
153  << input << "' " << e.what();
154  throw error("Bad allocation request in compress_buffer().");
155  }
156 }
157 
158 } // namespace
159 
160 bool string_span::to_bool(bool default_value) const
161 {
162  if(empty()) {
163  return default_value;
164  }
165 
166  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
167  return false;
168 
169  return true;
170 }
171 
173 {
174  const int buf_size = 64;
175  if(size() >= buf_size) {
176  return 0;
177  }
178  char buf[64];
179  memcpy(buf, begin(), size());
180  buf[size()] = 0;
181  return atoi(buf);
182 }
183 
184 std::string string_span::to_string() const
185 {
186  return std::string(begin(), end());
187 }
188 
190 {
191  char* buf = new char[size() + 1];
192  memcpy(buf, begin(), size());
193  buf[size()] = 0;
194  return buf;
195 }
196 
197 error::error(const char* msg)
198  : game::error(msg)
199 {
200  ERR_SWML << "ERROR: '" << msg << "'";
201 }
202 
203 std::ostream& operator<<(std::ostream& o, const string_span& s)
204 {
205  o << std::string(s.begin(), s.end());
206  return o;
207 }
208 
209 node::node(document& doc, node* parent) :
210  doc_(&doc),
211  attr_(),
212  parent_(parent),
213  children_(),
214  ordered_children_(),
215  output_cache_()
216 {
217 }
218 
219 #ifdef _MSC_VER
220 #pragma warning (push)
221 #pragma warning (disable: 4706)
222 #endif
223 node::node(document& doc, node* parent, const char** str, int depth) :
224  doc_(&doc),
225  attr_(),
226  parent_(parent),
227  children_(),
228  ordered_children_(),
229  output_cache_()
230 {
231  if(depth >= 1000) {
232  throw error("elements nested too deep");
233  }
234 
235  const char*& s = *str;
236 
237  const char* const begin = s;
238  while(*s) {
239  switch(*s) {
240  case '[': {
241  if(s[1] == '/') {
242  output_cache_ = string_span(begin, s - begin);
243  s = strchr(s, ']');
244  if(s == nullptr) {
245  throw error("end element unterminated");
246  }
247 
248  ++s;
249  return;
250  }
251 
252  ++s;
253  const char* end = strchr(s, ']');
254  if(end == nullptr) {
255  throw error("unterminated element");
256  }
257 
258  const int list_index = get_children(string_span(s, end - s));
260 
261  s = end + 1;
262 
263  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
264  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
266 
267  break;
268  }
269  case ' ':
270  case '\t':
271  case '\n':
272  ++s;
273  break;
274  case '#':
275  s = strchr(s, '\n');
276  if(s == nullptr) {
277  throw error("did not find newline after '#'");
278  }
279  break;
280  default: {
281  const char* end = strchr(s, '=');
282  if(end == nullptr) {
283  ERR_SWML << "attribute: " << s;
284  throw error("did not find '=' after attribute");
285  }
286 
287  string_span name(s, end - s);
288  s = end + 1;
289  if(*s == '_') {
290  s = strchr(s, '"');
291  if(s == nullptr) {
292  throw error("did not find '\"' after '_'");
293  }
294  }
295 
296  if (*s != '"') {
297  end = strchr(s, '\n');
298  if (!end) {
299  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))";
300  throw error("did not find end of attribute");
301  }
302  if (memchr(s, '"', end - s))
303  throw error("found stray quotes in unquoted value");
304  goto read_attribute;
305  }
306  end = s;
307  while(true)
308  {
309  // Read until the first single double quote.
310  while((end = strchr(end+1, '"')) && end[1] == '"') {
311 #ifdef _MSC_VER
312 #pragma warning (pop)
313 #endif
314  ++end;
315  }
316  if(end == nullptr)
317  throw error("did not find end of attribute");
318 
319  // Stop if newline.
320  const char *endline = end + 1;
321  while (*endline == ' ') ++endline;
322  if (*endline == '\n') break;
323 
324  // Read concatenation marker.
325  if (*(endline++) != '+')
326  throw error("did not find newline after end of attribute");
327  if (*(endline++) != '\n')
328  throw error("did not find newline after '+'");
329 
330  // Read textdomain marker.
331  if (*endline == '#') {
332  endline = strchr(endline + 1, '\n');
333  if (!endline)
334  throw error("did not find newline after '#'");
335  ++endline;
336  }
337 
338  // Read indentation and start of string.
339  while (*endline == '\t') ++endline;
340  if (*endline == '_') ++endline;
341  if (*endline != '"')
342  throw error("did not find quotes after '+'");
343  end = endline;
344  }
345 
346  ++s;
347 
348  read_attribute:
349  string_span value(s, end - s);
350  if(attr_.empty() == false && !(attr_.back().key < name)) {
351  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'";
352  throw error("attributes not in order");
353  }
354 
355  s = end + 1;
356 
357  attr_.emplace_back(name, value);
358  }
359  }
360  }
361 
362  output_cache_ = string_span(begin, s - begin);
364 }
365 
367 {
368  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
369  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
370  debug_delete(*j);
371  }
372  }
373 }
374 
375 namespace {
376 struct string_span_pair_comparer
377 {
378  bool operator()(const string_span& a, const node::attribute& b) const {
379  return a < b.key;
380  }
381 
382  bool operator()(const node::attribute& a, const string_span& b) const {
383  return a.key < b;
384  }
385 
386  bool operator()(const node::attribute& a,
387  const node::attribute& b) const {
388  return a.key < b.key;
389  }
390 };
391 }
392 
393 const string_span& node::operator[](const char* key) const
394 {
395  static string_span empty("");
396  string_span span(key);
397  std::pair<attribute_list::const_iterator,
398  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
399  if(range.first != range.second) {
400  return range.first->value;
401  }
402 
403  return empty;
404 }
405 
406 bool node::has_attr(const char* key) const
407 {
408  string_span span(key);
409  std::pair<attribute_list::const_iterator,
410  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
411  return range.first != range.second;
412 }
413 
414 node& node::set_attr(const char* key, const char* value)
415 {
416  set_dirty();
417 
418  string_span span(key);
419  std::pair<attribute_list::iterator,
420  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
421  if(range.first != range.second) {
422  range.first->value = string_span(value);
423  } else {
424  attr_.insert(range.first, attribute(span, string_span(value)));
425  }
426 
427  return *this;
428 }
429 
430 node& node::set_attr_dup(const char* key, const char* value)
431 {
432  return set_attr(key, doc_->dup_string(value));
433 }
434 
435 node& node::set_attr_dup(const char* key, const string_span& value)
436 {
437  char* buf = value.duplicate();
439  return set_attr(key, buf);
440 }
441 
442 node& node::set_attr_esc(const char* key, string_span value)
443 {
444  return set_attr(key, doc_->esc_string(value));
445 }
446 
447 node& node::set_attr_int(const char* key, int value)
448 {
449  std::string temp = std::to_string(value);
450  return set_attr_dup(key, temp.c_str());
451 }
452 
453 node& node::add_child_at(const char* name, std::size_t index)
454 {
455  set_dirty();
456 
457  const int list_index = get_children(name);
458  child_list& list = children_[list_index].second;
459  if(index > list.size()) {
460  index = list.size();
461  }
462 
464  list.insert(list.begin() + index, new node(*doc_, this));
465  insert_ordered_child(list_index, index);
466 
468  return *list[index];
469 }
470 
471 
472 node& node::add_child(const char* name)
473 {
474  set_dirty();
475 
476  const int list_index = get_children(name);
478  child_list& list = children_[list_index].second;
479  list.push_back(new node(*doc_, this));
480  ordered_children_.emplace_back(list_index, list.size() - 1);
482  return *list.back();
483 }
484 
485 void node::remove_child(const string_span& name, std::size_t index)
486 {
487  set_dirty();
488 
489  //if we don't already have a vector for this item we don't want to add one.
491  if(itor == children_.end()) {
492  return;
493  }
494 
495  child_list& list = itor->second;
496  if(index >= list.size()) {
497  return;
498  }
499 
500  remove_ordered_child(std::distance(children_.begin(), itor), index);
501 
502  debug_delete(list[index]);
503  list.erase(list.begin() + index);
504 
505  if(list.empty()) {
506  remove_ordered_child_list(std::distance(children_.begin(), itor));
507  children_.erase(itor);
508  }
509 }
510 
511 void node::insert_ordered_child(int child_map_index, int child_list_index)
512 {
513  bool inserted = false;
515  while(i != ordered_children_.end()) {
516  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
517  i->child_list_index++;
518  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
519  inserted = true;
520  i->child_list_index++;
521  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
522  ++i;
523  }
524 
525  ++i;
526  }
527 
528  if(!inserted) {
529  ordered_children_.emplace_back(child_map_index, child_list_index);
530  }
531 }
532 
533 void node::remove_ordered_child(int child_map_index, int child_list_index)
534 {
535  int erase_count = 0;
537  while(i != ordered_children_.end()) {
538  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
539  i = ordered_children_.erase(i);
540  ++erase_count;
541  } else {
542  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
543  i->child_list_index--;
544  }
545  ++i;
546  }
547  }
548 
549  assert(erase_count == 1);
550 }
551 
552 void node::insert_ordered_child_list(int child_map_index)
553 {
555  while(i != ordered_children_.end()) {
556  if(i->child_map_index >= child_map_index) {
557  i->child_map_index++;
558  }
559  }
560 }
561 
562 void node::remove_ordered_child_list(int child_map_index)
563 {
565  while(i != ordered_children_.end()) {
566  if(i->child_map_index == child_map_index) {
567  assert(false);
568  i = ordered_children_.erase(i);
569  } else {
570  if(i->child_map_index > child_map_index) {
571  i->child_map_index--;
572  }
573 
574  ++i;
575  }
576  }
577 }
578 
580 {
581 // only define this symbol in debug mode to work out child ordering.
582 #ifdef CHECK_ORDERED_CHILDREN
583  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
584  while(i != ordered_children_.end()) {
585  assert(i->child_map_index < children_.size());
586  assert(i->child_list_index < children_[i->child_map_index].second.size());
587  ++i;
588  }
589 
590  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
591  const unsigned short child_map_index = j - children_.begin();
592  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
593  const unsigned short child_list_index = k - j->second.begin();
594  bool found = false;
595  for(int n = 0; n != ordered_children_.size(); ++n) {
596  if(ordered_children_[n].child_map_index == child_map_index &&
597  ordered_children_[n].child_list_index == child_list_index) {
598  found = true;
599  break;
600  }
601  }
602 
603  assert(found);
604  }
605  }
606 #endif // CHECK_ORDERED_CHILDREN
607 }
608 
609 void node::remove_child(const char* name, std::size_t index)
610 {
612 }
613 
614 node* node::child(const char* name)
615 {
616  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
617  if(i->first == name) {
618  assert(i->second.empty() == false);
619  return i->second.front();
620  }
621  }
622 
623  return nullptr;
624 }
625 
626 const node* node::child(const char* name) const
627 {
628  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
629  if(i->first == name) {
630  if(i->second.empty()) {
631  return nullptr;
632  } else {
633  return i->second.front();
634  }
635  }
636  }
637 
638  return nullptr;
639 }
640 
641 node& node::child_or_add(const char* name)
642 {
643  if(node* res = child(name)) {
644  return *res;
645  }
646  return add_child(name);
647 }
648 
649 const node::child_list& node::children(const char* name) const
650 {
651  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
652  if(i->first == name) {
653  return i->second;
654  }
655  }
656 
657  static const node::child_list empty;
658  return empty;
659 }
660 
661 int node::get_children(const char* name)
662 {
663  return get_children(string_span(name));
664 }
665 
667 {
668  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
669  if(i->first == name) {
670  return std::distance(children_.begin(), i);
671  }
672  }
673 
674  children_.emplace_back(string_span(name), child_list());
675  return children_.size() - 1;
676 }
677 
678 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
679 {
680  child_map::const_iterator i = m.begin();
681  for(; i != m.end(); ++i) {
682  if(i->first == attr) {
683  break;
684  }
685  }
686 
687  return i;
688 }
689 
691 {
692  child_map::iterator i = m.begin();
693  for(; i != m.end(); ++i) {
694  if(i->first == attr) {
695  break;
696  }
697  }
698 
699  return i;
700 }
701 
703 {
704  if(children_.empty()) {
705  static const string_span empty;
706  return empty;
707  }
708 
709  return children_.begin()->first;
710 }
711 
712 int node::output_size() const
713 {
715  if(output_cache_.empty() == false) {
716  return output_cache_.size();
717  }
718 
719  int res = 0;
720  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
721  res += i->key.size() + i->value.size() + 4;
722  }
723 
724  std::size_t count_children = 0;
725  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
726  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
727  res += i->first.size()*2 + 7;
728  res += (*j)->output_size();
729  ++count_children;
730  }
731  }
732 
733  assert(count_children == ordered_children_.size());
734 
735  return res;
736 }
737 
738 void node::shift_buffers(ptrdiff_t offset)
739 {
740  if(!output_cache_.empty()) {
742  }
743 
744  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
745  i->key = string_span(i->key.begin() + offset, i->key.size());
746  i->value = string_span(i->value.begin() + offset, i->value.size());
747  }
748 
749  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
750  string_span& key = i->first;
751  key = string_span(key.begin() + offset, key.size());
752  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
753  (*j)->shift_buffers(offset);
754  }
755  }
756 }
757 
758 void node::output(char*& buf, CACHE_STATUS cache_status)
759 {
760  if(output_cache_.empty() == false) {
761  memcpy(buf, output_cache_.begin(), output_cache_.size());
762  if(cache_status == REFRESH_CACHE) {
764  }
765  buf += output_cache_.size();
766  return;
767  }
768 
769  char* begin = buf;
770 
771  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
772  memcpy(buf, i->key.begin(), i->key.size());
773  if(cache_status == REFRESH_CACHE) {
774  i->key = string_span(buf, i->key.size());
775  }
776  buf += i->key.size();
777  *buf++ = '=';
778  *buf++ = '"';
779  memcpy(buf, i->value.begin(), i->value.size());
780  if(cache_status == REFRESH_CACHE) {
781  i->value = string_span(buf, i->value.size());
782  }
783  buf += i->value.size();
784  *buf++ = '"';
785  *buf++ = '\n';
786  }
787 
788  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
789  i != ordered_children_.end(); ++i) {
790  assert(i->child_map_index < children_.size());
791  assert(i->child_list_index < children_[i->child_map_index].second.size());
792  string_span& attr = children_[i->child_map_index].first;
793  *buf++ = '[';
794  memcpy(buf, attr.begin(), attr.size());
795  if(cache_status == REFRESH_CACHE) {
796  attr = string_span(buf, attr.size());
797  }
798  buf += attr.size();
799  *buf++ = ']';
800  *buf++ = '\n';
801  children_[i->child_map_index].second[i->child_list_index]->output(buf, cache_status);
802  *buf++ = '[';
803  *buf++ = '/';
804  memcpy(buf, attr.begin(), attr.size());
805  buf += attr.size();
806  *buf++ = ']';
807  *buf++ = '\n';
808  }
809 
810  if(cache_status == REFRESH_CACHE) {
811  output_cache_ = string_span(begin, buf - begin);
812  }
813 }
814 
815 std::string node_to_string(const node& n)
816 {
817  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
818  //node, so we can do it safely
819  node& mutable_node = const_cast<node&>(n);
820  std::vector<char> v(mutable_node.output_size());
821  char* ptr = &v[0];
822  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
823  assert(ptr == &v[0] + v.size());
824  return std::string(v.begin(), v.end());
825 }
826 
827 void node::copy_into(node& n) const
828 {
829  n.set_dirty();
830  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
831  char* key = i->key.duplicate();
832  char* value = i->value.duplicate();
833  n.doc_->take_ownership_of_buffer(key);
834  n.doc_->take_ownership_of_buffer(value);
835  n.set_attr(key, value);
836  }
837 
838  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
839  i != ordered_children_.end(); ++i) {
840  assert(i->child_map_index < children_.size());
841  assert(i->child_list_index < children_[i->child_map_index].second.size());
842  char* buf = children_[i->child_map_index].first.duplicate();
843  n.doc_->take_ownership_of_buffer(buf);
844  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
845  }
846 }
847 
848 void node::apply_diff(const node& diff)
849 {
850  set_dirty();
851  const node* inserts = diff.child("insert");
852  if(inserts != nullptr) {
853  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
854  char* name = i->key.duplicate();
855  char* value = i->value.duplicate();
856  set_attr(name, value);
859  }
860  }
861 
862  const node* deletes = diff.child("delete");
863  if(deletes != nullptr) {
864  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
865  std::pair<attribute_list::iterator,
866  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
867  if(range.first != range.second) {
868  attr_.erase(range.first);
869  }
870  }
871  }
872 
873  const child_list& child_changes = diff.children("change_child");
874  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
875  const std::size_t index = (**i)["index"].to_int();
876  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
877  const string_span& name = j->first;
878  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
880  if(itor != children_.end()) {
881  if(index < itor->second.size()) {
882  itor->second[index]->apply_diff(**k);
883  }
884  }
885  }
886  }
887  }
888 
889  const child_list& child_inserts = diff.children("insert_child");
890  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
891  const std::size_t index = (**i)["index"].to_int();
892  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
893  const string_span& name = j->first;
894  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
895  char* buf = name.duplicate();
897  (*k)->copy_into(add_child_at(buf, index));
898  }
899  }
900  }
901 
902  const child_list& child_deletes = diff.children("delete_child");
903  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
904  const std::size_t index = (**i)["index"].to_int();
905  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
906  if(j->second.empty()) {
907  continue;
908  }
909 
910  const string_span& name = j->first;
911  remove_child(name, index);
912  }
913  }
914 }
915 
917 {
918  doc_ = doc;
919 
920  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
921  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
922  (*j)->set_doc(doc);
923  }
924  }
925 }
926 
927 int node::nchildren() const
928 {
929  int res = 0;
930  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
931  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
932  ++res;
933  res += (*j)->nchildren();
934  }
935  }
936 
937  return res;
938 }
939 
941 {
942  int res = attr_.capacity();
943  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
944  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
945  res += (*j)->nattributes_recursive();
946  }
947  }
948 
949  return res;
950 }
951 
953 {
954  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
955  n->output_cache_ = string_span();
956  }
957 }
958 
960  compressed_buf_(),
961  output_(nullptr),
962  buffers_(),
963  root_(new node(*this, nullptr)),
964  prev_(nullptr),
965  next_(nullptr)
966 {
967  attach_list();
968 }
969 
971  compressed_buf_(),
972  output_(buf),
973  buffers_(),
974  root_(nullptr),
975  prev_(nullptr),
976  next_(nullptr)
977 {
978  if(control == INIT_TAKE_OWNERSHIP) {
979  buffers_.push_back(buf);
980  }
981  const char* cbuf = buf;
982  root_ = new node(*this, nullptr, &cbuf);
983 
984  attach_list();
985 }
986 
987 document::document(const char* buf, INIT_STATE state) :
988  compressed_buf_(),
989  output_(buf),
990  buffers_(),
991  root_(nullptr),
992  prev_(nullptr),
993  next_(nullptr)
994 {
995  if(state == INIT_COMPRESSED) {
997  output_ = nullptr;
998  } else {
999  root_ = new node(*this, nullptr, &buf);
1000  }
1001 
1002  attach_list();
1003 }
1004 
1006  compressed_buf_(compressed_buf),
1007  output_(nullptr),
1008  buffers_(),
1009  root_(nullptr),
1010  prev_(nullptr),
1011  next_(nullptr)
1012 {
1013  string_span uncompressed_buf;
1014  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
1015  output_ = uncompressed_buf.begin();
1016  const char* cbuf = output_;
1017  try {
1018  root_ = new node(*this, nullptr, &cbuf);
1019  } catch(...) {
1020  ERR_SWML << "Caught exception creating a new simple_wml node: " << utils::get_unknown_exception_type();
1021  delete [] buffers_.front();
1022  buffers_.clear();
1023  throw;
1024  }
1025 
1026  attach_list();
1027 }
1028 
1030 {
1031  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1032  delete [] *i;
1033  }
1034 
1035  buffers_.clear();
1036  debug_delete(root_);
1037 
1038  detach_list();
1039 }
1040 
1041 const char* document::dup_string(const char* str)
1042 {
1043  const int len = strlen(str);
1044  char* res = new char[len+1];
1045  memcpy(res, str, len + 1);
1046  buffers_.push_back(res);
1047  return res;
1048 }
1049 
1051 {
1052  char* res = string_span(utils::wml_escape_string(str)).duplicate();
1053  buffers_.push_back(res);
1054  return res;
1055 }
1056 
1057 const char* document::output()
1058 {
1059  if(output_ && (!root_ || root_->is_dirty() == false)) {
1060  return output_;
1061  }
1062  if(!root_) {
1063  assert(compressed_buf_.empty() == false);
1064  string_span uncompressed_buf;
1065  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1066  output_ = uncompressed_buf.begin();
1067  return output_;
1068  }
1069 
1070  //we're dirty, so the compressed buf must also be dirty; clear it.
1072 
1073  std::vector<char*> bufs;
1074  bufs.swap(buffers_);
1075 
1076  const int buf_size = root_->output_size() + 1;
1077  char* buf;
1078  try {
1079  buf = new char[buf_size];
1080  } catch (const std::bad_alloc& e) {
1081  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1082  << e.what();
1083  throw error("Bad allocation request in output().");
1084  }
1085  buffers_.push_back(buf);
1086  output_ = buf;
1087 
1089  *buf++ = 0;
1090  assert(buf == output_ + buf_size);
1091 
1092  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1093  delete [] *i;
1094  }
1095 
1096  bufs.clear();
1097 
1098  return output_;
1099 }
1100 
1102 {
1103  if(compressed_buf_.empty() == false &&
1104  (root_ == nullptr || root_->is_dirty() == false)) {
1105  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1106  return compressed_buf_;
1107  }
1108 
1109  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1110  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1111 
1112  return compressed_buf_;
1113 }
1114 
1116 {
1118  debug_delete(root_);
1119  root_ = nullptr;
1120  output_ = nullptr;
1121  std::vector<char*> new_buffers;
1122  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1123  if(*i != compressed_buf_.begin()) {
1124  delete [] *i;
1125  } else {
1126  new_buffers.push_back(*i);
1127  }
1128  }
1129 
1130  buffers_.swap(new_buffers);
1131  assert(buffers_.size() == 1);
1132 }
1133 
1135 {
1136  if(output_ == nullptr) {
1137  assert(compressed_buf_.empty() == false);
1138  string_span uncompressed_buf;
1139  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1140  output_ = uncompressed_buf.begin();
1141  }
1142 
1143  assert(root_ == nullptr);
1144  const char* cbuf = output_;
1145  root_ = new node(*this, nullptr, &cbuf);
1146 }
1147 
1148 std::unique_ptr<document> document::clone()
1149 {
1150  char* buf = new char[strlen(output())+1];
1151  strcpy(buf, output());
1152  return std::make_unique<document>(buf);
1153 }
1154 
1156 {
1159  buffers_.swap(o.buffers_);
1160  std::swap(root_, o.root_);
1161 
1162  root_->set_doc(this);
1163  o.root_->set_doc(&o);
1164 }
1165 
1167 {
1169  output_ = nullptr;
1170  debug_delete(root_);
1171  root_ = new node(*this, nullptr);
1172  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1173  delete [] *i;
1174  }
1175 
1176  buffers_.clear();
1177 }
1178 
1179 namespace {
1180 document* head_doc = nullptr;
1181 }
1182 
1184 {
1185  prev_ = nullptr;
1186  next_ = head_doc;
1187 
1188  if(next_) {
1189  next_->prev_ = this;
1190  }
1191  head_doc = this;
1192 }
1193 
1195 {
1196  if(head_doc == this) {
1197  head_doc = next_;
1198  }
1199 
1200  if(next_) {
1201  next_->prev_ = prev_;
1202  }
1203 
1204  if(prev_) {
1205  prev_->next_ = next_;
1206  }
1207  next_ = prev_ = nullptr;
1208 }
1209 
1210 std::string document::stats()
1211 {
1212  std::ostringstream s;
1213  int ndocs = 0;
1214  int ncompressed = 0;
1215  int compressed_size = 0;
1216  int ntext = 0;
1217  int text_size = 0;
1218  int nbuffers = 0;
1219  int nnodes = 0;
1220  int ndirty = 0;
1221  int nattributes = 0;
1222  for(document* d = head_doc; d != nullptr; d = d->next_) {
1223  ndocs++;
1224  nbuffers += d->buffers_.size();
1225 
1226  if(d->compressed_buf_.is_null() == false) {
1227  ++ncompressed;
1228  compressed_size += d->compressed_buf_.size();
1229  }
1230 
1231  if(d->output_) {
1232  ++ntext;
1233  text_size += strlen(d->output_);
1234  }
1235 
1236  if(d->root_) {
1237  nnodes += 1 + d->root_->nchildren();
1238  nattributes += d->root_->nattributes_recursive();
1239  }
1240 
1241  if(d->root_ && d->root_->is_dirty()) {
1242  ++ndirty;
1243  }
1244  }
1245 
1246  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1247  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1248  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1249 
1250  s << "WML documents: " << ndocs << "\n"
1251  << "Dirty: " << ndirty << "\n"
1252  << "With compression: " << ncompressed << " (" << compressed_size
1253  << " bytes)\n"
1254  << "With text: " << ntext << " (" << text_size
1255  << " bytes)\n"
1256  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1257  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1258  << "Buffers: " << nbuffers << "\n"
1259  << "Total allocation: " << total_alloc << " bytes\n";
1260 
1261  return s.str();
1262 }
1263 
1264 void swap(document& lhs, document& rhs)
1265 {
1266  lhs.swap(rhs);
1267 }
1268 
1269 }
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:290
const char * output()
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
Definition: simple_wml.hpp:299
const char * esc_string(string_span str)
const char * output_
Definition: simple_wml.hpp:306
void swap(document &o)
std::vector< char * > buffers_
Definition: simple_wml.hpp:307
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
Definition: simple_wml.hpp:305
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:132
int output_size() const
Definition: simple_wml.cpp:712
void set_doc(document *doc)
Definition: simple_wml.cpp:916
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:511
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:552
void check_ordered_children() const
Definition: simple_wml.cpp:579
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:609
int nchildren() const
Definition: simple_wml.cpp:927
bool is_dirty() const
Definition: simple_wml.hpp:169
node(document &doc, node *parent)
Definition: simple_wml.cpp:209
int get_children(const string_span &name)
Definition: simple_wml.cpp:666
document * doc_
Definition: simple_wml.hpp:198
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:393
const child_list & children(const char *name) const
Definition: simple_wml.cpp:649
void apply_diff(const node &diff)
Definition: simple_wml.cpp:848
attribute_list attr_
Definition: simple_wml.hpp:201
bool has_attr(const char *key) const
Definition: simple_wml.cpp:406
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:447
std::vector< child_pair > child_map
Definition: simple_wml.hpp:206
node * child(const char *name)
Definition: simple_wml.cpp:614
std::vector< node * > child_list
Definition: simple_wml.hpp:129
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:562
int nattributes_recursive() const
Definition: simple_wml.cpp:940
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:224
const string_span & first_child() const
Definition: simple_wml.cpp:702
child_map children_
Definition: simple_wml.hpp:210
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:758
node & add_child(const char *name)
Definition: simple_wml.cpp:472
node & child_or_add(const char *name)
Definition: simple_wml.cpp:641
node & set_attr_esc(const char *key, string_span value)
As above but convert value to a WML value
Definition: simple_wml.cpp:442
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:414
string_span output_cache_
Definition: simple_wml.hpp:233
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:453
void copy_into(node &n) const
Definition: simple_wml.cpp:827
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:430
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:533
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:738
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:678
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:160
std::string to_string() const
Definition: simple_wml.cpp:184
const char * begin() const
Definition: simple_wml.hpp:94
char * duplicate() const
Definition: simple_wml.cpp:189
const char * end() const
Definition: simple_wml.hpp:95
std::size_t i
Definition: function.cpp:1030
unsigned in
If equal to search_counter, the node is off the list.
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:203
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:815
@ INIT_TAKE_OWNERSHIP
Definition: simple_wml.hpp:238
std::size_t index(std::string_view str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
constexpr auto filter
Definition: ranges.hpp:38
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
Definition: general.cpp:23
std::string wml_escape_string(std::string_view str)
Format str as a WML value
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
#define ERR_SWML
Definition: simple_wml.cpp:31
static lg::log_domain log_config("config")
static map_location::direction n
static map_location::direction s
#define d
#define e
#define b