The Battle for Wesnoth  1.19.5+dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2024
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <sstream>
17 
18 #include <boost/iostreams/copy.hpp>
19 #include <boost/iostreams/filtering_stream.hpp>
20 #include <boost/iostreams/filter/bzip2.hpp>
21 #include <boost/iostreams/filter/counter.hpp>
22 #include <boost/iostreams/filter/gzip.hpp>
23 
25 
26 #include "log.hpp"
27 #include "utils/general.hpp"
28 
29 static lg::log_domain log_config("config");
30 #define ERR_SWML LOG_STREAM(err, log_config)
31 #define LOG_SWML LOG_STREAM(info, log_config)
32 
33 namespace simple_wml {
34 
35 std::size_t document::document_size_limit = 40000000;
36 
37 namespace {
38 
39 void debug_delete(node* n) {
40  delete n;
41 }
42 
43 char* uncompress_buffer(const string_span& input, string_span* span)
44 {
45  int nalloc = input.size();
46  int state = 0;
47  try {
48  std::istringstream stream(std::string(input.begin(), input.end()));
49  state = 1;
50  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
51  state = 2;
52  if (!span->empty() && *span->begin() == 'B') {
53  filter.push(boost::iostreams::bzip2_decompressor());
54  } else {
55  filter.push(boost::iostreams::gzip_decompressor());
56  }
57  filter.push(stream);
58  state = 3;
59 
60  const std::size_t chunk_size = input.size() * 10;
61  nalloc = chunk_size;
62  std::vector<char> buf(chunk_size);
63  state = 4;
64  std::size_t len = 0;
65  std::size_t pos = 0;
66  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
67  if(pos + chunk_size > document::document_size_limit) {
68  throw error("WML document exceeded size limit during decompression");
69  }
70 
71  pos += len;
72  buf.resize(pos + chunk_size);
73  len = 0;
74  }
75 
76  if(!filter.eof() && !filter.good()) {
77  throw error("failed to uncompress");
78  }
79 
80  pos += len;
81  state = 5;
82  nalloc = pos;
83 
84  buf.resize(pos);
85  state = 6;
86 
87  char* small_out = new char[pos+1];
88  memcpy(small_out, &buf[0], pos);
89  state = 7;
90 
91  small_out[pos] = 0;
92 
93  *span = string_span(small_out, pos);
94  state = 8;
95  return small_out;
96  } catch (const std::bad_alloc& e) {
97  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
98  << state << " alloc bytes " << nalloc << " with input: '"
99  << input << "' " << e.what();
100  throw error("Bad allocation request in uncompress_buffer().");
101  }
102 }
103 
104 char* compress_buffer(const char* input, string_span* span, bool bzip2)
105 {
106  int nalloc = strlen(input);
107  int state = 0;
108  try {
109  std::string in(input);
110  state = 1;
111  std::istringstream istream(in);
112  state = 2;
113  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
114  state = 3;
115  if (bzip2) {
116  filter.push(boost::iostreams::bzip2_compressor());
117  } else {
118  filter.push(boost::iostreams::gzip_compressor());
119  }
120  state = 4;
121  nalloc = in.size()*2 + 80;
122  std::vector<char> buf(nalloc);
123  boost::iostreams::array_sink out(&buf[0], buf.size());
124  filter.push(boost::iostreams::counter());
125  filter.push(out);
126 
127  state = 5;
128 
129  boost::iostreams::copy(istream, filter, buf.size());
130  const int len = filter.component<boost::iostreams::counter>(1)->characters();
131  assert(len < 128*1024*1024);
132  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
133  throw error("failed to compress");
134  }
135  state = 6;
136  nalloc = len;
137 
138  buf.resize(len);
139  state = 7;
140 
141  char* small_out = new char[len];
142  memcpy(small_out, &buf[0], len);
143  state = 8;
144 
145  *span = string_span(small_out, len);
146  assert(*small_out == (bzip2 ? 'B' : 31));
147  state = 9;
148  return small_out;
149  } catch (const std::bad_alloc& e) {
150  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
151  << state << " alloc bytes " << nalloc << " with input: '"
152  << input << "' " << e.what();
153  throw error("Bad allocation request in compress_buffer().");
154  }
155 }
156 
157 } // namespace
158 
159 bool string_span::to_bool(bool default_value) const
160 {
161  if(empty()) {
162  return default_value;
163  }
164 
165  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
166  return false;
167 
168  return true;
169 }
170 
172 {
173  const int buf_size = 64;
174  if(size() >= buf_size) {
175  return 0;
176  }
177  char buf[64];
178  memcpy(buf, begin(), size());
179  buf[size()] = 0;
180  return atoi(buf);
181 }
182 
183 std::string string_span::to_string() const
184 {
185  return std::string(begin(), end());
186 }
187 
189 {
190  char* buf = new char[size() + 1];
191  memcpy(buf, begin(), size());
192  buf[size()] = 0;
193  return buf;
194 }
195 
196 error::error(const char* msg)
197  : game::error(msg)
198 {
199  ERR_SWML << "ERROR: '" << msg << "'";
200 }
201 
202 std::ostream& operator<<(std::ostream& o, const string_span& s)
203 {
204  o << std::string(s.begin(), s.end());
205  return o;
206 }
207 
208 node::node(document& doc, node* parent) :
209  doc_(&doc),
210  attr_(),
211  parent_(parent),
212  children_(),
213  ordered_children_(),
214  output_cache_()
215 {
216 }
217 
218 #ifdef _MSC_VER
219 #pragma warning (push)
220 #pragma warning (disable: 4706)
221 #endif
222 node::node(document& doc, node* parent, const char** str, int depth) :
223  doc_(&doc),
224  attr_(),
225  parent_(parent),
226  children_(),
227  ordered_children_(),
228  output_cache_()
229 {
230  if(depth >= 1000) {
231  throw error("elements nested too deep");
232  }
233 
234  const char*& s = *str;
235 
236  const char* const begin = s;
237  while(*s) {
238  switch(*s) {
239  case '[': {
240  if(s[1] == '/') {
241  output_cache_ = string_span(begin, s - begin);
242  s = strchr(s, ']');
243  if(s == nullptr) {
244  throw error("end element unterminated");
245  }
246 
247  ++s;
248  return;
249  }
250 
251  ++s;
252  const char* end = strchr(s, ']');
253  if(end == nullptr) {
254  throw error("unterminated element");
255  }
256 
257  const int list_index = get_children(string_span(s, end - s));
259 
260  s = end + 1;
261 
262  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
263  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
265 
266  break;
267  }
268  case ' ':
269  case '\t':
270  case '\n':
271  ++s;
272  break;
273  case '#':
274  s = strchr(s, '\n');
275  if(s == nullptr) {
276  throw error("did not find newline after '#'");
277  }
278  break;
279  default: {
280  const char* end = strchr(s, '=');
281  if(end == nullptr) {
282  ERR_SWML << "attribute: " << s;
283  throw error("did not find '=' after attribute");
284  }
285 
286  string_span name(s, end - s);
287  s = end + 1;
288  if(*s == '_') {
289  s = strchr(s, '"');
290  if(s == nullptr) {
291  throw error("did not find '\"' after '_'");
292  }
293  }
294 
295  if (*s != '"') {
296  end = strchr(s, '\n');
297  if (!end) {
298  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))";
299  throw error("did not find end of attribute");
300  }
301  if (memchr(s, '"', end - s))
302  throw error("found stray quotes in unquoted value");
303  goto read_attribute;
304  }
305  end = s;
306  while(true)
307  {
308  // Read until the first single double quote.
309  while((end = strchr(end+1, '"')) && end[1] == '"') {
310 #ifdef _MSC_VER
311 #pragma warning (pop)
312 #endif
313  ++end;
314  }
315  if(end == nullptr)
316  throw error("did not find end of attribute");
317 
318  // Stop if newline.
319  const char *endline = end + 1;
320  while (*endline == ' ') ++endline;
321  if (*endline == '\n') break;
322 
323  // Read concatenation marker.
324  if (*(endline++) != '+')
325  throw error("did not find newline after end of attribute");
326  if (*(endline++) != '\n')
327  throw error("did not find newline after '+'");
328 
329  // Read textdomain marker.
330  if (*endline == '#') {
331  endline = strchr(endline + 1, '\n');
332  if (!endline)
333  throw error("did not find newline after '#'");
334  ++endline;
335  }
336 
337  // Read indentation and start of string.
338  while (*endline == '\t') ++endline;
339  if (*endline == '_') ++endline;
340  if (*endline != '"')
341  throw error("did not find quotes after '+'");
342  end = endline;
343  }
344 
345  ++s;
346 
347  read_attribute:
348  string_span value(s, end - s);
349  if(attr_.empty() == false && !(attr_.back().key < name)) {
350  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'";
351  throw error("attributes not in order");
352  }
353 
354  s = end + 1;
355 
356  attr_.emplace_back(name, value);
357  }
358  }
359  }
360 
361  output_cache_ = string_span(begin, s - begin);
363 }
364 
366 {
367  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
368  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
369  debug_delete(*j);
370  }
371  }
372 }
373 
374 namespace {
375 struct string_span_pair_comparer
376 {
377  bool operator()(const string_span& a, const node::attribute& b) const {
378  return a < b.key;
379  }
380 
381  bool operator()(const node::attribute& a, const string_span& b) const {
382  return a.key < b;
383  }
384 
385  bool operator()(const node::attribute& a,
386  const node::attribute& b) const {
387  return a.key < b.key;
388  }
389 };
390 }
391 
392 const string_span& node::operator[](const char* key) const
393 {
394  static string_span empty("");
395  string_span span(key);
396  std::pair<attribute_list::const_iterator,
397  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
398  if(range.first != range.second) {
399  return range.first->value;
400  }
401 
402  return empty;
403 }
404 
405 bool node::has_attr(const char* key) const
406 {
407  string_span span(key);
408  std::pair<attribute_list::const_iterator,
409  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
410  return range.first != range.second;
411 }
412 
413 node& node::set_attr(const char* key, const char* value)
414 {
415  set_dirty();
416 
417  string_span span(key);
418  std::pair<attribute_list::iterator,
419  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
420  if(range.first != range.second) {
421  range.first->value = string_span(value);
422  } else {
423  attr_.insert(range.first, attribute(span, string_span(value)));
424  }
425 
426  return *this;
427 }
428 
429 node& node::set_attr_dup(const char* key, const char* value)
430 {
431  return set_attr(key, doc_->dup_string(value));
432 }
433 
434 node& node::set_attr_dup(const char* key, const string_span& value)
435 {
436  char* buf = value.duplicate();
438  return set_attr(key, buf);
439 }
440 
441 node& node::set_attr_int(const char* key, int value)
442 {
443  std::string temp = std::to_string(value);
444  return set_attr_dup(key, temp.c_str());
445 }
446 
447 node& node::add_child_at(const char* name, std::size_t index)
448 {
449  set_dirty();
450 
451  const int list_index = get_children(name);
452  child_list& list = children_[list_index].second;
453  if(index > list.size()) {
454  index = list.size();
455  }
456 
458  list.insert(list.begin() + index, new node(*doc_, this));
459  insert_ordered_child(list_index, index);
460 
462  return *list[index];
463 }
464 
465 
466 node& node::add_child(const char* name)
467 {
468  set_dirty();
469 
470  const int list_index = get_children(name);
472  child_list& list = children_[list_index].second;
473  list.push_back(new node(*doc_, this));
474  ordered_children_.emplace_back(list_index, list.size() - 1);
476  return *list.back();
477 }
478 
479 void node::remove_child(const string_span& name, std::size_t index)
480 {
481  set_dirty();
482 
483  //if we don't already have a vector for this item we don't want to add one.
485  if(itor == children_.end()) {
486  return;
487  }
488 
489  child_list& list = itor->second;
490  if(index >= list.size()) {
491  return;
492  }
493 
494  remove_ordered_child(std::distance(children_.begin(), itor), index);
495 
496  debug_delete(list[index]);
497  list.erase(list.begin() + index);
498 
499  if(list.empty()) {
500  remove_ordered_child_list(std::distance(children_.begin(), itor));
501  children_.erase(itor);
502  }
503 }
504 
505 void node::insert_ordered_child(int child_map_index, int child_list_index)
506 {
507  bool inserted = false;
509  while(i != ordered_children_.end()) {
510  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
511  i->child_list_index++;
512  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
513  inserted = true;
514  i->child_list_index++;
515  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
516  ++i;
517  }
518 
519  ++i;
520  }
521 
522  if(!inserted) {
523  ordered_children_.emplace_back(child_map_index, child_list_index);
524  }
525 }
526 
527 void node::remove_ordered_child(int child_map_index, int child_list_index)
528 {
529  int erase_count = 0;
531  while(i != ordered_children_.end()) {
532  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
533  i = ordered_children_.erase(i);
534  ++erase_count;
535  } else {
536  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
537  i->child_list_index--;
538  }
539  ++i;
540  }
541  }
542 
543  assert(erase_count == 1);
544 }
545 
546 void node::insert_ordered_child_list(int child_map_index)
547 {
549  while(i != ordered_children_.end()) {
550  if(i->child_map_index >= child_map_index) {
551  i->child_map_index++;
552  }
553  }
554 }
555 
556 void node::remove_ordered_child_list(int child_map_index)
557 {
559  while(i != ordered_children_.end()) {
560  if(i->child_map_index == child_map_index) {
561  assert(false);
562  i = ordered_children_.erase(i);
563  } else {
564  if(i->child_map_index > child_map_index) {
565  i->child_map_index--;
566  }
567 
568  ++i;
569  }
570  }
571 }
572 
574 {
575 // only define this symbol in debug mode to work out child ordering.
576 #ifdef CHECK_ORDERED_CHILDREN
577  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
578  while(i != ordered_children_.end()) {
579  assert(i->child_map_index < children_.size());
580  assert(i->child_list_index < children_[i->child_map_index].second.size());
581  ++i;
582  }
583 
584  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
585  const unsigned short child_map_index = j - children_.begin();
586  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
587  const unsigned short child_list_index = k - j->second.begin();
588  bool found = false;
589  for(int n = 0; n != ordered_children_.size(); ++n) {
590  if(ordered_children_[n].child_map_index == child_map_index &&
591  ordered_children_[n].child_list_index == child_list_index) {
592  found = true;
593  break;
594  }
595  }
596 
597  assert(found);
598  }
599  }
600 #endif // CHECK_ORDERED_CHILDREN
601 }
602 
603 void node::remove_child(const char* name, std::size_t index)
604 {
606 }
607 
608 node* node::child(const char* name)
609 {
610  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
611  if(i->first == name) {
612  assert(i->second.empty() == false);
613  return i->second.front();
614  }
615  }
616 
617  return nullptr;
618 }
619 
620 const node* node::child(const char* name) const
621 {
622  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
623  if(i->first == name) {
624  if(i->second.empty()) {
625  return nullptr;
626  } else {
627  return i->second.front();
628  }
629  }
630  }
631 
632  return nullptr;
633 }
634 
635 const node::child_list& node::children(const char* name) const
636 {
637  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
638  if(i->first == name) {
639  return i->second;
640  }
641  }
642 
643  static const node::child_list empty;
644  return empty;
645 }
646 
647 int node::get_children(const char* name)
648 {
649  return get_children(string_span(name));
650 }
651 
653 {
654  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
655  if(i->first == name) {
656  return std::distance(children_.begin(), i);
657  }
658  }
659 
660  children_.emplace_back(string_span(name), child_list());
661  return children_.size() - 1;
662 }
663 
664 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
665 {
666  child_map::const_iterator i = m.begin();
667  for(; i != m.end(); ++i) {
668  if(i->first == attr) {
669  break;
670  }
671  }
672 
673  return i;
674 }
675 
677 {
678  child_map::iterator i = m.begin();
679  for(; i != m.end(); ++i) {
680  if(i->first == attr) {
681  break;
682  }
683  }
684 
685  return i;
686 }
687 
689 {
690  if(children_.empty()) {
691  static const string_span empty;
692  return empty;
693  }
694 
695  return children_.begin()->first;
696 }
697 
698 int node::output_size() const
699 {
701  if(output_cache_.empty() == false) {
702  return output_cache_.size();
703  }
704 
705  int res = 0;
706  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
707  res += i->key.size() + i->value.size() + 4;
708  }
709 
710  std::size_t count_children = 0;
711  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
712  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
713  res += i->first.size()*2 + 7;
714  res += (*j)->output_size();
715  ++count_children;
716  }
717  }
718 
719  assert(count_children == ordered_children_.size());
720 
721  return res;
722 }
723 
724 void node::shift_buffers(ptrdiff_t offset)
725 {
726  if(!output_cache_.empty()) {
728  }
729 
730  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
731  i->key = string_span(i->key.begin() + offset, i->key.size());
732  i->value = string_span(i->value.begin() + offset, i->value.size());
733  }
734 
735  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
736  string_span& key = i->first;
737  key = string_span(key.begin() + offset, key.size());
738  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
739  (*j)->shift_buffers(offset);
740  }
741  }
742 }
743 
744 void node::output(char*& buf, CACHE_STATUS cache_status)
745 {
746  if(output_cache_.empty() == false) {
747  memcpy(buf, output_cache_.begin(), output_cache_.size());
748  if(cache_status == REFRESH_CACHE) {
750  }
751  buf += output_cache_.size();
752  return;
753  }
754 
755  char* begin = buf;
756 
757  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
758  memcpy(buf, i->key.begin(), i->key.size());
759  if(cache_status == REFRESH_CACHE) {
760  i->key = string_span(buf, i->key.size());
761  }
762  buf += i->key.size();
763  *buf++ = '=';
764  *buf++ = '"';
765  memcpy(buf, i->value.begin(), i->value.size());
766  if(cache_status == REFRESH_CACHE) {
767  i->value = string_span(buf, i->value.size());
768  }
769  buf += i->value.size();
770  *buf++ = '"';
771  *buf++ = '\n';
772  }
773 
774  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
775  i != ordered_children_.end(); ++i) {
776  assert(i->child_map_index < children_.size());
777  assert(i->child_list_index < children_[i->child_map_index].second.size());
778  string_span& attr = children_[i->child_map_index].first;
779  *buf++ = '[';
780  memcpy(buf, attr.begin(), attr.size());
781  if(cache_status == REFRESH_CACHE) {
782  attr = string_span(buf, attr.size());
783  }
784  buf += attr.size();
785  *buf++ = ']';
786  *buf++ = '\n';
787  children_[i->child_map_index].second[i->child_list_index]->output(buf, cache_status);
788  *buf++ = '[';
789  *buf++ = '/';
790  memcpy(buf, attr.begin(), attr.size());
791  buf += attr.size();
792  *buf++ = ']';
793  *buf++ = '\n';
794  }
795 
796  if(cache_status == REFRESH_CACHE) {
797  output_cache_ = string_span(begin, buf - begin);
798  }
799 }
800 
801 std::string node_to_string(const node& n)
802 {
803  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
804  //node, so we can do it safely
805  node& mutable_node = const_cast<node&>(n);
806  std::vector<char> v(mutable_node.output_size());
807  char* ptr = &v[0];
808  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
809  assert(ptr == &v[0] + v.size());
810  return std::string(v.begin(), v.end());
811 }
812 
813 void node::copy_into(node& n) const
814 {
815  n.set_dirty();
816  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
817  char* key = i->key.duplicate();
818  char* value = i->value.duplicate();
819  n.doc_->take_ownership_of_buffer(key);
820  n.doc_->take_ownership_of_buffer(value);
821  n.set_attr(key, value);
822  }
823 
824  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
825  i != ordered_children_.end(); ++i) {
826  assert(i->child_map_index < children_.size());
827  assert(i->child_list_index < children_[i->child_map_index].second.size());
828  char* buf = children_[i->child_map_index].first.duplicate();
829  n.doc_->take_ownership_of_buffer(buf);
830  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
831  }
832 }
833 
834 void node::apply_diff(const node& diff)
835 {
836  set_dirty();
837  const node* inserts = diff.child("insert");
838  if(inserts != nullptr) {
839  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
840  char* name = i->key.duplicate();
841  char* value = i->value.duplicate();
842  set_attr(name, value);
845  }
846  }
847 
848  const node* deletes = diff.child("delete");
849  if(deletes != nullptr) {
850  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
851  std::pair<attribute_list::iterator,
852  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
853  if(range.first != range.second) {
854  attr_.erase(range.first);
855  }
856  }
857  }
858 
859  const child_list& child_changes = diff.children("change_child");
860  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
861  const std::size_t index = (**i)["index"].to_int();
862  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
863  const string_span& name = j->first;
864  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
866  if(itor != children_.end()) {
867  if(index < itor->second.size()) {
868  itor->second[index]->apply_diff(**k);
869  }
870  }
871  }
872  }
873  }
874 
875  const child_list& child_inserts = diff.children("insert_child");
876  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
877  const std::size_t index = (**i)["index"].to_int();
878  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
879  const string_span& name = j->first;
880  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
881  char* buf = name.duplicate();
883  (*k)->copy_into(add_child_at(buf, index));
884  }
885  }
886  }
887 
888  const child_list& child_deletes = diff.children("delete_child");
889  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
890  const std::size_t index = (**i)["index"].to_int();
891  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
892  if(j->second.empty()) {
893  continue;
894  }
895 
896  const string_span& name = j->first;
897  remove_child(name, index);
898  }
899  }
900 }
901 
903 {
904  doc_ = doc;
905 
906  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
907  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
908  (*j)->set_doc(doc);
909  }
910  }
911 }
912 
913 int node::nchildren() const
914 {
915  int res = 0;
916  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
917  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
918  ++res;
919  res += (*j)->nchildren();
920  }
921  }
922 
923  return res;
924 }
925 
927 {
928  int res = attr_.capacity();
929  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
930  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
931  res += (*j)->nattributes_recursive();
932  }
933  }
934 
935  return res;
936 }
937 
939 {
940  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
941  n->output_cache_ = string_span();
942  }
943 }
944 
946  compressed_buf_(),
947  output_(nullptr),
948  buffers_(),
949  root_(new node(*this, nullptr)),
950  prev_(nullptr),
951  next_(nullptr)
952 {
953  attach_list();
954 }
955 
957  compressed_buf_(),
958  output_(buf),
959  buffers_(),
960  root_(nullptr),
961  prev_(nullptr),
962  next_(nullptr)
963 {
964  if(control == INIT_TAKE_OWNERSHIP) {
965  buffers_.push_back(buf);
966  }
967  const char* cbuf = buf;
968  root_ = new node(*this, nullptr, &cbuf);
969 
970  attach_list();
971 }
972 
973 document::document(const char* buf, INIT_STATE state) :
974  compressed_buf_(),
975  output_(buf),
976  buffers_(),
977  root_(nullptr),
978  prev_(nullptr),
979  next_(nullptr)
980 {
981  if(state == INIT_COMPRESSED) {
983  output_ = nullptr;
984  } else {
985  root_ = new node(*this, nullptr, &buf);
986  }
987 
988  attach_list();
989 }
990 
992  compressed_buf_(compressed_buf),
993  output_(nullptr),
994  buffers_(),
995  root_(nullptr),
996  prev_(nullptr),
997  next_(nullptr)
998 {
999  string_span uncompressed_buf;
1000  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
1001  output_ = uncompressed_buf.begin();
1002  const char* cbuf = output_;
1003  try {
1004  root_ = new node(*this, nullptr, &cbuf);
1005  } catch(...) {
1006  ERR_SWML << "Caught exception creating a new simple_wml node: " << utils::get_unknown_exception_type();
1007  delete [] buffers_.front();
1008  buffers_.clear();
1009  throw;
1010  }
1011 
1012  attach_list();
1013 }
1014 
1016 {
1017  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1018  delete [] *i;
1019  }
1020 
1021  buffers_.clear();
1022  debug_delete(root_);
1023 
1024  detach_list();
1025 }
1026 
1027 const char* document::dup_string(const char* str)
1028 {
1029  const int len = strlen(str);
1030  char* res = new char[len+1];
1031  memcpy(res, str, len + 1);
1032  buffers_.push_back(res);
1033  return res;
1034 }
1035 
1036 const char* document::output()
1037 {
1038  if(output_ && (!root_ || root_->is_dirty() == false)) {
1039  return output_;
1040  }
1041  if(!root_) {
1042  assert(compressed_buf_.empty() == false);
1043  string_span uncompressed_buf;
1044  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1045  output_ = uncompressed_buf.begin();
1046  return output_;
1047  }
1048 
1049  //we're dirty, so the compressed buf must also be dirty; clear it.
1051 
1052  std::vector<char*> bufs;
1053  bufs.swap(buffers_);
1054 
1055  const int buf_size = root_->output_size() + 1;
1056  char* buf;
1057  try {
1058  buf = new char[buf_size];
1059  } catch (const std::bad_alloc& e) {
1060  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1061  << e.what();
1062  throw error("Bad allocation request in output().");
1063  }
1064  buffers_.push_back(buf);
1065  output_ = buf;
1066 
1068  *buf++ = 0;
1069  assert(buf == output_ + buf_size);
1070 
1071  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1072  delete [] *i;
1073  }
1074 
1075  bufs.clear();
1076 
1077  return output_;
1078 }
1079 
1081 {
1082  if(compressed_buf_.empty() == false &&
1083  (root_ == nullptr || root_->is_dirty() == false)) {
1084  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1085  return compressed_buf_;
1086  }
1087 
1088  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1089  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1090 
1091  return compressed_buf_;
1092 }
1093 
1095 {
1097  debug_delete(root_);
1098  root_ = nullptr;
1099  output_ = nullptr;
1100  std::vector<char*> new_buffers;
1101  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1102  if(*i != compressed_buf_.begin()) {
1103  delete [] *i;
1104  } else {
1105  new_buffers.push_back(*i);
1106  }
1107  }
1108 
1109  buffers_.swap(new_buffers);
1110  assert(buffers_.size() == 1);
1111 }
1112 
1114 {
1115  if(output_ == nullptr) {
1116  assert(compressed_buf_.empty() == false);
1117  string_span uncompressed_buf;
1118  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1119  output_ = uncompressed_buf.begin();
1120  }
1121 
1122  assert(root_ == nullptr);
1123  const char* cbuf = output_;
1124  root_ = new node(*this, nullptr, &cbuf);
1125 }
1126 
1127 std::unique_ptr<document> document::clone()
1128 {
1129  char* buf = new char[strlen(output())+1];
1130  strcpy(buf, output());
1131  return std::make_unique<document>(buf);
1132 }
1133 
1135 {
1138  buffers_.swap(o.buffers_);
1139  std::swap(root_, o.root_);
1140 
1141  root_->set_doc(this);
1142  o.root_->set_doc(&o);
1143 }
1144 
1146 {
1148  output_ = nullptr;
1149  debug_delete(root_);
1150  root_ = new node(*this, nullptr);
1151  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1152  delete [] *i;
1153  }
1154 
1155  buffers_.clear();
1156 }
1157 
1158 namespace {
1159 document* head_doc = nullptr;
1160 }
1161 
1163 {
1164  prev_ = nullptr;
1165  next_ = head_doc;
1166 
1167  if(next_) {
1168  next_->prev_ = this;
1169  }
1170  head_doc = this;
1171 }
1172 
1174 {
1175  if(head_doc == this) {
1176  head_doc = next_;
1177  }
1178 
1179  if(next_) {
1180  next_->prev_ = prev_;
1181  }
1182 
1183  if(prev_) {
1184  prev_->next_ = next_;
1185  }
1186  next_ = prev_ = nullptr;
1187 }
1188 
1189 std::string document::stats()
1190 {
1191  std::ostringstream s;
1192  int ndocs = 0;
1193  int ncompressed = 0;
1194  int compressed_size = 0;
1195  int ntext = 0;
1196  int text_size = 0;
1197  int nbuffers = 0;
1198  int nnodes = 0;
1199  int ndirty = 0;
1200  int nattributes = 0;
1201  for(document* d = head_doc; d != nullptr; d = d->next_) {
1202  ndocs++;
1203  nbuffers += d->buffers_.size();
1204 
1205  if(d->compressed_buf_.is_null() == false) {
1206  ++ncompressed;
1207  compressed_size += d->compressed_buf_.size();
1208  }
1209 
1210  if(d->output_) {
1211  ++ntext;
1212  text_size += strlen(d->output_);
1213  }
1214 
1215  if(d->root_) {
1216  nnodes += 1 + d->root_->nchildren();
1217  nattributes += d->root_->nattributes_recursive();
1218  }
1219 
1220  if(d->root_ && d->root_->is_dirty()) {
1221  ++ndirty;
1222  }
1223  }
1224 
1225  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1226  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1227  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1228 
1229  s << "WML documents: " << ndocs << "\n"
1230  << "Dirty: " << ndirty << "\n"
1231  << "With compression: " << ncompressed << " (" << compressed_size
1232  << " bytes)\n"
1233  << "With text: " << ntext << " (" << text_size
1234  << " bytes)\n"
1235  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1236  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1237  << "Buffers: " << nbuffers << "\n"
1238  << "Total allocation: " << total_alloc << " bytes\n";
1239 
1240  return s.str();
1241 }
1242 
1243 void swap(document& lhs, document& rhs)
1244 {
1245  lhs.swap(rhs);
1246 }
1247 
1248 }
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:281
const char * output()
std::unique_ptr< document > clone()
static std::string stats()
static std::size_t document_size_limit
Definition: simple_wml.hpp:290
const char * output_
Definition: simple_wml.hpp:297
void swap(document &o)
std::vector< char * > buffers_
Definition: simple_wml.hpp:298
string_span output_compressed(bool bzip2=false)
const char * dup_string(const char *str)
string_span compressed_buf_
Definition: simple_wml.hpp:296
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:128
int output_size() const
Definition: simple_wml.cpp:698
void set_doc(document *doc)
Definition: simple_wml.cpp:902
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:505
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:546
void check_ordered_children() const
Definition: simple_wml.cpp:573
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:603
int nchildren() const
Definition: simple_wml.cpp:913
bool is_dirty() const
Definition: simple_wml.hpp:161
node(document &doc, node *parent)
Definition: simple_wml.cpp:208
int get_children(const string_span &name)
Definition: simple_wml.cpp:652
document * doc_
Definition: simple_wml.hpp:190
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:392
const child_list & children(const char *name) const
Definition: simple_wml.cpp:635
void apply_diff(const node &diff)
Definition: simple_wml.cpp:834
attribute_list attr_
Definition: simple_wml.hpp:193
bool has_attr(const char *key) const
Definition: simple_wml.cpp:405
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:441
std::vector< child_pair > child_map
Definition: simple_wml.hpp:198
node * child(const char *name)
Definition: simple_wml.cpp:608
std::vector< node * > child_list
Definition: simple_wml.hpp:125
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:556
int nattributes_recursive() const
Definition: simple_wml.cpp:926
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:216
const string_span & first_child() const
Definition: simple_wml.cpp:688
child_map children_
Definition: simple_wml.hpp:202
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:744
node & add_child(const char *name)
Definition: simple_wml.cpp:466
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:413
string_span output_cache_
Definition: simple_wml.hpp:225
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:447
void copy_into(node &n) const
Definition: simple_wml.cpp:813
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:429
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:527
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:724
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:664
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:159
std::string to_string() const
Definition: simple_wml.cpp:183
const char * begin() const
Definition: simple_wml.hpp:90
char * duplicate() const
Definition: simple_wml.cpp:188
const char * end() const
Definition: simple_wml.hpp:91
std::size_t i
Definition: function.cpp:1023
unsigned in
If equal to search_counter, the node is off the list.
Standard logging facilities (interface).
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:202
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:801
@ INIT_TAKE_OWNERSHIP
Definition: simple_wml.hpp:230
std::size_t index(const std::string &str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:70
std::string get_unknown_exception_type()
Utility function for finding the type of thing caught with catch(...).
Definition: general.cpp:23
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:109
#define ERR_SWML
Definition: simple_wml.cpp:30
static lg::log_domain log_config("config")
static map_location::direction n
static map_location::direction s
#define d
#define e
#define b