The Battle for Wesnoth  1.17.0-dev
simple_wml.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 - 2021
3  by David White <dave@whitevine.net>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #include <iostream>
17 #include <sstream>
18 
19 #include <boost/iostreams/copy.hpp>
20 #include <boost/iostreams/filtering_stream.hpp>
21 #include <boost/iostreams/filter/bzip2.hpp>
22 #include <boost/iostreams/filter/counter.hpp>
23 #include <boost/iostreams/filter/gzip.hpp>
24 
26 
27 #include "log.hpp"
28 
29 static lg::log_domain log_config("config");
30 #define ERR_SWML LOG_STREAM(err, log_config)
31 
32 namespace simple_wml {
33 
34 std::size_t document::document_size_limit = 40000000;
35 
36 namespace {
37 
38 void debug_delete(node* n) {
39  delete n;
40 }
41 
42 char* uncompress_buffer(const string_span& input, string_span* span)
43 {
44  int nalloc = input.size();
45  int state = 0;
46  try {
47  std::istringstream stream(std::string(input.begin(), input.end()));
48  state = 1;
49  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
50  state = 2;
51  if (!span->empty() && *span->begin() == 'B') {
52  filter.push(boost::iostreams::bzip2_decompressor());
53  } else {
54  filter.push(boost::iostreams::gzip_decompressor());
55  }
56  filter.push(stream);
57  state = 3;
58 
59  const std::size_t chunk_size = input.size() * 10;
60  nalloc = chunk_size;
61  std::vector<char> buf(chunk_size);
62  state = 4;
63  std::size_t len = 0;
64  std::size_t pos = 0;
65  while(filter.good() && (len = filter.read(&buf[pos], chunk_size).gcount()) == chunk_size) {
66  if(pos + chunk_size > document::document_size_limit) {
67  throw error("WML document exceeded size limit during decompression");
68  }
69 
70  pos += len;
71  buf.resize(pos + chunk_size);
72  len = 0;
73  }
74 
75  if(!filter.eof() && !filter.good()) {
76  throw error("failed to uncompress");
77  }
78 
79  pos += len;
80  state = 5;
81  nalloc = pos;
82 
83  buf.resize(pos);
84  state = 6;
85 
86  char* small_out = new char[pos+1];
87  memcpy(small_out, &buf[0], pos);
88  state = 7;
89 
90  small_out[pos] = 0;
91 
92  *span = string_span(small_out, pos);
93  state = 8;
94  return small_out;
95  } catch (const std::bad_alloc& e) {
96  ERR_SWML << "ERROR: bad_alloc caught in uncompress_buffer() state "
97  << state << " alloc bytes " << nalloc << " with input: '"
98  << input << "' " << e.what() << std::endl;
99  throw error("Bad allocation request in uncompress_buffer().");
100  }
101 }
102 
103 char* compress_buffer(const char* input, string_span* span, bool bzip2)
104 {
105  int nalloc = strlen(input);
106  int state = 0;
107  try {
108  std::string in(input);
109  state = 1;
110  std::istringstream istream(in);
111  state = 2;
112  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
113  state = 3;
114  if (bzip2) {
115  filter.push(boost::iostreams::bzip2_compressor());
116  } else {
117  filter.push(boost::iostreams::gzip_compressor());
118  }
119  state = 4;
120  nalloc = in.size()*2 + 80;
121  std::vector<char> buf(nalloc);
122  boost::iostreams::array_sink out(&buf[0], buf.size());
123  filter.push(boost::iostreams::counter());
124  filter.push(out);
125 
126  state = 5;
127 
128  boost::iostreams::copy(istream, filter, buf.size());
129  const int len = filter.component<boost::iostreams::counter>(1)->characters();
130  assert(len < 128*1024*1024);
131  if((!filter.eof() && !filter.good()) || len == static_cast<int>(buf.size())) {
132  throw error("failed to compress");
133  }
134  state = 6;
135  nalloc = len;
136 
137  buf.resize(len);
138  state = 7;
139 
140  char* small_out = new char[len];
141  memcpy(small_out, &buf[0], len);
142  state = 8;
143 
144  *span = string_span(small_out, len);
145  assert(*small_out == (bzip2 ? 'B' : 31));
146  state = 9;
147  return small_out;
148  } catch (const std::bad_alloc& e) {
149  ERR_SWML << "ERROR: bad_alloc caught in compress_buffer() state "
150  << state << " alloc bytes " << nalloc << " with input: '"
151  << input << "' " << e.what() << std::endl;
152  throw error("Bad allocation request in compress_buffer().");
153  }
154 }
155 
156 } // namespace
157 
158 bool string_span::to_bool(bool default_value) const
159 {
160  if(empty()) {
161  return default_value;
162  }
163 
164  if (operator==("no") || operator==("off") || operator==("false") || operator==("0") || operator==("0.0"))
165  return false;
166 
167  return true;
168 }
169 
171 {
172  const int buf_size = 64;
173  if(size() >= buf_size) {
174  return 0;
175  }
176  char buf[64];
177  memcpy(buf, begin(), size());
178  buf[size()] = 0;
179  return atoi(buf);
180 }
181 
182 std::string string_span::to_string() const
183 {
184  return std::string(begin(), end());
185 }
186 
188 {
189  char* buf = new char[size() + 1];
190  memcpy(buf, begin(), size());
191  buf[size()] = 0;
192  return buf;
193 }
194 
195 error::error(const char* msg)
196  : game::error(msg)
197 {
198  ERR_SWML << "ERROR: '" << msg << "'" << std::endl;
199 }
200 
201 std::ostream& operator<<(std::ostream& o, const string_span& s)
202 {
203  o << std::string(s.begin(), s.end());
204  return o;
205 }
206 
207 node::node(document& doc, node* parent) :
208  doc_(&doc),
209  attr_(),
210  parent_(parent),
211  children_(),
212  ordered_children_(),
213  output_cache_()
214 {
215 }
216 
217 #ifdef _MSC_VER
218 #pragma warning (push)
219 #pragma warning (disable: 4706)
220 #endif
221 node::node(document& doc, node* parent, const char** str, int depth) :
222  doc_(&doc),
223  attr_(),
224  parent_(parent),
225  children_(),
227  output_cache_()
228 {
229  if(depth >= 1000) {
230  throw error("elements nested too deep");
231  }
232 
233  const char*& s = *str;
234 
235  const char* const begin = s;
236  while(*s) {
237  switch(*s) {
238  case '[': {
239  if(s[1] == '/') {
240  output_cache_ = string_span(begin, s - begin);
241  s = strchr(s, ']');
242  if(s == nullptr) {
243  throw error("end element unterminated");
244  }
245 
246  ++s;
247  return;
248  }
249 
250  ++s;
251  const char* end = strchr(s, ']');
252  if(end == nullptr) {
253  throw error("unterminated element");
254  }
255 
256  const int list_index = get_children(string_span(s, end - s));
258 
259  s = end + 1;
260 
261  children_[list_index].second.push_back(new node(doc, this, str, depth+1));
262  ordered_children_.emplace_back(list_index, children_[list_index].second.size() - 1);
264 
265  break;
266  }
267  case ' ':
268  case '\t':
269  case '\n':
270  ++s;
271  break;
272  case '#':
273  s = strchr(s, '\n');
274  if(s == nullptr) {
275  throw error("did not find newline after '#'");
276  }
277  break;
278  default: {
279  const char* end = strchr(s, '=');
280  if(end == nullptr) {
281  ERR_SWML << "attribute: " << s << std::endl;
282  throw error("did not find '=' after attribute");
283  }
284 
285  string_span name(s, end - s);
286  s = end + 1;
287  if(*s == '_') {
288  s = strchr(s, '"');
289  if(s == nullptr) {
290  throw error("did not find '\"' after '_'");
291  }
292  }
293 
294  if (*s != '"') {
295  end = strchr(s, '\n');
296  if (!end) {
297  ERR_SWML << "ATTR: '" << name << "' (((" << s << ")))" << std::endl;
298  throw error("did not find end of attribute");
299  }
300  if (memchr(s, '"', end - s))
301  throw error("found stray quotes in unquoted value");
302  goto read_attribute;
303  }
304  end = s;
305  while(true)
306  {
307  // Read until the first single double quote.
308  while((end = strchr(end+1, '"')) && end[1] == '"') {
309 #ifdef _MSC_VER
310 #pragma warning (pop)
311 #endif
312  ++end;
313  }
314  if(end == nullptr)
315  throw error("did not find end of attribute");
316 
317  // Stop if newline.
318  const char *endline = end + 1;
319  while (*endline == ' ') ++endline;
320  if (*endline == '\n') break;
321 
322  // Read concatenation marker.
323  if (*(endline++) != '+')
324  throw error("did not find newline after end of attribute");
325  if (*(endline++) != '\n')
326  throw error("did not find newline after '+'");
327 
328  // Read textdomain marker.
329  if (*endline == '#') {
330  endline = strchr(endline + 1, '\n');
331  if (!endline)
332  throw error("did not find newline after '#'");
333  ++endline;
334  }
335 
336  // Read indentation and start of string.
337  while (*endline == '\t') ++endline;
338  if (*endline == '_') ++endline;
339  if (*endline != '"')
340  throw error("did not find quotes after '+'");
341  end = endline;
342  }
343 
344  ++s;
345 
346  read_attribute:
347  string_span value(s, end - s);
348  if(attr_.empty() == false && !(attr_.back().key < name)) {
349  ERR_SWML << "attributes: '" << attr_.back().key << "' < '" << name << "'" << std::endl;
350  throw error("attributes not in order");
351  }
352 
353  s = end + 1;
354 
355  attr_.emplace_back(name, value);
356  }
357  }
358  }
359 
360  output_cache_ = string_span(begin, s - begin);
362 }
363 
365 {
366  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
367  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
368  debug_delete(*j);
369  }
370  }
371 }
372 
373 namespace {
374 struct string_span_pair_comparer
375 {
376  bool operator()(const string_span& a, const node::attribute& b) const {
377  return a < b.key;
378  }
379 
380  bool operator()(const node::attribute& a, const string_span& b) const {
381  return a.key < b;
382  }
383 
384  bool operator()(const node::attribute& a,
385  const node::attribute& b) const {
386  return a.key < b.key;
387  }
388 };
389 }
390 
391 const string_span& node::operator[](const char* key) const
392 {
393  static string_span empty("");
394  string_span span(key);
395  std::pair<attribute_list::const_iterator,
396  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
397  if(range.first != range.second) {
398  return range.first->value;
399  }
400 
401  return empty;
402 }
403 
404 bool node::has_attr(const char* key) const
405 {
406  string_span span(key);
407  std::pair<attribute_list::const_iterator,
408  attribute_list::const_iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
409  return range.first != range.second;
410 }
411 
412 node& node::set_attr(const char* key, const char* value)
413 {
414  set_dirty();
415 
416  string_span span(key);
417  std::pair<attribute_list::iterator,
418  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), span, string_span_pair_comparer());
419  if(range.first != range.second) {
420  range.first->value = string_span(value);
421  } else {
422  attr_.insert(range.first, attribute(span, string_span(value)));
423  }
424 
425  return *this;
426 }
427 
428 node& node::set_attr_dup(const char* key, const char* value)
429 {
430  return set_attr(key, doc_->dup_string(value));
431 }
432 
433 node& node::set_attr_dup(const char* key, const string_span& value)
434 {
435  char* buf = value.duplicate();
437  return set_attr(key, buf);
438 }
439 
440 node& node::set_attr_int(const char* key, int value)
441 {
442  std::string temp = std::to_string(value);
443  return set_attr_dup(key, temp.c_str());
444 }
445 
446 node& node::add_child_at(const char* name, std::size_t index)
447 {
448  set_dirty();
449 
450  const int list_index = get_children(name);
451  child_list& list = children_[list_index].second;
452  if(index > list.size()) {
453  index = list.size();
454  }
455 
457  list.insert(list.begin() + index, new node(*doc_, this));
458  insert_ordered_child(list_index, index);
459 
461  return *list[index];
462 }
463 
464 
465 node& node::add_child(const char* name)
466 {
467  set_dirty();
468 
469  const int list_index = get_children(name);
471  child_list& list = children_[list_index].second;
472  list.push_back(new node(*doc_, this));
473  ordered_children_.emplace_back(list_index, list.size() - 1);
475  return *list.back();
476 }
477 
478 void node::remove_child(const string_span& name, std::size_t index)
479 {
480  set_dirty();
481 
482  //if we don't already have a vector for this item we don't want to add one.
484  if(itor == children_.end()) {
485  return;
486  }
487 
488  child_list& list = itor->second;
489  if(index >= list.size()) {
490  return;
491  }
492 
493  remove_ordered_child(std::distance(children_.begin(), itor), index);
494 
495  debug_delete(list[index]);
496  list.erase(list.begin() + index);
497 
498  if(list.empty()) {
499  remove_ordered_child_list(std::distance(children_.begin(), itor));
500  children_.erase(itor);
501  }
502 }
503 
504 void node::insert_ordered_child(int child_map_index, int child_list_index)
505 {
506  bool inserted = false;
508  while(i != ordered_children_.end()) {
509  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
510  i->child_list_index++;
511  } else if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
512  inserted = true;
513  i->child_list_index++;
514  i = ordered_children_.insert(i, node_pos(child_map_index, child_list_index));
515  ++i;
516  }
517 
518  ++i;
519  }
520 
521  if(!inserted) {
522  ordered_children_.emplace_back(child_map_index, child_list_index);
523  }
524 }
525 
526 void node::remove_ordered_child(int child_map_index, int child_list_index)
527 {
528  int erase_count = 0;
530  while(i != ordered_children_.end()) {
531  if(i->child_map_index == child_map_index && i->child_list_index == child_list_index) {
532  i = ordered_children_.erase(i);
533  ++erase_count;
534  } else {
535  if(i->child_map_index == child_map_index && i->child_list_index > child_list_index) {
536  i->child_list_index--;
537  }
538  ++i;
539  }
540  }
541 
542  assert(erase_count == 1);
543 }
544 
545 void node::insert_ordered_child_list(int child_map_index)
546 {
548  while(i != ordered_children_.end()) {
549  if(i->child_map_index >= child_map_index) {
550  i->child_map_index++;
551  }
552  }
553 }
554 
555 void node::remove_ordered_child_list(int child_map_index)
556 {
558  while(i != ordered_children_.end()) {
559  if(i->child_map_index == child_map_index) {
560  assert(false);
561  i = ordered_children_.erase(i);
562  } else {
563  if(i->child_map_index > child_map_index) {
564  i->child_map_index--;
565  }
566 
567  ++i;
568  }
569  }
570 }
571 
573 {
574 // only define this symbol in debug mode to work out child ordering.
575 #ifdef CHECK_ORDERED_CHILDREN
576  std::vector<node_pos>::const_iterator i = ordered_children_.begin();
577  while(i != ordered_children_.end()) {
578  assert(i->child_map_index < children_.size());
579  assert(i->child_list_index < children_[i->child_map_index].second.size());
580  ++i;
581  }
582 
583  for(child_map::const_iterator j = children_.begin(); j != children_.end(); ++j) {
584  const unsigned short child_map_index = j - children_.begin();
585  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
586  const unsigned short child_list_index = k - j->second.begin();
587  bool found = false;
588  for(int n = 0; n != ordered_children_.size(); ++n) {
589  if(ordered_children_[n].child_map_index == child_map_index &&
590  ordered_children_[n].child_list_index == child_list_index) {
591  found = true;
592  break;
593  }
594  }
595 
596  assert(found);
597  }
598  }
599 #endif // CHECK_ORDERED_CHILDREN
600 }
601 
602 void node::remove_child(const char* name, std::size_t index)
603 {
604  remove_child(string_span(name), index);
605 }
606 
607 node* node::child(const char* name)
608 {
609  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
610  if(i->first == name) {
611  assert(i->second.empty() == false);
612  return i->second.front();
613  }
614  }
615 
616  return nullptr;
617 }
618 
619 const node* node::child(const char* name) const
620 {
621  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
622  if(i->first == name) {
623  if(i->second.empty()) {
624  return nullptr;
625  } else {
626  return i->second.front();
627  }
628  }
629  }
630 
631  return nullptr;
632 }
633 
634 const node::child_list& node::children(const char* name) const
635 {
636  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
637  if(i->first == name) {
638  return i->second;
639  }
640  }
641 
642  static const node::child_list empty;
643  return empty;
644 }
645 
646 int node::get_children(const char* name)
647 {
648  return get_children(string_span(name));
649 }
650 
652 {
653  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
654  if(i->first == name) {
655  return std::distance(children_.begin(), i);
656  }
657  }
658 
659  children_.emplace_back(string_span(name), child_list());
660  return children_.size() - 1;
661 }
662 
663 node::child_map::const_iterator node::find_in_map(const child_map& m, const string_span& attr)
664 {
665  child_map::const_iterator i = m.begin();
666  for(; i != m.end(); ++i) {
667  if(i->first == attr) {
668  break;
669  }
670  }
671 
672  return i;
673 }
674 
676 {
677  child_map::iterator i = m.begin();
678  for(; i != m.end(); ++i) {
679  if(i->first == attr) {
680  break;
681  }
682  }
683 
684  return i;
685 }
686 
688 {
689  if(children_.empty()) {
690  static const string_span empty;
691  return empty;
692  }
693 
694  return children_.begin()->first;
695 }
696 
697 int node::output_size() const
698 {
700  if(output_cache_.empty() == false) {
701  return output_cache_.size();
702  }
703 
704  int res = 0;
705  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
706  res += i->key.size() + i->value.size() + 4;
707  }
708 
709  std::size_t count_children = 0;
710  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
711  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
712  res += i->first.size()*2 + 7;
713  res += (*j)->output_size();
714  ++count_children;
715  }
716  }
717 
718  assert(count_children == ordered_children_.size());
719 
720  return res;
721 }
722 
723 void node::shift_buffers(ptrdiff_t offset)
724 {
725  if(!output_cache_.empty()) {
727  }
728 
729  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
730  i->key = string_span(i->key.begin() + offset, i->key.size());
731  i->value = string_span(i->value.begin() + offset, i->value.size());
732  }
733 
734  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
735  string_span& key = i->first;
736  key = string_span(key.begin() + offset, key.size());
737  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
738  (*j)->shift_buffers(offset);
739  }
740  }
741 }
742 
743 void node::output(char*& buf, CACHE_STATUS cache_status)
744 {
745  if(output_cache_.empty() == false) {
746  memcpy(buf, output_cache_.begin(), output_cache_.size());
747  if(cache_status == REFRESH_CACHE) {
749  }
750  buf += output_cache_.size();
751  return;
752  }
753 
754  char* begin = buf;
755 
756  for(std::vector<attribute>::iterator i = attr_.begin(); i != attr_.end(); ++i) {
757  memcpy(buf, i->key.begin(), i->key.size());
758  i->key = string_span(buf, i->key.size());
759  buf += i->key.size();
760  *buf++ = '=';
761  *buf++ = '"';
762  memcpy(buf, i->value.begin(), i->value.size());
763  i->value = string_span(buf, i->value.size());
764  buf += i->value.size();
765  *buf++ = '"';
766  *buf++ = '\n';
767  }
768 
769  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
770  i != ordered_children_.end(); ++i) {
771  assert(i->child_map_index < children_.size());
772  assert(i->child_list_index < children_[i->child_map_index].second.size());
773  string_span& attr = children_[i->child_map_index].first;
774  *buf++ = '[';
775  memcpy(buf, attr.begin(), attr.size());
776  attr = string_span(buf, attr.size());
777  buf += attr.size();
778  *buf++ = ']';
779  *buf++ = '\n';
780  children_[i->child_map_index].second[i->child_list_index]->output(buf, cache_status);
781  *buf++ = '[';
782  *buf++ = '/';
783  memcpy(buf, attr.begin(), attr.size());
784  buf += attr.size();
785  *buf++ = ']';
786  *buf++ = '\n';
787  }
788 
789  if(cache_status == REFRESH_CACHE) {
790  output_cache_ = string_span(begin, buf - begin);
791  }
792 }
793 
794 std::string node_to_string(const node& n)
795 {
796  //calling output with status=DO_NOT_MODIFY_CACHE really doesn't modify the
797  //node, so we can do it safely
798  node& mutable_node = const_cast<node&>(n);
799  std::vector<char> v(mutable_node.output_size());
800  char* ptr = &v[0];
801  mutable_node.output(ptr, node::DO_NOT_MODIFY_CACHE);
802  assert(ptr == &v[0] + v.size());
803  return std::string(v.begin(), v.end());
804 }
805 
806 void node::copy_into(node& n) const
807 {
808  n.set_dirty();
809  for(attribute_list::const_iterator i = attr_.begin(); i != attr_.end(); ++i) {
810  char* key = i->key.duplicate();
811  char* value = i->value.duplicate();
813  n.doc_->take_ownership_of_buffer(value);
814  n.set_attr(key, value);
815  }
816 
817  for(std::vector<node_pos>::const_iterator i = ordered_children_.begin();
818  i != ordered_children_.end(); ++i) {
819  assert(i->child_map_index < children_.size());
820  assert(i->child_list_index < children_[i->child_map_index].second.size());
821  char* buf = children_[i->child_map_index].first.duplicate();
823  children_[i->child_map_index].second[i->child_list_index]->copy_into(n.add_child(buf));
824  }
825 }
826 
827 void node::apply_diff(const node& diff)
828 {
829  set_dirty();
830  const node* inserts = diff.child("insert");
831  if(inserts != nullptr) {
832  for(attribute_list::const_iterator i = inserts->attr_.begin(); i != inserts->attr_.end(); ++i) {
833  char* name = i->key.duplicate();
834  char* value = i->value.duplicate();
835  set_attr(name, value);
838  }
839  }
840 
841  const node* deletes = diff.child("delete");
842  if(deletes != nullptr) {
843  for(attribute_list::const_iterator i = deletes->attr_.begin(); i != deletes->attr_.end(); ++i) {
844  std::pair<attribute_list::iterator,
845  attribute_list::iterator> range = std::equal_range(attr_.begin(), attr_.end(), i->key, string_span_pair_comparer());
846  if(range.first != range.second) {
847  attr_.erase(range.first);
848  }
849  }
850  }
851 
852  const child_list& child_changes = diff.children("change_child");
853  for(child_list::const_iterator i = child_changes.begin(); i != child_changes.end(); ++i) {
854  const std::size_t index = (**i)["index"].to_int();
855  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
856  const string_span& name = j->first;
857  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
859  if(itor != children_.end()) {
860  if(index < itor->second.size()) {
861  itor->second[index]->apply_diff(**k);
862  }
863  }
864  }
865  }
866  }
867 
868  const child_list& child_inserts = diff.children("insert_child");
869  for(child_list::const_iterator i = child_inserts.begin(); i != child_inserts.end(); ++i) {
870  const std::size_t index = (**i)["index"].to_int();
871  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
872  const string_span& name = j->first;
873  for(child_list::const_iterator k = j->second.begin(); k != j->second.end(); ++k) {
874  char* buf = name.duplicate();
876  (*k)->copy_into(add_child_at(buf, index));
877  }
878  }
879  }
880 
881  const child_list& child_deletes = diff.children("delete_child");
882  for(child_list::const_iterator i = child_deletes.begin(); i != child_deletes.end(); ++i) {
883  const std::size_t index = (**i)["index"].to_int();
884  for(child_map::const_iterator j = (*i)->children_.begin(); j != (*i)->children_.end(); ++j) {
885  if(j->second.empty()) {
886  continue;
887  }
888 
889  const string_span& name = j->first;
890  remove_child(name, index);
891  }
892  }
893 }
894 
896 {
897  doc_ = doc;
898 
899  for(child_map::iterator i = children_.begin(); i != children_.end(); ++i) {
900  for(child_list::iterator j = i->second.begin(); j != i->second.end(); ++j) {
901  (*j)->set_doc(doc);
902  }
903  }
904 }
905 
906 int node::nchildren() const
907 {
908  int res = 0;
909  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
910  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
911  ++res;
912  res += (*j)->nchildren();
913  }
914  }
915 
916  return res;
917 }
918 
920 {
921  int res = attr_.capacity();
922  for(child_map::const_iterator i = children_.begin(); i != children_.end(); ++i) {
923  for(child_list::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
924  res += (*j)->nattributes_recursive();
925  }
926  }
927 
928  return res;
929 }
930 
932 {
933  for(node* n = this; n != nullptr && n->output_cache_.is_null() == false; n = n->parent_) {
934  n->output_cache_ = string_span();
935  }
936 }
937 
939  compressed_buf_(),
940  output_(nullptr),
941  buffers_(),
942  root_(new node(*this, nullptr)),
943  prev_(nullptr),
944  next_(nullptr)
945 {
946  attach_list();
947 }
948 
950  compressed_buf_(),
951  output_(buf),
952  buffers_(),
953  root_(nullptr),
954  prev_(nullptr),
955  next_(nullptr)
956 {
957  if(control == INIT_TAKE_OWNERSHIP) {
958  buffers_.push_back(buf);
959  }
960  const char* cbuf = buf;
961  root_ = new node(*this, nullptr, &cbuf);
962 
963  attach_list();
964 }
965 
966 document::document(const char* buf, INIT_STATE state) :
967  compressed_buf_(),
968  output_(buf),
969  buffers_(),
970  root_(nullptr),
971  prev_(nullptr),
972  next_(nullptr)
973 {
974  if(state == INIT_COMPRESSED) {
976  output_ = nullptr;
977  } else {
978  root_ = new node(*this, nullptr, &buf);
979  }
980 
981  attach_list();
982 }
983 
985  compressed_buf_(compressed_buf),
986  output_(nullptr),
987  buffers_(),
988  root_(nullptr),
989  prev_(nullptr),
990  next_(nullptr)
991 {
992  string_span uncompressed_buf;
993  buffers_.push_back(uncompress_buffer(compressed_buf, &uncompressed_buf));
994  output_ = uncompressed_buf.begin();
995  const char* cbuf = output_;
996  try {
997  root_ = new node(*this, nullptr, &cbuf);
998  } catch(...) {
999  delete [] buffers_.front();
1000  buffers_.clear();
1001  throw;
1002  }
1003 
1004  attach_list();
1005 }
1006 
1008 {
1009  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1010  delete [] *i;
1011  }
1012 
1013  buffers_.clear();
1014  debug_delete(root_);
1015 
1016  detach_list();
1017 }
1018 
1019 const char* document::dup_string(const char* str)
1020 {
1021  const int len = strlen(str);
1022  char* res = new char[len+1];
1023  memcpy(res, str, len + 1);
1024  buffers_.push_back(res);
1025  return res;
1026 }
1027 
1028 const char* document::output()
1029 {
1030  if(output_ && (!root_ || root_->is_dirty() == false)) {
1031  return output_;
1032  }
1033  if(!root_) {
1034  assert(compressed_buf_.empty() == false);
1035  string_span uncompressed_buf;
1036  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1037  output_ = uncompressed_buf.begin();
1038  return output_;
1039  }
1040 
1041  //we're dirty, so the compressed buf must also be dirty; clear it.
1043 
1044  std::vector<char*> bufs;
1045  bufs.swap(buffers_);
1046 
1047  const int buf_size = root_->output_size() + 1;
1048  char* buf;
1049  try {
1050  buf = new char[buf_size];
1051  } catch (const std::bad_alloc& e) {
1052  ERR_SWML << "ERROR: Trying to allocate " << buf_size << " bytes. "
1053  << e.what() << std::endl;
1054  throw error("Bad allocation request in output().");
1055  }
1056  buffers_.push_back(buf);
1057  output_ = buf;
1058 
1060  *buf++ = 0;
1061  assert(buf == output_ + buf_size);
1062 
1063  for(std::vector<char*>::iterator i = bufs.begin(); i != bufs.end(); ++i) {
1064  delete [] *i;
1065  }
1066 
1067  bufs.clear();
1068 
1069  return output_;
1070 }
1071 
1073 {
1074  if(compressed_buf_.empty() == false &&
1075  (root_ == nullptr || root_->is_dirty() == false)) {
1076  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1077  return compressed_buf_;
1078  }
1079 
1080  buffers_.push_back(compress_buffer(output(), &compressed_buf_, bzip2));
1081  assert(*compressed_buf_.begin() == (bzip2 ? 'B' : 31));
1082 
1083  return compressed_buf_;
1084 }
1085 
1087 {
1089  debug_delete(root_);
1090  root_ = nullptr;
1091  output_ = nullptr;
1092  std::vector<char*> new_buffers;
1093  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1094  if(*i != compressed_buf_.begin()) {
1095  delete [] *i;
1096  } else {
1097  new_buffers.push_back(*i);
1098  }
1099  }
1100 
1101  buffers_.swap(new_buffers);
1102  assert(buffers_.size() == 1);
1103 }
1104 
1106 {
1107  if(output_ == nullptr) {
1108  assert(compressed_buf_.empty() == false);
1109  string_span uncompressed_buf;
1110  buffers_.push_back(uncompress_buffer(compressed_buf_, &uncompressed_buf));
1111  output_ = uncompressed_buf.begin();
1112  }
1113 
1114  assert(root_ == nullptr);
1115  const char* cbuf = output_;
1116  root_ = new node(*this, nullptr, &cbuf);
1117 }
1118 
1119 std::unique_ptr<document> document::clone()
1120 {
1121  char* buf = new char[strlen(output())+1];
1122  strcpy(buf, output());
1123  return std::make_unique<document>(buf);
1124 }
1125 
1127 {
1130  buffers_.swap(o.buffers_);
1131  std::swap(root_, o.root_);
1132 
1133  root_->set_doc(this);
1134  o.root_->set_doc(&o);
1135 }
1136 
1138 {
1140  output_ = nullptr;
1141  debug_delete(root_);
1142  root_ = new node(*this, nullptr);
1143  for(std::vector<char*>::iterator i = buffers_.begin(); i != buffers_.end(); ++i) {
1144  delete [] *i;
1145  }
1146 
1147  buffers_.clear();
1148 }
1149 
1150 namespace {
1151 document* head_doc = nullptr;
1152 }
1153 
1155 {
1156  prev_ = nullptr;
1157  next_ = head_doc;
1158 
1159  if(next_) {
1160  next_->prev_ = this;
1161  }
1162  head_doc = this;
1163 }
1164 
1166 {
1167  if(head_doc == this) {
1168  head_doc = next_;
1169  }
1170 
1171  if(next_) {
1172  next_->prev_ = prev_;
1173  }
1174 
1175  if(prev_) {
1176  prev_->next_ = next_;
1177  }
1178  next_ = prev_ = nullptr;
1179 }
1180 
1181 std::string document::stats()
1182 {
1183  std::ostringstream s;
1184  int ndocs = 0;
1185  int ncompressed = 0;
1186  int compressed_size = 0;
1187  int ntext = 0;
1188  int text_size = 0;
1189  int nbuffers = 0;
1190  int nnodes = 0;
1191  int ndirty = 0;
1192  int nattributes = 0;
1193  for(document* d = head_doc; d != nullptr; d = d->next_) {
1194  ndocs++;
1195  nbuffers += d->buffers_.size();
1196 
1197  if(d->compressed_buf_.is_null() == false) {
1198  ++ncompressed;
1199  compressed_size += d->compressed_buf_.size();
1200  }
1201 
1202  if(d->output_) {
1203  ++ntext;
1204  text_size += strlen(d->output_);
1205  }
1206 
1207  if(d->root_) {
1208  nnodes += 1 + d->root_->nchildren();
1209  nattributes += d->root_->nattributes_recursive();
1210  }
1211 
1212  if(d->root_ && d->root_->is_dirty()) {
1213  ++ndirty;
1214  }
1215  }
1216 
1217  const int nodes_alloc = nnodes*(sizeof(node) + 12);
1218  const int attr_alloc = nattributes*(sizeof(string_span)*2);
1219  const int total_alloc = compressed_size + text_size + nodes_alloc + attr_alloc;
1220 
1221  s << "WML documents: " << ndocs << "\n"
1222  << "Dirty: " << ndirty << "\n"
1223  << "With compression: " << ncompressed << " (" << compressed_size
1224  << " bytes)\n"
1225  << "With text: " << ntext << " (" << text_size
1226  << " bytes)\n"
1227  << "Nodes: " << nnodes << " (" << nodes_alloc << " bytes)\n"
1228  << "Attr: " << nattributes << " (" << attr_alloc << " bytes)\n"
1229  << "Buffers: " << nbuffers << "\n"
1230  << "Total allocation: " << total_alloc << " bytes\n";
1231 
1232  return s.str();
1233 }
1234 
1235 void swap(document& lhs, document& rhs)
1236 {
1237  lhs.swap(rhs);
1238 }
1239 
1240 }
1241 
1242 #ifdef UNIT_TEST_SIMPLE_WML
1243 
1244 int main(int argc, char** argv)
1245 {
1246  char* doctext = strdup(
1247 "[test]\n"
1248 "a=\"blah\"\n"
1249 "b=\"blah\"\n"
1250 "c=\"\\\\\"\n"
1251 "d=\"\\\"\"\n"
1252 "[/test]");
1253  std::cerr << doctext << "\n";
1254  simple_wml::document doc(doctext);
1255 
1256  simple_wml::node& node = doc.root();
1257  simple_wml::node* test_node = node.child("test");
1258  assert(test_node);
1259  assert((*test_node)["a"] == "blah");
1260  assert((*test_node)["b"] == "blah");
1261  assert((*test_node)["c"] == "\\\\");
1262  assert((*test_node)["d"] == "\\\"");
1263 
1264  node.set_attr("blah", "blah");
1265  test_node->set_attr("e", "f");
1266  std::cerr << doc.output();
1267 }
1268 
1269 #endif
void remove_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:526
node & add_child(const char *name)
Definition: simple_wml.cpp:465
const string_span & attr(const char *key) const
Definition: simple_wml.hpp:129
string_span compressed_buf_
Definition: simple_wml.hpp:297
string_span output_compressed(bool bzip2=false)
std::string to_string() const
Definition: simple_wml.cpp:182
std::ostream & operator<<(std::ostream &o, const string_span &s)
Definition: simple_wml.cpp:201
void apply_diff(const node &diff)
Definition: simple_wml.cpp:827
void shift_buffers(ptrdiff_t offset)
Definition: simple_wml.cpp:723
void output(char *&buf, CACHE_STATUS status=DO_NOT_MODIFY_CACHE)
Definition: simple_wml.cpp:743
bool has_attr(const char *key) const
Definition: simple_wml.cpp:404
node(document &doc, node *parent)
Definition: simple_wml.cpp:207
void insert_ordered_child(int child_map_index, int child_list_index)
Definition: simple_wml.cpp:504
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:40
#define a
string_span output_cache_
Definition: simple_wml.hpp:226
static child_map::const_iterator find_in_map(const child_map &m, const string_span &attr)
Definition: simple_wml.cpp:663
std::vector< char * > buffers_
Definition: simple_wml.hpp:299
char * duplicate() const
Definition: simple_wml.cpp:187
const char * end() const
Definition: simple_wml.hpp:92
void check_ordered_children() const
Definition: simple_wml.cpp:572
int nchildren() const
Definition: simple_wml.cpp:906
node & set_attr_int(const char *key, int value)
Definition: simple_wml.cpp:440
const char * dup_string(const char *str)
node & set_attr(const char *key, const char *value)
Definition: simple_wml.cpp:412
bool to_bool(bool default_value=false) const
Definition: simple_wml.cpp:158
int get_children(const string_span &name)
Definition: simple_wml.cpp:651
node & add_child_at(const char *name, std::size_t index)
Definition: simple_wml.cpp:446
static void msg(const char *act, debug_info &i, const char *to="", const char *result="")
Definition: debugger.cpp:110
attribute_list attr_
Definition: simple_wml.hpp:194
#define d
#define ERR_SWML
Definition: simple_wml.cpp:30
const child_list & children(const char *name) const
Definition: simple_wml.cpp:634
document * doc_
Definition: simple_wml.hpp:191
int output_size() const
Definition: simple_wml.cpp:697
int main(int argc, char **argv)
Definition: SDLMain.mm:115
std::vector< child_pair > child_map
Definition: simple_wml.hpp:199
void insert_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:545
const string_span & operator[](const char *key) const
Definition: simple_wml.cpp:391
#define b
bool is_dirty() const
Definition: simple_wml.hpp:162
std::unique_ptr< document > clone()
unsigned in
If equal to search_counter, the node is off the list.
const char * output()
node * child(const char *name)
Definition: simple_wml.cpp:607
const char * begin() const
Definition: simple_wml.hpp:91
void remove_ordered_child_list(int child_map_index)
Definition: simple_wml.cpp:555
child_map children_
Definition: simple_wml.hpp:203
static std::size_t document_size_limit
Definition: simple_wml.hpp:291
const string_span & first_child() const
Definition: simple_wml.cpp:687
void swap(document &o)
const char * output_
Definition: simple_wml.hpp:298
static std::string stats()
std::size_t i
Definition: function.cpp:967
node & set_attr_dup(const char *key, const char *value)
Definition: simple_wml.cpp:428
int nattributes_recursive() const
Definition: simple_wml.cpp:919
static map_location::DIRECTION s
void remove_child(const char *name, std::size_t index)
Definition: simple_wml.cpp:602
std::vector< node * > child_list
Definition: simple_wml.hpp:126
std::size_t index(const std::string &str, const std::size_t index)
Codepoint index corresponding to the nth character in a UTF-8 string.
Definition: unicode.cpp:72
Standard logging facilities (interface).
void set_doc(document *doc)
Definition: simple_wml.cpp:895
#define e
std::vector< node_pos > ordered_children_
Definition: simple_wml.hpp:217
void swap(document &lhs, document &rhs)
Implement non-member swap function for std::swap (calls document::swap).
void copy_into(node &n) const
Definition: simple_wml.cpp:806
static map_location::DIRECTION n
void take_ownership_of_buffer(char *buffer)
Definition: simple_wml.hpp:282
std::string::const_iterator iterator
Definition: tokenizer.hpp:25
std::string node_to_string(const node &n)
Definition: simple_wml.cpp:794
static lg::log_domain log_config("config")