serialization/parser.cpp

Go to the documentation of this file.
00001 /* $Id: parser.cpp 48153 2011-01-01 15:57:50Z mordante $ */
00002 /*
00003    Copyright (C) 2003 by David White <dave@whitevine.net>
00004    Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Copyright (C) 2005 - 2011 by Philippe Plantier <ayin@anathas.org>
00006    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License as published by
00010    the Free Software Foundation; either version 2 of the License, or
00011    (at your option) any later version.
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY.
00014 
00015    See the COPYING file for more details.
00016 */
00017 
00018 /**
00019  * @file
00020  * Read/Write & analyze WML- and config-files.
00021  */
00022 
00023 
00024 #include "serialization/parser.hpp"
00025 
00026 #include "config.hpp"
00027 #include "log.hpp"
00028 #include "gettext.hpp"
00029 #include "loadscreen.hpp"
00030 #include "wesconfig.h"
00031 #include "serialization/preprocessor.hpp"
00032 #include "serialization/tokenizer.hpp"
00033 #include "serialization/string_utils.hpp"
00034 #include "foreach.hpp"
00035 
00036 #include <stack>
00037 
00038 #include <boost/iostreams/filtering_stream.hpp>
00039 #include <boost/iostreams/filter/gzip.hpp>
00040 #include <boost/algorithm/string/replace.hpp>
00041 #include <boost/variant.hpp>
00042 
00043 static lg::log_domain log_config("config");
00044 #define ERR_CF LOG_STREAM(err, log_config)
00045 #define WRN_CF LOG_STREAM(warn, log_config)
00046 #define LOG_CF LOG_STREAM(info, log_config)
00047 
00048 static const size_t max_recursion_levels = 1000;
00049 
00050 namespace {
00051 
00052 class parser
00053 {
00054     parser();
00055     parser(const parser&);
00056     parser& operator=(const parser&);
00057 public:
00058     parser(config& cfg, std::istream& in);
00059     ~parser();
00060     void operator()();
00061 
00062 private:
00063     void parse_element();
00064     void parse_variable();
00065     std::string lineno_string(utils::string_map &map, std::string const &lineno,
00066         const char *error_string);
00067     void error(const std::string& message);
00068 
00069     config& cfg_;
00070     tokenizer *tok_;
00071 
00072     struct element {
00073         element(config *cfg, std::string const &name,
00074             int start_line = 0, const std::string &file = "") :
00075             cfg(cfg), name(name), start_line(start_line), file(file)
00076         {}
00077 
00078         config* cfg;
00079         std::string name;
00080         int start_line;
00081         std::string file;
00082     };
00083 
00084     std::stack<element> elements;
00085 };
00086 
00087 parser::parser(config &cfg, std::istream &in) :
00088         cfg_(cfg),
00089         tok_(new tokenizer(in)),
00090         elements()
00091 {
00092 }
00093 
00094 
00095 parser::~parser()
00096 {
00097     delete tok_;
00098 }
00099 
00100 void parser::operator()()
00101 {
00102     cfg_.clear();
00103     elements.push(element(&cfg_, ""));
00104 
00105     do {
00106         tok_->next_token();
00107 
00108         switch(tok_->current_token().type) {
00109         case token::LF:
00110             continue;
00111         case '[':
00112             parse_element();
00113             break;
00114         case token::STRING:
00115             parse_variable();
00116             break;
00117         default:
00118             if (static_cast<unsigned char>(tok_->current_token().value[0]) == 0xEF &&
00119                 static_cast<unsigned char>(tok_->next_token().value[0])    == 0xBB &&
00120                 static_cast<unsigned char>(tok_->next_token().value[0])    == 0xBF)
00121             {
00122                 ERR_CF << "Skipping over a utf8 BOM\n";
00123             } else {
00124                 error(_("Unexpected characters at line start"));
00125             }
00126             break;
00127         case token::END:
00128             break;
00129         }
00130         loadscreen::increment_progress();
00131     } while (tok_->current_token().type != token::END);
00132 
00133     // The main element should be there. If it is not, this is a parser error.
00134     assert(!elements.empty());
00135 
00136     if(elements.size() != 1) {
00137         utils::string_map i18n_symbols;
00138         i18n_symbols["tag"] = elements.top().name;
00139         std::stringstream ss;
00140         ss << elements.top().start_line << " " << elements.top().file;
00141         error(lineno_string(i18n_symbols, ss.str(),
00142                 N_("Missing closing tag for tag $tag at $pos")));
00143     }
00144 }
00145 
00146 void parser::parse_element()
00147 {
00148     tok_->next_token();
00149     std::string elname;
00150     config* current_element = NULL;
00151 
00152     switch(tok_->current_token().type) {
00153     case token::STRING: // [element]
00154         elname = tok_->current_token().value;
00155         if (tok_->next_token().type != ']')
00156             error(_("Unterminated [element] tag"));
00157 
00158         // Add the element
00159         current_element = &(elements.top().cfg->add_child(elname));
00160         elements.push(element(current_element, elname, tok_->get_start_line(), tok_->get_file()));
00161         break;
00162 
00163     case '+': // [+element]
00164         if (tok_->next_token().type != token::STRING)
00165             error(_("Invalid tag name"));
00166         elname = tok_->current_token().value;
00167         if (tok_->next_token().type != ']')
00168             error(_("Unterminated [+element] tag"));
00169 
00170         // Find the last child of the current element whose name is
00171         // element
00172         if (config &c = elements.top().cfg->child(elname, -1)) {
00173             current_element = &c;
00174         } else {
00175             current_element = &elements.top().cfg->add_child(elname);
00176         }
00177         elements.push(element(current_element, elname, tok_->get_start_line(), tok_->get_file()));
00178         break;
00179 
00180     case '/': // [/element]
00181         if(tok_->next_token().type != token::STRING)
00182             error(_("Invalid closing tag name"));
00183         elname = tok_->current_token().value;
00184         if(tok_->next_token().type != ']')
00185             error(_("Unterminated closing tag"));
00186         if(elements.size() <= 1)
00187             error(_("Unexpected closing tag"));
00188         if(elname != elements.top().name) {
00189             utils::string_map i18n_symbols;
00190             i18n_symbols["tag1"] = elements.top().name;
00191             i18n_symbols["tag2"] = elname;
00192             std::stringstream ss;
00193             ss << elements.top().start_line << " " << elements.top().file;
00194             error(lineno_string(i18n_symbols, ss.str(),
00195                     N_("Found invalid closing tag $tag2 for tag $tag1 (opened at $pos)")));
00196         }
00197 
00198         elements.pop();
00199         break;
00200     default:
00201         error(_("Invalid tag name"));
00202     }
00203 }
00204 
00205 void parser::parse_variable()
00206 {
00207     config& cfg = *elements.top().cfg;
00208     std::vector<std::string> variables;
00209     variables.push_back("");
00210 
00211     while (tok_->current_token().type != '=') {
00212         switch(tok_->current_token().type) {
00213         case token::STRING:
00214             if(!variables.back().empty())
00215                 variables.back() += ' ';
00216             variables.back() += tok_->current_token().value;
00217             break;
00218         case ',':
00219             if(variables.back().empty()) {
00220                 error(_("Empty variable name"));
00221             } else {
00222                 variables.push_back("");
00223             }
00224             break;
00225         default:
00226             error(_("Unexpected characters after variable name (expected , or =)"));
00227             break;
00228         }
00229         tok_->next_token();
00230     }
00231     if(variables.back().empty())
00232         error(_("Empty variable name"));
00233 
00234     t_string_base buffer;
00235 
00236     std::vector<std::string>::const_iterator curvar = variables.begin();
00237 
00238     bool ignore_next_newlines = false, previous_string = false;
00239     while(1) {
00240         tok_->next_token();
00241         assert(curvar != variables.end());
00242 
00243         switch (tok_->current_token().type) {
00244         case ',':
00245             if ((curvar+1) != variables.end()) {
00246                 if (buffer.translatable())
00247                     cfg[*curvar] = t_string(buffer);
00248                 else
00249                     cfg[*curvar] = buffer.value();
00250                 buffer = t_string_base();
00251                 ++curvar;
00252             } else {
00253                 buffer += ",";
00254             }
00255             break;
00256         case '_':
00257             tok_->next_token();
00258             switch (tok_->current_token().type) {
00259             case token::UNTERMINATED_QSTRING:
00260                 error(_("Unterminated quoted string"));
00261                 break;
00262             case token::QSTRING:
00263                 buffer += t_string_base(tok_->current_token().value, tok_->textdomain());
00264                 break;
00265             default:
00266                 buffer += "_";
00267                 buffer += tok_->current_token().value;
00268                 break;
00269             case token::END:
00270             case token::LF:
00271                 buffer += "_";
00272                 goto finish;
00273             }
00274             break;
00275         case '+':
00276             ignore_next_newlines = true;
00277             continue;
00278         case token::STRING:
00279             if (previous_string) buffer += " ";
00280             //nobreak
00281         default:
00282             buffer += tok_->current_token().value;
00283             break;
00284         case token::QSTRING:
00285             buffer += tok_->current_token().value;
00286             break;
00287         case token::UNTERMINATED_QSTRING:
00288             error(_("Unterminated quoted string"));
00289             break;
00290         case token::LF:
00291             if (ignore_next_newlines) continue;
00292             //nobreak
00293         case token::END:
00294             goto finish;
00295         }
00296 
00297         previous_string = tok_->current_token().type == token::STRING;
00298         ignore_next_newlines = false;
00299     }
00300 
00301     finish:
00302     if (buffer.translatable())
00303         cfg[*curvar] = t_string(buffer);
00304     else
00305         cfg[*curvar] = buffer.value();
00306     while (++curvar != variables.end()) {
00307         cfg[*curvar] = "";
00308     }
00309 }
00310 
00311 /**
00312  * This function is crap. Don't use it on a string_map with prefixes.
00313  */
00314 std::string parser::lineno_string(utils::string_map &i18n_symbols,
00315     std::string const &lineno, const char *error_string)
00316 {
00317     i18n_symbols["pos"] = ::lineno_string(lineno);
00318     std::string result = _(error_string);
00319     foreach(utils::string_map::value_type& var, i18n_symbols)
00320         boost::algorithm::replace_all(result, std::string("$") + var.first, std::string(var.second));
00321     return result;
00322 }
00323 
00324 void parser::error(const std::string& error_type)
00325 {
00326     utils::string_map i18n_symbols;
00327     i18n_symbols["error"] = error_type;
00328     i18n_symbols["value"] = tok_->current_token().value;
00329     std::stringstream ss;
00330     ss << tok_->get_start_line() << " " << tok_->get_file();
00331 #ifdef DEBUG
00332     i18n_symbols["previous_value"] = tok_->previous_token().value;
00333     throw config::error(
00334         lineno_string(i18n_symbols, ss.str(),
00335                       N_("$error, value '$value', previous '$previous_value' at $pos")));
00336 #else
00337     throw config::error(
00338         lineno_string(i18n_symbols, ss.str(),
00339                       N_("$error, value '$value' at $pos")));
00340 #endif
00341 }
00342 
00343 } // end anon namespace
00344 
00345 void read(config &cfg, std::istream &in)
00346 {
00347     parser(cfg, in)();
00348 }
00349 
00350 void read(config &cfg, std::string &in)
00351 {
00352     std::istringstream ss(in);
00353     parser(cfg, ss)();
00354 }
00355 
00356 void read_gz(config &cfg, std::istream &file)
00357 {
00358     //an empty gzip file seems to confuse boost on msvc
00359     //so return early if this is the case
00360     if (file.peek() == EOF) {
00361         return;
00362     }
00363     boost::iostreams::filtering_stream<boost::iostreams::input> filter;
00364     filter.push(boost::iostreams::gzip_decompressor());
00365     filter.push(file);
00366 
00367     parser(cfg, filter)();
00368 }
00369 
00370 static std::string escaped_string(const std::string &value)
00371 {
00372     std::string res = value;
00373     std::string::size_type pos = 0;
00374     while ((pos = res.find('"', pos)) != std::string::npos) {
00375         res.insert(pos + 1, 1, '"');
00376         pos += 2;
00377     }
00378     return res;
00379 }
00380 
00381 struct write_key_val_visitor : boost::static_visitor<void>
00382 {
00383     std::ostream &out_;
00384     unsigned level_;
00385     std::string &textdomain_;
00386     const std::string &key_;
00387 
00388     write_key_val_visitor(std::ostream &out, unsigned level,
00389         std::string &textdomain, const std::string &key)
00390         : out_(out), level_(level), textdomain_(textdomain), key_(key)
00391     {}
00392 
00393     void operator()(boost::blank const &) const
00394     { out_ << "\"\""; }
00395     void operator()(bool b) const
00396     { out_ << (b ? "yes" : "no"); }
00397     void operator()(double d) const
00398     { int i = d; if (d == i) out_ << i; else out_ << d; }
00399     void operator()(std::string const &s) const
00400     { out_ << '"' << escaped_string(s) << '"'; }
00401     void operator()(t_string const &s) const;
00402 };
00403 
00404 /**
00405  * Writes all the parts of a translatable string.
00406  * @note If the first part is translatable and in the wrong textdomain,
00407  *       the textdomain change has to happen before the attribute name.
00408  *       That is the reason for not outputting the key beforehand and
00409  *       letting this function do it.
00410  */
00411 void write_key_val_visitor::operator()(t_string const &value) const
00412 {
00413     bool first = true;
00414 
00415     for (t_string::walker w(value); !w.eos(); w.next())
00416     {
00417         std::string part(w.begin(), w.end());
00418 
00419         if (!first)
00420             out_ << " +\n";
00421 
00422         if (w.translatable() && w.textdomain() != textdomain_) {
00423             textdomain_ = w.textdomain();
00424             out_ << "#textdomain " << textdomain_ << '\n';
00425         }
00426 
00427         for (unsigned i = 0; i < level_; ++i) out_ << '\t';
00428 
00429         if (first)
00430             out_ << key_ << '=';
00431         else
00432             out_ << '\t';
00433 
00434         if (w.translatable())
00435             out_ << '_';
00436 
00437         out_ << '"' << escaped_string(part) << '"';
00438         first = false;
00439     }
00440 }
00441 
00442 void write_key_val(std::ostream &out, const std::string &key,
00443     const config::attribute_value &value, unsigned level,
00444     std::string& textdomain)
00445 {
00446     if (!boost::get<t_string const>(&value.value)) {
00447         for (unsigned i = 0; i < level; ++i) out << '\t';
00448         out << key << '=';
00449     }
00450     boost::apply_visitor(write_key_val_visitor(out, level, textdomain, key), value.value);
00451     out << '\n';
00452 }
00453 
00454 void write_open_child(std::ostream &out, const std::string &child, unsigned int level)
00455 {
00456     out << std::string(level, '\t') << '[' << child << "]\n";
00457 }
00458 
00459 void write_close_child(std::ostream &out, const std::string &child, unsigned int level)
00460 {
00461     out << std::string(level, '\t') << "[/" << child << "]\n";
00462 }
00463 
00464 static void write_internal(config const &cfg, std::ostream &out, std::string& textdomain, size_t tab = 0)
00465 {
00466     if (tab > max_recursion_levels)
00467         throw config::error("Too many recursion levels in config write");
00468 
00469     foreach (const config::attribute &i, cfg.attribute_range()) {
00470         write_key_val(out, i.first, i.second, tab, textdomain);
00471     }
00472 
00473     foreach (const config::any_child &item, cfg.all_children_range())
00474     {
00475         write_open_child(out, item.key, tab);
00476         write_internal(item.cfg, out, textdomain, tab + 1);
00477         write_close_child(out, item.key, tab);
00478     }
00479 }
00480 
00481 void write(std::ostream &out, config const &cfg, unsigned int level)
00482 {
00483     std::string textdomain = PACKAGE;
00484     write_internal(cfg, out, textdomain, level);
00485 }
00486 

Generated by doxygen 1.5.6 on Thu Feb 10 01:01:27 2011 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs