serialization/tokenizer.hpp

Go to the documentation of this file.
00001 /* $Id: tokenizer.hpp 54102 2012-05-06 12:00:03Z mordante $ */
00002 /*
00003    Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
00004    Copyright (C) 2010 - 2012 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Part of the Battle for Wesnoth Project http://www.wesnoth.org
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY.
00013 
00014    See the COPYING file for more details.
00015 */
00016 
00017 #ifndef TOKENIZER_H_INCLUDED
00018 #define TOKENIZER_H_INCLUDED
00019 
00020 #include "buffered_istream.hpp"
00021 
00022 #include <istream>
00023 #include <string>
00024 
00025 class config;
00026 
00027 struct token
00028 {
00029     token() :
00030         type(END),
00031         value()
00032     {}
00033 
00034     enum token_type
00035     {
00036         STRING,
00037         QSTRING,
00038         UNTERMINATED_QSTRING,
00039         MISC,
00040 
00041         LF = '\n',
00042         EQUALS = '=',
00043         COMMA = ',',
00044         PLUS = '+',
00045         SLASH = '/',
00046         OPEN_BRACKET = '[',
00047         CLOSE_BRACKET = ']',
00048         UNDERSCORE = '_',
00049         END
00050     };
00051 
00052     token_type type;
00053     std::string value;
00054 };
00055 
00056 /** Abstract baseclass for the tokenizer. */
00057 class tokenizer
00058 {
00059 public:
00060     tokenizer(std::istream& in);
00061     ~tokenizer();
00062 
00063     const token &next_token();
00064 
00065     const token &current_token() const
00066     {
00067         return token_;
00068     }
00069 
00070 #ifdef DEBUG
00071     const token &previous_token() const
00072     {
00073         return previous_token_;
00074     }
00075 #endif
00076 
00077     const std::string &textdomain() const
00078     {
00079         return textdomain_;
00080     }
00081 
00082     const std::string &get_file() const
00083     {
00084         return file_;
00085     }
00086 
00087     int get_start_line() const
00088     {
00089         return startlineno_;
00090     }
00091 
00092 private:
00093     tokenizer();
00094     int current_;
00095     int lineno_;
00096     int startlineno_;
00097 
00098     void next_char()
00099     {
00100         if (UNLIKELY(current_ == '\n'))
00101             ++lineno_;
00102         next_char_fast();
00103     }
00104 
00105     void next_char_fast()
00106     {
00107         do {
00108             current_ = in_.get();
00109         } while (UNLIKELY(current_ == '\r'));
00110 #if 0
00111             /// @todo disabled untill campaign server is fixed
00112             if(LIKELY(in_.good())) {
00113                 current_ = in_.get();
00114                 if (UNLIKELY(current_ == '\r'))
00115                 {
00116                     // we assume that there is only one '\r'
00117                     if(LIKELY(in_.good())) {
00118                         current_ = in_.get();
00119                     } else {
00120                         current_ = EOF;
00121                     }
00122                 }
00123             } else {
00124                 current_ = EOF;
00125             }
00126 #endif
00127     }
00128 
00129     int peek_char()
00130     {
00131         return in_.peek();
00132     }
00133 
00134     enum
00135     {
00136         TOK_SPACE = 1,
00137         TOK_NUMERIC = 2,
00138         TOK_ALPHA = 4
00139     };
00140 
00141     int char_type(unsigned c) const
00142     {
00143         return c < 128 ? char_types_[c] : 0;
00144     }
00145 
00146     bool is_space(int c) const
00147     {
00148         return char_type(c) & TOK_SPACE;
00149     }
00150 
00151     bool is_num(int c) const
00152     {
00153         return char_type(c) & TOK_NUMERIC;
00154     }
00155 
00156     bool is_alnum(int c) const
00157     {
00158         return char_type(c) & (TOK_ALPHA | TOK_NUMERIC);
00159     }
00160 
00161     void skip_comment();
00162 
00163     /**
00164      * Returns true if the next characters are the one from @a cmd
00165      * followed by a space. Skips all the matching characters.
00166      */
00167     bool skip_command(char const *cmd);
00168 
00169     std::string textdomain_;
00170     std::string file_;
00171     token token_;
00172 #ifdef DEBUG
00173     token previous_token_;
00174 #endif
00175     buffered_istream in_;
00176     char char_types_[128];
00177 };
00178 
00179 #endif
00180 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by doxygen 1.7.1 on Fri May 25 2012 01:03:09 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs