The Battle for Wesnoth  1.15.10+dev
tokenizer.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
3  Copyright (C) 2010 - 2018 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
17 #include "wesconfig.h"
18 
19 tokenizer::tokenizer(std::istream& in) :
20  current_(EOF),
21  lineno_(1),
22  startlineno_(0),
23  textdomain_(PACKAGE),
24  file_(),
25  token_(),
26  in_(in)
27 {
28  for (int c = 0; c < 128; ++c)
29  {
30  int t = 0;
31  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
32  t = TOK_ALPHA;
33  } else if (c >= '0' && c <= '9') {
34  t = TOK_NUMERIC;
35  } else if (c == ' ' || c == '\t') {
36  t = TOK_SPACE;
37  }
38  char_types_[c] = t;
39  }
40  in_.stream().exceptions(std::ios_base::badbit);
42 }
43 
45 {
46  in_.stream().clear(std::ios_base::goodbit);
47  in_.stream().exceptions(std::ios_base::goodbit);
48 }
49 
51 {
52 #ifdef DEBUG_TOKENIZER
53  previous_token_ = token_;
54 #endif
55  token_.value.clear();
56 
57  // Dump spaces and inlined comments
58  while(true)
59  {
60  while (is_space(current_)) {
62  }
63  if (current_ != 254)
64  break;
65  skip_comment();
66  // skip the line end
68  }
69 
70  if (current_ == '#')
71  skip_comment();
72 
74 
75  switch(current_) {
76  case EOF:
78  break;
79 
80  case '<':
81  if (peek_char() != '<') {
84  break;
85  }
88  for (;;) {
89  next_char();
90  if (current_ == EOF) {
92  break;
93  }
94  if (current_ == '>' && peek_char() == '>') {
96  break;
97  }
99  }
100  break;
101 
102  case '"':
104  for (;;) {
105  next_char();
106  if (current_ == EOF) {
108  break;
109  }
110  if (current_ == '"') {
111  if (peek_char() != '"') break;
112  next_char_fast();
113  }
114  if (current_ == 254) {
115  skip_comment();
116  --lineno_;
117  continue;
118  }
119  token_.value += current_;
120  }
121  break;
122 
123  case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
126  break;
127 
128  case '_':
129  if (!is_alnum(peek_char())) {
132  break;
133  }
134  [[fallthrough]];
135 
136  default:
137  if (is_alnum(current_) || current_ == '$') {
139  do {
140  token_.value += current_;
141  next_char_fast();
142  while (current_ == 254) {
143  skip_comment();
144  next_char_fast();
145  }
146  } while (is_alnum(current_) || current_ == '$');
147  } else {
149  token_.value += current_;
150  next_char();
151  }
152  return token_;
153  }
154 
155  if (current_ != EOF)
156  next_char();
157 
158  return token_;
159 }
160 
161 bool tokenizer::skip_command(char const *cmd)
162 {
163  for (; *cmd; ++cmd) {
164  next_char_fast();
165  if (current_ != *cmd) return false;
166  }
167  next_char_fast();
168  if (!is_space(current_)) return false;
169  next_char_fast();
170  return true;
171 }
172 
174 {
175  next_char_fast();
176  if (current_ == '\n' || current_ == EOF) return;
177  std::string *dst = nullptr;
178 
179  if (current_ == 't')
180  {
181  if (!skip_command("extdomain")) goto fail;
182  dst = &textdomain_;
183  }
184  else if (current_ == 'l')
185  {
186  if (!skip_command("ine")) goto fail;
187  lineno_ = 0;
188  while (is_num(current_)) {
189  lineno_ = lineno_ * 10 + (current_ - '0');
190  next_char_fast();
191  }
192  if (!is_space(current_)) goto fail;
193  next_char_fast();
194  dst = &file_;
195  }
196  else
197  {
198  fail:
199  while (current_ != '\n' && current_ != EOF) {
200  next_char_fast();
201  }
202  return;
203  }
204 
205  dst->clear();
206  while (current_ != '\n' && current_ != EOF) {
207  *dst += current_;
208  next_char_fast();
209  }
210 }
void skip_comment()
Definition: tokenizer.cpp:173
void next_char()
Definition: tokenizer.hpp:96
bool is_space(int c) const
Definition: tokenizer.hpp:145
int startlineno_
Definition: tokenizer.hpp:94
void next_char_fast()
Definition: tokenizer.hpp:103
const token & next_token()
Definition: tokenizer.cpp:50
unsigned in
If equal to search_counter, the node is off the list.
token_type
Definition: tokenizer.hpp:32
std::string file_
Definition: tokenizer.hpp:169
int lineno_
Definition: tokenizer.hpp:93
bool is_alnum(int c) const
Definition: tokenizer.hpp:155
#define PACKAGE
Definition: wesconfig.h:23
Some defines: VERSION, PACKAGE, MIN_SAVEGAME_VERSION.
char char_types_[128]
Definition: tokenizer.hpp:175
int peek_char()
Definition: tokenizer.hpp:127
double t
Definition: astarsearch.cpp:64
std::string textdomain_
Definition: tokenizer.hpp:168
int current_
Definition: tokenizer.hpp:92
bool is_num(int c) const
Definition: tokenizer.hpp:150
buffered_istream in_
Definition: tokenizer.hpp:174
std::istream & stream()
Returns the owned stream.
token token_
Definition: tokenizer.hpp:170
mock_char c
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:161
std::string value
Definition: tokenizer.hpp:51
token_type type
Definition: tokenizer.hpp:50