The Battle for Wesnoth  1.15.7+dev
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 by David White <dave@whitevine.net>
3  Copyright (C) 2005 - 2018 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #pragma once
17 
18 #include "font/constants.hpp"
20 
21 #include <algorithm>
22 #include <map>
23 #include <ostream>
24 #include <set>
25 #include <sstream>
26 #include <string>
27 #include <utility>
28 #include <vector>
29 
30 class t_string;
31 
32 namespace utils {
33 
34 using string_map = std::map<std::string, t_string>;
35 
36 bool isnewline(const char c);
37 bool portable_isspace(const char c);
38 bool notspace(char c);
39 
40 enum {
41  REMOVE_EMPTY = 0x01, /** REMOVE_EMPTY: remove empty elements. */
42  STRIP_SPACES = 0x02 /** STRIP_SPACES: strips leading and trailing blank spaces. */
43 };
44 
45 void trim(string_view& s);
46 
47 template<typename F>
48 void split_foreach_impl(string_view s, char sep, const F& f)
49 {
50  if(s.empty()) {
51  return;
52  }
53  while(true)
54  {
55  int partend = s.find(sep);
56  if(partend == int(string_view::npos)) {
57  break;
58  }
59  f(s.substr(0, partend));
60  s.remove_prefix(partend + 1);
61  }
62  f(s);
63 }
64 
65 template<typename F>
66 void split_foreach(string_view s, char sep, const int flags, const F& f)
67 {
68  split_foreach_impl(s, sep, [&](string_view item) {
69  if(flags & STRIP_SPACES) {
70  trim(item);
71  }
72  if(!(flags & REMOVE_EMPTY) || !item.empty()) {
73  f(item);
74  }
75  });
76 }
77 
78 
79 
80 /** Splits a (comma-)separated string into a vector of pieces. */
81 std::vector<std::string> split(string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
82 std::set<std::string> split_set(string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
83 
84 /**
85  * This function is identical to split(), except it does not split when it otherwise would if the
86  * previous character was identical to the parameter 'quote' (i.e. it does not split quoted commas).
87  * This method was added to make it possible to quote user input, particularly so commas in user input
88  * would not cause visual problems in menus.
89  *
90  * @todo Why not change split()? That would change the methods post condition.
91  */
92 std::vector<std::string> quoted_split(const std::string& val, char c= ',', int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
93 
94 /**
95  * Splits a (comma-)separated string into a set of pieces.
96  * See split() for the meanings of the parameters.
97  */
98 inline std::set<std::string> set_split(const std::string& val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES)
99 {
100  std::vector<std::string> vec_split = split(val, c, flags);
101  return std::set< std::string >(vec_split.begin(), vec_split.end());
102 }
103 
104 /**
105  * Splits a string based on two separators into a map.
106  *
107  * Major: the separator between elements of the map
108  * Minor: the separator between keys and values in one element
109  *
110  * For example, the string 'a:b,c:d,e:f' would be parsed into:
111  * a => b
112  * c => d
113  * e => f
114  */
115 std::map<std::string, std::string> map_split(
116  const std::string& val,
117  char major = ',',
118  char minor = ':',
119  int flags = REMOVE_EMPTY | STRIP_SPACES,
120  const std::string& default_value = "");
121 
122 /**
123  * Splits a string based either on a separator, except then the text appears within specified parenthesis.
124  *
125  * If the separator is "0" (default), it splits a string into an odd number of parts:
126  * - The part before the first '(',
127  * - the part between the first '('
128  * - and the matching right ')', etc ...
129  * and the remainder of the string.
130  *
131  * Note that one can use the same character for both the left and right parenthesis, which usually makes
132  * the most sense for this function.
133  *
134  * Note that this will find the first matching char in the left string and match against the corresponding
135  * char in the right string. A correctly processed string should return a vector with an odd number of
136  * elements. Empty elements are never removed as they are placeholders, hence REMOVE EMPTY only works for
137  * the separator split.
138  *
139  * INPUT: ("a(b)c{d}e(f{g})h", 0, "({", ")}")
140  * RETURNS: {"a", "b", "c", "d", "e", "f{g}", "h"}
141  */
142 std::vector< std::string > parenthetical_split(
143  const std::string& val,
144  const char separator = 0,
145  const std::string& left = "(",
146  const std::string& right = ")",
147  const int flags = REMOVE_EMPTY | STRIP_SPACES);
148 
149 /**
150  * Similar to parenthetical_split, but also expands embedded square brackets.
151  *
152  * Notes:
153  * - The Separator must be specified and number of entries in each square bracket must match in each section.
154  * - Leading zeros are preserved if specified between square brackets.
155  * - An asterisk as in [a*n] indicates to expand 'a' n times
156  *
157  * This is useful for expanding animation WML code.
158  *
159  * Examples:
160  *
161  * INPUT: ("a[1-3](1,[5,6,7]),b[8,9]", ",")
162  * RETURNS: {"a1(1,5)", "a2(1,6)", "a3(1,7)", "b8", "b9"}
163  *
164  * INPUT: ("abc[07-10]")
165  * RETURNS: {"abc07", "abc08", "abc09", "abc10"}
166  *
167  * INPUT: ("a[1,2]b[3-4]:c[5,6]")
168  * RETURNS: {"a1b3:c5", "a2b4:c6"}
169  *
170  * INPUT: ("abc[3,1].png")
171  * RETURNS: {"abc3.png", "abc2.png", "abc1.png"}
172  *
173  * INPUT: ("abc[de,xyz]")
174  * RETURNS: {"abcde", "abcxyz"}
175  *
176  * INPUT: ("abc[1*3]")
177  * RETURNS: {"abc1", "abc1", "abc1"}
178  */
179 std::vector<std::string> square_parenthetical_split(
180  const std::string& val,
181  const char separator = ',',
182  const std::string& left = "([",
183  const std::string& right = ")]",
184  const int flags = REMOVE_EMPTY | STRIP_SPACES);
185 
186 /**
187  * Generates a new string joining container items in a list.
188  *
189  * @param v A container with elements.
190  * @param s List delimiter.
191  */
192 template <typename T>
193 std::string join(const T& v, const std::string& s = ",")
194 {
195  std::stringstream str;
196 
197  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
198  str << *i;
199  if(std::next(i) != v.end()) {
200  str << s;
201  }
202  }
203 
204  return str.str();
205 }
206 
207 template <typename T>
209  const T& v,
210  const std::string& major = ",",
211  const std::string& minor = ":")
212 {
213  std::stringstream str;
214 
215  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
216  str << i->first << minor << i->second;
217  if(std::next(i) != v.end()) {
218  str << major;
219  }
220  }
221 
222  return str.str();
223 }
224 
225 /**
226  * Generates a new string containing a bullet list.
227  *
228  * List items are preceded by the indentation blanks, a bullet string and
229  * another blank; all but the last item are followed by a newline.
230  *
231  * @param v A container with elements.
232  * @param indent Number of indentation blanks.
233  * @param bullet The leading bullet string.
234  */
235 template<typename T>
236 std::string bullet_list(const T& v, std::size_t indent = 4, const std::string& bullet = font::unicode_bullet)
237 {
238  std::ostringstream str;
239 
240  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
241  if(i != v.begin()) {
242  str << '\n';
243  }
244 
245  str << std::string(indent, ' ') << bullet << ' ' << *i;
246  }
247 
248  return str.str();
249 }
250 
251 /**
252  * Indent a block of text.
253  *
254  * Only lines with content are changed; empty lines are left intact. However,
255  * if @a string is an empty string itself, the indentation unit with the
256  * specified @a indent_size will be returned instead.
257  *
258  * @param string Text to indent.
259  * @param indent_size Number of indentation units to use.
260  */
261 std::string indent(const std::string& string, std::size_t indent_size = 4);
262 
263 std::pair<int, int> parse_range(const std::string& str);
264 
265 std::vector<std::pair<int, int>> parse_ranges(const std::string& str);
266 
267 int apply_modifier(const int number, const std::string &amount, const int minimum = 0);
268 
269 /** Add a "+" or replace the "-" par Unicode minus */
271 {
272  return mod[0] == '-' ? (font::unicode_minus + std::string(mod.begin() + 1, mod.end())) : ("+" + mod);
273 }
274 
275 /** Prepends a configurable set of characters with a backslash */
276 std::string escape(const std::string &str, const char *special_chars);
277 
278 /**
279  * Prepend all special characters with a backslash.
280  *
281  * Special characters are:
282  * #@{}+-,\*=
283  */
284 inline std::string escape(const std::string &str)
285 {
286  return escape(str, "#@{}+-,\\*=");
287 }
288 
289 /** Remove all escape characters (backslash) */
290 std::string unescape(const std::string &str);
291 
292 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
293 std::string urlencode(const std::string &str);
294 
295 /** Surround the string 'str' with double quotes. */
296 inline std::string quote(const std::string &str)
297 {
298  return '"' + str + '"';
299 }
300 
301 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
302 bool string_bool(const std::string& str,bool def=false);
303 
304 /** Converts a bool value to 'true' or 'false' */
305 std::string bool_string(const bool value);
306 
307 /** Convert into a signed value (using the Unicode "−" and +0 convention */
308 std::string signed_value(int val);
309 
310 /** Sign with Unicode "−" if negative */
312 
313 /** Convert into a percentage (using the Unicode "−" and +0% convention */
314 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
315 
316 /**
317  * Convert into a string with an SI-postfix.
318  *
319  * If the unit is to be translatable,
320  * a t_string should be passed as the third argument.
321  * _("unit_byte^B") is suggested as standard.
322  *
323  * There are no default values because they would not be translatable.
324  */
325 std::string si_string(double input, bool base2, const std::string& unit);
326 
327 /**
328  * Try to complete the last word of 'text' with the 'wordlist'.
329  *
330  * @param[in, out] text The parameter's usage is:
331  * - Input: Text where we try to complete the last word
332  * of.
333  * - Output: Text with completed last word.
334  * @param[in, out] wordlist
335  * The parameter's usage is:
336  * - Inout: A vector of strings to complete against.
337  * - Output: A vector of strings that matched 'text'.
338  *
339  * @retval true iff text is just one word (no spaces)
340  */
341 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
342 
343 /** Check if a message contains a word. */
344 bool word_match(const std::string& message, const std::string& word);
345 
346 /**
347  * Match using '*' as any number of characters (including none),
348  * '+' as one or more characters, and '?' as any one character.
349  */
350 bool wildcard_string_match(const std::string& str, const std::string& match);
351 
352 /**
353  * Check if the username contains only valid characters.
354  *
355  * (all alpha-numeric characters plus underscore and hyphen)
356  */
357 bool isvalid_username(const std::string& login);
358 
359 /**
360  * Check if the username pattern contains only valid characters.
361  *
362  * (all alpha-numeric characters plus underscore, hyphen,
363  * question mark and asterisk)
364  */
365 bool isvalid_wildcard(const std::string& login);
366 
367 /**
368  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
369  */
370 void ellipsis_truncate(std::string& str, const std::size_t size);
371 
372 } // end namespace utils
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
BOOST_CXX14_CONSTEXPR size_type find(basic_string_view s, size_type pos=0) const BOOST_NOEXCEPT
std::pair< int, int > parse_range(const std::string &str)
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::string urlencode(const std::string &str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
std::map< std::string, t_string > string_map
This class represents a single unit of a specific type.
Definition: unit.hpp:129
void split_foreach_impl(string_view s, char sep, const F &f)
std::string join(const T &v, const std::string &s=",")
Generates a new string joining container items in a list.
BOOST_CXX14_CONSTEXPR void remove_prefix(size_type n)
std::string unescape(const std::string &str)
Remove all escape characters (backslash)
bool wildcard_string_match(const std::string &str, const std::string &match)
Match using &#39;*&#39; as any number of characters (including none), &#39;+&#39; as one or more characters, and &#39;?&#39; as any one character.
void ellipsis_truncate(std::string &str, const std::size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis. ...
BOOST_CXX14_CONSTEXPR basic_string_view substr(size_type pos, size_type n=npos) const
bool notspace(const char c)
std::map< std::string, std::string > map_split(const std::string &val, char major, char minor, int flags, const std::string &default_value)
Splits a string based on two separators into a map.
std::string str
Definition: statement.cpp:110
std::string quote(const std::string &str)
Surround the string &#39;str&#39; with double quotes.
void split_foreach(string_view s, char sep, const int flags, const F &f)
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::string bullet_list(const T &v, std::size_t indent=4, const std::string &bullet=font::unicode_bullet)
Generates a new string containing a bullet list.
const std::string unicode_minus
Definition: constants.cpp:38
std::vector< std::pair< int, int > > parse_ranges(const std::string &str)
std::string si_string(double input, bool base2, const std::string &unit)
Convert into a string with an SI-postfix.
void trim(string_view &s)
std::set< std::string > split_set(string_view s, char sep, const int flags)
BOOST_CONSTEXPR bool empty() const BOOST_NOEXCEPT
REMOVE_EMPTY: remove empty elements.
lu_byte right
Definition: lparser.cpp:1027
std::string escape(const std::string &str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::string login()
std::size_t i
Definition: function.cpp:933
u64 size
Definition: statement.cpp:80
static map_location::DIRECTION s
std::vector< std::string > quoted_split(const std::string &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
std::string bool_string(const bool value)
Converts a bool value to &#39;true&#39; or &#39;false&#39;.
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
static BOOST_CONSTEXPR_OR_CONST size_type npos
Definition: string_view.hpp:84
const std::string unicode_bullet
Definition: constants.cpp:43
bool isnewline(const char c)
#define next(ls)
Definition: llex.cpp:32
#define f
std::vector< std::string > split(const config_attribute_value &val)
int apply_modifier(const int number, const std::string &amount, const int minimum)
lu_byte left
Definition: lparser.cpp:1026
std::string indent(const std::string &string, std::size_t indent_size)
Indent a block of text.
static const char * match(MatchState *ms, const char *s, const char *p)
Definition: lstrlib.cpp:425
std::set< std::string > set_split(const std::string &val, const char c=',', const int flags=REMOVE_EMPTY|STRIP_SPACES)
Splits a (comma-)separated string into a set of pieces.
bool portable_isspace(const char c)
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
mock_char c
std::string print_modifier(const std::string &mod)
Add a "+" or replace the "-" par Unicode minus.
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of &#39;text&#39; with the &#39;wordlist&#39;.
std::vector< std::string > square_parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
std::pair< std::string, unsigned > item
Definition: help_impl.hpp:384
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.
std::vector< std::string > parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Splits a string based either on a separator, except then the text appears within specified parenthesi...