The Battle for Wesnoth  1.17.23+dev
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2005 - 2023
3  by Philippe Plantier <ayin@anathas.org>
4  Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
5  Copyright (C) 2003 by David White <dave@whitevine.net>
6  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY.
14 
15  See the COPYING file for more details.
16 */
17 
18 #pragma once
19 
20 #include "font/constants.hpp"
21 
22 #include <algorithm>
23 #include <map>
24 #include <ostream>
25 #include <set>
26 #include <sstream>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 #include <vector>
31 
32 class t_string;
33 
34 namespace utils {
35 
36 using string_map = std::map<std::string, t_string>;
37 
38 const std::vector<std::string> res_order = {"blade", "pierce", "impact", "fire", "cold", "arcane"};
39 
40 struct res_compare {
41  /** Returns whether a < b, considering res_order. */
42  bool operator()(const std::string& a, const std::string& b) const {
43  for(const std::string& r : res_order) {
44  if (b == r) // this means b <= a, so a < b is false
45  return false;
46  if (a == r)
47  return true;
48  }
49  return a < b; // fallback only reached when neither a nor b occur in res_order
50  }
51 };
52 
53 using string_map_res = std::map<std::string, t_string, res_compare>;
54 
55 bool isnewline(const char c);
56 bool portable_isspace(const char c);
57 bool notspace(char c);
58 
59 enum {
60  REMOVE_EMPTY = 0x01, /** REMOVE_EMPTY: remove empty elements. */
61  STRIP_SPACES = 0x02 /** STRIP_SPACES: strips leading and trailing blank spaces. */
62 };
63 
64 void trim(std::string_view& s);
65 
66 template<typename F>
67 void split_foreach_impl(std::string_view s, char sep, const F& f)
68 {
69  if(s.empty()) {
70  return;
71  }
72  while(true)
73  {
74  std::size_t partend = s.find(sep);
75  if(partend == std::string_view::npos) {
76  break;
77  }
78  f(s.substr(0, partend));
79  s.remove_prefix(partend + 1);
80  }
81  f(s);
82 }
83 
84 template<typename F>
85 void split_foreach(std::string_view s, char sep, const int flags, const F& f)
86 {
87  split_foreach_impl(s, sep, [&](std::string_view item) {
88  if(flags & STRIP_SPACES) {
89  trim(item);
90  }
91  if(!(flags & REMOVE_EMPTY) || !item.empty()) {
92  f(item);
93  }
94  });
95 }
96 
97 
98 
99 /** Splits a (comma-)separated string into a vector of pieces. */
100 std::vector<std::string> split(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
101 std::set<std::string> split_set(std::string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
102 
103 /**
104  * This function is identical to split(), except it does not split when it otherwise would if the
105  * previous character was identical to the parameter 'quote' (i.e. it does not split quoted commas).
106  * This method was added to make it possible to quote user input, particularly so commas in user input
107  * would not cause visual problems in menus.
108  *
109  * @todo Why not change split()? That would change the methods post condition.
110  */
111 std::vector<std::string> quoted_split(const std::string& val, char c= ',', int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
112 
113 /**
114  * Splits a (comma-)separated string into a set of pieces.
115  * See split() for the meanings of the parameters.
116  */
117 inline std::set<std::string> set_split(const std::string& val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES)
118 {
119  std::vector<std::string> vec_split = split(val, c, flags);
120  return std::set< std::string >(vec_split.begin(), vec_split.end());
121 }
122 
123 /**
124  * Splits a string based on two separators into a map.
125  *
126  * Major: the separator between elements of the map
127  * Minor: the separator between keys and values in one element
128  *
129  * For example, the string 'a:b,c:d,e:f' would be parsed into:
130  * a => b
131  * c => d
132  * e => f
133  */
134 std::map<std::string, std::string> map_split(
135  const std::string& val,
136  char major = ',',
137  char minor = ':',
138  int flags = REMOVE_EMPTY | STRIP_SPACES,
139  const std::string& default_value = "");
140 
141 /**
142  * Splits a string based either on a separator, except then the text appears within specified parenthesis.
143  *
144  * If the separator is "0" (default), it splits a string into an odd number of parts:
145  * - The part before the first '(',
146  * - the part between the first '('
147  * - and the matching right ')', etc ...
148  * and the remainder of the string.
149  *
150  * Note that one can use the same character for both the left and right parenthesis, which usually makes
151  * the most sense for this function.
152  *
153  * Note that this will find the first matching char in the left string and match against the corresponding
154  * char in the right string. A correctly processed string should return a vector with an odd number of
155  * elements. Empty elements are never removed as they are placeholders, hence REMOVE EMPTY only works for
156  * the separator split.
157  *
158  * INPUT: ("a(b)c{d}e(f{g})h", 0, "({", ")}")
159  * RETURNS: {"a", "b", "c", "d", "e", "f{g}", "h"}
160  */
161 std::vector< std::string > parenthetical_split(
162  const std::string& val,
163  const char separator = 0,
164  const std::string& left = "(",
165  const std::string& right = ")",
166  const int flags = REMOVE_EMPTY | STRIP_SPACES);
167 
168 /**
169  * Similar to parenthetical_split, but also expands embedded square brackets.
170  *
171  * Notes:
172  * - The Separator must be specified and number of entries in each square bracket must match in each section.
173  * - Leading zeros are preserved if specified between square brackets.
174  * - An asterisk as in [a*n] indicates to expand 'a' n times
175  *
176  * This is useful for expanding animation WML code.
177  *
178  * Examples:
179  *
180  * INPUT: ("a[1~3](1,[5,6,7]),b[8,9]", ",")
181  * RETURNS: {"a1(1,5)", "a2(1,6)", "a3(1,7)", "b8", "b9"}
182  *
183  * INPUT: ("abc[07~10]")
184  * RETURNS: {"abc07", "abc08", "abc09", "abc10"}
185  *
186  * INPUT: ("a[1,2]b[3~4]:c[5,6]")
187  * RETURNS: {"a1b3:c5", "a2b4:c6"}
188  *
189  * INPUT: ("abc[3~1].png")
190  * RETURNS: {"abc3.png", "abc2.png", "abc1.png"}
191  *
192  * INPUT: ("abc[3,1].png")
193  * RETURNS: {"abc3.png", "abc1.png"}
194  *
195  * INPUT: ("abc[de,xyz]")
196  * RETURNS: {"abcde", "abcxyz"}
197  *
198  * INPUT: ("abc[1*3]")
199  * RETURNS: {"abc1", "abc1", "abc1"}
200  */
201 std::vector<std::string> square_parenthetical_split(
202  const std::string& val,
203  const char separator = ',',
204  const std::string& left = "([",
205  const std::string& right = ")]",
206  const int flags = REMOVE_EMPTY | STRIP_SPACES);
207 
208 /**
209  * Generates a new string joining container items in a list.
210  *
211  * @param v A container with elements.
212  * @param s List delimiter.
213  */
214 template <typename T>
215 std::string join(const T& v, const std::string& s = ",")
216 {
217  std::stringstream str;
218 
219  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
220  str << *i;
221  if(std::next(i) != v.end()) {
222  str << s;
223  }
224  }
225 
226  return str.str();
227 }
228 
229 template <typename T>
230 std::string join_map(
231  const T& v,
232  const std::string& major = ",",
233  const std::string& minor = ":")
234 {
235  std::stringstream str;
236 
237  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
238  str << i->first << minor << i->second;
239  if(std::next(i) != v.end()) {
240  str << major;
241  }
242  }
243 
244  return str.str();
245 }
246 
247 /**
248  * Generates a new string containing a bullet list.
249  *
250  * List items are preceded by the indentation blanks, a bullet string and
251  * another blank; all but the last item are followed by a newline.
252  *
253  * @param v A container with elements.
254  * @param indent Number of indentation blanks.
255  * @param bullet The leading bullet string.
256  */
257 template<typename T>
258 std::string bullet_list(const T& v, std::size_t indent = 4, const std::string& bullet = font::unicode_bullet)
259 {
260  std::ostringstream str;
261 
262  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
263  if(i != v.begin()) {
264  str << '\n';
265  }
266 
267  str << std::string(indent, ' ') << bullet << ' ' << *i;
268  }
269 
270  return str.str();
271 }
272 
273 /**
274  * Indent a block of text.
275  *
276  * Only lines with content are changed; empty lines are left intact. However,
277  * if @a string is an empty string itself, the indentation unit with the
278  * specified @a indent_size will be returned instead.
279  *
280  * @param string Text to indent.
281  * @param indent_size Number of indentation units to use.
282  */
283 std::string indent(const std::string& string, std::size_t indent_size = 4);
284 
285 /**
286  * Recognises the following patterns, and returns a {min, max} pair.
287  *
288  * * "1" returns {1, 1}
289  * * "1-3" returns {1, 3}
290  * * "1-infinity" returns {1, maximum int}
291  * * "-1" returns {-1, -1}
292  * * "-3--1" returns {-3, -1}
293  *
294  * Note that:
295  *
296  * * "3-1" returns {3, 3} and does not log an error
297  * * "-1--3" returns {-1, -1} and does not log an error
298  * * Although "-infinity--1", "2-infinity" and "-infinity-infinity" are all supported,
299  * * ranges that can't match a reasonable number, e.g. "-infinity" or "infinity..infinity", may be treated as errors.
300  */
301 std::pair<int, int> parse_range(const std::string& str);
302 
303 /**
304  * Handles a comma-separated list of inputs to parse_range, in a context that does not expect
305  * negative values. Will return an empty list if any of the ranges have a minimum that's below
306  * zero.
307  */
308 std::vector<std::pair<int, int>> parse_ranges_unsigned(const std::string& str);
309 
310 /**
311  * Handles a comma-separated list of inputs to parse_range.
312  */
313 std::vector<std::pair<int, int>> parse_ranges_int(const std::string& str);
314 
315 /**
316  * Recognises similar patterns to parse_range, and returns a {min, max} pair.
317  *
318  * For this function, "infinity" results in std::numeric_limits<double>::infinity.
319  */
320 std::pair<double, double> parse_range_real(const std::string& str);
321 
322 std::vector<std::pair<double, double>> parse_ranges_real(const std::string& str);
323 
324 int apply_modifier(const int number, const std::string &amount, const int minimum = 0);
325 
326 /** Add a "+" or replace the "-" par Unicode minus */
327 inline std::string print_modifier(const std::string &mod)
328 {
329  return mod[0] == '-' ? (font::unicode_minus + std::string(mod.begin() + 1, mod.end())) : ("+" + mod);
330 }
331 
332 /** Prepends a configurable set of characters with a backslash */
333 std::string escape(const std::string &str, const char *special_chars);
334 
335 /**
336  * Prepend all special characters with a backslash.
337  *
338  * Special characters are:
339  * #@{}+-,\*=
340  */
341 inline std::string escape(const std::string &str)
342 {
343  return escape(str, "#@{}+-,\\*=");
344 }
345 
346 /** Remove all escape characters (backslash) */
347 std::string unescape(const std::string &str);
348 
349 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
350 std::string urlencode(const std::string &str);
351 
352 /** Surround the string 'str' with double quotes. */
353 inline std::string quote(const std::string &str)
354 {
355  return '"' + str + '"';
356 }
357 
358 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
359 bool string_bool(const std::string& str,bool def=false);
360 
361 /** Converts a bool value to 'true' or 'false' */
362 std::string bool_string(const bool value);
363 
364 /** Convert into a signed value (using the Unicode "−" and +0 convention */
365 std::string signed_value(int val);
366 
367 /** Sign with Unicode "−" if negative */
368 std::string half_signed_value(int val);
369 
370 /** Convert into a percentage (using the Unicode "−" and +0% convention */
371 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
372 
373 /**
374  * Convert into a string with an SI-postfix.
375  *
376  * If the unit is to be translatable,
377  * a t_string should be passed as the third argument.
378  * _("unit_byte^B") is suggested as standard.
379  *
380  * There are no default values because they would not be translatable.
381  */
382 std::string si_string(double input, bool base2, const std::string& unit);
383 
384 /**
385  * Try to complete the last word of 'text' with the 'wordlist'.
386  *
387  * @param[in, out] text The parameter's usage is:
388  * - Input: Text where we try to complete the last word
389  * of.
390  * - Output: Text with completed last word.
391  * @param[in, out] wordlist
392  * The parameter's usage is:
393  * - Inout: A vector of strings to complete against.
394  * - Output: A vector of strings that matched 'text'.
395  *
396  * @retval true iff text is just one word (no spaces)
397  */
398 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
399 
400 /** Check if a message contains a word. */
401 bool word_match(const std::string& message, const std::string& word);
402 
403 /**
404  * Match using '*' as any number of characters (including none),
405  * '+' as one or more characters, and '?' as any one character.
406  */
407 bool wildcard_string_match(const std::string& str, const std::string& match);
408 
409 /**
410  * Converts '*' to '%' and optionally escapes '_'.
411  *
412  * @param str The original string.
413  * @param underscores Whether to escape underscore characters as well.
414  */
415 void to_sql_wildcards(std::string& str, bool underscores = true);
416 
417 /**
418  * Check if the username contains only valid characters.
419  *
420  * (all alpha-numeric characters plus underscore and hyphen)
421  */
422 bool isvalid_username(const std::string& login);
423 
424 /**
425  * Check if the username pattern contains only valid characters.
426  *
427  * (all alpha-numeric characters plus underscore, hyphen,
428  * question mark and asterisk)
429  */
430 bool isvalid_wildcard(const std::string& login);
431 
432 /**
433  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
434  */
435 void ellipsis_truncate(std::string& str, const std::size_t size);
436 
437 } // end namespace utils
This class represents a single unit of a specific type.
Definition: unit.hpp:135
std::size_t i
Definition: function.cpp:968
const std::string unicode_bullet
Definition: constants.cpp:47
const std::string unicode_minus
Definition: constants.cpp:42
std::pair< std::string, unsigned > item
Definition: help_impl.hpp:414
std::string login()
std::size_t size(const std::string &str)
Length in characters of a UTF-8 string.
Definition: unicode.cpp:87
@ STRIP_SPACES
REMOVE_EMPTY: remove empty elements.
@ REMOVE_EMPTY
std::string si_string(double input, bool base2, const std::string &unit)
Convert into a string with an SI-postfix.
void trim(std::string_view &s)
std::string indent(const std::string &string, std::size_t indent_size)
Indent a block of text.
std::map< std::string, std::string > map_split(const std::string &val, char major, char minor, int flags, const std::string &default_value)
Splits a string based on two separators into a map.
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
std::set< std::string > split_set(std::string_view s, char sep, const int flags)
std::vector< std::string > quoted_split(const std::string &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::vector< std::pair< int, int > > parse_ranges_int(const std::string &str)
Handles a comma-separated list of inputs to parse_range.
std::string bullet_list(const T &v, std::size_t indent=4, const std::string &bullet=font::unicode_bullet)
Generates a new string containing a bullet list.
void split_foreach_impl(std::string_view s, char sep, const F &f)
std::set< std::string > set_split(const std::string &val, const char c=',', const int flags=REMOVE_EMPTY|STRIP_SPACES)
Splits a (comma-)separated string into a set of pieces.
std::pair< int, int > parse_range(const std::string &str)
Recognises the following patterns, and returns a {min, max} pair.
std::vector< std::string > parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Splits a string based either on a separator, except then the text appears within specified parenthesi...
const std::vector< std::string > res_order
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::string bool_string(const bool value)
Converts a bool value to 'true' or 'false'.
std::string urlencode(const std::string &str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
std::map< std::string, t_string, res_compare > string_map_res
std::string quote(const std::string &str)
Surround the string 'str' with double quotes.
void ellipsis_truncate(std::string &str, const std::size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis.
std::vector< std::pair< int, int > > parse_ranges_unsigned(const std::string &str)
Handles a comma-separated list of inputs to parse_range, in a context that does not expect negative v...
void to_sql_wildcards(std::string &str, bool underscores)
Converts '*' to '' and optionally escapes '_'.
void split_foreach(std::string_view s, char sep, const int flags, const F &f)
bool wildcard_string_match(const std::string &str, const std::string &match)
Match using '*' as any number of characters (including none), '+' as one or more characters,...
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
std::string unescape(const std::string &str)
Remove all escape characters (backslash)
bool portable_isspace(const char c)
int apply_modifier(const int number, const std::string &amount, const int minimum)
std::vector< std::string > square_parenthetical_split(const std::string &val, const char separator, const std::string &left, const std::string &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
bool isnewline(const char c)
bool notspace(const char c)
std::string join(const T &v, const std::string &s=",")
Generates a new string joining container items in a list.
std::string escape(const std::string &str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::vector< std::pair< double, double > > parse_ranges_real(const std::string &str)
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
std::map< std::string, t_string > string_map
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
std::vector< std::string > split(const config_attribute_value &val)
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of 'text' with the 'wordlist'.
std::string print_modifier(const std::string &mod)
Add a "+" or replace the "-" par Unicode minus.
std::pair< double, double > parse_range_real(const std::string &str)
Recognises similar patterns to parse_range, and returns a {min, max} pair.
bool operator()(const std::string &a, const std::string &b) const
Returns whether a < b, considering res_order.
mock_char c
static map_location::DIRECTION s
#define f
#define a
#define b