serialization/string_utils.hpp

Go to the documentation of this file.
00001 /* $Id: string_utils.hpp 54274 2012-05-21 00:48:07Z shadowmaster $ */
00002 /*
00003    Copyright (C) 2003 by David White <dave@whitevine.net>
00004    Copyright (C) 2005 - 2012 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY.
00013 
00014    See the COPYING file for more details.
00015 */
00016 
00017 #ifndef SERIALIZATION_STRING_UTILS_HPP_INCLUDED
00018 #define SERIALIZATION_STRING_UTILS_HPP_INCLUDED
00019 
00020 #include <algorithm>
00021 #include <map>
00022 #include <sstream>
00023 #include <string>
00024 #include <vector>
00025 #include <boost/next_prior.hpp>
00026 
00027 #include "SDL_types.h"
00028 
00029 /** The type we use to represent Unicode strings. */
00030 typedef std::vector<wchar_t> wide_string;
00031 
00032 /** If we append a 0 to that one, we can pass it to SDL_ttf as a const Uint16*. */
00033 typedef std::vector<Uint16> ucs2_string;
00034 typedef std::vector<Uint32> ucs4_string;
00035 typedef std::string utf8_string;
00036 
00037 class t_string;
00038 
00039 namespace utils {
00040 
00041 extern const std::string unicode_minus;
00042 extern const std::string unicode_en_dash;
00043 extern const std::string unicode_em_dash;
00044 extern const std::string unicode_figure_dash;
00045 extern const std::string unicode_multiplication_sign;
00046 extern const std::string unicode_bullet;
00047 
00048 bool isnewline(const char c);
00049 bool portable_isspace(const char c);
00050 bool notspace(char c);
00051 
00052 enum { REMOVE_EMPTY = 0x01, /**< REMOVE_EMPTY : remove empty elements. */
00053       STRIP_SPACES  = 0x02  /**< STRIP_SPACES : strips leading and trailing blank spaces. */
00054 };
00055 
00056 std::vector< std::string > split(std::string const &val, char c = ',', int flags = REMOVE_EMPTY | STRIP_SPACES);
00057 
00058 /**
00059  * Splits a string based either on a separator where text within parenthesis
00060  * is protected from splitting (Note that one can use the same character for
00061  * both the left and right parenthesis. In this mode it usually makes only
00062  * sense to have one character for the left and right parenthesis.)
00063  * or if the separator == 0 it splits a string into an odd number of parts:
00064  * - The part before the first '(',
00065  * - the part between the first '('
00066  * - and the matching right ')', etc ...
00067  * and the remainder of the string.
00068  * Note that this will find the first matching char in the left string
00069  * and match against the corresponding char in the right string.
00070  * In this mode, a correctly processed string should return with
00071  * an odd number of elements to the vector and
00072  * an empty elements are never removed as they are placeholders.
00073  * hence REMOVE EMPTY only works for the separator split.
00074  *
00075  * parenthetical_split("a(b)c{d}e(f{g})h",0,"({",")}") should return
00076  * a vector of <"a","b","c","d","e","f{g}","h">
00077  */
00078 std::vector< std::string > parenthetical_split(std::string const &val,
00079     const char separator = 0 , std::string const &left="(",
00080     std::string const &right=")",int flags = REMOVE_EMPTY | STRIP_SPACES);
00081 
00082 /**
00083  * Generates a new string joining container items in a list.
00084  *
00085  * @param v A container with elements.
00086  * @param s List delimiter.
00087  */
00088 template <typename T>
00089 std::string join(T const &v, const std::string& s = ",")
00090 {
00091         std::stringstream str;
00092         for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
00093                 str << *i;
00094                 if (boost::next(i) != v.end())
00095                         str << s;
00096         }
00097 
00098         return str.str();
00099 }
00100 
00101 /**
00102  * Generates a new string containing a bullet list.
00103  *
00104  * List items are preceded by the indentation blanks, a bullet string and
00105  * another blank; all but the last item are followed by a newline.
00106  *
00107  * @param v A container with elements.
00108  * @param indent Number of indentation blanks.
00109  * @param bullet The leading bullet string.
00110  */
00111 template<typename T>
00112 std::string bullet_list(const T& v, size_t indent = 4, const std::string& bullet = unicode_bullet)
00113 {
00114     std::ostringstream str;
00115 
00116     for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
00117         if(i != v.begin()) {
00118             str << '\n';
00119         }
00120 
00121         str << std::string(indent, ' ') << bullet << ' ' << *i;
00122     }
00123 
00124     return str.str();
00125 }
00126 
00127 /**
00128  * This function is identical to split(), except it does not split
00129  * when it otherwise would if the previous character was identical to the parameter 'quote'.
00130  * i.e. it does not split quoted commas.
00131  * This method was added to make it possible to quote user input,
00132  * particularly so commas in user input will not cause visual problems in menus.
00133  *
00134  * @todo Why not change split()? That would change the methods post condition.
00135  */
00136 std::vector< std::string > quoted_split(std::string const &val, char c= ',',
00137                                         int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
00138 std::pair< int, int > parse_range(std::string const &str);
00139 std::vector< std::pair< int, int > > parse_ranges(std::string const &str);
00140 int apply_modifier( const int number, const std::string &amount, const int minimum = 0);
00141 
00142 /* add a "+" or replace the "-" par Unicode minus */
00143 inline std::string print_modifier(const std::string &mod)
00144 { return mod[0] == '-' ?
00145     (unicode_minus + std::string(mod.begin()+1, mod.end())) : ("+" + mod);}
00146 
00147 /** Prepends a configurable set of characters with a backslash */
00148 std::string escape(const std::string &str, const char *special_chars);
00149 
00150 /**
00151  * Prepend all special characters with a backslash.
00152  *
00153  * Special characters are:
00154  * #@{}+-,\*=
00155  */
00156 inline std::string escape(const std::string &str)
00157 { return escape(str, "#@{}+-,\\*="); }
00158 
00159 /** Remove all escape characters (backslash) */
00160 std::string unescape(const std::string &str);
00161 
00162 /** Remove whitespace from the front and back of the string 'str'. */
00163 std::string &strip(std::string &str);
00164 
00165 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
00166 bool string_bool(const std::string& str,bool def=false);
00167 
00168 /** Convert into a signed value (using the Unicode "−" and +0 convention */
00169 std::string signed_value(int val);
00170 
00171 /** Sign with Unicode "−" if negative */
00172 std::string half_signed_value(int val);
00173 
00174 /** Convert into a percentage (using the Unicode "−" and +0% convention */
00175 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
00176 
00177 /**
00178  * Convert into a string with an SI-postfix.
00179  *
00180  * If the unit is to be translatable,
00181  * a t_string should be passed as the third argument.
00182  * _("unit_byte^B") is suggested as standard.
00183  *
00184  * There are no default values because they would not be translatable.
00185  */
00186 std::string si_string(double input, bool base2, std::string unit);
00187 
00188 /**
00189  * Try to complete the last word of 'text' with the 'wordlist'.
00190  *
00191  * @param[in]  'text'     Text where we try to complete the last word of.
00192  * @param[out] 'text'     Text with completed last word.
00193  * @param[in]  'wordlist' A vector of strings to complete against.
00194  * @param[out] 'wordlist' A vector of strings that matched 'text'.
00195  *
00196  * @return 'true' iff text is just one word (no spaces)
00197  */
00198 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
00199 
00200 /** Check if a message contains a word. */
00201 bool word_match(const std::string& message, const std::string& word);
00202 
00203 /**
00204  * Match using '*' as any number of characters (including none), and '?' as any
00205  * one character.
00206  */
00207 bool wildcard_string_match(const std::string& str, const std::string& match);
00208 
00209 /**
00210  * Check if the username contains only valid characters.
00211  *
00212  * (all alpha-numeric characters plus underscore and hyphen)
00213  */
00214 bool isvalid_username(const std::string &login);
00215 
00216 /**
00217  * Check if the username pattern contains only valid characters.
00218  *
00219  * (all alpha-numeric characters plus underscore, hyphen,
00220  * question mark and asterisk)
00221  */
00222 bool isvalid_wildcard(const std::string &login);
00223 
00224 typedef std::map< std::string, t_string > string_map;
00225 
00226 /**
00227  * Functions for converting Unicode wide-char strings to UTF-8 encoded strings,
00228  * back and forth.
00229  */
00230 class invalid_utf8_exception : public std::exception {
00231 };
00232 
00233 class utf8_iterator
00234 {
00235 public:
00236     typedef std::input_iterator_tag iterator_category;
00237     typedef wchar_t value_type;
00238     typedef ptrdiff_t difference_type;
00239     typedef wchar_t* pointer;
00240     typedef wchar_t& reference;
00241 
00242     utf8_iterator(const std::string& str);
00243     utf8_iterator(std::string::const_iterator const &begin, std::string::const_iterator const &end);
00244 
00245     static utf8_iterator begin(const std::string& str);
00246     static utf8_iterator end(const std::string& str);
00247 
00248     bool operator==(const utf8_iterator& a) const;
00249     bool operator!=(const utf8_iterator& a) const { return ! (*this == a); }
00250     utf8_iterator& operator++();
00251     wchar_t operator*() const;
00252     bool next_is_end();
00253     const std::pair<std::string::const_iterator, std::string::const_iterator>& substr() const;
00254 private:
00255     void update();
00256 
00257     wchar_t current_char;
00258     std::string::const_iterator string_end;
00259     std::pair<std::string::const_iterator, std::string::const_iterator> current_substr;
00260 };
00261 
00262 std::string wstring_to_string(const wide_string &);
00263 wide_string string_to_wstring(const std::string &);
00264 std::string wchar_to_string(const wchar_t);
00265 
00266 /** Returns a lowercased version of the string. */
00267 utf8_string lowercase(const utf8_string&);
00268 
00269 /**
00270  * Truncates a string.
00271  *
00272  * If the string send has more than size utf-8 characters it will be truncated
00273  * to this size.
00274  * No assumptions can be made about the actual size of the string.
00275  *
00276  * @param[in]  str     String which can be converted to utf-8.
00277  * @param[out] str     String which contains maximal size utf-8 characters.
00278  * @param size         The size to truncate at.
00279  */
00280 void truncate_as_wstring(std::string& str, const size_t size);
00281 
00282 /**
00283  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
00284  */
00285 void ellipsis_truncate(std::string& str, const size_t size);
00286 
00287 }
00288 
00289 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by doxygen 1.7.1 on Fri May 25 2012 01:03:09 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs