lua/lstrlib.c

Go to the documentation of this file.
00001 /*
00002 ** $Id: lstrlib.c,v 1.173 2011/11/30 18:24:56 roberto Exp $
00003 ** Standard library for string operations and pattern-matching
00004 ** See Copyright Notice in lua.h
00005 */
00006 
00007 
00008 #include <ctype.h>
00009 #include <stddef.h>
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <string.h>
00013 
00014 #define lstrlib_c
00015 #define LUA_LIB
00016 
00017 #include "lua.h"
00018 
00019 #include "lauxlib.h"
00020 #include "lualib.h"
00021 
00022 
00023 /*
00024 ** maximum number of captures that a pattern can do during
00025 ** pattern-matching. This limit is arbitrary.
00026 */
00027 #if !defined(LUA_MAXCAPTURES)
00028 #define LUA_MAXCAPTURES     32
00029 #endif
00030 
00031 
00032 /* macro to `unsign' a character */
00033 #define uchar(c)        ((unsigned char)(c))
00034 
00035 
00036 
00037 static int str_len (lua_State *L) {
00038   size_t l;
00039   luaL_checklstring(L, 1, &l);
00040   lua_pushinteger(L, (lua_Integer)l);
00041   return 1;
00042 }
00043 
00044 
00045 /* translate a relative string position: negative means back from end */
00046 static size_t posrelat (ptrdiff_t pos, size_t len) {
00047   if (pos >= 0) return (size_t)pos;
00048   else if (0u - (size_t)pos > len) return 0;
00049   else return len - ((size_t)-pos) + 1;
00050 }
00051 
00052 
00053 static int str_sub (lua_State *L) {
00054   size_t l;
00055   const char *s = luaL_checklstring(L, 1, &l);
00056   size_t start = posrelat(luaL_checkinteger(L, 2), l);
00057   size_t end = posrelat(luaL_optinteger(L, 3, -1), l);
00058   if (start < 1) start = 1;
00059   if (end > l) end = l;
00060   if (start <= end)
00061     lua_pushlstring(L, s + start - 1, end - start + 1);
00062   else lua_pushliteral(L, "");
00063   return 1;
00064 }
00065 
00066 
00067 static int str_reverse (lua_State *L) {
00068   size_t l, i;
00069   luaL_Buffer b;
00070   const char *s = luaL_checklstring(L, 1, &l);
00071   char *p = luaL_buffinitsize(L, &b, l);
00072   for (i = 0; i < l; i++)
00073     p[i] = s[l - i - 1];
00074   luaL_pushresultsize(&b, l);
00075   return 1;
00076 }
00077 
00078 
00079 static int str_lower (lua_State *L) {
00080   size_t l;
00081   size_t i;
00082   luaL_Buffer b;
00083   const char *s = luaL_checklstring(L, 1, &l);
00084   char *p = luaL_buffinitsize(L, &b, l);
00085   for (i=0; i<l; i++)
00086     p[i] = tolower(uchar(s[i]));
00087   luaL_pushresultsize(&b, l);
00088   return 1;
00089 }
00090 
00091 
00092 static int str_upper (lua_State *L) {
00093   size_t l;
00094   size_t i;
00095   luaL_Buffer b;
00096   const char *s = luaL_checklstring(L, 1, &l);
00097   char *p = luaL_buffinitsize(L, &b, l);
00098   for (i=0; i<l; i++)
00099     p[i] = toupper(uchar(s[i]));
00100   luaL_pushresultsize(&b, l);
00101   return 1;
00102 }
00103 
00104 
00105 /* reasonable limit to avoid arithmetic overflow */
00106 #define MAXSIZE     ((~(size_t)0) >> 1)
00107 
00108 static int str_rep (lua_State *L) {
00109   size_t l, lsep;
00110   const char *s = luaL_checklstring(L, 1, &l);
00111   int n = luaL_checkint(L, 2);
00112   const char *sep = luaL_optlstring(L, 3, "", &lsep);
00113   if (n <= 0) lua_pushliteral(L, "");
00114   else if (l + lsep < l || l + lsep >= MAXSIZE / n)  /* may overflow? */
00115     return luaL_error(L, "resulting string too large");
00116   else {
00117     size_t totallen = n * l + (n - 1) * lsep;
00118     luaL_Buffer b;
00119     char *p = luaL_buffinitsize(L, &b, totallen);
00120     while (n-- > 1) {  /* first n-1 copies (followed by separator) */
00121       memcpy(p, s, l * sizeof(char)); p += l;
00122       memcpy(p, sep, lsep * sizeof(char)); p += lsep;
00123     }
00124     memcpy(p, s, l * sizeof(char));  /* last copy (not followed by separator) */
00125     luaL_pushresultsize(&b, totallen);
00126   }
00127   return 1;
00128 }
00129 
00130 
00131 static int str_byte (lua_State *L) {
00132   size_t l;
00133   const char *s = luaL_checklstring(L, 1, &l);
00134   size_t posi = posrelat(luaL_optinteger(L, 2, 1), l);
00135   size_t pose = posrelat(luaL_optinteger(L, 3, posi), l);
00136   int n, i;
00137   if (posi < 1) posi = 1;
00138   if (pose > l) pose = l;
00139   if (posi > pose) return 0;  /* empty interval; return no values */
00140   n = (int)(pose -  posi + 1);
00141   if (posi + n <= pose)  /* (size_t -> int) overflow? */
00142     return luaL_error(L, "string slice too long");
00143   luaL_checkstack(L, n, "string slice too long");
00144   for (i=0; i<n; i++)
00145     lua_pushinteger(L, uchar(s[posi+i-1]));
00146   return n;
00147 }
00148 
00149 
00150 static int str_char (lua_State *L) {
00151   int n = lua_gettop(L);  /* number of arguments */
00152   int i;
00153   luaL_Buffer b;
00154   char *p = luaL_buffinitsize(L, &b, n);
00155   for (i=1; i<=n; i++) {
00156     int c = luaL_checkint(L, i);
00157     luaL_argcheck(L, uchar(c) == c, i, "value out of range");
00158     p[i - 1] = uchar(c);
00159   }
00160   luaL_pushresultsize(&b, n);
00161   return 1;
00162 }
00163 
00164 
00165 static int writer (lua_State *L, const void* b, size_t size, void* B) {
00166   (void)L;
00167   luaL_addlstring((luaL_Buffer*) B, (const char *)b, size);
00168   return 0;
00169 }
00170 
00171 
00172 static int str_dump (lua_State *L) {
00173   luaL_Buffer b;
00174   luaL_checktype(L, 1, LUA_TFUNCTION);
00175   lua_settop(L, 1);
00176   luaL_buffinit(L,&b);
00177   if (lua_dump(L, writer, &b) != 0)
00178     return luaL_error(L, "unable to dump given function");
00179   luaL_pushresult(&b);
00180   return 1;
00181 }
00182 
00183 
00184 
00185 /*
00186 ** {======================================================
00187 ** PATTERN MATCHING
00188 ** =======================================================
00189 */
00190 
00191 
00192 #define CAP_UNFINISHED  (-1)
00193 #define CAP_POSITION    (-2)
00194 
00195 typedef struct MatchState {
00196   const char *src_init;  /* init of source string */
00197   const char *src_end;  /* end ('\0') of source string */
00198   const char *p_end;  /* end ('\0') of pattern */
00199   lua_State *L;
00200   int level;  /* total number of captures (finished or unfinished) */
00201   struct {
00202     const char *init;
00203     ptrdiff_t len;
00204   } capture[LUA_MAXCAPTURES];
00205 } MatchState;
00206 
00207 
00208 #define L_ESC       '%'
00209 #define SPECIALS    "^$*+?.([%-"
00210 
00211 
00212 static int check_capture (MatchState *ms, int l) {
00213   l -= '1';
00214   if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
00215     return luaL_error(ms->L, "invalid capture index %%%d", l + 1);
00216   return l;
00217 }
00218 
00219 
00220 static int capture_to_close (MatchState *ms) {
00221   int level = ms->level;
00222   for (level--; level>=0; level--)
00223     if (ms->capture[level].len == CAP_UNFINISHED) return level;
00224   return luaL_error(ms->L, "invalid pattern capture");
00225 }
00226 
00227 
00228 static const char *classend (MatchState *ms, const char *p) {
00229   switch (*p++) {
00230     case L_ESC: {
00231       if (p == ms->p_end)
00232         luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
00233       return p+1;
00234     }
00235     case '[': {
00236       if (*p == '^') p++;
00237       do {  /* look for a `]' */
00238         if (p == ms->p_end)
00239           luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
00240         if (*(p++) == L_ESC && p < ms->p_end)
00241           p++;  /* skip escapes (e.g. `%]') */
00242       } while (*p != ']');
00243       return p+1;
00244     }
00245     default: {
00246       return p;
00247     }
00248   }
00249 }
00250 
00251 
00252 static int match_class (int c, int cl) {
00253   int res;
00254   switch (tolower(cl)) {
00255     case 'a' : res = isalpha(c); break;
00256     case 'c' : res = iscntrl(c); break;
00257     case 'd' : res = isdigit(c); break;
00258     case 'g' : res = isgraph(c); break;
00259     case 'l' : res = islower(c); break;
00260     case 'p' : res = ispunct(c); break;
00261     case 's' : res = isspace(c); break;
00262     case 'u' : res = isupper(c); break;
00263     case 'w' : res = isalnum(c); break;
00264     case 'x' : res = isxdigit(c); break;
00265     case 'z' : res = (c == 0); break;  /* deprecated option */
00266     default: return (cl == c);
00267   }
00268   return (islower(cl) ? res : !res);
00269 }
00270 
00271 
00272 static int matchbracketclass (int c, const char *p, const char *ec) {
00273   int sig = 1;
00274   if (*(p+1) == '^') {
00275     sig = 0;
00276     p++;  /* skip the `^' */
00277   }
00278   while (++p < ec) {
00279     if (*p == L_ESC) {
00280       p++;
00281       if (match_class(c, uchar(*p)))
00282         return sig;
00283     }
00284     else if ((*(p+1) == '-') && (p+2 < ec)) {
00285       p+=2;
00286       if (uchar(*(p-2)) <= c && c <= uchar(*p))
00287         return sig;
00288     }
00289     else if (uchar(*p) == c) return sig;
00290   }
00291   return !sig;
00292 }
00293 
00294 
00295 static int singlematch (int c, const char *p, const char *ep) {
00296   switch (*p) {
00297     case '.': return 1;  /* matches any char */
00298     case L_ESC: return match_class(c, uchar(*(p+1)));
00299     case '[': return matchbracketclass(c, p, ep-1);
00300     default:  return (uchar(*p) == c);
00301   }
00302 }
00303 
00304 
00305 static const char *match (MatchState *ms, const char *s, const char *p);
00306 
00307 
00308 static const char *matchbalance (MatchState *ms, const char *s,
00309                                    const char *p) {
00310   if (p >= ms->p_end - 1)
00311     luaL_error(ms->L, "malformed pattern "
00312                       "(missing arguments to " LUA_QL("%%b") ")");
00313   if (*s != *p) return NULL;
00314   else {
00315     int b = *p;
00316     int e = *(p+1);
00317     int cont = 1;
00318     while (++s < ms->src_end) {
00319       if (*s == e) {
00320         if (--cont == 0) return s+1;
00321       }
00322       else if (*s == b) cont++;
00323     }
00324   }
00325   return NULL;  /* string ends out of balance */
00326 }
00327 
00328 
00329 static const char *max_expand (MatchState *ms, const char *s,
00330                                  const char *p, const char *ep) {
00331   ptrdiff_t i = 0;  /* counts maximum expand for item */
00332   while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep))
00333     i++;
00334   /* keeps trying to match with the maximum repetitions */
00335   while (i>=0) {
00336     const char *res = match(ms, (s+i), ep+1);
00337     if (res) return res;
00338     i--;  /* else didn't match; reduce 1 repetition to try again */
00339   }
00340   return NULL;
00341 }
00342 
00343 
00344 static const char *min_expand (MatchState *ms, const char *s,
00345                                  const char *p, const char *ep) {
00346   for (;;) {
00347     const char *res = match(ms, s, ep+1);
00348     if (res != NULL)
00349       return res;
00350     else if (s<ms->src_end && singlematch(uchar(*s), p, ep))
00351       s++;  /* try with one more repetition */
00352     else return NULL;
00353   }
00354 }
00355 
00356 
00357 static const char *start_capture (MatchState *ms, const char *s,
00358                                     const char *p, int what) {
00359   const char *res;
00360   int level = ms->level;
00361   if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures");
00362   ms->capture[level].init = s;
00363   ms->capture[level].len = what;
00364   ms->level = level+1;
00365   if ((res=match(ms, s, p)) == NULL)  /* match failed? */
00366     ms->level--;  /* undo capture */
00367   return res;
00368 }
00369 
00370 
00371 static const char *end_capture (MatchState *ms, const char *s,
00372                                   const char *p) {
00373   int l = capture_to_close(ms);
00374   const char *res;
00375   ms->capture[l].len = s - ms->capture[l].init;  /* close capture */
00376   if ((res = match(ms, s, p)) == NULL)  /* match failed? */
00377     ms->capture[l].len = CAP_UNFINISHED;  /* undo capture */
00378   return res;
00379 }
00380 
00381 
00382 static const char *match_capture (MatchState *ms, const char *s, int l) {
00383   size_t len;
00384   l = check_capture(ms, l);
00385   len = ms->capture[l].len;
00386   if ((size_t)(ms->src_end-s) >= len &&
00387       memcmp(ms->capture[l].init, s, len) == 0)
00388     return s+len;
00389   else return NULL;
00390 }
00391 
00392 
00393 static const char *match (MatchState *ms, const char *s, const char *p) {
00394   init: /* using goto's to optimize tail recursion */
00395   if (p == ms->p_end)  /* end of pattern? */
00396     return s;  /* match succeeded */
00397   switch (*p) {
00398     case '(': {  /* start capture */
00399       if (*(p+1) == ')')  /* position capture? */
00400         return start_capture(ms, s, p+2, CAP_POSITION);
00401       else
00402         return start_capture(ms, s, p+1, CAP_UNFINISHED);
00403     }
00404     case ')': {  /* end capture */
00405       return end_capture(ms, s, p+1);
00406     }
00407     case '$': {
00408       if ((p+1) == ms->p_end)  /* is the `$' the last char in pattern? */
00409         return (s == ms->src_end) ? s : NULL;  /* check end of string */
00410       else goto dflt;
00411     }
00412     case L_ESC: {  /* escaped sequences not in the format class[*+?-]? */
00413       switch (*(p+1)) {
00414         case 'b': {  /* balanced string? */
00415           s = matchbalance(ms, s, p+2);
00416           if (s == NULL) return NULL;
00417           p+=4; goto init;  /* else return match(ms, s, p+4); */
00418         }
00419         case 'f': {  /* frontier? */
00420           const char *ep; char previous;
00421           p += 2;
00422           if (*p != '[')
00423             luaL_error(ms->L, "missing " LUA_QL("[") " after "
00424                                LUA_QL("%%f") " in pattern");
00425           ep = classend(ms, p);  /* points to what is next */
00426           previous = (s == ms->src_init) ? '\0' : *(s-1);
00427           if (matchbracketclass(uchar(previous), p, ep-1) ||
00428              !matchbracketclass(uchar(*s), p, ep-1)) return NULL;
00429           p=ep; goto init;  /* else return match(ms, s, ep); */
00430         }
00431         case '0': case '1': case '2': case '3':
00432         case '4': case '5': case '6': case '7':
00433         case '8': case '9': {  /* capture results (%0-%9)? */
00434           s = match_capture(ms, s, uchar(*(p+1)));
00435           if (s == NULL) return NULL;
00436           p+=2; goto init;  /* else return match(ms, s, p+2) */
00437         }
00438         default: goto dflt;
00439       }
00440     }
00441     default: dflt: {  /* pattern class plus optional suffix */
00442       const char *ep = classend(ms, p);  /* points to what is next */
00443       int m = s < ms->src_end && singlematch(uchar(*s), p, ep);
00444       switch (*ep) {
00445         case '?': {  /* optional */
00446           const char *res;
00447           if (m && ((res=match(ms, s+1, ep+1)) != NULL))
00448             return res;
00449           p=ep+1; goto init;  /* else return match(ms, s, ep+1); */
00450         }
00451         case '*': {  /* 0 or more repetitions */
00452           return max_expand(ms, s, p, ep);
00453         }
00454         case '+': {  /* 1 or more repetitions */
00455           return (m ? max_expand(ms, s+1, p, ep) : NULL);
00456         }
00457         case '-': {  /* 0 or more repetitions (minimum) */
00458           return min_expand(ms, s, p, ep);
00459         }
00460         default: {
00461           if (!m) return NULL;
00462           s++; p=ep; goto init;  /* else return match(ms, s+1, ep); */
00463         }
00464       }
00465     }
00466   }
00467 }
00468 
00469 
00470 
00471 static const char *lmemfind (const char *s1, size_t l1,
00472                                const char *s2, size_t l2) {
00473   if (l2 == 0) return s1;  /* empty strings are everywhere */
00474   else if (l2 > l1) return NULL;  /* avoids a negative `l1' */
00475   else {
00476     const char *init;  /* to search for a `*s2' inside `s1' */
00477     l2--;  /* 1st char will be checked by `memchr' */
00478     l1 = l1-l2;  /* `s2' cannot be found after that */
00479     while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
00480       init++;   /* 1st char is already checked */
00481       if (memcmp(init, s2+1, l2) == 0)
00482         return init-1;
00483       else {  /* correct `l1' and `s1' to try again */
00484         l1 -= init-s1;
00485         s1 = init;
00486       }
00487     }
00488     return NULL;  /* not found */
00489   }
00490 }
00491 
00492 
00493 static void push_onecapture (MatchState *ms, int i, const char *s,
00494                                                     const char *e) {
00495   if (i >= ms->level) {
00496     if (i == 0)  /* ms->level == 0, too */
00497       lua_pushlstring(ms->L, s, e - s);  /* add whole match */
00498     else
00499       luaL_error(ms->L, "invalid capture index");
00500   }
00501   else {
00502     ptrdiff_t l = ms->capture[i].len;
00503     if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
00504     if (l == CAP_POSITION)
00505       lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
00506     else
00507       lua_pushlstring(ms->L, ms->capture[i].init, l);
00508   }
00509 }
00510 
00511 
00512 static int push_captures (MatchState *ms, const char *s, const char *e) {
00513   int i;
00514   int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
00515   luaL_checkstack(ms->L, nlevels, "too many captures");
00516   for (i = 0; i < nlevels; i++)
00517     push_onecapture(ms, i, s, e);
00518   return nlevels;  /* number of strings pushed */
00519 }
00520 
00521 
00522 /* check whether pattern has no special characters */
00523 static int nospecials (const char *p, size_t l) {
00524   size_t upto = 0;
00525   do {
00526     if (strpbrk(p + upto, SPECIALS))
00527       return 0;  /* pattern has a special character */
00528     upto += strlen(p + upto) + 1;  /* may have more after \0 */
00529   } while (upto <= l);
00530   return 1;  /* no special chars found */
00531 }
00532 
00533 
00534 static int str_find_aux (lua_State *L, int find) {
00535   size_t ls, lp;
00536   const char *s = luaL_checklstring(L, 1, &ls);
00537   const char *p = luaL_checklstring(L, 2, &lp);
00538   size_t init = posrelat(luaL_optinteger(L, 3, 1), ls);
00539   if (init < 1) init = 1;
00540   else if (init > ls + 1) {  /* start after string's end? */
00541     lua_pushnil(L);  /* cannot find anything */
00542     return 1;
00543   }
00544   /* explicit request or no special characters? */
00545   if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) {
00546     /* do a plain search */
00547     const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp);
00548     if (s2) {
00549       lua_pushinteger(L, s2 - s + 1);
00550       lua_pushinteger(L, s2 - s + lp);
00551       return 2;
00552     }
00553   }
00554   else {
00555     MatchState ms;
00556     const char *s1 = s + init - 1;
00557     int anchor = (*p == '^');
00558     if (anchor) {
00559       p++; lp--;  /* skip anchor character */
00560     }
00561     ms.L = L;
00562     ms.src_init = s;
00563     ms.src_end = s + ls;
00564     ms.p_end = p + lp;
00565     do {
00566       const char *res;
00567       ms.level = 0;
00568       if ((res=match(&ms, s1, p)) != NULL) {
00569         if (find) {
00570           lua_pushinteger(L, s1 - s + 1);  /* start */
00571           lua_pushinteger(L, res - s);   /* end */
00572           return push_captures(&ms, NULL, 0) + 2;
00573         }
00574         else
00575           return push_captures(&ms, s1, res);
00576       }
00577     } while (s1++ < ms.src_end && !anchor);
00578   }
00579   lua_pushnil(L);  /* not found */
00580   return 1;
00581 }
00582 
00583 
00584 static int str_find (lua_State *L) {
00585   return str_find_aux(L, 1);
00586 }
00587 
00588 
00589 static int str_match (lua_State *L) {
00590   return str_find_aux(L, 0);
00591 }
00592 
00593 
00594 static int gmatch_aux (lua_State *L) {
00595   MatchState ms;
00596   size_t ls, lp;
00597   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
00598   const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp);
00599   const char *src;
00600   ms.L = L;
00601   ms.src_init = s;
00602   ms.src_end = s+ls;
00603   ms.p_end = p + lp;
00604   for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
00605        src <= ms.src_end;
00606        src++) {
00607     const char *e;
00608     ms.level = 0;
00609     if ((e = match(&ms, src, p)) != NULL) {
00610       lua_Integer newstart = e-s;
00611       if (e == src) newstart++;  /* empty match? go at least one position */
00612       lua_pushinteger(L, newstart);
00613       lua_replace(L, lua_upvalueindex(3));
00614       return push_captures(&ms, src, e);
00615     }
00616   }
00617   return 0;  /* not found */
00618 }
00619 
00620 
00621 static int gmatch (lua_State *L) {
00622   luaL_checkstring(L, 1);
00623   luaL_checkstring(L, 2);
00624   lua_settop(L, 2);
00625   lua_pushinteger(L, 0);
00626   lua_pushcclosure(L, gmatch_aux, 3);
00627   return 1;
00628 }
00629 
00630 
00631 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
00632                                                    const char *e) {
00633   size_t l, i;
00634   const char *news = lua_tolstring(ms->L, 3, &l);
00635   for (i = 0; i < l; i++) {
00636     if (news[i] != L_ESC)
00637       luaL_addchar(b, news[i]);
00638     else {
00639       i++;  /* skip ESC */
00640       if (!isdigit(uchar(news[i]))) {
00641         if (news[i] != L_ESC)
00642           luaL_error(ms->L, "invalid use of " LUA_QL("%c")
00643                            " in replacement string", L_ESC);
00644         luaL_addchar(b, news[i]);
00645       }
00646       else if (news[i] == '0')
00647           luaL_addlstring(b, s, e - s);
00648       else {
00649         push_onecapture(ms, news[i] - '1', s, e);
00650         luaL_addvalue(b);  /* add capture to accumulated result */
00651       }
00652     }
00653   }
00654 }
00655 
00656 
00657 static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
00658                                        const char *e, int tr) {
00659   lua_State *L = ms->L;
00660   switch (tr) {
00661     case LUA_TFUNCTION: {
00662       int n;
00663       lua_pushvalue(L, 3);
00664       n = push_captures(ms, s, e);
00665       lua_call(L, n, 1);
00666       break;
00667     }
00668     case LUA_TTABLE: {
00669       push_onecapture(ms, 0, s, e);
00670       lua_gettable(L, 3);
00671       break;
00672     }
00673     default: {  /* LUA_TNUMBER or LUA_TSTRING */
00674       add_s(ms, b, s, e);
00675       return;
00676     }
00677   }
00678   if (!lua_toboolean(L, -1)) {  /* nil or false? */
00679     lua_pop(L, 1);
00680     lua_pushlstring(L, s, e - s);  /* keep original text */
00681   }
00682   else if (!lua_isstring(L, -1))
00683     luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
00684   luaL_addvalue(b);  /* add result to accumulator */
00685 }
00686 
00687 
00688 static int str_gsub (lua_State *L) {
00689   size_t srcl, lp;
00690   const char *src = luaL_checklstring(L, 1, &srcl);
00691   const char *p = luaL_checklstring(L, 2, &lp);
00692   int tr = lua_type(L, 3);
00693   size_t max_s = luaL_optinteger(L, 4, srcl+1);
00694   int anchor = (*p == '^');
00695   size_t n = 0;
00696   MatchState ms;
00697   luaL_Buffer b;
00698   luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
00699                    tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
00700                       "string/function/table expected");
00701   luaL_buffinit(L, &b);
00702   if (anchor) {
00703     p++; lp--;  /* skip anchor character */
00704   }
00705   ms.L = L;
00706   ms.src_init = src;
00707   ms.src_end = src+srcl;
00708   ms.p_end = p + lp;
00709   while (n < max_s) {
00710     const char *e;
00711     ms.level = 0;
00712     e = match(&ms, src, p);
00713     if (e) {
00714       n++;
00715       add_value(&ms, &b, src, e, tr);
00716     }
00717     if (e && e>src) /* non empty match? */
00718       src = e;  /* skip it */
00719     else if (src < ms.src_end)
00720       luaL_addchar(&b, *src++);
00721     else break;
00722     if (anchor) break;
00723   }
00724   luaL_addlstring(&b, src, ms.src_end-src);
00725   luaL_pushresult(&b);
00726   lua_pushinteger(L, n);  /* number of substitutions */
00727   return 2;
00728 }
00729 
00730 /* }====================================================== */
00731 
00732 
00733 
00734 /*
00735 ** {======================================================
00736 ** STRING FORMAT
00737 ** =======================================================
00738 */
00739 
00740 /*
00741 ** LUA_INTFRMLEN is the length modifier for integer conversions in
00742 ** 'string.format'; LUA_INTFRM_T is the integer type corresponding to
00743 ** the previous length
00744 */
00745 #if !defined(LUA_INTFRMLEN) /* { */
00746 #if defined(LUA_USE_LONGLONG)
00747 
00748 #define LUA_INTFRMLEN           "ll"
00749 #define LUA_INTFRM_T            long long
00750 
00751 #else
00752 
00753 #define LUA_INTFRMLEN           "l"
00754 #define LUA_INTFRM_T            long
00755 
00756 #endif
00757 #endif              /* } */
00758 
00759 #define MAX_UINTFRM ((lua_Number)(~(unsigned LUA_INTFRM_T)0))
00760 #define MAX_INTFRM  ((lua_Number)((~(unsigned LUA_INTFRM_T)0)/2))
00761 #define MIN_INTFRM  (-(lua_Number)((~(unsigned LUA_INTFRM_T)0)/2) - 1)
00762 
00763 /*
00764 ** LUA_FLTFRMLEN is the length modifier for float conversions in
00765 ** 'string.format'; LUA_FLTFRM_T is the float type corresponding to
00766 ** the previous length
00767 */
00768 #if !defined(LUA_FLTFRMLEN)
00769 
00770 #define LUA_FLTFRMLEN           ""
00771 #define LUA_FLTFRM_T            double
00772 
00773 #endif
00774 
00775 
00776 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
00777 #define MAX_ITEM    512
00778 /* valid flags in a format specification */
00779 #define FLAGS   "-+ #0"
00780 /*
00781 ** maximum size of each format specification (such as '%-099.99d')
00782 ** (+10 accounts for %99.99x plus margin of error)
00783 */
00784 #define MAX_FORMAT  (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
00785 
00786 
00787 static void addquoted (lua_State *L, luaL_Buffer *b, int arg) {
00788   size_t l;
00789   const char *s = luaL_checklstring(L, arg, &l);
00790   luaL_addchar(b, '"');
00791   while (l--) {
00792     if (*s == '"' || *s == '\\' || *s == '\n') {
00793       luaL_addchar(b, '\\');
00794       luaL_addchar(b, *s);
00795     }
00796     else if (*s == '\0' || iscntrl(uchar(*s))) {
00797       char buff[10];
00798       if (!isdigit(uchar(*(s+1))))
00799         sprintf(buff, "\\%d", (int)uchar(*s));
00800       else
00801         sprintf(buff, "\\%03d", (int)uchar(*s));
00802       luaL_addstring(b, buff);
00803     }
00804     else
00805       luaL_addchar(b, *s);
00806     s++;
00807   }
00808   luaL_addchar(b, '"');
00809 }
00810 
00811 static const char *scanformat (lua_State *L, const char *strfrmt, char *form) {
00812   const char *p = strfrmt;
00813   while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++;  /* skip flags */
00814   if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char))
00815     luaL_error(L, "invalid format (repeated flags)");
00816   if (isdigit(uchar(*p))) p++;  /* skip width */
00817   if (isdigit(uchar(*p))) p++;  /* (2 digits at most) */
00818   if (*p == '.') {
00819     p++;
00820     if (isdigit(uchar(*p))) p++;  /* skip precision */
00821     if (isdigit(uchar(*p))) p++;  /* (2 digits at most) */
00822   }
00823   if (isdigit(uchar(*p)))
00824     luaL_error(L, "invalid format (width or precision too long)");
00825   *(form++) = '%';
00826   memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char));
00827   form += p - strfrmt + 1;
00828   *form = '\0';
00829   return p;
00830 }
00831 
00832 
00833 /*
00834 ** add length modifier into formats
00835 */
00836 static void addlenmod (char *form, const char *lenmod) {
00837   size_t l = strlen(form);
00838   size_t lm = strlen(lenmod);
00839   char spec = form[l - 1];
00840   strcpy(form + l - 1, lenmod);
00841   form[l + lm - 1] = spec;
00842   form[l + lm] = '\0';
00843 }
00844 
00845 
00846 static int str_format (lua_State *L) {
00847   int top = lua_gettop(L);
00848   int arg = 1;
00849   size_t sfl;
00850   const char *strfrmt = luaL_checklstring(L, arg, &sfl);
00851   const char *strfrmt_end = strfrmt+sfl;
00852   luaL_Buffer b;
00853   luaL_buffinit(L, &b);
00854   while (strfrmt < strfrmt_end) {
00855     if (*strfrmt != L_ESC)
00856       luaL_addchar(&b, *strfrmt++);
00857     else if (*++strfrmt == L_ESC)
00858       luaL_addchar(&b, *strfrmt++);  /* %% */
00859     else { /* format item */
00860       char form[MAX_FORMAT];  /* to store the format (`%...') */
00861       char *buff = luaL_prepbuffsize(&b, MAX_ITEM);  /* to put formatted item */
00862       int nb = 0;  /* number of bytes in added item */
00863       if (++arg > top)
00864         luaL_argerror(L, arg, "no value");
00865       strfrmt = scanformat(L, strfrmt, form);
00866       switch (*strfrmt++) {
00867         case 'c': {
00868           nb = sprintf(buff, form, luaL_checkint(L, arg));
00869           break;
00870         }
00871         case 'd':  case 'i': {
00872           lua_Number n = luaL_checknumber(L, arg);
00873           luaL_argcheck(L, (MIN_INTFRM - 1) < n && n < (MAX_INTFRM + 1), arg,
00874                         "not a number in proper range");
00875           addlenmod(form, LUA_INTFRMLEN);
00876           nb = sprintf(buff, form, (LUA_INTFRM_T)n);
00877           break;
00878         }
00879         case 'o':  case 'u':  case 'x':  case 'X': {
00880           lua_Number n = luaL_checknumber(L, arg);
00881           luaL_argcheck(L, 0 <= n && n < (MAX_UINTFRM + 1), arg,
00882                         "not a non-negative number in proper range");
00883           addlenmod(form, LUA_INTFRMLEN);
00884           nb = sprintf(buff, form, (unsigned LUA_INTFRM_T)n);
00885           break;
00886         }
00887         case 'e':  case 'E': case 'f':
00888 #if defined(LUA_USE_AFORMAT)
00889         case 'a': case 'A':
00890 #endif
00891         case 'g': case 'G': {
00892           addlenmod(form, LUA_FLTFRMLEN);
00893           nb = sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg));
00894           break;
00895         }
00896         case 'q': {
00897           addquoted(L, &b, arg);
00898           break;
00899         }
00900         case 's': {
00901           size_t l;
00902           const char *s = luaL_tolstring(L, arg, &l);
00903           if (!strchr(form, '.') && l >= 100) {
00904             /* no precision and string is too long to be formatted;
00905                keep original string */
00906             luaL_addvalue(&b);
00907             break;
00908           }
00909           else {
00910             nb = sprintf(buff, form, s);
00911             lua_pop(L, 1);  /* remove result from 'luaL_tolstring' */
00912             break;
00913           }
00914         }
00915         default: {  /* also treat cases `pnLlh' */
00916           return luaL_error(L, "invalid option " LUA_QL("%%%c") " to "
00917                                LUA_QL("format"), *(strfrmt - 1));
00918         }
00919       }
00920       luaL_addsize(&b, nb);
00921     }
00922   }
00923   luaL_pushresult(&b);
00924   return 1;
00925 }
00926 
00927 /* }====================================================== */
00928 
00929 
00930 static const luaL_Reg strlib[] = {
00931   {"byte", str_byte},
00932   {"char", str_char},
00933   {"dump", str_dump},
00934   {"find", str_find},
00935   {"format", str_format},
00936   {"gmatch", gmatch},
00937   {"gsub", str_gsub},
00938   {"len", str_len},
00939   {"lower", str_lower},
00940   {"match", str_match},
00941   {"rep", str_rep},
00942   {"reverse", str_reverse},
00943   {"sub", str_sub},
00944   {"upper", str_upper},
00945   {NULL, NULL}
00946 };
00947 
00948 
00949 static void createmetatable (lua_State *L) {
00950   lua_createtable(L, 0, 1);  /* table to be metatable for strings */
00951   lua_pushliteral(L, "");  /* dummy string */
00952   lua_pushvalue(L, -2);  /* copy table */
00953   lua_setmetatable(L, -2);  /* set table as metatable for strings */
00954   lua_pop(L, 1);  /* pop dummy string */
00955   lua_pushvalue(L, -2);  /* get string library */
00956   lua_setfield(L, -2, "__index");  /* metatable.__index = string */
00957   lua_pop(L, 1);  /* pop metatable */
00958 }
00959 
00960 
00961 /*
00962 ** Open string library
00963 */
00964 LUAMOD_API int luaopen_string (lua_State *L) {
00965   luaL_newlib(L, strlib);
00966   createmetatable(L);
00967   return 1;
00968 }
00969 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by doxygen 1.7.1 on Fri May 25 2012 01:03:04 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs