lua/llex.c

Go to the documentation of this file.
00001 /*
00002 ** $Id: llex.c,v 2.59 2011/11/30 12:43:51 roberto Exp $
00003 ** Lexical Analyzer
00004 ** See Copyright Notice in lua.h
00005 */
00006 
00007 
00008 #include <locale.h>
00009 #include <string.h>
00010 
00011 #define llex_c
00012 #define LUA_CORE
00013 
00014 #include "lua.h"
00015 
00016 #include "lctype.h"
00017 #include "ldo.h"
00018 #include "llex.h"
00019 #include "lobject.h"
00020 #include "lparser.h"
00021 #include "lstate.h"
00022 #include "lstring.h"
00023 #include "ltable.h"
00024 #include "lzio.h"
00025 
00026 
00027 
00028 #define next(ls) (ls->current = zgetc(ls->z))
00029 
00030 
00031 
00032 #define currIsNewline(ls)   (ls->current == '\n' || ls->current == '\r')
00033 
00034 
00035 /* ORDER RESERVED */
00036 static const char *const luaX_tokens [] = {
00037     "and", "break", "do", "else", "elseif",
00038     "end", "false", "for", "function", "goto", "if",
00039     "in", "local", "nil", "not", "or", "repeat",
00040     "return", "then", "true", "until", "while",
00041     "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
00042     "<number>", "<name>", "<string>"
00043 };
00044 
00045 
00046 #define save_and_next(ls) (save(ls, ls->current), next(ls))
00047 
00048 
00049 static l_noret lexerror (LexState *ls, const char *msg, int token);
00050 
00051 
00052 static void save (LexState *ls, int c) {
00053   Mbuffer *b = ls->buff;
00054   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
00055     size_t newsize;
00056     if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
00057       lexerror(ls, "lexical element too long", 0);
00058     newsize = luaZ_sizebuffer(b) * 2;
00059     luaZ_resizebuffer(ls->L, b, newsize);
00060   }
00061   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
00062 }
00063 
00064 
00065 void luaX_init (lua_State *L) {
00066   int i;
00067   for (i=0; i<NUM_RESERVED; i++) {
00068     TString *ts = luaS_new(L, luaX_tokens[i]);
00069     luaS_fix(ts);  /* reserved words are never collected */
00070     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
00071   }
00072 }
00073 
00074 
00075 const char *luaX_token2str (LexState *ls, int token) {
00076   if (token < FIRST_RESERVED) {
00077     lua_assert(token == cast(unsigned char, token));
00078     return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
00079                               luaO_pushfstring(ls->L, "char(%d)", token);
00080   }
00081   else {
00082     const char *s = luaX_tokens[token - FIRST_RESERVED];
00083     if (token < TK_EOS)
00084       return luaO_pushfstring(ls->L, LUA_QS, s);
00085     else
00086       return s;
00087   }
00088 }
00089 
00090 
00091 static const char *txtToken (LexState *ls, int token) {
00092   switch (token) {
00093     case TK_NAME:
00094     case TK_STRING:
00095     case TK_NUMBER:
00096       save(ls, '\0');
00097       return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
00098     default:
00099       return luaX_token2str(ls, token);
00100   }
00101 }
00102 
00103 
00104 static l_noret lexerror (LexState *ls, const char *msg, int token) {
00105   char buff[LUA_IDSIZE];
00106   luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
00107   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
00108   if (token)
00109     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
00110   luaD_throw(ls->L, LUA_ERRSYNTAX);
00111 }
00112 
00113 
00114 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
00115   lexerror(ls, msg, ls->t.token);
00116 }
00117 
00118 
00119 /*
00120 ** creates a new string and anchors it in function's table so that
00121 ** it will not be collected until the end of the function's compilation
00122 ** (by that time it should be anchored in function's prototype)
00123 */
00124 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
00125   lua_State *L = ls->L;
00126   TValue *o;  /* entry for `str' */
00127   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
00128   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
00129   o = luaH_set(L, ls->fs->h, L->top - 1);
00130   if (ttisnil(o)) {  /* not in use yet? (see 'addK') */
00131     /* boolean value does not need GC barrier;
00132        table has no metatable, so it does not need to invalidate cache */
00133     setbvalue(o, 1);  /* t[string] = true */
00134     luaC_checkGC(L);
00135   }
00136   L->top--;  /* remove string from stack */
00137   return ts;
00138 }
00139 
00140 
00141 /*
00142 ** increment line number and skips newline sequence (any of
00143 ** \n, \r, \n\r, or \r\n)
00144 */
00145 static void inclinenumber (LexState *ls) {
00146   int old = ls->current;
00147   lua_assert(currIsNewline(ls));
00148   next(ls);  /* skip `\n' or `\r' */
00149   if (currIsNewline(ls) && ls->current != old)
00150     next(ls);  /* skip `\n\r' or `\r\n' */
00151   if (++ls->linenumber >= MAX_INT)
00152     luaX_syntaxerror(ls, "chunk has too many lines");
00153 }
00154 
00155 
00156 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
00157                     int firstchar) {
00158   ls->decpoint = '.';
00159   ls->L = L;
00160   ls->current = firstchar;
00161   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
00162   ls->z = z;
00163   ls->fs = NULL;
00164   ls->linenumber = 1;
00165   ls->lastline = 1;
00166   ls->source = source;
00167   ls->envn = luaS_new(L, LUA_ENV);  /* create env name */
00168   luaS_fix(ls->envn);  /* never collect this name */
00169   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
00170 }
00171 
00172 
00173 
00174 /*
00175 ** =======================================================
00176 ** LEXICAL ANALYZER
00177 ** =======================================================
00178 */
00179 
00180 
00181 
00182 static int check_next (LexState *ls, const char *set) {
00183   if (ls->current == '\0' || !strchr(set, ls->current))
00184     return 0;
00185   save_and_next(ls);
00186   return 1;
00187 }
00188 
00189 
00190 /*
00191 ** change all characters 'from' in buffer to 'to'
00192 */
00193 static void buffreplace (LexState *ls, char from, char to) {
00194   size_t n = luaZ_bufflen(ls->buff);
00195   char *p = luaZ_buffer(ls->buff);
00196   while (n--)
00197     if (p[n] == from) p[n] = to;
00198 }
00199 
00200 
00201 #if !defined(getlocaledecpoint)
00202 #define getlocaledecpoint() (localeconv()->decimal_point[0])
00203 #endif
00204 
00205 
00206 #define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
00207 
00208 /*
00209 ** in case of format error, try to change decimal point separator to
00210 ** the one defined in the current locale and check again
00211 */
00212 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
00213   char old = ls->decpoint;
00214   ls->decpoint = getlocaledecpoint();
00215   buffreplace(ls, old, ls->decpoint);  /* try new decimal separator */
00216   if (!buff2d(ls->buff, &seminfo->r)) {
00217     /* format error with correct decimal point: no more options */
00218     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
00219     lexerror(ls, "malformed number", TK_NUMBER);
00220   }
00221 }
00222 
00223 
00224 /* LUA_NUMBER */
00225 static void read_numeral (LexState *ls, SemInfo *seminfo) {
00226   lua_assert(lisdigit(ls->current));
00227   do {
00228     save_and_next(ls);
00229     if (check_next(ls, "EePp"))  /* exponent part? */
00230       check_next(ls, "+-");  /* optional exponent sign */
00231   } while (lislalnum(ls->current) || ls->current == '.');
00232   save(ls, '\0');
00233   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
00234   if (!buff2d(ls->buff, &seminfo->r))  /* format error? */
00235     trydecpoint(ls, seminfo); /* try to update decimal point separator */
00236 }
00237 
00238 
00239 /*
00240 ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
00241 ** -1 if sequence is malformed
00242 */
00243 static int skip_sep (LexState *ls) {
00244   int count = 0;
00245   int s = ls->current;
00246   lua_assert(s == '[' || s == ']');
00247   save_and_next(ls);
00248   while (ls->current == '=') {
00249     save_and_next(ls);
00250     count++;
00251   }
00252   return (ls->current == s) ? count : (-count) - 1;
00253 }
00254 
00255 
00256 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
00257   save_and_next(ls);  /* skip 2nd `[' */
00258   if (currIsNewline(ls))  /* string starts with a newline? */
00259     inclinenumber(ls);  /* skip it */
00260   for (;;) {
00261     switch (ls->current) {
00262       case EOZ:
00263         lexerror(ls, (seminfo) ? "unfinished long string" :
00264                                  "unfinished long comment", TK_EOS);
00265         break;  /* to avoid warnings */
00266       case ']': {
00267         if (skip_sep(ls) == sep) {
00268           save_and_next(ls);  /* skip 2nd `]' */
00269           goto endloop;
00270         }
00271         break;
00272       }
00273       case '\n': case '\r': {
00274         save(ls, '\n');
00275         inclinenumber(ls);
00276         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
00277         break;
00278       }
00279       default: {
00280         if (seminfo) save_and_next(ls);
00281         else next(ls);
00282       }
00283     }
00284   } endloop:
00285   if (seminfo)
00286     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
00287                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
00288 }
00289 
00290 
00291 static void escerror (LexState *ls, int *c, int n, const char *msg) {
00292   int i;
00293   luaZ_resetbuffer(ls->buff);  /* prepare error message */
00294   save(ls, '\\');
00295   for (i = 0; i < n && c[i] != EOZ; i++)
00296     save(ls, c[i]);
00297   lexerror(ls, msg, TK_STRING);
00298 }
00299 
00300 
00301 static int readhexaesc (LexState *ls) {
00302   int c[3], i;  /* keep input for error message */
00303   int r = 0;  /* result accumulator */
00304   c[0] = 'x';  /* for error message */
00305   for (i = 1; i < 3; i++) {  /* read two hexa digits */
00306     c[i] = next(ls);
00307     if (!lisxdigit(c[i]))
00308       escerror(ls, c, i + 1, "hexadecimal digit expected");
00309     r = (r << 4) + luaO_hexavalue(c[i]);
00310   }
00311   return r;
00312 }
00313 
00314 
00315 static int readdecesc (LexState *ls) {
00316   int c[3], i;
00317   int r = 0;  /* result accumulator */
00318   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
00319     c[i] = ls->current;
00320     r = 10*r + c[i] - '0';
00321     next(ls);
00322   }
00323   if (r > UCHAR_MAX)
00324     escerror(ls, c, i, "decimal escape too large");
00325   return r;
00326 }
00327 
00328 
00329 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
00330   save_and_next(ls);  /* keep delimiter (for error messages) */
00331   while (ls->current != del) {
00332     switch (ls->current) {
00333       case EOZ:
00334         lexerror(ls, "unfinished string", TK_EOS);
00335         break;  /* to avoid warnings */
00336       case '\n':
00337       case '\r':
00338         lexerror(ls, "unfinished string", TK_STRING);
00339         break;  /* to avoid warnings */
00340       case '\\': {  /* escape sequences */
00341         int c;  /* final character to be saved */
00342         next(ls);  /* do not save the `\' */
00343         switch (ls->current) {
00344           case 'a': c = '\a'; goto read_save;
00345           case 'b': c = '\b'; goto read_save;
00346           case 'f': c = '\f'; goto read_save;
00347           case 'n': c = '\n'; goto read_save;
00348           case 'r': c = '\r'; goto read_save;
00349           case 't': c = '\t'; goto read_save;
00350           case 'v': c = '\v'; goto read_save;
00351           case 'x': c = readhexaesc(ls); goto read_save;
00352           case '\n': case '\r':
00353             inclinenumber(ls); c = '\n'; goto only_save;
00354           case '\\': case '\"': case '\'':
00355             c = ls->current; goto read_save;
00356           case EOZ: goto no_save;  /* will raise an error next loop */
00357           case 'z': {  /* zap following span of spaces */
00358             next(ls);  /* skip the 'z' */
00359             while (lisspace(ls->current)) {
00360               if (currIsNewline(ls)) inclinenumber(ls);
00361               else next(ls);
00362             }
00363             goto no_save;
00364           }
00365           default: {
00366             if (!lisdigit(ls->current))
00367               escerror(ls, &ls->current, 1, "invalid escape sequence");
00368             /* digital escape \ddd */
00369             c = readdecesc(ls);
00370             goto only_save;
00371           }
00372         }
00373        read_save: next(ls);  /* read next character */
00374        only_save: save(ls, c);  /* save 'c' */
00375        no_save: break;
00376       }
00377       default:
00378         save_and_next(ls);
00379     }
00380   }
00381   save_and_next(ls);  /* skip delimiter */
00382   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
00383                                    luaZ_bufflen(ls->buff) - 2);
00384 }
00385 
00386 
00387 static int llex (LexState *ls, SemInfo *seminfo) {
00388   luaZ_resetbuffer(ls->buff);
00389   for (;;) {
00390     switch (ls->current) {
00391       case '\n': case '\r': {  /* line breaks */
00392         inclinenumber(ls);
00393         break;
00394       }
00395       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
00396         next(ls);
00397         break;
00398       }
00399       case '-': {  /* '-' or '--' (comment) */
00400         next(ls);
00401         if (ls->current != '-') return '-';
00402         /* else is a comment */
00403         next(ls);
00404         if (ls->current == '[') {  /* long comment? */
00405           int sep = skip_sep(ls);
00406           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
00407           if (sep >= 0) {
00408             read_long_string(ls, NULL, sep);  /* skip long comment */
00409             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
00410             break;
00411           }
00412         }
00413         /* else short comment */
00414         while (!currIsNewline(ls) && ls->current != EOZ)
00415           next(ls);  /* skip until end of line (or end of file) */
00416         break;
00417       }
00418       case '[': {  /* long string or simply '[' */
00419         int sep = skip_sep(ls);
00420         if (sep >= 0) {
00421           read_long_string(ls, seminfo, sep);
00422           return TK_STRING;
00423         }
00424         else if (sep == -1) return '[';
00425         else lexerror(ls, "invalid long string delimiter", TK_STRING);
00426       }
00427       case '=': {
00428         next(ls);
00429         if (ls->current != '=') return '=';
00430         else { next(ls); return TK_EQ; }
00431       }
00432       case '<': {
00433         next(ls);
00434         if (ls->current != '=') return '<';
00435         else { next(ls); return TK_LE; }
00436       }
00437       case '>': {
00438         next(ls);
00439         if (ls->current != '=') return '>';
00440         else { next(ls); return TK_GE; }
00441       }
00442       case '~': {
00443         next(ls);
00444         if (ls->current != '=') return '~';
00445         else { next(ls); return TK_NE; }
00446       }
00447       case ':': {
00448         next(ls);
00449         if (ls->current != ':') return ':';
00450         else { next(ls); return TK_DBCOLON; }
00451       }
00452       case '"': case '\'': {  /* short literal strings */
00453         read_string(ls, ls->current, seminfo);
00454         return TK_STRING;
00455       }
00456       case '.': {  /* '.', '..', '...', or number */
00457         save_and_next(ls);
00458         if (check_next(ls, ".")) {
00459           if (check_next(ls, "."))
00460             return TK_DOTS;   /* '...' */
00461           else return TK_CONCAT;   /* '..' */
00462         }
00463         else if (!lisdigit(ls->current)) return '.';
00464         /* else go through */
00465       }
00466       case '0': case '1': case '2': case '3': case '4':
00467       case '5': case '6': case '7': case '8': case '9': {
00468         read_numeral(ls, seminfo);
00469         return TK_NUMBER;
00470       }
00471       case EOZ: {
00472         return TK_EOS;
00473       }
00474       default: {
00475         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
00476           TString *ts;
00477           do {
00478             save_and_next(ls);
00479           } while (lislalnum(ls->current));
00480           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
00481                                   luaZ_bufflen(ls->buff));
00482           seminfo->ts = ts;
00483           if (ts->tsv.reserved > 0)  /* reserved word? */
00484             return ts->tsv.reserved - 1 + FIRST_RESERVED;
00485           else {
00486             return TK_NAME;
00487           }
00488         }
00489         else {  /* single-char tokens (+ - / ...) */
00490           int c = ls->current;
00491           next(ls);
00492           return c;
00493         }
00494       }
00495     }
00496   }
00497 }
00498 
00499 
00500 void luaX_next (LexState *ls) {
00501   ls->lastline = ls->linenumber;
00502   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
00503     ls->t = ls->lookahead;  /* use this one */
00504     ls->lookahead.token = TK_EOS;  /* and discharge it */
00505   }
00506   else
00507     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
00508 }
00509 
00510 
00511 int luaX_lookahead (LexState *ls) {
00512   lua_assert(ls->lookahead.token == TK_EOS);
00513   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
00514   return ls->lookahead.token;
00515 }
00516 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by doxygen 1.7.1 on Fri May 25 2012 01:03:04 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs