rpm
4.5
|
00001 /* 00002 ** $Id: llex.c,v 1.1 2004/03/16 21:58:30 niemeyer Exp $ 00003 ** Lexical Analyzer 00004 ** See Copyright Notice in lua.h 00005 */ 00006 00007 00008 #include <ctype.h> 00009 #include <string.h> 00010 00011 #define llex_c 00012 00013 #include "lua.h" 00014 00015 #include "ldo.h" 00016 #include "llex.h" 00017 #include "lobject.h" 00018 #include "lparser.h" 00019 #include "lstate.h" 00020 #include "lstring.h" 00021 #include "lzio.h" 00022 00023 00024 00025 #define next(LS) (LS->current = zgetc(LS->z)) 00026 00027 00028 00029 /* ORDER RESERVED */ 00030 /*@observer@*/ /*@unchecked@*/ 00031 static const char *const token2string [] = { 00032 "and", "break", "do", "else", "elseif", 00033 "end", "false", "for", "function", "if", 00034 "in", "local", "nil", "not", "or", "repeat", 00035 "return", "then", "true", "until", "while", "*name", 00036 "..", "...", "==", ">=", "<=", "~=", 00037 "*number", "*string", "<eof>" 00038 }; 00039 00040 00041 void luaX_init (lua_State *L) { 00042 int i; 00043 for (i=0; i<NUM_RESERVED; i++) { 00044 TString *ts = luaS_new(L, token2string[i]); 00045 luaS_fix(ts); /* reserved words are never collected */ 00046 lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN); 00047 ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */ 00048 } 00049 } 00050 00051 00052 #define MAXSRC 80 00053 00054 00055 void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) { 00056 if (val > limit) { 00057 msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit); 00058 luaX_syntaxerror(ls, msg); 00059 } 00060 } 00061 00062 00063 void luaX_errorline (LexState *ls, const char *s, const char *token, int line) { 00064 lua_State *L = ls->L; 00065 char buff[MAXSRC]; 00066 luaO_chunkid(buff, getstr(ls->source), MAXSRC); 00067 luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token); 00068 luaD_throw(L, LUA_ERRSYNTAX); 00069 } 00070 00071 00072 static void luaX_error (LexState *ls, const char *s, const char *token) 00073 /*@modifies ls @*/ 00074 { 00075 luaX_errorline(ls, s, token, ls->linenumber); 00076 } 00077 00078 00079 void luaX_syntaxerror (LexState *ls, const char *msg) { 00080 const char *lasttoken; 00081 switch (ls->t.token) { 00082 case TK_NAME: 00083 lasttoken = getstr(ls->t.seminfo.ts); 00084 break; 00085 case TK_STRING: 00086 case TK_NUMBER: 00087 lasttoken = luaZ_buffer(ls->buff); 00088 break; 00089 default: 00090 lasttoken = luaX_token2str(ls, ls->t.token); 00091 break; 00092 } 00093 luaX_error(ls, msg, lasttoken); 00094 } 00095 00096 00097 const char *luaX_token2str (LexState *ls, int token) { 00098 if (token < FIRST_RESERVED) { 00099 lua_assert(token == (unsigned char)token); 00100 return luaO_pushfstring(ls->L, "%c", token); 00101 } 00102 else 00103 return token2string[token-FIRST_RESERVED]; 00104 } 00105 00106 00107 static void luaX_lexerror (LexState *ls, const char *s, int token) 00108 /*@modifies ls @*/ 00109 { 00110 if (token == TK_EOS) 00111 luaX_error(ls, s, luaX_token2str(ls, token)); 00112 else 00113 luaX_error(ls, s, luaZ_buffer(ls->buff)); 00114 } 00115 00116 00117 static void inclinenumber (LexState *LS) 00118 /*@modifies LS @*/ 00119 { 00120 next(LS); /* skip `\n' */ 00121 ++LS->linenumber; 00122 luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); 00123 } 00124 00125 00126 void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { 00127 LS->L = L; 00128 LS->lookahead.token = TK_EOS; /* no look-ahead token */ 00129 LS->z = z; 00130 LS->fs = NULL; 00131 LS->linenumber = 1; 00132 LS->lastline = 1; 00133 LS->source = source; 00134 next(LS); /* read first char */ 00135 if (LS->current == '#') { 00136 do { /* skip first line */ 00137 next(LS); 00138 } while (LS->current != '\n' && LS->current != EOZ); 00139 } 00140 } 00141 00142 00143 00144 /* 00145 ** ======================================================= 00146 ** LEXICAL ANALYZER 00147 ** ======================================================= 00148 */ 00149 00150 00151 /* use buffer to store names, literal strings and numbers */ 00152 00153 /* extra space to allocate when growing buffer */ 00154 #define EXTRABUFF 32 00155 00156 /* maximum number of chars that can be read without checking buffer size */ 00157 #define MAXNOCHECK 5 00158 00159 #define checkbuffer(LS, len) \ 00160 if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \ 00161 luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF) 00162 00163 #define save(LS, c, l) \ 00164 (luaZ_buffer((LS)->buff)[l++] = cast(char, c)) 00165 #define save_and_next(LS, l) (save(LS, LS->current, l), next(LS)) 00166 00167 00168 static size_t readname (LexState *LS) 00169 /*@modifies LS @*/ 00170 { 00171 size_t l = 0; 00172 checkbuffer(LS, l); 00173 do { 00174 checkbuffer(LS, l); 00175 save_and_next(LS, l); 00176 } while (isalnum(LS->current) || LS->current == '_'); 00177 save(LS, '\0', l); 00178 return l-1; 00179 } 00180 00181 00182 /* LUA_NUMBER */ 00183 static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) 00184 /*@modifies LS, seminfo @*/ 00185 { 00186 size_t l = 0; 00187 checkbuffer(LS, l); 00188 if (comma) save(LS, '.', l); 00189 while (isdigit(LS->current)) { 00190 checkbuffer(LS, l); 00191 save_and_next(LS, l); 00192 } 00193 if (LS->current == '.') { 00194 save_and_next(LS, l); 00195 if (LS->current == '.') { 00196 save_and_next(LS, l); 00197 save(LS, '\0', l); 00198 luaX_lexerror(LS, 00199 "ambiguous syntax (decimal point x string concatenation)", 00200 TK_NUMBER); 00201 } 00202 } 00203 while (isdigit(LS->current)) { 00204 checkbuffer(LS, l); 00205 save_and_next(LS, l); 00206 } 00207 if (LS->current == 'e' || LS->current == 'E') { 00208 save_and_next(LS, l); /* read `E' */ 00209 if (LS->current == '+' || LS->current == '-') 00210 save_and_next(LS, l); /* optional exponent sign */ 00211 while (isdigit(LS->current)) { 00212 checkbuffer(LS, l); 00213 save_and_next(LS, l); 00214 } 00215 } 00216 save(LS, '\0', l); 00217 if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) 00218 luaX_lexerror(LS, "malformed number", TK_NUMBER); 00219 } 00220 00221 00222 static void read_long_string (LexState *LS, /*@null@*/ SemInfo *seminfo) 00223 /*@modifies LS, seminfo @*/ 00224 { 00225 int cont = 0; 00226 size_t l = 0; 00227 checkbuffer(LS, l); 00228 save(LS, '[', l); /* save first `[' */ 00229 save_and_next(LS, l); /* pass the second `[' */ 00230 if (LS->current == '\n') /* string starts with a newline? */ 00231 inclinenumber(LS); /* skip it */ 00232 for (;;) { 00233 checkbuffer(LS, l); 00234 switch (LS->current) { 00235 case EOZ: 00236 save(LS, '\0', l); 00237 luaX_lexerror(LS, (seminfo) ? "unfinished long string" : 00238 "unfinished long comment", TK_EOS); 00239 break; /* to avoid warnings */ 00240 case '[': 00241 save_and_next(LS, l); 00242 if (LS->current == '[') { 00243 cont++; 00244 save_and_next(LS, l); 00245 } 00246 continue; 00247 case ']': 00248 save_and_next(LS, l); 00249 if (LS->current == ']') { 00250 if (cont == 0) goto endloop; 00251 cont--; 00252 save_and_next(LS, l); 00253 } 00254 continue; 00255 case '\n': 00256 save(LS, '\n', l); 00257 inclinenumber(LS); 00258 if (!seminfo) l = 0; /* reset buffer to avoid wasting space */ 00259 continue; 00260 default: 00261 save_and_next(LS, l); 00262 } 00263 } endloop: 00264 save_and_next(LS, l); /* skip the second `]' */ 00265 save(LS, '\0', l); 00266 if (seminfo) 00267 seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5); 00268 } 00269 00270 00271 static void read_string (LexState *LS, int del, SemInfo *seminfo) 00272 /*@modifies LS, seminfo @*/ 00273 { 00274 size_t l = 0; 00275 checkbuffer(LS, l); 00276 save_and_next(LS, l); 00277 while (LS->current != del) { 00278 checkbuffer(LS, l); 00279 switch (LS->current) { 00280 case EOZ: 00281 save(LS, '\0', l); 00282 luaX_lexerror(LS, "unfinished string", TK_EOS); 00283 break; /* to avoid warnings */ 00284 case '\n': 00285 save(LS, '\0', l); 00286 luaX_lexerror(LS, "unfinished string", TK_STRING); 00287 break; /* to avoid warnings */ 00288 case '\\': 00289 next(LS); /* do not save the `\' */ 00290 switch (LS->current) { 00291 case 'a': save(LS, '\a', l); next(LS); break; 00292 case 'b': save(LS, '\b', l); next(LS); break; 00293 case 'f': save(LS, '\f', l); next(LS); break; 00294 case 'n': save(LS, '\n', l); next(LS); break; 00295 case 'r': save(LS, '\r', l); next(LS); break; 00296 case 't': save(LS, '\t', l); next(LS); break; 00297 case 'v': save(LS, '\v', l); next(LS); break; 00298 case '\n': save(LS, '\n', l); inclinenumber(LS); break; 00299 case EOZ: break; /* will raise an error next loop */ 00300 default: { 00301 if (!isdigit(LS->current)) 00302 save_and_next(LS, l); /* handles \\, \", \', and \? */ 00303 else { /* \xxx */ 00304 int c = 0; 00305 int i = 0; 00306 do { 00307 c = 10*c + (LS->current-'0'); 00308 next(LS); 00309 } while (++i<3 && isdigit(LS->current)); 00310 if (c > UCHAR_MAX) { 00311 save(LS, '\0', l); 00312 luaX_lexerror(LS, "escape sequence too large", TK_STRING); 00313 } 00314 save(LS, c, l); 00315 } 00316 } 00317 } 00318 break; 00319 default: 00320 save_and_next(LS, l); 00321 } 00322 } 00323 save_and_next(LS, l); /* skip delimiter */ 00324 save(LS, '\0', l); 00325 seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3); 00326 } 00327 00328 00329 int luaX_lex (LexState *LS, SemInfo *seminfo) { 00330 for (;;) { 00331 switch (LS->current) { 00332 00333 case '\n': { 00334 inclinenumber(LS); 00335 continue; 00336 } 00337 case '-': { 00338 next(LS); 00339 if (LS->current != '-') return '-'; 00340 /* else is a comment */ 00341 next(LS); 00342 if (LS->current == '[' && (next(LS), LS->current == '[')) 00343 read_long_string(LS, NULL); /* long comment */ 00344 else /* short comment */ 00345 while (LS->current != '\n' && LS->current != EOZ) 00346 next(LS); 00347 continue; 00348 } 00349 case '[': { 00350 next(LS); 00351 if (LS->current != '[') return '['; 00352 else { 00353 read_long_string(LS, seminfo); 00354 return TK_STRING; 00355 } 00356 } 00357 case '=': { 00358 next(LS); 00359 if (LS->current != '=') return '='; 00360 else { next(LS); return TK_EQ; } 00361 } 00362 case '<': { 00363 next(LS); 00364 if (LS->current != '=') return '<'; 00365 else { next(LS); return TK_LE; } 00366 } 00367 case '>': { 00368 next(LS); 00369 if (LS->current != '=') return '>'; 00370 else { next(LS); return TK_GE; } 00371 } 00372 case '~': { 00373 next(LS); 00374 if (LS->current != '=') return '~'; 00375 else { next(LS); return TK_NE; } 00376 } 00377 case '"': 00378 case '\'': { 00379 read_string(LS, LS->current, seminfo); 00380 return TK_STRING; 00381 } 00382 case '.': { 00383 next(LS); 00384 if (LS->current == '.') { 00385 next(LS); 00386 if (LS->current == '.') { 00387 next(LS); 00388 return TK_DOTS; /* ... */ 00389 } 00390 else return TK_CONCAT; /* .. */ 00391 } 00392 else if (!isdigit(LS->current)) return '.'; 00393 else { 00394 read_numeral(LS, 1, seminfo); 00395 return TK_NUMBER; 00396 } 00397 } 00398 case EOZ: { 00399 return TK_EOS; 00400 } 00401 default: { 00402 if (isspace(LS->current)) { 00403 next(LS); 00404 continue; 00405 } 00406 else if (isdigit(LS->current)) { 00407 read_numeral(LS, 0, seminfo); 00408 return TK_NUMBER; 00409 } 00410 else if (isalpha(LS->current) || LS->current == '_') { 00411 /* identifier or reserved word */ 00412 size_t l = readname(LS); 00413 TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l); 00414 if (ts->tsv.reserved > 0) /* reserved word? */ 00415 return ts->tsv.reserved - 1 + FIRST_RESERVED; 00416 seminfo->ts = ts; 00417 return TK_NAME; 00418 } 00419 else { 00420 int c = LS->current; 00421 if (iscntrl(c)) 00422 luaX_error(LS, "invalid control char", 00423 luaO_pushfstring(LS->L, "char(%d)", c)); 00424 next(LS); 00425 return c; /* single-char tokens (+ - / ...) */ 00426 } 00427 } 00428 } 00429 } 00430 } 00431 00432 #undef next