Mercurial > vba-clojure
diff src/lua/llex.c @ 11:27763b933818
raise lua sources up one level
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 03 Mar 2012 11:07:39 -0600 |
parents | src/lua/src/llex.c@f9f4f1b99eed |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/lua/llex.c Sat Mar 03 11:07:39 2012 -0600 1.3 @@ -0,0 +1,461 @@ 1.4 +/* 1.5 +** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ 1.6 +** Lexical Analyzer 1.7 +** See Copyright Notice in lua.h 1.8 +*/ 1.9 + 1.10 + 1.11 +#include <ctype.h> 1.12 +#include <locale.h> 1.13 +#include <string.h> 1.14 + 1.15 +#define llex_c 1.16 +#define LUA_CORE 1.17 + 1.18 +#include "lua.h" 1.19 + 1.20 +#include "ldo.h" 1.21 +#include "llex.h" 1.22 +#include "lobject.h" 1.23 +#include "lparser.h" 1.24 +#include "lstate.h" 1.25 +#include "lstring.h" 1.26 +#include "ltable.h" 1.27 +#include "lzio.h" 1.28 + 1.29 + 1.30 + 1.31 +#define next(ls) (ls->current = zgetc(ls->z)) 1.32 + 1.33 + 1.34 + 1.35 + 1.36 +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 1.37 + 1.38 + 1.39 +/* ORDER RESERVED */ 1.40 +const char *const luaX_tokens [] = { 1.41 + "and", "break", "do", "else", "elseif", 1.42 + "end", "false", "for", "function", "if", 1.43 + "in", "local", "nil", "not", "or", "repeat", 1.44 + "return", "then", "true", "until", "while", 1.45 + "..", "...", "==", ">=", "<=", "~=", 1.46 + "<number>", "<name>", "<string>", "<eof>", 1.47 + NULL 1.48 +}; 1.49 + 1.50 + 1.51 +#define save_and_next(ls) (save(ls, ls->current), next(ls)) 1.52 + 1.53 + 1.54 +static void save (LexState *ls, int c) { 1.55 + Mbuffer *b = ls->buff; 1.56 + if (b->n + 1 > b->buffsize) { 1.57 + size_t newsize; 1.58 + if (b->buffsize >= MAX_SIZET/2) 1.59 + luaX_lexerror(ls, "lexical element too long", 0); 1.60 + newsize = b->buffsize * 2; 1.61 + luaZ_resizebuffer(ls->L, b, newsize); 1.62 + } 1.63 + b->buffer[b->n++] = cast(char, c); 1.64 +} 1.65 + 1.66 + 1.67 +void luaX_init (lua_State *L) { 1.68 + int i; 1.69 + for (i=0; i<NUM_RESERVED; i++) { 1.70 + TString *ts = luaS_new(L, luaX_tokens[i]); 1.71 + luaS_fix(ts); /* reserved words are never collected */ 1.72 + lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); 1.73 + ts->tsv.reserved = cast_byte(i+1); /* reserved word */ 1.74 + } 1.75 +} 1.76 + 1.77 + 1.78 +#define MAXSRC 80 1.79 + 1.80 + 1.81 +const char *luaX_token2str (LexState *ls, int token) { 1.82 + if (token < FIRST_RESERVED) { 1.83 + lua_assert(token == cast(unsigned char, token)); 1.84 + return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : 1.85 + luaO_pushfstring(ls->L, "%c", token); 1.86 + } 1.87 + else 1.88 + return luaX_tokens[token-FIRST_RESERVED]; 1.89 +} 1.90 + 1.91 + 1.92 +static const char *txtToken (LexState *ls, int token) { 1.93 + switch (token) { 1.94 + case TK_NAME: 1.95 + case TK_STRING: 1.96 + case TK_NUMBER: 1.97 + save(ls, '\0'); 1.98 + return luaZ_buffer(ls->buff); 1.99 + default: 1.100 + return luaX_token2str(ls, token); 1.101 + } 1.102 +} 1.103 + 1.104 + 1.105 +void luaX_lexerror (LexState *ls, const char *msg, int token) { 1.106 + char buff[MAXSRC]; 1.107 + luaO_chunkid(buff, getstr(ls->source), MAXSRC); 1.108 + msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); 1.109 + if (token) 1.110 + luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); 1.111 + luaD_throw(ls->L, LUA_ERRSYNTAX); 1.112 +} 1.113 + 1.114 + 1.115 +void luaX_syntaxerror (LexState *ls, const char *msg) { 1.116 + luaX_lexerror(ls, msg, ls->t.token); 1.117 +} 1.118 + 1.119 + 1.120 +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 1.121 + lua_State *L = ls->L; 1.122 + TString *ts = luaS_newlstr(L, str, l); 1.123 + TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ 1.124 + if (ttisnil(o)) 1.125 + setbvalue(o, 1); /* make sure `str' will not be collected */ 1.126 + return ts; 1.127 +} 1.128 + 1.129 + 1.130 +static void inclinenumber (LexState *ls) { 1.131 + int old = ls->current; 1.132 + lua_assert(currIsNewline(ls)); 1.133 + next(ls); /* skip `\n' or `\r' */ 1.134 + if (currIsNewline(ls) && ls->current != old) 1.135 + next(ls); /* skip `\n\r' or `\r\n' */ 1.136 + if (++ls->linenumber >= MAX_INT) 1.137 + luaX_syntaxerror(ls, "chunk has too many lines"); 1.138 +} 1.139 + 1.140 + 1.141 +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { 1.142 + ls->decpoint = '.'; 1.143 + ls->L = L; 1.144 + ls->lookahead.token = TK_EOS; /* no look-ahead token */ 1.145 + ls->z = z; 1.146 + ls->fs = NULL; 1.147 + ls->linenumber = 1; 1.148 + ls->lastline = 1; 1.149 + ls->source = source; 1.150 + luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 1.151 + next(ls); /* read first char */ 1.152 +} 1.153 + 1.154 + 1.155 + 1.156 +/* 1.157 +** ======================================================= 1.158 +** LEXICAL ANALYZER 1.159 +** ======================================================= 1.160 +*/ 1.161 + 1.162 + 1.163 + 1.164 +static int check_next (LexState *ls, const char *set) { 1.165 + if (!strchr(set, ls->current)) 1.166 + return 0; 1.167 + save_and_next(ls); 1.168 + return 1; 1.169 +} 1.170 + 1.171 + 1.172 +static void buffreplace (LexState *ls, char from, char to) { 1.173 + size_t n = luaZ_bufflen(ls->buff); 1.174 + char *p = luaZ_buffer(ls->buff); 1.175 + while (n--) 1.176 + if (p[n] == from) p[n] = to; 1.177 +} 1.178 + 1.179 + 1.180 +static void trydecpoint (LexState *ls, SemInfo *seminfo) { 1.181 + /* format error: try to update decimal point separator */ 1.182 + struct lconv *cv = localeconv(); 1.183 + char old = ls->decpoint; 1.184 + ls->decpoint = (cv ? cv->decimal_point[0] : '.'); 1.185 + buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ 1.186 + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { 1.187 + /* format error with correct decimal point: no more options */ 1.188 + buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ 1.189 + luaX_lexerror(ls, "malformed number", TK_NUMBER); 1.190 + } 1.191 +} 1.192 + 1.193 + 1.194 +/* LUA_NUMBER */ 1.195 +static void read_numeral (LexState *ls, SemInfo *seminfo) { 1.196 + lua_assert(isdigit(ls->current)); 1.197 + do { 1.198 + save_and_next(ls); 1.199 + } while (isdigit(ls->current) || ls->current == '.'); 1.200 + if (check_next(ls, "Ee")) /* `E'? */ 1.201 + check_next(ls, "+-"); /* optional exponent sign */ 1.202 + while (isalnum(ls->current) || ls->current == '_') 1.203 + save_and_next(ls); 1.204 + save(ls, '\0'); 1.205 + buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ 1.206 + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ 1.207 + trydecpoint(ls, seminfo); /* try to update decimal point separator */ 1.208 +} 1.209 + 1.210 + 1.211 +static int skip_sep (LexState *ls) { 1.212 + int count = 0; 1.213 + int s = ls->current; 1.214 + lua_assert(s == '[' || s == ']'); 1.215 + save_and_next(ls); 1.216 + while (ls->current == '=') { 1.217 + save_and_next(ls); 1.218 + count++; 1.219 + } 1.220 + return (ls->current == s) ? count : (-count) - 1; 1.221 +} 1.222 + 1.223 + 1.224 +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 1.225 + int cont = 0; 1.226 + (void)(cont); /* avoid warnings when `cont' is not used */ 1.227 + save_and_next(ls); /* skip 2nd `[' */ 1.228 + if (currIsNewline(ls)) /* string starts with a newline? */ 1.229 + inclinenumber(ls); /* skip it */ 1.230 + for (;;) { 1.231 + switch (ls->current) { 1.232 + case EOZ: 1.233 + luaX_lexerror(ls, (seminfo) ? "unfinished long string" : 1.234 + "unfinished long comment", TK_EOS); 1.235 + break; /* to avoid warnings */ 1.236 +#if defined(LUA_COMPAT_LSTR) 1.237 + case '[': { 1.238 + if (skip_sep(ls) == sep) { 1.239 + save_and_next(ls); /* skip 2nd `[' */ 1.240 + cont++; 1.241 +#if LUA_COMPAT_LSTR == 1 1.242 + if (sep == 0) 1.243 + luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); 1.244 +#endif 1.245 + } 1.246 + break; 1.247 + } 1.248 +#endif 1.249 + case ']': { 1.250 + if (skip_sep(ls) == sep) { 1.251 + save_and_next(ls); /* skip 2nd `]' */ 1.252 +#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 1.253 + cont--; 1.254 + if (sep == 0 && cont >= 0) break; 1.255 +#endif 1.256 + goto endloop; 1.257 + } 1.258 + break; 1.259 + } 1.260 + case '\n': 1.261 + case '\r': { 1.262 + save(ls, '\n'); 1.263 + inclinenumber(ls); 1.264 + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 1.265 + break; 1.266 + } 1.267 + default: { 1.268 + if (seminfo) save_and_next(ls); 1.269 + else next(ls); 1.270 + } 1.271 + } 1.272 + } endloop: 1.273 + if (seminfo) 1.274 + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 1.275 + luaZ_bufflen(ls->buff) - 2*(2 + sep)); 1.276 +} 1.277 + 1.278 + 1.279 +static void read_string (LexState *ls, int del, SemInfo *seminfo) { 1.280 + save_and_next(ls); 1.281 + while (ls->current != del) { 1.282 + switch (ls->current) { 1.283 + case EOZ: 1.284 + luaX_lexerror(ls, "unfinished string", TK_EOS); 1.285 + continue; /* to avoid warnings */ 1.286 + case '\n': 1.287 + case '\r': 1.288 + luaX_lexerror(ls, "unfinished string", TK_STRING); 1.289 + continue; /* to avoid warnings */ 1.290 + case '\\': { 1.291 + int c; 1.292 + next(ls); /* do not save the `\' */ 1.293 + switch (ls->current) { 1.294 + case 'a': c = '\a'; break; 1.295 + case 'b': c = '\b'; break; 1.296 + case 'f': c = '\f'; break; 1.297 + case 'n': c = '\n'; break; 1.298 + case 'r': c = '\r'; break; 1.299 + case 't': c = '\t'; break; 1.300 + case 'v': c = '\v'; break; 1.301 + case '\n': /* go through */ 1.302 + case '\r': save(ls, '\n'); inclinenumber(ls); continue; 1.303 + case EOZ: continue; /* will raise an error next loop */ 1.304 + default: { 1.305 + if (!isdigit(ls->current)) 1.306 + save_and_next(ls); /* handles \\, \", \', and \? */ 1.307 + else { /* \xxx */ 1.308 + int i = 0; 1.309 + c = 0; 1.310 + do { 1.311 + c = 10*c + (ls->current-'0'); 1.312 + next(ls); 1.313 + } while (++i<3 && isdigit(ls->current)); 1.314 + if (c > UCHAR_MAX) 1.315 + luaX_lexerror(ls, "escape sequence too large", TK_STRING); 1.316 + save(ls, c); 1.317 + } 1.318 + continue; 1.319 + } 1.320 + } 1.321 + save(ls, c); 1.322 + next(ls); 1.323 + continue; 1.324 + } 1.325 + default: 1.326 + save_and_next(ls); 1.327 + } 1.328 + } 1.329 + save_and_next(ls); /* skip delimiter */ 1.330 + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 1.331 + luaZ_bufflen(ls->buff) - 2); 1.332 +} 1.333 + 1.334 + 1.335 +static int llex (LexState *ls, SemInfo *seminfo) { 1.336 + luaZ_resetbuffer(ls->buff); 1.337 + for (;;) { 1.338 + switch (ls->current) { 1.339 + case '\n': 1.340 + case '\r': { 1.341 + inclinenumber(ls); 1.342 + continue; 1.343 + } 1.344 + case '-': { 1.345 + next(ls); 1.346 + if (ls->current != '-') return '-'; 1.347 + /* else is a comment */ 1.348 + next(ls); 1.349 + if (ls->current == '[') { 1.350 + int sep = skip_sep(ls); 1.351 + luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ 1.352 + if (sep >= 0) { 1.353 + read_long_string(ls, NULL, sep); /* long comment */ 1.354 + luaZ_resetbuffer(ls->buff); 1.355 + continue; 1.356 + } 1.357 + } 1.358 + /* else short comment */ 1.359 + while (!currIsNewline(ls) && ls->current != EOZ) 1.360 + next(ls); 1.361 + continue; 1.362 + } 1.363 + case '[': { 1.364 + int sep = skip_sep(ls); 1.365 + if (sep >= 0) { 1.366 + read_long_string(ls, seminfo, sep); 1.367 + return TK_STRING; 1.368 + } 1.369 + else if (sep == -1) return '['; 1.370 + else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); 1.371 + } 1.372 + case '=': { 1.373 + next(ls); 1.374 + if (ls->current != '=') return '='; 1.375 + else { next(ls); return TK_EQ; } 1.376 + } 1.377 + case '<': { 1.378 + next(ls); 1.379 + if (ls->current != '=') return '<'; 1.380 + else { next(ls); return TK_LE; } 1.381 + } 1.382 + case '>': { 1.383 + next(ls); 1.384 + if (ls->current != '=') return '>'; 1.385 + else { next(ls); return TK_GE; } 1.386 + } 1.387 + case '~': { 1.388 + next(ls); 1.389 + if (ls->current != '=') return '~'; 1.390 + else { next(ls); return TK_NE; } 1.391 + } 1.392 + case '"': 1.393 + case '\'': { 1.394 + read_string(ls, ls->current, seminfo); 1.395 + return TK_STRING; 1.396 + } 1.397 + case '.': { 1.398 + save_and_next(ls); 1.399 + if (check_next(ls, ".")) { 1.400 + if (check_next(ls, ".")) 1.401 + return TK_DOTS; /* ... */ 1.402 + else return TK_CONCAT; /* .. */ 1.403 + } 1.404 + else if (!isdigit(ls->current)) return '.'; 1.405 + else { 1.406 + read_numeral(ls, seminfo); 1.407 + return TK_NUMBER; 1.408 + } 1.409 + } 1.410 + case EOZ: { 1.411 + return TK_EOS; 1.412 + } 1.413 + default: { 1.414 + if (isspace(ls->current)) { 1.415 + lua_assert(!currIsNewline(ls)); 1.416 + next(ls); 1.417 + continue; 1.418 + } 1.419 + else if (isdigit(ls->current)) { 1.420 + read_numeral(ls, seminfo); 1.421 + return TK_NUMBER; 1.422 + } 1.423 + else if (isalpha(ls->current) || ls->current == '_') { 1.424 + /* identifier or reserved word */ 1.425 + TString *ts; 1.426 + do { 1.427 + save_and_next(ls); 1.428 + } while (isalnum(ls->current) || ls->current == '_'); 1.429 + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 1.430 + luaZ_bufflen(ls->buff)); 1.431 + if (ts->tsv.reserved > 0) /* reserved word? */ 1.432 + return ts->tsv.reserved - 1 + FIRST_RESERVED; 1.433 + else { 1.434 + seminfo->ts = ts; 1.435 + return TK_NAME; 1.436 + } 1.437 + } 1.438 + else { 1.439 + int c = ls->current; 1.440 + next(ls); 1.441 + return c; /* single-char tokens (+ - / ...) */ 1.442 + } 1.443 + } 1.444 + } 1.445 + } 1.446 +} 1.447 + 1.448 + 1.449 +void luaX_next (LexState *ls) { 1.450 + ls->lastline = ls->linenumber; 1.451 + if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 1.452 + ls->t = ls->lookahead; /* use this one */ 1.453 + ls->lookahead.token = TK_EOS; /* and discharge it */ 1.454 + } 1.455 + else 1.456 + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 1.457 +} 1.458 + 1.459 + 1.460 +void luaX_lookahead (LexState *ls) { 1.461 + lua_assert(ls->lookahead.token == TK_EOS); 1.462 + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 1.463 +} 1.464 +