annotate src/lua/llex.c @ 25:bacd824b9e27

checkpoint
author Robert McIntyre <rlm@mit.edu>
date Sun, 04 Mar 2012 18:02:08 -0600
parents 27763b933818
children
rev   line source
rlm@1 1 /*
rlm@1 2 ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
rlm@1 3 ** Lexical Analyzer
rlm@1 4 ** See Copyright Notice in lua.h
rlm@1 5 */
rlm@1 6
rlm@1 7
rlm@1 8 #include <ctype.h>
rlm@1 9 #include <locale.h>
rlm@1 10 #include <string.h>
rlm@1 11
rlm@1 12 #define llex_c
rlm@1 13 #define LUA_CORE
rlm@1 14
rlm@1 15 #include "lua.h"
rlm@1 16
rlm@1 17 #include "ldo.h"
rlm@1 18 #include "llex.h"
rlm@1 19 #include "lobject.h"
rlm@1 20 #include "lparser.h"
rlm@1 21 #include "lstate.h"
rlm@1 22 #include "lstring.h"
rlm@1 23 #include "ltable.h"
rlm@1 24 #include "lzio.h"
rlm@1 25
rlm@1 26
rlm@1 27
rlm@1 28 #define next(ls) (ls->current = zgetc(ls->z))
rlm@1 29
rlm@1 30
rlm@1 31
rlm@1 32
rlm@1 33 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
rlm@1 34
rlm@1 35
rlm@1 36 /* ORDER RESERVED */
rlm@1 37 const char *const luaX_tokens [] = {
rlm@1 38 "and", "break", "do", "else", "elseif",
rlm@1 39 "end", "false", "for", "function", "if",
rlm@1 40 "in", "local", "nil", "not", "or", "repeat",
rlm@1 41 "return", "then", "true", "until", "while",
rlm@1 42 "..", "...", "==", ">=", "<=", "~=",
rlm@1 43 "<number>", "<name>", "<string>", "<eof>",
rlm@1 44 NULL
rlm@1 45 };
rlm@1 46
rlm@1 47
rlm@1 48 #define save_and_next(ls) (save(ls, ls->current), next(ls))
rlm@1 49
rlm@1 50
rlm@1 51 static void save (LexState *ls, int c) {
rlm@1 52 Mbuffer *b = ls->buff;
rlm@1 53 if (b->n + 1 > b->buffsize) {
rlm@1 54 size_t newsize;
rlm@1 55 if (b->buffsize >= MAX_SIZET/2)
rlm@1 56 luaX_lexerror(ls, "lexical element too long", 0);
rlm@1 57 newsize = b->buffsize * 2;
rlm@1 58 luaZ_resizebuffer(ls->L, b, newsize);
rlm@1 59 }
rlm@1 60 b->buffer[b->n++] = cast(char, c);
rlm@1 61 }
rlm@1 62
rlm@1 63
rlm@1 64 void luaX_init (lua_State *L) {
rlm@1 65 int i;
rlm@1 66 for (i=0; i<NUM_RESERVED; i++) {
rlm@1 67 TString *ts = luaS_new(L, luaX_tokens[i]);
rlm@1 68 luaS_fix(ts); /* reserved words are never collected */
rlm@1 69 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
rlm@1 70 ts->tsv.reserved = cast_byte(i+1); /* reserved word */
rlm@1 71 }
rlm@1 72 }
rlm@1 73
rlm@1 74
rlm@1 75 #define MAXSRC 80
rlm@1 76
rlm@1 77
rlm@1 78 const char *luaX_token2str (LexState *ls, int token) {
rlm@1 79 if (token < FIRST_RESERVED) {
rlm@1 80 lua_assert(token == cast(unsigned char, token));
rlm@1 81 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
rlm@1 82 luaO_pushfstring(ls->L, "%c", token);
rlm@1 83 }
rlm@1 84 else
rlm@1 85 return luaX_tokens[token-FIRST_RESERVED];
rlm@1 86 }
rlm@1 87
rlm@1 88
rlm@1 89 static const char *txtToken (LexState *ls, int token) {
rlm@1 90 switch (token) {
rlm@1 91 case TK_NAME:
rlm@1 92 case TK_STRING:
rlm@1 93 case TK_NUMBER:
rlm@1 94 save(ls, '\0');
rlm@1 95 return luaZ_buffer(ls->buff);
rlm@1 96 default:
rlm@1 97 return luaX_token2str(ls, token);
rlm@1 98 }
rlm@1 99 }
rlm@1 100
rlm@1 101
rlm@1 102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
rlm@1 103 char buff[MAXSRC];
rlm@1 104 luaO_chunkid(buff, getstr(ls->source), MAXSRC);
rlm@1 105 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
rlm@1 106 if (token)
rlm@1 107 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
rlm@1 108 luaD_throw(ls->L, LUA_ERRSYNTAX);
rlm@1 109 }
rlm@1 110
rlm@1 111
rlm@1 112 void luaX_syntaxerror (LexState *ls, const char *msg) {
rlm@1 113 luaX_lexerror(ls, msg, ls->t.token);
rlm@1 114 }
rlm@1 115
rlm@1 116
rlm@1 117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
rlm@1 118 lua_State *L = ls->L;
rlm@1 119 TString *ts = luaS_newlstr(L, str, l);
rlm@1 120 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */
rlm@1 121 if (ttisnil(o))
rlm@1 122 setbvalue(o, 1); /* make sure `str' will not be collected */
rlm@1 123 return ts;
rlm@1 124 }
rlm@1 125
rlm@1 126
rlm@1 127 static void inclinenumber (LexState *ls) {
rlm@1 128 int old = ls->current;
rlm@1 129 lua_assert(currIsNewline(ls));
rlm@1 130 next(ls); /* skip `\n' or `\r' */
rlm@1 131 if (currIsNewline(ls) && ls->current != old)
rlm@1 132 next(ls); /* skip `\n\r' or `\r\n' */
rlm@1 133 if (++ls->linenumber >= MAX_INT)
rlm@1 134 luaX_syntaxerror(ls, "chunk has too many lines");
rlm@1 135 }
rlm@1 136
rlm@1 137
rlm@1 138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
rlm@1 139 ls->decpoint = '.';
rlm@1 140 ls->L = L;
rlm@1 141 ls->lookahead.token = TK_EOS; /* no look-ahead token */
rlm@1 142 ls->z = z;
rlm@1 143 ls->fs = NULL;
rlm@1 144 ls->linenumber = 1;
rlm@1 145 ls->lastline = 1;
rlm@1 146 ls->source = source;
rlm@1 147 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
rlm@1 148 next(ls); /* read first char */
rlm@1 149 }
rlm@1 150
rlm@1 151
rlm@1 152
rlm@1 153 /*
rlm@1 154 ** =======================================================
rlm@1 155 ** LEXICAL ANALYZER
rlm@1 156 ** =======================================================
rlm@1 157 */
rlm@1 158
rlm@1 159
rlm@1 160
rlm@1 161 static int check_next (LexState *ls, const char *set) {
rlm@1 162 if (!strchr(set, ls->current))
rlm@1 163 return 0;
rlm@1 164 save_and_next(ls);
rlm@1 165 return 1;
rlm@1 166 }
rlm@1 167
rlm@1 168
rlm@1 169 static void buffreplace (LexState *ls, char from, char to) {
rlm@1 170 size_t n = luaZ_bufflen(ls->buff);
rlm@1 171 char *p = luaZ_buffer(ls->buff);
rlm@1 172 while (n--)
rlm@1 173 if (p[n] == from) p[n] = to;
rlm@1 174 }
rlm@1 175
rlm@1 176
rlm@1 177 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
rlm@1 178 /* format error: try to update decimal point separator */
rlm@1 179 struct lconv *cv = localeconv();
rlm@1 180 char old = ls->decpoint;
rlm@1 181 ls->decpoint = (cv ? cv->decimal_point[0] : '.');
rlm@1 182 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */
rlm@1 183 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
rlm@1 184 /* format error with correct decimal point: no more options */
rlm@1 185 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
rlm@1 186 luaX_lexerror(ls, "malformed number", TK_NUMBER);
rlm@1 187 }
rlm@1 188 }
rlm@1 189
rlm@1 190
rlm@1 191 /* LUA_NUMBER */
rlm@1 192 static void read_numeral (LexState *ls, SemInfo *seminfo) {
rlm@1 193 lua_assert(isdigit(ls->current));
rlm@1 194 do {
rlm@1 195 save_and_next(ls);
rlm@1 196 } while (isdigit(ls->current) || ls->current == '.');
rlm@1 197 if (check_next(ls, "Ee")) /* `E'? */
rlm@1 198 check_next(ls, "+-"); /* optional exponent sign */
rlm@1 199 while (isalnum(ls->current) || ls->current == '_')
rlm@1 200 save_and_next(ls);
rlm@1 201 save(ls, '\0');
rlm@1 202 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
rlm@1 203 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */
rlm@1 204 trydecpoint(ls, seminfo); /* try to update decimal point separator */
rlm@1 205 }
rlm@1 206
rlm@1 207
rlm@1 208 static int skip_sep (LexState *ls) {
rlm@1 209 int count = 0;
rlm@1 210 int s = ls->current;
rlm@1 211 lua_assert(s == '[' || s == ']');
rlm@1 212 save_and_next(ls);
rlm@1 213 while (ls->current == '=') {
rlm@1 214 save_and_next(ls);
rlm@1 215 count++;
rlm@1 216 }
rlm@1 217 return (ls->current == s) ? count : (-count) - 1;
rlm@1 218 }
rlm@1 219
rlm@1 220
rlm@1 221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
rlm@1 222 int cont = 0;
rlm@1 223 (void)(cont); /* avoid warnings when `cont' is not used */
rlm@1 224 save_and_next(ls); /* skip 2nd `[' */
rlm@1 225 if (currIsNewline(ls)) /* string starts with a newline? */
rlm@1 226 inclinenumber(ls); /* skip it */
rlm@1 227 for (;;) {
rlm@1 228 switch (ls->current) {
rlm@1 229 case EOZ:
rlm@1 230 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
rlm@1 231 "unfinished long comment", TK_EOS);
rlm@1 232 break; /* to avoid warnings */
rlm@1 233 #if defined(LUA_COMPAT_LSTR)
rlm@1 234 case '[': {
rlm@1 235 if (skip_sep(ls) == sep) {
rlm@1 236 save_and_next(ls); /* skip 2nd `[' */
rlm@1 237 cont++;
rlm@1 238 #if LUA_COMPAT_LSTR == 1
rlm@1 239 if (sep == 0)
rlm@1 240 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
rlm@1 241 #endif
rlm@1 242 }
rlm@1 243 break;
rlm@1 244 }
rlm@1 245 #endif
rlm@1 246 case ']': {
rlm@1 247 if (skip_sep(ls) == sep) {
rlm@1 248 save_and_next(ls); /* skip 2nd `]' */
rlm@1 249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
rlm@1 250 cont--;
rlm@1 251 if (sep == 0 && cont >= 0) break;
rlm@1 252 #endif
rlm@1 253 goto endloop;
rlm@1 254 }
rlm@1 255 break;
rlm@1 256 }
rlm@1 257 case '\n':
rlm@1 258 case '\r': {
rlm@1 259 save(ls, '\n');
rlm@1 260 inclinenumber(ls);
rlm@1 261 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
rlm@1 262 break;
rlm@1 263 }
rlm@1 264 default: {
rlm@1 265 if (seminfo) save_and_next(ls);
rlm@1 266 else next(ls);
rlm@1 267 }
rlm@1 268 }
rlm@1 269 } endloop:
rlm@1 270 if (seminfo)
rlm@1 271 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
rlm@1 272 luaZ_bufflen(ls->buff) - 2*(2 + sep));
rlm@1 273 }
rlm@1 274
rlm@1 275
rlm@1 276 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
rlm@1 277 save_and_next(ls);
rlm@1 278 while (ls->current != del) {
rlm@1 279 switch (ls->current) {
rlm@1 280 case EOZ:
rlm@1 281 luaX_lexerror(ls, "unfinished string", TK_EOS);
rlm@1 282 continue; /* to avoid warnings */
rlm@1 283 case '\n':
rlm@1 284 case '\r':
rlm@1 285 luaX_lexerror(ls, "unfinished string", TK_STRING);
rlm@1 286 continue; /* to avoid warnings */
rlm@1 287 case '\\': {
rlm@1 288 int c;
rlm@1 289 next(ls); /* do not save the `\' */
rlm@1 290 switch (ls->current) {
rlm@1 291 case 'a': c = '\a'; break;
rlm@1 292 case 'b': c = '\b'; break;
rlm@1 293 case 'f': c = '\f'; break;
rlm@1 294 case 'n': c = '\n'; break;
rlm@1 295 case 'r': c = '\r'; break;
rlm@1 296 case 't': c = '\t'; break;
rlm@1 297 case 'v': c = '\v'; break;
rlm@1 298 case '\n': /* go through */
rlm@1 299 case '\r': save(ls, '\n'); inclinenumber(ls); continue;
rlm@1 300 case EOZ: continue; /* will raise an error next loop */
rlm@1 301 default: {
rlm@1 302 if (!isdigit(ls->current))
rlm@1 303 save_and_next(ls); /* handles \\, \", \', and \? */
rlm@1 304 else { /* \xxx */
rlm@1 305 int i = 0;
rlm@1 306 c = 0;
rlm@1 307 do {
rlm@1 308 c = 10*c + (ls->current-'0');
rlm@1 309 next(ls);
rlm@1 310 } while (++i<3 && isdigit(ls->current));
rlm@1 311 if (c > UCHAR_MAX)
rlm@1 312 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
rlm@1 313 save(ls, c);
rlm@1 314 }
rlm@1 315 continue;
rlm@1 316 }
rlm@1 317 }
rlm@1 318 save(ls, c);
rlm@1 319 next(ls);
rlm@1 320 continue;
rlm@1 321 }
rlm@1 322 default:
rlm@1 323 save_and_next(ls);
rlm@1 324 }
rlm@1 325 }
rlm@1 326 save_and_next(ls); /* skip delimiter */
rlm@1 327 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
rlm@1 328 luaZ_bufflen(ls->buff) - 2);
rlm@1 329 }
rlm@1 330
rlm@1 331
rlm@1 332 static int llex (LexState *ls, SemInfo *seminfo) {
rlm@1 333 luaZ_resetbuffer(ls->buff);
rlm@1 334 for (;;) {
rlm@1 335 switch (ls->current) {
rlm@1 336 case '\n':
rlm@1 337 case '\r': {
rlm@1 338 inclinenumber(ls);
rlm@1 339 continue;
rlm@1 340 }
rlm@1 341 case '-': {
rlm@1 342 next(ls);
rlm@1 343 if (ls->current != '-') return '-';
rlm@1 344 /* else is a comment */
rlm@1 345 next(ls);
rlm@1 346 if (ls->current == '[') {
rlm@1 347 int sep = skip_sep(ls);
rlm@1 348 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
rlm@1 349 if (sep >= 0) {
rlm@1 350 read_long_string(ls, NULL, sep); /* long comment */
rlm@1 351 luaZ_resetbuffer(ls->buff);
rlm@1 352 continue;
rlm@1 353 }
rlm@1 354 }
rlm@1 355 /* else short comment */
rlm@1 356 while (!currIsNewline(ls) && ls->current != EOZ)
rlm@1 357 next(ls);
rlm@1 358 continue;
rlm@1 359 }
rlm@1 360 case '[': {
rlm@1 361 int sep = skip_sep(ls);
rlm@1 362 if (sep >= 0) {
rlm@1 363 read_long_string(ls, seminfo, sep);
rlm@1 364 return TK_STRING;
rlm@1 365 }
rlm@1 366 else if (sep == -1) return '[';
rlm@1 367 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
rlm@1 368 }
rlm@1 369 case '=': {
rlm@1 370 next(ls);
rlm@1 371 if (ls->current != '=') return '=';
rlm@1 372 else { next(ls); return TK_EQ; }
rlm@1 373 }
rlm@1 374 case '<': {
rlm@1 375 next(ls);
rlm@1 376 if (ls->current != '=') return '<';
rlm@1 377 else { next(ls); return TK_LE; }
rlm@1 378 }
rlm@1 379 case '>': {
rlm@1 380 next(ls);
rlm@1 381 if (ls->current != '=') return '>';
rlm@1 382 else { next(ls); return TK_GE; }
rlm@1 383 }
rlm@1 384 case '~': {
rlm@1 385 next(ls);
rlm@1 386 if (ls->current != '=') return '~';
rlm@1 387 else { next(ls); return TK_NE; }
rlm@1 388 }
rlm@1 389 case '"':
rlm@1 390 case '\'': {
rlm@1 391 read_string(ls, ls->current, seminfo);
rlm@1 392 return TK_STRING;
rlm@1 393 }
rlm@1 394 case '.': {
rlm@1 395 save_and_next(ls);
rlm@1 396 if (check_next(ls, ".")) {
rlm@1 397 if (check_next(ls, "."))
rlm@1 398 return TK_DOTS; /* ... */
rlm@1 399 else return TK_CONCAT; /* .. */
rlm@1 400 }
rlm@1 401 else if (!isdigit(ls->current)) return '.';
rlm@1 402 else {
rlm@1 403 read_numeral(ls, seminfo);
rlm@1 404 return TK_NUMBER;
rlm@1 405 }
rlm@1 406 }
rlm@1 407 case EOZ: {
rlm@1 408 return TK_EOS;
rlm@1 409 }
rlm@1 410 default: {
rlm@1 411 if (isspace(ls->current)) {
rlm@1 412 lua_assert(!currIsNewline(ls));
rlm@1 413 next(ls);
rlm@1 414 continue;
rlm@1 415 }
rlm@1 416 else if (isdigit(ls->current)) {
rlm@1 417 read_numeral(ls, seminfo);
rlm@1 418 return TK_NUMBER;
rlm@1 419 }
rlm@1 420 else if (isalpha(ls->current) || ls->current == '_') {
rlm@1 421 /* identifier or reserved word */
rlm@1 422 TString *ts;
rlm@1 423 do {
rlm@1 424 save_and_next(ls);
rlm@1 425 } while (isalnum(ls->current) || ls->current == '_');
rlm@1 426 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
rlm@1 427 luaZ_bufflen(ls->buff));
rlm@1 428 if (ts->tsv.reserved > 0) /* reserved word? */
rlm@1 429 return ts->tsv.reserved - 1 + FIRST_RESERVED;
rlm@1 430 else {
rlm@1 431 seminfo->ts = ts;
rlm@1 432 return TK_NAME;
rlm@1 433 }
rlm@1 434 }
rlm@1 435 else {
rlm@1 436 int c = ls->current;
rlm@1 437 next(ls);
rlm@1 438 return c; /* single-char tokens (+ - / ...) */
rlm@1 439 }
rlm@1 440 }
rlm@1 441 }
rlm@1 442 }
rlm@1 443 }
rlm@1 444
rlm@1 445
rlm@1 446 void luaX_next (LexState *ls) {
rlm@1 447 ls->lastline = ls->linenumber;
rlm@1 448 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
rlm@1 449 ls->t = ls->lookahead; /* use this one */
rlm@1 450 ls->lookahead.token = TK_EOS; /* and discharge it */
rlm@1 451 }
rlm@1 452 else
rlm@1 453 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
rlm@1 454 }
rlm@1 455
rlm@1 456
rlm@1 457 void luaX_lookahead (LexState *ls) {
rlm@1 458 lua_assert(ls->lookahead.token == TK_EOS);
rlm@1 459 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
rlm@1 460 }
rlm@1 461