diff src/lua/src/llex.c @ 1:f9f4f1b99eed

importing src directory
author Robert McIntyre <rlm@mit.edu>
date Sat, 03 Mar 2012 10:31:27 -0600
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/lua/src/llex.c	Sat Mar 03 10:31:27 2012 -0600
     1.3 @@ -0,0 +1,461 @@
     1.4 +/*
     1.5 +** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
     1.6 +** Lexical Analyzer
     1.7 +** See Copyright Notice in lua.h
     1.8 +*/
     1.9 +
    1.10 +
    1.11 +#include <ctype.h>
    1.12 +#include <locale.h>
    1.13 +#include <string.h>
    1.14 +
    1.15 +#define llex_c
    1.16 +#define LUA_CORE
    1.17 +
    1.18 +#include "lua.h"
    1.19 +
    1.20 +#include "ldo.h"
    1.21 +#include "llex.h"
    1.22 +#include "lobject.h"
    1.23 +#include "lparser.h"
    1.24 +#include "lstate.h"
    1.25 +#include "lstring.h"
    1.26 +#include "ltable.h"
    1.27 +#include "lzio.h"
    1.28 +
    1.29 +
    1.30 +
    1.31 +#define next(ls) (ls->current = zgetc(ls->z))
    1.32 +
    1.33 +
    1.34 +
    1.35 +
    1.36 +#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
    1.37 +
    1.38 +
    1.39 +/* ORDER RESERVED */
    1.40 +const char *const luaX_tokens [] = {
    1.41 +    "and", "break", "do", "else", "elseif",
    1.42 +    "end", "false", "for", "function", "if",
    1.43 +    "in", "local", "nil", "not", "or", "repeat",
    1.44 +    "return", "then", "true", "until", "while",
    1.45 +    "..", "...", "==", ">=", "<=", "~=",
    1.46 +    "<number>", "<name>", "<string>", "<eof>",
    1.47 +    NULL
    1.48 +};
    1.49 +
    1.50 +
    1.51 +#define save_and_next(ls) (save(ls, ls->current), next(ls))
    1.52 +
    1.53 +
    1.54 +static void save (LexState *ls, int c) {
    1.55 +  Mbuffer *b = ls->buff;
    1.56 +  if (b->n + 1 > b->buffsize) {
    1.57 +    size_t newsize;
    1.58 +    if (b->buffsize >= MAX_SIZET/2)
    1.59 +      luaX_lexerror(ls, "lexical element too long", 0);
    1.60 +    newsize = b->buffsize * 2;
    1.61 +    luaZ_resizebuffer(ls->L, b, newsize);
    1.62 +  }
    1.63 +  b->buffer[b->n++] = cast(char, c);
    1.64 +}
    1.65 +
    1.66 +
    1.67 +void luaX_init (lua_State *L) {
    1.68 +  int i;
    1.69 +  for (i=0; i<NUM_RESERVED; i++) {
    1.70 +    TString *ts = luaS_new(L, luaX_tokens[i]);
    1.71 +    luaS_fix(ts);  /* reserved words are never collected */
    1.72 +    lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
    1.73 +    ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
    1.74 +  }
    1.75 +}
    1.76 +
    1.77 +
    1.78 +#define MAXSRC          80
    1.79 +
    1.80 +
    1.81 +const char *luaX_token2str (LexState *ls, int token) {
    1.82 +  if (token < FIRST_RESERVED) {
    1.83 +    lua_assert(token == cast(unsigned char, token));
    1.84 +    return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
    1.85 +                              luaO_pushfstring(ls->L, "%c", token);
    1.86 +  }
    1.87 +  else
    1.88 +    return luaX_tokens[token-FIRST_RESERVED];
    1.89 +}
    1.90 +
    1.91 +
    1.92 +static const char *txtToken (LexState *ls, int token) {
    1.93 +  switch (token) {
    1.94 +    case TK_NAME:
    1.95 +    case TK_STRING:
    1.96 +    case TK_NUMBER:
    1.97 +      save(ls, '\0');
    1.98 +      return luaZ_buffer(ls->buff);
    1.99 +    default:
   1.100 +      return luaX_token2str(ls, token);
   1.101 +  }
   1.102 +}
   1.103 +
   1.104 +
   1.105 +void luaX_lexerror (LexState *ls, const char *msg, int token) {
   1.106 +  char buff[MAXSRC];
   1.107 +  luaO_chunkid(buff, getstr(ls->source), MAXSRC);
   1.108 +  msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
   1.109 +  if (token)
   1.110 +    luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
   1.111 +  luaD_throw(ls->L, LUA_ERRSYNTAX);
   1.112 +}
   1.113 +
   1.114 +
   1.115 +void luaX_syntaxerror (LexState *ls, const char *msg) {
   1.116 +  luaX_lexerror(ls, msg, ls->t.token);
   1.117 +}
   1.118 +
   1.119 +
   1.120 +TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
   1.121 +  lua_State *L = ls->L;
   1.122 +  TString *ts = luaS_newlstr(L, str, l);
   1.123 +  TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
   1.124 +  if (ttisnil(o))
   1.125 +    setbvalue(o, 1);  /* make sure `str' will not be collected */
   1.126 +  return ts;
   1.127 +}
   1.128 +
   1.129 +
   1.130 +static void inclinenumber (LexState *ls) {
   1.131 +  int old = ls->current;
   1.132 +  lua_assert(currIsNewline(ls));
   1.133 +  next(ls);  /* skip `\n' or `\r' */
   1.134 +  if (currIsNewline(ls) && ls->current != old)
   1.135 +    next(ls);  /* skip `\n\r' or `\r\n' */
   1.136 +  if (++ls->linenumber >= MAX_INT)
   1.137 +    luaX_syntaxerror(ls, "chunk has too many lines");
   1.138 +}
   1.139 +
   1.140 +
   1.141 +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
   1.142 +  ls->decpoint = '.';
   1.143 +  ls->L = L;
   1.144 +  ls->lookahead.token = TK_EOS;  /* no look-ahead token */
   1.145 +  ls->z = z;
   1.146 +  ls->fs = NULL;
   1.147 +  ls->linenumber = 1;
   1.148 +  ls->lastline = 1;
   1.149 +  ls->source = source;
   1.150 +  luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
   1.151 +  next(ls);  /* read first char */
   1.152 +}
   1.153 +
   1.154 +
   1.155 +
   1.156 +/*
   1.157 +** =======================================================
   1.158 +** LEXICAL ANALYZER
   1.159 +** =======================================================
   1.160 +*/
   1.161 +
   1.162 +
   1.163 +
   1.164 +static int check_next (LexState *ls, const char *set) {
   1.165 +  if (!strchr(set, ls->current))
   1.166 +    return 0;
   1.167 +  save_and_next(ls);
   1.168 +  return 1;
   1.169 +}
   1.170 +
   1.171 +
   1.172 +static void buffreplace (LexState *ls, char from, char to) {
   1.173 +  size_t n = luaZ_bufflen(ls->buff);
   1.174 +  char *p = luaZ_buffer(ls->buff);
   1.175 +  while (n--)
   1.176 +    if (p[n] == from) p[n] = to;
   1.177 +}
   1.178 +
   1.179 +
   1.180 +static void trydecpoint (LexState *ls, SemInfo *seminfo) {
   1.181 +  /* format error: try to update decimal point separator */
   1.182 +  struct lconv *cv = localeconv();
   1.183 +  char old = ls->decpoint;
   1.184 +  ls->decpoint = (cv ? cv->decimal_point[0] : '.');
   1.185 +  buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
   1.186 +  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
   1.187 +    /* format error with correct decimal point: no more options */
   1.188 +    buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
   1.189 +    luaX_lexerror(ls, "malformed number", TK_NUMBER);
   1.190 +  }
   1.191 +}
   1.192 +
   1.193 +
   1.194 +/* LUA_NUMBER */
   1.195 +static void read_numeral (LexState *ls, SemInfo *seminfo) {
   1.196 +  lua_assert(isdigit(ls->current));
   1.197 +  do {
   1.198 +    save_and_next(ls);
   1.199 +  } while (isdigit(ls->current) || ls->current == '.');
   1.200 +  if (check_next(ls, "Ee"))  /* `E'? */
   1.201 +    check_next(ls, "+-");  /* optional exponent sign */
   1.202 +  while (isalnum(ls->current) || ls->current == '_')
   1.203 +    save_and_next(ls);
   1.204 +  save(ls, '\0');
   1.205 +  buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
   1.206 +  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
   1.207 +    trydecpoint(ls, seminfo); /* try to update decimal point separator */
   1.208 +}
   1.209 +
   1.210 +
   1.211 +static int skip_sep (LexState *ls) {
   1.212 +  int count = 0;
   1.213 +  int s = ls->current;
   1.214 +  lua_assert(s == '[' || s == ']');
   1.215 +  save_and_next(ls);
   1.216 +  while (ls->current == '=') {
   1.217 +    save_and_next(ls);
   1.218 +    count++;
   1.219 +  }
   1.220 +  return (ls->current == s) ? count : (-count) - 1;
   1.221 +}
   1.222 +
   1.223 +
   1.224 +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
   1.225 +  int cont = 0;
   1.226 +  (void)(cont);  /* avoid warnings when `cont' is not used */
   1.227 +  save_and_next(ls);  /* skip 2nd `[' */
   1.228 +  if (currIsNewline(ls))  /* string starts with a newline? */
   1.229 +    inclinenumber(ls);  /* skip it */
   1.230 +  for (;;) {
   1.231 +    switch (ls->current) {
   1.232 +      case EOZ:
   1.233 +        luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
   1.234 +                                   "unfinished long comment", TK_EOS);
   1.235 +        break;  /* to avoid warnings */
   1.236 +#if defined(LUA_COMPAT_LSTR)
   1.237 +      case '[': {
   1.238 +        if (skip_sep(ls) == sep) {
   1.239 +          save_and_next(ls);  /* skip 2nd `[' */
   1.240 +          cont++;
   1.241 +#if LUA_COMPAT_LSTR == 1
   1.242 +          if (sep == 0)
   1.243 +            luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
   1.244 +#endif
   1.245 +        }
   1.246 +        break;
   1.247 +      }
   1.248 +#endif
   1.249 +      case ']': {
   1.250 +        if (skip_sep(ls) == sep) {
   1.251 +          save_and_next(ls);  /* skip 2nd `]' */
   1.252 +#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
   1.253 +          cont--;
   1.254 +          if (sep == 0 && cont >= 0) break;
   1.255 +#endif
   1.256 +          goto endloop;
   1.257 +        }
   1.258 +        break;
   1.259 +      }
   1.260 +      case '\n':
   1.261 +      case '\r': {
   1.262 +        save(ls, '\n');
   1.263 +        inclinenumber(ls);
   1.264 +        if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
   1.265 +        break;
   1.266 +      }
   1.267 +      default: {
   1.268 +        if (seminfo) save_and_next(ls);
   1.269 +        else next(ls);
   1.270 +      }
   1.271 +    }
   1.272 +  } endloop:
   1.273 +  if (seminfo)
   1.274 +    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
   1.275 +                                     luaZ_bufflen(ls->buff) - 2*(2 + sep));
   1.276 +}
   1.277 +
   1.278 +
   1.279 +static void read_string (LexState *ls, int del, SemInfo *seminfo) {
   1.280 +  save_and_next(ls);
   1.281 +  while (ls->current != del) {
   1.282 +    switch (ls->current) {
   1.283 +      case EOZ:
   1.284 +        luaX_lexerror(ls, "unfinished string", TK_EOS);
   1.285 +        continue;  /* to avoid warnings */
   1.286 +      case '\n':
   1.287 +      case '\r':
   1.288 +        luaX_lexerror(ls, "unfinished string", TK_STRING);
   1.289 +        continue;  /* to avoid warnings */
   1.290 +      case '\\': {
   1.291 +        int c;
   1.292 +        next(ls);  /* do not save the `\' */
   1.293 +        switch (ls->current) {
   1.294 +          case 'a': c = '\a'; break;
   1.295 +          case 'b': c = '\b'; break;
   1.296 +          case 'f': c = '\f'; break;
   1.297 +          case 'n': c = '\n'; break;
   1.298 +          case 'r': c = '\r'; break;
   1.299 +          case 't': c = '\t'; break;
   1.300 +          case 'v': c = '\v'; break;
   1.301 +          case '\n':  /* go through */
   1.302 +          case '\r': save(ls, '\n'); inclinenumber(ls); continue;
   1.303 +          case EOZ: continue;  /* will raise an error next loop */
   1.304 +          default: {
   1.305 +            if (!isdigit(ls->current))
   1.306 +              save_and_next(ls);  /* handles \\, \", \', and \? */
   1.307 +            else {  /* \xxx */
   1.308 +              int i = 0;
   1.309 +              c = 0;
   1.310 +              do {
   1.311 +                c = 10*c + (ls->current-'0');
   1.312 +                next(ls);
   1.313 +              } while (++i<3 && isdigit(ls->current));
   1.314 +              if (c > UCHAR_MAX)
   1.315 +                luaX_lexerror(ls, "escape sequence too large", TK_STRING);
   1.316 +              save(ls, c);
   1.317 +            }
   1.318 +            continue;
   1.319 +          }
   1.320 +        }
   1.321 +        save(ls, c);
   1.322 +        next(ls);
   1.323 +        continue;
   1.324 +      }
   1.325 +      default:
   1.326 +        save_and_next(ls);
   1.327 +    }
   1.328 +  }
   1.329 +  save_and_next(ls);  /* skip delimiter */
   1.330 +  seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
   1.331 +                                   luaZ_bufflen(ls->buff) - 2);
   1.332 +}
   1.333 +
   1.334 +
   1.335 +static int llex (LexState *ls, SemInfo *seminfo) {
   1.336 +  luaZ_resetbuffer(ls->buff);
   1.337 +  for (;;) {
   1.338 +    switch (ls->current) {
   1.339 +      case '\n':
   1.340 +      case '\r': {
   1.341 +        inclinenumber(ls);
   1.342 +        continue;
   1.343 +      }
   1.344 +      case '-': {
   1.345 +        next(ls);
   1.346 +        if (ls->current != '-') return '-';
   1.347 +        /* else is a comment */
   1.348 +        next(ls);
   1.349 +        if (ls->current == '[') {
   1.350 +          int sep = skip_sep(ls);
   1.351 +          luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
   1.352 +          if (sep >= 0) {
   1.353 +            read_long_string(ls, NULL, sep);  /* long comment */
   1.354 +            luaZ_resetbuffer(ls->buff);
   1.355 +            continue;
   1.356 +          }
   1.357 +        }
   1.358 +        /* else short comment */
   1.359 +        while (!currIsNewline(ls) && ls->current != EOZ)
   1.360 +          next(ls);
   1.361 +        continue;
   1.362 +      }
   1.363 +      case '[': {
   1.364 +        int sep = skip_sep(ls);
   1.365 +        if (sep >= 0) {
   1.366 +          read_long_string(ls, seminfo, sep);
   1.367 +          return TK_STRING;
   1.368 +        }
   1.369 +        else if (sep == -1) return '[';
   1.370 +        else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
   1.371 +      }
   1.372 +      case '=': {
   1.373 +        next(ls);
   1.374 +        if (ls->current != '=') return '=';
   1.375 +        else { next(ls); return TK_EQ; }
   1.376 +      }
   1.377 +      case '<': {
   1.378 +        next(ls);
   1.379 +        if (ls->current != '=') return '<';
   1.380 +        else { next(ls); return TK_LE; }
   1.381 +      }
   1.382 +      case '>': {
   1.383 +        next(ls);
   1.384 +        if (ls->current != '=') return '>';
   1.385 +        else { next(ls); return TK_GE; }
   1.386 +      }
   1.387 +      case '~': {
   1.388 +        next(ls);
   1.389 +        if (ls->current != '=') return '~';
   1.390 +        else { next(ls); return TK_NE; }
   1.391 +      }
   1.392 +      case '"':
   1.393 +      case '\'': {
   1.394 +        read_string(ls, ls->current, seminfo);
   1.395 +        return TK_STRING;
   1.396 +      }
   1.397 +      case '.': {
   1.398 +        save_and_next(ls);
   1.399 +        if (check_next(ls, ".")) {
   1.400 +          if (check_next(ls, "."))
   1.401 +            return TK_DOTS;   /* ... */
   1.402 +          else return TK_CONCAT;   /* .. */
   1.403 +        }
   1.404 +        else if (!isdigit(ls->current)) return '.';
   1.405 +        else {
   1.406 +          read_numeral(ls, seminfo);
   1.407 +          return TK_NUMBER;
   1.408 +        }
   1.409 +      }
   1.410 +      case EOZ: {
   1.411 +        return TK_EOS;
   1.412 +      }
   1.413 +      default: {
   1.414 +        if (isspace(ls->current)) {
   1.415 +          lua_assert(!currIsNewline(ls));
   1.416 +          next(ls);
   1.417 +          continue;
   1.418 +        }
   1.419 +        else if (isdigit(ls->current)) {
   1.420 +          read_numeral(ls, seminfo);
   1.421 +          return TK_NUMBER;
   1.422 +        }
   1.423 +        else if (isalpha(ls->current) || ls->current == '_') {
   1.424 +          /* identifier or reserved word */
   1.425 +          TString *ts;
   1.426 +          do {
   1.427 +            save_and_next(ls);
   1.428 +          } while (isalnum(ls->current) || ls->current == '_');
   1.429 +          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
   1.430 +                                  luaZ_bufflen(ls->buff));
   1.431 +          if (ts->tsv.reserved > 0)  /* reserved word? */
   1.432 +            return ts->tsv.reserved - 1 + FIRST_RESERVED;
   1.433 +          else {
   1.434 +            seminfo->ts = ts;
   1.435 +            return TK_NAME;
   1.436 +          }
   1.437 +        }
   1.438 +        else {
   1.439 +          int c = ls->current;
   1.440 +          next(ls);
   1.441 +          return c;  /* single-char tokens (+ - / ...) */
   1.442 +        }
   1.443 +      }
   1.444 +    }
   1.445 +  }
   1.446 +}
   1.447 +
   1.448 +
   1.449 +void luaX_next (LexState *ls) {
   1.450 +  ls->lastline = ls->linenumber;
   1.451 +  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
   1.452 +    ls->t = ls->lookahead;  /* use this one */
   1.453 +    ls->lookahead.token = TK_EOS;  /* and discharge it */
   1.454 +  }
   1.455 +  else
   1.456 +    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
   1.457 +}
   1.458 +
   1.459 +
   1.460 +void luaX_lookahead (LexState *ls) {
   1.461 +  lua_assert(ls->lookahead.token == TK_EOS);
   1.462 +  ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
   1.463 +}
   1.464 +