rlm@1: /* rlm@1: ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ rlm@1: ** Lexical Analyzer rlm@1: ** See Copyright Notice in lua.h rlm@1: */ rlm@1: rlm@1: rlm@1: #include rlm@1: #include rlm@1: #include rlm@1: rlm@1: #define llex_c rlm@1: #define LUA_CORE rlm@1: rlm@1: #include "lua.h" rlm@1: rlm@1: #include "ldo.h" rlm@1: #include "llex.h" rlm@1: #include "lobject.h" rlm@1: #include "lparser.h" rlm@1: #include "lstate.h" rlm@1: #include "lstring.h" rlm@1: #include "ltable.h" rlm@1: #include "lzio.h" rlm@1: rlm@1: rlm@1: rlm@1: #define next(ls) (ls->current = zgetc(ls->z)) rlm@1: rlm@1: rlm@1: rlm@1: rlm@1: #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') rlm@1: rlm@1: rlm@1: /* ORDER RESERVED */ rlm@1: const char *const luaX_tokens [] = { rlm@1: "and", "break", "do", "else", "elseif", rlm@1: "end", "false", "for", "function", "if", rlm@1: "in", "local", "nil", "not", "or", "repeat", rlm@1: "return", "then", "true", "until", "while", rlm@1: "..", "...", "==", ">=", "<=", "~=", rlm@1: "", "", "", "", rlm@1: NULL rlm@1: }; rlm@1: rlm@1: rlm@1: #define save_and_next(ls) (save(ls, ls->current), next(ls)) rlm@1: rlm@1: rlm@1: static void save (LexState *ls, int c) { rlm@1: Mbuffer *b = ls->buff; rlm@1: if (b->n + 1 > b->buffsize) { rlm@1: size_t newsize; rlm@1: if (b->buffsize >= MAX_SIZET/2) rlm@1: luaX_lexerror(ls, "lexical element too long", 0); rlm@1: newsize = b->buffsize * 2; rlm@1: luaZ_resizebuffer(ls->L, b, newsize); rlm@1: } rlm@1: b->buffer[b->n++] = cast(char, c); rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_init (lua_State *L) { rlm@1: int i; rlm@1: for (i=0; itsv.reserved = cast_byte(i+1); /* reserved word */ rlm@1: } rlm@1: } rlm@1: rlm@1: rlm@1: #define MAXSRC 80 rlm@1: rlm@1: rlm@1: const char *luaX_token2str (LexState *ls, int token) { rlm@1: if (token < FIRST_RESERVED) { rlm@1: lua_assert(token == cast(unsigned char, token)); rlm@1: return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : rlm@1: luaO_pushfstring(ls->L, "%c", token); rlm@1: } rlm@1: else rlm@1: return luaX_tokens[token-FIRST_RESERVED]; rlm@1: } rlm@1: rlm@1: rlm@1: static const char *txtToken (LexState *ls, int token) { rlm@1: switch (token) { rlm@1: case TK_NAME: rlm@1: case TK_STRING: rlm@1: case TK_NUMBER: rlm@1: save(ls, '\0'); rlm@1: return luaZ_buffer(ls->buff); rlm@1: default: rlm@1: return luaX_token2str(ls, token); rlm@1: } rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_lexerror (LexState *ls, const char *msg, int token) { rlm@1: char buff[MAXSRC]; rlm@1: luaO_chunkid(buff, getstr(ls->source), MAXSRC); rlm@1: msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); rlm@1: if (token) rlm@1: luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); rlm@1: luaD_throw(ls->L, LUA_ERRSYNTAX); rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_syntaxerror (LexState *ls, const char *msg) { rlm@1: luaX_lexerror(ls, msg, ls->t.token); rlm@1: } rlm@1: rlm@1: rlm@1: TString *luaX_newstring (LexState *ls, const char *str, size_t l) { rlm@1: lua_State *L = ls->L; rlm@1: TString *ts = luaS_newlstr(L, str, l); rlm@1: TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ rlm@1: if (ttisnil(o)) rlm@1: setbvalue(o, 1); /* make sure `str' will not be collected */ rlm@1: return ts; rlm@1: } rlm@1: rlm@1: rlm@1: static void inclinenumber (LexState *ls) { rlm@1: int old = ls->current; rlm@1: lua_assert(currIsNewline(ls)); rlm@1: next(ls); /* skip `\n' or `\r' */ rlm@1: if (currIsNewline(ls) && ls->current != old) rlm@1: next(ls); /* skip `\n\r' or `\r\n' */ rlm@1: if (++ls->linenumber >= MAX_INT) rlm@1: luaX_syntaxerror(ls, "chunk has too many lines"); rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { rlm@1: ls->decpoint = '.'; rlm@1: ls->L = L; rlm@1: ls->lookahead.token = TK_EOS; /* no look-ahead token */ rlm@1: ls->z = z; rlm@1: ls->fs = NULL; rlm@1: ls->linenumber = 1; rlm@1: ls->lastline = 1; rlm@1: ls->source = source; rlm@1: luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ rlm@1: next(ls); /* read first char */ rlm@1: } rlm@1: rlm@1: rlm@1: rlm@1: /* rlm@1: ** ======================================================= rlm@1: ** LEXICAL ANALYZER rlm@1: ** ======================================================= rlm@1: */ rlm@1: rlm@1: rlm@1: rlm@1: static int check_next (LexState *ls, const char *set) { rlm@1: if (!strchr(set, ls->current)) rlm@1: return 0; rlm@1: save_and_next(ls); rlm@1: return 1; rlm@1: } rlm@1: rlm@1: rlm@1: static void buffreplace (LexState *ls, char from, char to) { rlm@1: size_t n = luaZ_bufflen(ls->buff); rlm@1: char *p = luaZ_buffer(ls->buff); rlm@1: while (n--) rlm@1: if (p[n] == from) p[n] = to; rlm@1: } rlm@1: rlm@1: rlm@1: static void trydecpoint (LexState *ls, SemInfo *seminfo) { rlm@1: /* format error: try to update decimal point separator */ rlm@1: struct lconv *cv = localeconv(); rlm@1: char old = ls->decpoint; rlm@1: ls->decpoint = (cv ? cv->decimal_point[0] : '.'); rlm@1: buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ rlm@1: if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { rlm@1: /* format error with correct decimal point: no more options */ rlm@1: buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ rlm@1: luaX_lexerror(ls, "malformed number", TK_NUMBER); rlm@1: } rlm@1: } rlm@1: rlm@1: rlm@1: /* LUA_NUMBER */ rlm@1: static void read_numeral (LexState *ls, SemInfo *seminfo) { rlm@1: lua_assert(isdigit(ls->current)); rlm@1: do { rlm@1: save_and_next(ls); rlm@1: } while (isdigit(ls->current) || ls->current == '.'); rlm@1: if (check_next(ls, "Ee")) /* `E'? */ rlm@1: check_next(ls, "+-"); /* optional exponent sign */ rlm@1: while (isalnum(ls->current) || ls->current == '_') rlm@1: save_and_next(ls); rlm@1: save(ls, '\0'); rlm@1: buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ rlm@1: if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ rlm@1: trydecpoint(ls, seminfo); /* try to update decimal point separator */ rlm@1: } rlm@1: rlm@1: rlm@1: static int skip_sep (LexState *ls) { rlm@1: int count = 0; rlm@1: int s = ls->current; rlm@1: lua_assert(s == '[' || s == ']'); rlm@1: save_and_next(ls); rlm@1: while (ls->current == '=') { rlm@1: save_and_next(ls); rlm@1: count++; rlm@1: } rlm@1: return (ls->current == s) ? count : (-count) - 1; rlm@1: } rlm@1: rlm@1: rlm@1: static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { rlm@1: int cont = 0; rlm@1: (void)(cont); /* avoid warnings when `cont' is not used */ rlm@1: save_and_next(ls); /* skip 2nd `[' */ rlm@1: if (currIsNewline(ls)) /* string starts with a newline? */ rlm@1: inclinenumber(ls); /* skip it */ rlm@1: for (;;) { rlm@1: switch (ls->current) { rlm@1: case EOZ: rlm@1: luaX_lexerror(ls, (seminfo) ? "unfinished long string" : rlm@1: "unfinished long comment", TK_EOS); rlm@1: break; /* to avoid warnings */ rlm@1: #if defined(LUA_COMPAT_LSTR) rlm@1: case '[': { rlm@1: if (skip_sep(ls) == sep) { rlm@1: save_and_next(ls); /* skip 2nd `[' */ rlm@1: cont++; rlm@1: #if LUA_COMPAT_LSTR == 1 rlm@1: if (sep == 0) rlm@1: luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); rlm@1: #endif rlm@1: } rlm@1: break; rlm@1: } rlm@1: #endif rlm@1: case ']': { rlm@1: if (skip_sep(ls) == sep) { rlm@1: save_and_next(ls); /* skip 2nd `]' */ rlm@1: #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 rlm@1: cont--; rlm@1: if (sep == 0 && cont >= 0) break; rlm@1: #endif rlm@1: goto endloop; rlm@1: } rlm@1: break; rlm@1: } rlm@1: case '\n': rlm@1: case '\r': { rlm@1: save(ls, '\n'); rlm@1: inclinenumber(ls); rlm@1: if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ rlm@1: break; rlm@1: } rlm@1: default: { rlm@1: if (seminfo) save_and_next(ls); rlm@1: else next(ls); rlm@1: } rlm@1: } rlm@1: } endloop: rlm@1: if (seminfo) rlm@1: seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), rlm@1: luaZ_bufflen(ls->buff) - 2*(2 + sep)); rlm@1: } rlm@1: rlm@1: rlm@1: static void read_string (LexState *ls, int del, SemInfo *seminfo) { rlm@1: save_and_next(ls); rlm@1: while (ls->current != del) { rlm@1: switch (ls->current) { rlm@1: case EOZ: rlm@1: luaX_lexerror(ls, "unfinished string", TK_EOS); rlm@1: continue; /* to avoid warnings */ rlm@1: case '\n': rlm@1: case '\r': rlm@1: luaX_lexerror(ls, "unfinished string", TK_STRING); rlm@1: continue; /* to avoid warnings */ rlm@1: case '\\': { rlm@1: int c; rlm@1: next(ls); /* do not save the `\' */ rlm@1: switch (ls->current) { rlm@1: case 'a': c = '\a'; break; rlm@1: case 'b': c = '\b'; break; rlm@1: case 'f': c = '\f'; break; rlm@1: case 'n': c = '\n'; break; rlm@1: case 'r': c = '\r'; break; rlm@1: case 't': c = '\t'; break; rlm@1: case 'v': c = '\v'; break; rlm@1: case '\n': /* go through */ rlm@1: case '\r': save(ls, '\n'); inclinenumber(ls); continue; rlm@1: case EOZ: continue; /* will raise an error next loop */ rlm@1: default: { rlm@1: if (!isdigit(ls->current)) rlm@1: save_and_next(ls); /* handles \\, \", \', and \? */ rlm@1: else { /* \xxx */ rlm@1: int i = 0; rlm@1: c = 0; rlm@1: do { rlm@1: c = 10*c + (ls->current-'0'); rlm@1: next(ls); rlm@1: } while (++i<3 && isdigit(ls->current)); rlm@1: if (c > UCHAR_MAX) rlm@1: luaX_lexerror(ls, "escape sequence too large", TK_STRING); rlm@1: save(ls, c); rlm@1: } rlm@1: continue; rlm@1: } rlm@1: } rlm@1: save(ls, c); rlm@1: next(ls); rlm@1: continue; rlm@1: } rlm@1: default: rlm@1: save_and_next(ls); rlm@1: } rlm@1: } rlm@1: save_and_next(ls); /* skip delimiter */ rlm@1: seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, rlm@1: luaZ_bufflen(ls->buff) - 2); rlm@1: } rlm@1: rlm@1: rlm@1: static int llex (LexState *ls, SemInfo *seminfo) { rlm@1: luaZ_resetbuffer(ls->buff); rlm@1: for (;;) { rlm@1: switch (ls->current) { rlm@1: case '\n': rlm@1: case '\r': { rlm@1: inclinenumber(ls); rlm@1: continue; rlm@1: } rlm@1: case '-': { rlm@1: next(ls); rlm@1: if (ls->current != '-') return '-'; rlm@1: /* else is a comment */ rlm@1: next(ls); rlm@1: if (ls->current == '[') { rlm@1: int sep = skip_sep(ls); rlm@1: luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ rlm@1: if (sep >= 0) { rlm@1: read_long_string(ls, NULL, sep); /* long comment */ rlm@1: luaZ_resetbuffer(ls->buff); rlm@1: continue; rlm@1: } rlm@1: } rlm@1: /* else short comment */ rlm@1: while (!currIsNewline(ls) && ls->current != EOZ) rlm@1: next(ls); rlm@1: continue; rlm@1: } rlm@1: case '[': { rlm@1: int sep = skip_sep(ls); rlm@1: if (sep >= 0) { rlm@1: read_long_string(ls, seminfo, sep); rlm@1: return TK_STRING; rlm@1: } rlm@1: else if (sep == -1) return '['; rlm@1: else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); rlm@1: } rlm@1: case '=': { rlm@1: next(ls); rlm@1: if (ls->current != '=') return '='; rlm@1: else { next(ls); return TK_EQ; } rlm@1: } rlm@1: case '<': { rlm@1: next(ls); rlm@1: if (ls->current != '=') return '<'; rlm@1: else { next(ls); return TK_LE; } rlm@1: } rlm@1: case '>': { rlm@1: next(ls); rlm@1: if (ls->current != '=') return '>'; rlm@1: else { next(ls); return TK_GE; } rlm@1: } rlm@1: case '~': { rlm@1: next(ls); rlm@1: if (ls->current != '=') return '~'; rlm@1: else { next(ls); return TK_NE; } rlm@1: } rlm@1: case '"': rlm@1: case '\'': { rlm@1: read_string(ls, ls->current, seminfo); rlm@1: return TK_STRING; rlm@1: } rlm@1: case '.': { rlm@1: save_and_next(ls); rlm@1: if (check_next(ls, ".")) { rlm@1: if (check_next(ls, ".")) rlm@1: return TK_DOTS; /* ... */ rlm@1: else return TK_CONCAT; /* .. */ rlm@1: } rlm@1: else if (!isdigit(ls->current)) return '.'; rlm@1: else { rlm@1: read_numeral(ls, seminfo); rlm@1: return TK_NUMBER; rlm@1: } rlm@1: } rlm@1: case EOZ: { rlm@1: return TK_EOS; rlm@1: } rlm@1: default: { rlm@1: if (isspace(ls->current)) { rlm@1: lua_assert(!currIsNewline(ls)); rlm@1: next(ls); rlm@1: continue; rlm@1: } rlm@1: else if (isdigit(ls->current)) { rlm@1: read_numeral(ls, seminfo); rlm@1: return TK_NUMBER; rlm@1: } rlm@1: else if (isalpha(ls->current) || ls->current == '_') { rlm@1: /* identifier or reserved word */ rlm@1: TString *ts; rlm@1: do { rlm@1: save_and_next(ls); rlm@1: } while (isalnum(ls->current) || ls->current == '_'); rlm@1: ts = luaX_newstring(ls, luaZ_buffer(ls->buff), rlm@1: luaZ_bufflen(ls->buff)); rlm@1: if (ts->tsv.reserved > 0) /* reserved word? */ rlm@1: return ts->tsv.reserved - 1 + FIRST_RESERVED; rlm@1: else { rlm@1: seminfo->ts = ts; rlm@1: return TK_NAME; rlm@1: } rlm@1: } rlm@1: else { rlm@1: int c = ls->current; rlm@1: next(ls); rlm@1: return c; /* single-char tokens (+ - / ...) */ rlm@1: } rlm@1: } rlm@1: } rlm@1: } rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_next (LexState *ls) { rlm@1: ls->lastline = ls->linenumber; rlm@1: if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ rlm@1: ls->t = ls->lookahead; /* use this one */ rlm@1: ls->lookahead.token = TK_EOS; /* and discharge it */ rlm@1: } rlm@1: else rlm@1: ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ rlm@1: } rlm@1: rlm@1: rlm@1: void luaX_lookahead (LexState *ls) { rlm@1: lua_assert(ls->lookahead.token == TK_EOS); rlm@1: ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); rlm@1: } rlm@1: