Mercurial > vba-clojure
view src/lua/llex.c @ 113:0831da75d2c5
completed frame-counting machine language program with dylan's help
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Fri, 16 Mar 2012 00:43:28 -0500 |
parents | 27763b933818 |
children |
line wrap: on
line source
1 /*2 ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $3 ** Lexical Analyzer4 ** See Copyright Notice in lua.h5 */8 #include <ctype.h>9 #include <locale.h>10 #include <string.h>12 #define llex_c13 #define LUA_CORE15 #include "lua.h"17 #include "ldo.h"18 #include "llex.h"19 #include "lobject.h"20 #include "lparser.h"21 #include "lstate.h"22 #include "lstring.h"23 #include "ltable.h"24 #include "lzio.h"28 #define next(ls) (ls->current = zgetc(ls->z))33 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')36 /* ORDER RESERVED */37 const char *const luaX_tokens [] = {38 "and", "break", "do", "else", "elseif",39 "end", "false", "for", "function", "if",40 "in", "local", "nil", "not", "or", "repeat",41 "return", "then", "true", "until", "while",42 "..", "...", "==", ">=", "<=", "~=",43 "<number>", "<name>", "<string>", "<eof>",44 NULL45 };48 #define save_and_next(ls) (save(ls, ls->current), next(ls))51 static void save (LexState *ls, int c) {52 Mbuffer *b = ls->buff;53 if (b->n + 1 > b->buffsize) {54 size_t newsize;55 if (b->buffsize >= MAX_SIZET/2)56 luaX_lexerror(ls, "lexical element too long", 0);57 newsize = b->buffsize * 2;58 luaZ_resizebuffer(ls->L, b, newsize);59 }60 b->buffer[b->n++] = cast(char, c);61 }64 void luaX_init (lua_State *L) {65 int i;66 for (i=0; i<NUM_RESERVED; i++) {67 TString *ts = luaS_new(L, luaX_tokens[i]);68 luaS_fix(ts); /* reserved words are never collected */69 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);70 ts->tsv.reserved = cast_byte(i+1); /* reserved word */71 }72 }75 #define MAXSRC 8078 const char *luaX_token2str (LexState *ls, int token) {79 if (token < FIRST_RESERVED) {80 lua_assert(token == cast(unsigned char, token));81 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :82 luaO_pushfstring(ls->L, "%c", token);83 }84 else85 return luaX_tokens[token-FIRST_RESERVED];86 }89 static const char *txtToken (LexState *ls, int token) {90 switch (token) {91 case TK_NAME:92 case TK_STRING:93 case TK_NUMBER:94 save(ls, '\0');95 return luaZ_buffer(ls->buff);96 default:97 return luaX_token2str(ls, token);98 }99 }102 void luaX_lexerror (LexState *ls, const char *msg, int token) {103 char buff[MAXSRC];104 luaO_chunkid(buff, getstr(ls->source), MAXSRC);105 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);106 if (token)107 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));108 luaD_throw(ls->L, LUA_ERRSYNTAX);109 }112 void luaX_syntaxerror (LexState *ls, const char *msg) {113 luaX_lexerror(ls, msg, ls->t.token);114 }117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {118 lua_State *L = ls->L;119 TString *ts = luaS_newlstr(L, str, l);120 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */121 if (ttisnil(o))122 setbvalue(o, 1); /* make sure `str' will not be collected */123 return ts;124 }127 static void inclinenumber (LexState *ls) {128 int old = ls->current;129 lua_assert(currIsNewline(ls));130 next(ls); /* skip `\n' or `\r' */131 if (currIsNewline(ls) && ls->current != old)132 next(ls); /* skip `\n\r' or `\r\n' */133 if (++ls->linenumber >= MAX_INT)134 luaX_syntaxerror(ls, "chunk has too many lines");135 }138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {139 ls->decpoint = '.';140 ls->L = L;141 ls->lookahead.token = TK_EOS; /* no look-ahead token */142 ls->z = z;143 ls->fs = NULL;144 ls->linenumber = 1;145 ls->lastline = 1;146 ls->source = source;147 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */148 next(ls); /* read first char */149 }153 /*154 ** =======================================================155 ** LEXICAL ANALYZER156 ** =======================================================157 */161 static int check_next (LexState *ls, const char *set) {162 if (!strchr(set, ls->current))163 return 0;164 save_and_next(ls);165 return 1;166 }169 static void buffreplace (LexState *ls, char from, char to) {170 size_t n = luaZ_bufflen(ls->buff);171 char *p = luaZ_buffer(ls->buff);172 while (n--)173 if (p[n] == from) p[n] = to;174 }177 static void trydecpoint (LexState *ls, SemInfo *seminfo) {178 /* format error: try to update decimal point separator */179 struct lconv *cv = localeconv();180 char old = ls->decpoint;181 ls->decpoint = (cv ? cv->decimal_point[0] : '.');182 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */183 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {184 /* format error with correct decimal point: no more options */185 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */186 luaX_lexerror(ls, "malformed number", TK_NUMBER);187 }188 }191 /* LUA_NUMBER */192 static void read_numeral (LexState *ls, SemInfo *seminfo) {193 lua_assert(isdigit(ls->current));194 do {195 save_and_next(ls);196 } while (isdigit(ls->current) || ls->current == '.');197 if (check_next(ls, "Ee")) /* `E'? */198 check_next(ls, "+-"); /* optional exponent sign */199 while (isalnum(ls->current) || ls->current == '_')200 save_and_next(ls);201 save(ls, '\0');202 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */203 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */204 trydecpoint(ls, seminfo); /* try to update decimal point separator */205 }208 static int skip_sep (LexState *ls) {209 int count = 0;210 int s = ls->current;211 lua_assert(s == '[' || s == ']');212 save_and_next(ls);213 while (ls->current == '=') {214 save_and_next(ls);215 count++;216 }217 return (ls->current == s) ? count : (-count) - 1;218 }221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {222 int cont = 0;223 (void)(cont); /* avoid warnings when `cont' is not used */224 save_and_next(ls); /* skip 2nd `[' */225 if (currIsNewline(ls)) /* string starts with a newline? */226 inclinenumber(ls); /* skip it */227 for (;;) {228 switch (ls->current) {229 case EOZ:230 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :231 "unfinished long comment", TK_EOS);232 break; /* to avoid warnings */233 #if defined(LUA_COMPAT_LSTR)234 case '[': {235 if (skip_sep(ls) == sep) {236 save_and_next(ls); /* skip 2nd `[' */237 cont++;238 #if LUA_COMPAT_LSTR == 1239 if (sep == 0)240 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');241 #endif242 }243 break;244 }245 #endif246 case ']': {247 if (skip_sep(ls) == sep) {248 save_and_next(ls); /* skip 2nd `]' */249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2250 cont--;251 if (sep == 0 && cont >= 0) break;252 #endif253 goto endloop;254 }255 break;256 }257 case '\n':258 case '\r': {259 save(ls, '\n');260 inclinenumber(ls);261 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */262 break;263 }264 default: {265 if (seminfo) save_and_next(ls);266 else next(ls);267 }268 }269 } endloop:270 if (seminfo)271 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),272 luaZ_bufflen(ls->buff) - 2*(2 + sep));273 }276 static void read_string (LexState *ls, int del, SemInfo *seminfo) {277 save_and_next(ls);278 while (ls->current != del) {279 switch (ls->current) {280 case EOZ:281 luaX_lexerror(ls, "unfinished string", TK_EOS);282 continue; /* to avoid warnings */283 case '\n':284 case '\r':285 luaX_lexerror(ls, "unfinished string", TK_STRING);286 continue; /* to avoid warnings */287 case '\\': {288 int c;289 next(ls); /* do not save the `\' */290 switch (ls->current) {291 case 'a': c = '\a'; break;292 case 'b': c = '\b'; break;293 case 'f': c = '\f'; break;294 case 'n': c = '\n'; break;295 case 'r': c = '\r'; break;296 case 't': c = '\t'; break;297 case 'v': c = '\v'; break;298 case '\n': /* go through */299 case '\r': save(ls, '\n'); inclinenumber(ls); continue;300 case EOZ: continue; /* will raise an error next loop */301 default: {302 if (!isdigit(ls->current))303 save_and_next(ls); /* handles \\, \", \', and \? */304 else { /* \xxx */305 int i = 0;306 c = 0;307 do {308 c = 10*c + (ls->current-'0');309 next(ls);310 } while (++i<3 && isdigit(ls->current));311 if (c > UCHAR_MAX)312 luaX_lexerror(ls, "escape sequence too large", TK_STRING);313 save(ls, c);314 }315 continue;316 }317 }318 save(ls, c);319 next(ls);320 continue;321 }322 default:323 save_and_next(ls);324 }325 }326 save_and_next(ls); /* skip delimiter */327 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,328 luaZ_bufflen(ls->buff) - 2);329 }332 static int llex (LexState *ls, SemInfo *seminfo) {333 luaZ_resetbuffer(ls->buff);334 for (;;) {335 switch (ls->current) {336 case '\n':337 case '\r': {338 inclinenumber(ls);339 continue;340 }341 case '-': {342 next(ls);343 if (ls->current != '-') return '-';344 /* else is a comment */345 next(ls);346 if (ls->current == '[') {347 int sep = skip_sep(ls);348 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */349 if (sep >= 0) {350 read_long_string(ls, NULL, sep); /* long comment */351 luaZ_resetbuffer(ls->buff);352 continue;353 }354 }355 /* else short comment */356 while (!currIsNewline(ls) && ls->current != EOZ)357 next(ls);358 continue;359 }360 case '[': {361 int sep = skip_sep(ls);362 if (sep >= 0) {363 read_long_string(ls, seminfo, sep);364 return TK_STRING;365 }366 else if (sep == -1) return '[';367 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);368 }369 case '=': {370 next(ls);371 if (ls->current != '=') return '=';372 else { next(ls); return TK_EQ; }373 }374 case '<': {375 next(ls);376 if (ls->current != '=') return '<';377 else { next(ls); return TK_LE; }378 }379 case '>': {380 next(ls);381 if (ls->current != '=') return '>';382 else { next(ls); return TK_GE; }383 }384 case '~': {385 next(ls);386 if (ls->current != '=') return '~';387 else { next(ls); return TK_NE; }388 }389 case '"':390 case '\'': {391 read_string(ls, ls->current, seminfo);392 return TK_STRING;393 }394 case '.': {395 save_and_next(ls);396 if (check_next(ls, ".")) {397 if (check_next(ls, "."))398 return TK_DOTS; /* ... */399 else return TK_CONCAT; /* .. */400 }401 else if (!isdigit(ls->current)) return '.';402 else {403 read_numeral(ls, seminfo);404 return TK_NUMBER;405 }406 }407 case EOZ: {408 return TK_EOS;409 }410 default: {411 if (isspace(ls->current)) {412 lua_assert(!currIsNewline(ls));413 next(ls);414 continue;415 }416 else if (isdigit(ls->current)) {417 read_numeral(ls, seminfo);418 return TK_NUMBER;419 }420 else if (isalpha(ls->current) || ls->current == '_') {421 /* identifier or reserved word */422 TString *ts;423 do {424 save_and_next(ls);425 } while (isalnum(ls->current) || ls->current == '_');426 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),427 luaZ_bufflen(ls->buff));428 if (ts->tsv.reserved > 0) /* reserved word? */429 return ts->tsv.reserved - 1 + FIRST_RESERVED;430 else {431 seminfo->ts = ts;432 return TK_NAME;433 }434 }435 else {436 int c = ls->current;437 next(ls);438 return c; /* single-char tokens (+ - / ...) */439 }440 }441 }442 }443 }446 void luaX_next (LexState *ls) {447 ls->lastline = ls->linenumber;448 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */449 ls->t = ls->lookahead; /* use this one */450 ls->lookahead.token = TK_EOS; /* and discharge it */451 }452 else453 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */454 }457 void luaX_lookahead (LexState *ls) {458 lua_assert(ls->lookahead.token == TK_EOS);459 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);460 }