#include "./include/lexer.h" #include "./include/macros.h" #include "./include/token.h" #include #include #include #include #include void lexer_reset(lexer_t *lexer, char *source) { lexer->source = source; lexer->c = source[0]; lexer->i = 0; lexer->row = 1; lexer->col = 1; lexer->finished = false; } lexer_t *init_lexer(char *source) { lexer_t *l = (lexer_t *)malloc(sizeof(lexer_t)); if (l == NULL) die("malloc on lexer"); lexer_reset(l, source); return l; } void lexer_move(lexer_t *lexer) { if (lexer->c != '\0') { lexer->i++; lexer->c = lexer->source[lexer->i]; if (lexer->c == '\n') { lexer->row++; lexer->col = 0; } } else lexer->finished = true; } void lexer_ignore_whitespace(lexer_t *lexer) { while (isspace(lexer->c)) lexer_move(lexer); } void lexer_skip_comment(lexer_t *lexer) { while (lexer->c != '\n' && lexer->c != '\0') lexer_move(lexer); } static bool is_valid_id_char(char c) { if (c == '(' || c == ')' || isdigit(c) || c == '"' || c == '\'' || c == '#' || c == '.') return false; return true; } static char *char_to_string(char c) { char *s = (char *)malloc(2 * sizeof(char)); s[0] = c; s[1] = '\0'; return s; } token_t *lexer_collect_bool(lexer_t *lexer) { lexer_move(lexer); if (lexer->c == 't') return init_token(TOKEN_BOOL, "T", lexer->row, lexer->col); else if (lexer->c == 'f') return init_token(TOKEN_BOOL, "F", lexer->row, lexer->col); else return NULL; } token_t *lexer_collect_id(lexer_t *lexer) { char *ret = (char *)malloc(1); ret[0] = '\0'; while (is_valid_id_char(lexer->c)) { ret = realloc(ret, (strlen(ret) + 2)); strcat(ret, char_to_string(lexer->c)); lexer_move(lexer); } return init_token(TOKEN_ID, ret, lexer->row, lexer->col); } token_t *lexer_collect_num(lexer_t *lexer) { char *ret = (char *)malloc(1); ret[0] = '\0'; bool is_float = false; while (isdigit(lexer->c) || (lexer->c == '.' && !is_float)) { if (lexer->c == '.') is_float = true; ret = realloc(ret, (strlen(ret) + 2)); strcat(ret, char_to_string(lexer->c)); lexer_move(lexer); } if (is_float) return init_token(TOKEN_FLOAT, ret, lexer->row, lexer->col); return init_token(TOKEN_INT, ret, lexer->row, lexer->col); } token_t *lexer_collect_string(lexer_t *lexer) { char *ret = (char *)malloc(1); ret[0] = '\0'; lexer_move(lexer); while (lexer->c != '"') { ret = realloc(ret, (strlen(ret) + 2)); strcat(ret, char_to_string(lexer->c)); lexer_move(lexer); } lexer_move(lexer); return init_token(TOKEN_STRING, ret, lexer->row, lexer->col); } static token_t *lexer_move_with(lexer_t *lexer, token_t *token) { lexer_move(lexer); return token; } token_t *lexer_collect_next(lexer_t *lexer) { if (lexer->c == '\0') { lexer->finished = true; return NULL; } if (isspace(lexer->c)) lexer_ignore_whitespace(lexer); if (isdigit(lexer->c)) return lexer_collect_num(lexer); else if (is_valid_id_char(lexer->c)) return lexer_collect_id(lexer); else if (lexer->c == '"') return lexer_collect_string(lexer); else if (lexer->c == '#') return lexer_collect_bool(lexer); else if (lexer->c == '(') return lexer_move_with( lexer, init_token(TOKEN_LPAREN, "(", lexer->row, lexer->col)); else if (lexer->c == ')') return lexer_move_with( lexer, init_token(TOKEN_RPAREN, ")", lexer->row, lexer->col)); else if (lexer->c == '\'') return lexer_move_with( lexer, init_token(TOKEN_QUOTE, "'", lexer->row, lexer->col)); else if (lexer->c == '.') return lexer_move_with( lexer, init_token(TOKEN_PERIOD, ".", lexer->row, lexer->col)); else return NULL; }