summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorPreston Pan <preston@nullring.xyz>2023-01-02 22:31:49 -0800
committerPreston Pan <preston@nullring.xyz>2023-01-02 22:31:49 -0800
commit64feef1b9ea72adf7ba32998e9dca7d507607498 (patch)
treea409e61877bb51aa6fb2477175dabbf3dbccf298 /src/lexer.c
a lot of stuff.
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c150
1 files changed, 150 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..02417a1
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,150 @@
+#include "./include/lexer.h"
+#include "./include/macros.h"
+#include "./include/token.h"
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void lexer_reset(lexer_t *lexer, char *source) {
+ lexer->source = source;
+ lexer->c = source[0];
+ lexer->i = 0;
+ lexer->row = 1;
+ lexer->col = 1;
+ lexer->finished = false;
+}
+
+lexer_t *init_lexer(char *source) {
+ lexer_t *l = (lexer_t *)malloc(sizeof(lexer_t));
+ if (l == NULL)
+ die("malloc on lexer");
+ lexer_reset(l, source);
+ return l;
+}
+
+void lexer_move(lexer_t *lexer) {
+ if (lexer->c != '\0') {
+ lexer->i++;
+ lexer->c = lexer->source[lexer->i];
+ if (lexer->c == '\n') {
+ lexer->row++;
+ lexer->col = 0;
+ }
+ } else
+ lexer->finished = true;
+}
+
+void lexer_ignore_whitespace(lexer_t *lexer) {
+ while (isspace(lexer->c))
+ lexer_move(lexer);
+}
+
+void lexer_skip_comment(lexer_t *lexer) {
+ while (lexer->c != '\n' && lexer->c != '\0')
+ lexer_move(lexer);
+}
+
+static bool is_valid_id_char(char c) {
+ if (c == '(' || c == ')' || isdigit(c) || c == '"' || c == '\'' || c == '#' ||
+ c == '.')
+ return false;
+ return true;
+}
+
+static char *char_to_string(char c) {
+ char *s = (char *)malloc(2 * sizeof(char));
+ s[0] = c;
+ s[1] = '\0';
+ return s;
+}
+
+token_t *lexer_collect_bool(lexer_t *lexer) {
+ lexer_move(lexer);
+ if (lexer->c == 't')
+ return init_token(TOKEN_BOOL, "T", lexer->row, lexer->col);
+ else if (lexer->c == 'f')
+ return init_token(TOKEN_BOOL, "F", lexer->row, lexer->col);
+ else
+ return NULL;
+}
+
+token_t *lexer_collect_id(lexer_t *lexer) {
+ char *ret = (char *)malloc(1);
+ ret[0] = '\0';
+
+ while (is_valid_id_char(lexer->c)) {
+ ret = realloc(ret, (strlen(ret) + 2));
+ strcat(ret, char_to_string(lexer->c));
+ lexer_move(lexer);
+ }
+ return init_token(TOKEN_ID, ret, lexer->row, lexer->col);
+}
+
+token_t *lexer_collect_num(lexer_t *lexer) {
+ char *ret = (char *)malloc(1);
+ ret[0] = '\0';
+ bool is_float = false;
+ while (isdigit(lexer->c) || (lexer->c == '.' && !is_float)) {
+ if (lexer->c == '.')
+ is_float = true;
+ ret = realloc(ret, (strlen(ret) + 2));
+ strcat(ret, char_to_string(lexer->c));
+ lexer_move(lexer);
+ }
+ if (is_float)
+ return init_token(TOKEN_FLOAT, ret, lexer->row, lexer->col);
+ return init_token(TOKEN_INT, ret, lexer->row, lexer->col);
+}
+
+token_t *lexer_collect_string(lexer_t *lexer) {
+ char *ret = (char *)malloc(1);
+ ret[0] = '\0';
+ lexer_move(lexer);
+ while (lexer->c != '"') {
+ ret = realloc(ret, (strlen(ret) + 2));
+ strcat(ret, char_to_string(lexer->c));
+ lexer_move(lexer);
+ }
+ lexer_move(lexer);
+ return init_token(TOKEN_STRING, ret, lexer->row, lexer->col);
+}
+
+static token_t *lexer_move_with(lexer_t *lexer, token_t *token) {
+ lexer_move(lexer);
+ return token;
+}
+
+token_t *lexer_collect_next(lexer_t *lexer) {
+ if (lexer->c == '\0') {
+ lexer->finished = true;
+ return NULL;
+ }
+ if (isspace(lexer->c))
+ lexer_ignore_whitespace(lexer);
+
+ if (isdigit(lexer->c))
+ return lexer_collect_num(lexer);
+ else if (is_valid_id_char(lexer->c))
+ return lexer_collect_id(lexer);
+ else if (lexer->c == '"')
+ return lexer_collect_string(lexer);
+ else if (lexer->c == '#')
+ return lexer_collect_bool(lexer);
+ else if (lexer->c == '(')
+ return lexer_move_with(
+ lexer, init_token(TOKEN_LPAREN, "(", lexer->row, lexer->col));
+ else if (lexer->c == ')')
+ return lexer_move_with(
+ lexer, init_token(TOKEN_RPAREN, ")", lexer->row, lexer->col));
+ else if (lexer->c == '\'')
+ return lexer_move_with(
+ lexer, init_token(TOKEN_QUOTE, "'", lexer->row, lexer->col));
+ else if (lexer->c == '.')
+ return lexer_move_with(
+ lexer, init_token(TOKEN_PERIOD, ".", lexer->row, lexer->col));
+ else
+ return NULL;
+}