diff options
Diffstat (limited to 'source/forest.c')
| -rw-r--r-- | source/forest.c | 320 |
1 files changed, 260 insertions, 60 deletions
diff --git a/source/forest.c b/source/forest.c index ccec067..3e756fb 100644 --- a/source/forest.c +++ b/source/forest.c @@ -1,5 +1,6 @@ #include <assert.h> #include <inttypes.h> +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <string.h> @@ -9,6 +10,8 @@ #include <stdlib.h> #include <unistd.h> +#include <gmp.h> + #define ARENA_IMPLEMENTATION #include "arena.h" @@ -25,8 +28,8 @@ #define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit #define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit #define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit -#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit -#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit +#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?)" CLEX_floatlit +#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+" CLEX_floatlit #define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id #define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring #define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring @@ -41,9 +44,9 @@ #define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow #define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq #define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq - // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq - // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: - // "<<=" CLEX_shleq ">>=" CLEX_shreq + // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq + // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: + // "<<=" CLEX_shleq ">>=" CLEX_shreq #define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below #define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage @@ -60,67 +63,245 @@ #define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent #define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned - // leaving it as N should help you catch config bugs + // leaving it as N should help you catch config bugs #define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess - // still have #line, #pragma, etc) + // still have #line, #pragma, etc) -//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace + //#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace #define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions #define STB_C_LEXER_IMPLEMENTATION #include "stb_c_lexer.h" +static void +print_token(FILE *stream, stb_lexer *lexer) { + switch (lexer->token) { + case CLEX_id : fprintf(stream, "_%s", lexer->string) ; break; + case CLEX_eq : fprintf(stream, "==") ; break; + case CLEX_noteq : fprintf(stream, "!=") ; break; + case CLEX_lesseq : fprintf(stream, "<=") ; break; + case CLEX_greatereq : fprintf(stream, ">=") ; break; + case CLEX_andand : fprintf(stream, "&&") ; break; + case CLEX_oror : fprintf(stream, "||") ; break; + case CLEX_shl : fprintf(stream, "<<") ; break; + case CLEX_shr : fprintf(stream, ">>") ; break; + case CLEX_plusplus : fprintf(stream, "++") ; break; + case CLEX_minusminus: fprintf(stream, "--") ; break; + case CLEX_arrow : fprintf(stream, "->") ; break; + case CLEX_andeq : fprintf(stream, "&=") ; break; + case CLEX_oreq : fprintf(stream, "|=") ; break; + case CLEX_xoreq : fprintf(stream, "^=") ; break; + case CLEX_pluseq : fprintf(stream, "+=") ; break; + case CLEX_minuseq : fprintf(stream, "-=") ; break; + case CLEX_muleq : fprintf(stream, "*=") ; break; + case CLEX_diveq : fprintf(stream, "/=") ; break; + case CLEX_modeq : fprintf(stream, "%%=") ; break; + case CLEX_shleq : fprintf(stream, "<<=") ; break; + case CLEX_shreq : fprintf(stream, ">>=") ; break; + case CLEX_eqarrow : fprintf(stream, "=>") ; break; + case CLEX_dqstring : fprintf(stream, "\"%s\"", lexer->string) ; break; + case CLEX_sqstring : fprintf(stream, "'\"%s\"'", lexer->string); break; + case CLEX_charlit : fprintf(stream, "'%s'", lexer->string) ; break; +#if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) + case CLEX_intlit : fprintf(stream, "#%g", lexer->real_number); break; +#else + case CLEX_intlit : fprintf(stream, "#%ld", lexer->int_number); break; +#endif + case CLEX_floatlit : fprintf(stream, "%g", lexer->real_number) ; break; + default: + if (lexer->token >= 0 && lexer->token < 256) + fprintf(stream, "%c", (int) lexer->token); + else { + fprintf(stream, "<<<UNKNOWN TOKEN %ld >>>\n", lexer->token); + } + break; + } +} + +//////////////////////////////////////////////////////////////// + +// TODO: GMP uses its own allocation hooks, so we may need custom allocators +// for arena-friendly ownership or debugging. Example: +// mp_set_memory_functions(my_alloc, my_realloc, my_free); +// See: https://gmplib.org/manual/Custom-Allocation +// +// TODO: We currently leak GMP-owned memory because we do not call the +// matching cleanup functions. Example: +// mpz_init(x); +// ... +// mpz_clear(x); + + +typedef enum { + Node_Type_Nil, + Node_Type_Integer, + Node_Type_Real, + Node_Type_String, + Node_Type_Symbol, + Node_Type_List, +} Node_Type; + +typedef struct Node Node; +struct Node { + Node_Type type; + union { + mpz_t integer; + String_View string; + String_View symbol; + + struct { + Node *head; + Node *tail; + } list; + } as; + Node *next; +}; + + +// TODO: Add diagnostics, symbol table, options, etc. +typedef struct Context Context; +struct Context { + Arena arena; +}; + + +typedef struct Parser Parser; +struct Parser { + Context *context; + stb_lexer lexer; + bool had_error; +}; + +static Node * +parser_parse(Parser *parser); +static Node * +parser_parse_expr(Parser *parser); +static Node * +parser_parse_list(Parser *parser); +static Node * +parser_parse_atom(Parser *parser); + +static bool +parser_accept(Parser *parser, long token); +static bool +parser_expect(Parser *parser, long token); +static void +parser_advance(Parser *parser); +static void +parser_error(Parser *parser, const char *message); + +static bool +parser_accept(Parser *parser, long token) { + if (parser->lexer.token == token) { + parser_advance(parser); + return true; + } + return false; +} + +static bool +parser_expect(Parser *parser, long token) { + bool ok = parser_accept(parser, token); + if (!ok) + parser_error(parser, "parser_expect"); + return ok; +} static void -print_token(stb_lexer *lexer) { - switch (lexer->token) { - case CLEX_id : printf("_%s", lexer->string); break; - case CLEX_eq : printf("=="); break; - case CLEX_noteq : printf("!="); break; - case CLEX_lesseq : printf("<="); break; - case CLEX_greatereq : printf(">="); break; - case CLEX_andand : printf("&&"); break; - case CLEX_oror : printf("||"); break; - case CLEX_shl : printf("<<"); break; - case CLEX_shr : printf(">>"); break; - case CLEX_plusplus : printf("++"); break; - case CLEX_minusminus: printf("--"); break; - case CLEX_arrow : printf("->"); break; - case CLEX_andeq : printf("&="); break; - case CLEX_oreq : printf("|="); break; - case CLEX_xoreq : printf("^="); break; - case CLEX_pluseq : printf("+="); break; - case CLEX_minuseq : printf("-="); break; - case CLEX_muleq : printf("*="); break; - case CLEX_diveq : printf("/="); break; - case CLEX_modeq : printf("%%="); break; - case CLEX_shleq : printf("<<="); break; - case CLEX_shreq : printf(">>="); break; - case CLEX_eqarrow : printf("=>"); break; - case CLEX_dqstring : printf("\"%s\"", lexer->string); break; - case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break; - case CLEX_charlit : printf("'%s'", lexer->string); break; - #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) - case CLEX_intlit : printf("#%g", lexer->real_number); break; - #else - case CLEX_intlit : printf("#%ld", lexer->int_number); break; - #endif - case CLEX_floatlit : printf("%g", lexer->real_number); break; - default: - if (lexer->token >= 0 && lexer->token < 256) - printf("%c", (int) lexer->token); - else { - printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token); - } - break; - } +parser_advance(Parser *parser) { + stb_c_lexer_get_token(&parser->lexer); +} + +// TODO: Error handling needs to be revised. +static void +parser_error(Parser *parser, const char *message) { + parser->had_error = true; + fprintf(stderr, "Error: %s\n", message); +} + +static Node * +parser_parse(Parser *parser) { + parser_advance(parser); + return parser_parse_expr(parser); +} + +static Node * +parser_parse_expr(Parser *parser) { + if (parser->lexer.token == '(') + return parser_parse_list(parser); + return parser_parse_atom(parser); } +static Node * +parser_parse_list(Parser *parser) { + // TODO: We can move the allocation and zeroing to a separate function that accepts the context. + Node *list = arena_alloc(&parser->context->arena, sizeof(Node)); + assert(list != NULL); + memset(list, 0, sizeof(Node)); + + list->type = Node_Type_List; + + parser_expect(parser, '('); + while (!parser_accept(parser, ')')) { + Node *expr = parser_parse_expr(parser); + + if (list->as.list.head == NULL) { + list->as.list.head = list->as.list.tail = expr; + } else { + list->as.list.tail = list->as.list.tail->next = expr; + } + } + + return list; +} + +static Node * +parser_parse_atom(Parser *parser) { + // TODO: We can move the allocation and zeroing to a separate function that accepts the context. + Node *atom = arena_alloc(&parser->context->arena, sizeof(Node)); + assert(atom != NULL); + memset(atom, 0, sizeof(Node)); + + switch (parser->lexer.token) { + case CLEX_intlit: + atom->type = Node_Type_Integer; + mpz_init(atom->as.integer); + mpz_set_si(atom->as.integer, parser->lexer.int_number); + parser_advance(parser); + break; + case CLEX_floatlit: + atom->type = Node_Type_Real; + // TODO: Store real numbers inside the AST. + assert(false && "Not implemented."); + (void) parser->lexer.real_number; + parser_advance(parser); + break; + case CLEX_dqstring: + atom->type = Node_Type_String; + atom->as.string = nob_sv_from_parts(parser->lexer.string, parser->lexer.string_len); + parser_advance(parser); + break; + case CLEX_id: + atom->type = Node_Type_Symbol; + atom->as.symbol = nob_sv_from_parts(parser->lexer.string, parser->lexer.string_len); + parser_advance(parser); + break; + default: + parser_error(parser, "parser_parse_atom"); + break; + } + + return atom; +} + +//////////////////////////////////////////////////////////////// int main(int argc, char **argv) { + fprintf(stdout, "GMP version: %s\n", gmp_version); + if (argc != 2) { assert(argc >= 1); fprintf(stderr, "Usage: %s <source>\n", argv[0]); @@ -134,20 +315,39 @@ main(int argc, char **argv) { char string_store[1024 * 1024]; int store_length = ARRAY_LEN(string_store); - stb_lexer lexer = {0}; + // stb_lexer lexer = {0}; + // const char *input_stream = sb.items; + // const char *input_stream_end = sb.items + sb.count; + // stb_c_lexer_init(&lexer, input_stream, input_stream_end, + // string_store, store_length); + + // while (stb_c_lexer_get_token(&lexer)) { + // if (lexer.token == CLEX_parse_error) { + // fprintf(stderr, "\n<<<PARSE ERROR>>>\n"); + // break; + // } + // print_token(stdout, &lexer); + // fprintf(stdout, " "); + // } + // fprintf(stdout, "\n"); + + Context context = { + .arena = {0}, + }; + + Parser parser = { + .context = &context, + .lexer = {0}, + .had_error = false, + }; + const char *input_stream = sb.items; const char *input_stream_end = sb.items + sb.count; - stb_c_lexer_init(&lexer, input_stream, input_stream_end, + stb_c_lexer_init(&parser.lexer, input_stream, input_stream_end, string_store, store_length); - while (stb_c_lexer_get_token(&lexer)) { - if (lexer.token == CLEX_parse_error) { - fprintf(stderr, "\n<<<PARSE ERROR>>>\n"); - break; - } - print_token(&lexer); - printf(" "); - } + Node *root = parser_parse(&parser); + (void) root; return 0; } |
