summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/forest.c320
1 files changed, 260 insertions, 60 deletions
diff --git a/source/forest.c b/source/forest.c
index ccec067..3e756fb 100644
--- a/source/forest.c
+++ b/source/forest.c
@@ -1,5 +1,6 @@
#include <assert.h>
#include <inttypes.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
@@ -9,6 +10,8 @@
#include <stdlib.h>
#include <unistd.h>
+#include <gmp.h>
+
#define ARENA_IMPLEMENTATION
#include "arena.h"
@@ -25,8 +28,8 @@
#define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit
#define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit
#define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit
-#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit
-#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit
+#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?)" CLEX_floatlit
+#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+" CLEX_floatlit
#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id
#define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring
#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring
@@ -41,9 +44,9 @@
#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow
#define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq
#define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq
- // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
- // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
- // "<<=" CLEX_shleq ">>=" CLEX_shreq
+ // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
+ // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
+ // "<<=" CLEX_shleq ">>=" CLEX_shreq
#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below
#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
@@ -60,67 +63,245 @@
#define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent
#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned
- // leaving it as N should help you catch config bugs
+ // leaving it as N should help you catch config bugs
#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess
- // still have #line, #pragma, etc)
+ // still have #line, #pragma, etc)
-//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace
+ //#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace
#define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions
#define STB_C_LEXER_IMPLEMENTATION
#include "stb_c_lexer.h"
+static void
+print_token(FILE *stream, stb_lexer *lexer) {
+ switch (lexer->token) {
+ case CLEX_id : fprintf(stream, "_%s", lexer->string) ; break;
+ case CLEX_eq : fprintf(stream, "==") ; break;
+ case CLEX_noteq : fprintf(stream, "!=") ; break;
+ case CLEX_lesseq : fprintf(stream, "<=") ; break;
+ case CLEX_greatereq : fprintf(stream, ">=") ; break;
+ case CLEX_andand : fprintf(stream, "&&") ; break;
+ case CLEX_oror : fprintf(stream, "||") ; break;
+ case CLEX_shl : fprintf(stream, "<<") ; break;
+ case CLEX_shr : fprintf(stream, ">>") ; break;
+ case CLEX_plusplus : fprintf(stream, "++") ; break;
+ case CLEX_minusminus: fprintf(stream, "--") ; break;
+ case CLEX_arrow : fprintf(stream, "->") ; break;
+ case CLEX_andeq : fprintf(stream, "&=") ; break;
+ case CLEX_oreq : fprintf(stream, "|=") ; break;
+ case CLEX_xoreq : fprintf(stream, "^=") ; break;
+ case CLEX_pluseq : fprintf(stream, "+=") ; break;
+ case CLEX_minuseq : fprintf(stream, "-=") ; break;
+ case CLEX_muleq : fprintf(stream, "*=") ; break;
+ case CLEX_diveq : fprintf(stream, "/=") ; break;
+ case CLEX_modeq : fprintf(stream, "%%=") ; break;
+ case CLEX_shleq : fprintf(stream, "<<=") ; break;
+ case CLEX_shreq : fprintf(stream, ">>=") ; break;
+ case CLEX_eqarrow : fprintf(stream, "=>") ; break;
+ case CLEX_dqstring : fprintf(stream, "\"%s\"", lexer->string) ; break;
+ case CLEX_sqstring : fprintf(stream, "'\"%s\"'", lexer->string); break;
+ case CLEX_charlit : fprintf(stream, "'%s'", lexer->string) ; break;
+#if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
+ case CLEX_intlit : fprintf(stream, "#%g", lexer->real_number); break;
+#else
+ case CLEX_intlit : fprintf(stream, "#%ld", lexer->int_number); break;
+#endif
+ case CLEX_floatlit : fprintf(stream, "%g", lexer->real_number) ; break;
+ default:
+ if (lexer->token >= 0 && lexer->token < 256)
+ fprintf(stream, "%c", (int) lexer->token);
+ else {
+ fprintf(stream, "<<<UNKNOWN TOKEN %ld >>>\n", lexer->token);
+ }
+ break;
+ }
+}
+
+////////////////////////////////////////////////////////////////
+
+// TODO: GMP uses its own allocation hooks, so we may need custom allocators
+// for arena-friendly ownership or debugging. Example:
+// mp_set_memory_functions(my_alloc, my_realloc, my_free);
+// See: https://gmplib.org/manual/Custom-Allocation
+//
+// TODO: We currently leak GMP-owned memory because we do not call the
+// matching cleanup functions. Example:
+// mpz_init(x);
+// ...
+// mpz_clear(x);
+
+
+typedef enum {
+ Node_Type_Nil,
+ Node_Type_Integer,
+ Node_Type_Real,
+ Node_Type_String,
+ Node_Type_Symbol,
+ Node_Type_List,
+} Node_Type;
+
+typedef struct Node Node;
+struct Node {
+ Node_Type type;
+ union {
+ mpz_t integer;
+ String_View string;
+ String_View symbol;
+
+ struct {
+ Node *head;
+ Node *tail;
+ } list;
+ } as;
+ Node *next;
+};
+
+
+// TODO: Add diagnostics, symbol table, options, etc.
+typedef struct Context Context;
+struct Context {
+ Arena arena;
+};
+
+
+typedef struct Parser Parser;
+struct Parser {
+ Context *context;
+ stb_lexer lexer;
+ bool had_error;
+};
+
+static Node *
+parser_parse(Parser *parser);
+static Node *
+parser_parse_expr(Parser *parser);
+static Node *
+parser_parse_list(Parser *parser);
+static Node *
+parser_parse_atom(Parser *parser);
+
+static bool
+parser_accept(Parser *parser, long token);
+static bool
+parser_expect(Parser *parser, long token);
+static void
+parser_advance(Parser *parser);
+static void
+parser_error(Parser *parser, const char *message);
+
+static bool
+parser_accept(Parser *parser, long token) {
+ if (parser->lexer.token == token) {
+ parser_advance(parser);
+ return true;
+ }
+ return false;
+}
+
+static bool
+parser_expect(Parser *parser, long token) {
+ bool ok = parser_accept(parser, token);
+ if (!ok)
+ parser_error(parser, "parser_expect");
+ return ok;
+}
static void
-print_token(stb_lexer *lexer) {
- switch (lexer->token) {
- case CLEX_id : printf("_%s", lexer->string); break;
- case CLEX_eq : printf("=="); break;
- case CLEX_noteq : printf("!="); break;
- case CLEX_lesseq : printf("<="); break;
- case CLEX_greatereq : printf(">="); break;
- case CLEX_andand : printf("&&"); break;
- case CLEX_oror : printf("||"); break;
- case CLEX_shl : printf("<<"); break;
- case CLEX_shr : printf(">>"); break;
- case CLEX_plusplus : printf("++"); break;
- case CLEX_minusminus: printf("--"); break;
- case CLEX_arrow : printf("->"); break;
- case CLEX_andeq : printf("&="); break;
- case CLEX_oreq : printf("|="); break;
- case CLEX_xoreq : printf("^="); break;
- case CLEX_pluseq : printf("+="); break;
- case CLEX_minuseq : printf("-="); break;
- case CLEX_muleq : printf("*="); break;
- case CLEX_diveq : printf("/="); break;
- case CLEX_modeq : printf("%%="); break;
- case CLEX_shleq : printf("<<="); break;
- case CLEX_shreq : printf(">>="); break;
- case CLEX_eqarrow : printf("=>"); break;
- case CLEX_dqstring : printf("\"%s\"", lexer->string); break;
- case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break;
- case CLEX_charlit : printf("'%s'", lexer->string); break;
- #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
- case CLEX_intlit : printf("#%g", lexer->real_number); break;
- #else
- case CLEX_intlit : printf("#%ld", lexer->int_number); break;
- #endif
- case CLEX_floatlit : printf("%g", lexer->real_number); break;
- default:
- if (lexer->token >= 0 && lexer->token < 256)
- printf("%c", (int) lexer->token);
- else {
- printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token);
- }
- break;
- }
+parser_advance(Parser *parser) {
+ stb_c_lexer_get_token(&parser->lexer);
+}
+
+// TODO: Error handling needs to be revised.
+static void
+parser_error(Parser *parser, const char *message) {
+ parser->had_error = true;
+ fprintf(stderr, "Error: %s\n", message);
+}
+
+static Node *
+parser_parse(Parser *parser) {
+ parser_advance(parser);
+ return parser_parse_expr(parser);
+}
+
+static Node *
+parser_parse_expr(Parser *parser) {
+ if (parser->lexer.token == '(')
+ return parser_parse_list(parser);
+ return parser_parse_atom(parser);
}
+static Node *
+parser_parse_list(Parser *parser) {
+ // TODO: We can move the allocation and zeroing to a separate function that accepts the context.
+ Node *list = arena_alloc(&parser->context->arena, sizeof(Node));
+ assert(list != NULL);
+ memset(list, 0, sizeof(Node));
+
+ list->type = Node_Type_List;
+
+ parser_expect(parser, '(');
+ while (!parser_accept(parser, ')')) {
+ Node *expr = parser_parse_expr(parser);
+
+ if (list->as.list.head == NULL) {
+ list->as.list.head = list->as.list.tail = expr;
+ } else {
+ list->as.list.tail = list->as.list.tail->next = expr;
+ }
+ }
+
+ return list;
+}
+
+static Node *
+parser_parse_atom(Parser *parser) {
+ // TODO: We can move the allocation and zeroing to a separate function that accepts the context.
+ Node *atom = arena_alloc(&parser->context->arena, sizeof(Node));
+ assert(atom != NULL);
+ memset(atom, 0, sizeof(Node));
+
+ switch (parser->lexer.token) {
+ case CLEX_intlit:
+ atom->type = Node_Type_Integer;
+ mpz_init(atom->as.integer);
+ mpz_set_si(atom->as.integer, parser->lexer.int_number);
+ parser_advance(parser);
+ break;
+ case CLEX_floatlit:
+ atom->type = Node_Type_Real;
+ // TODO: Store real numbers inside the AST.
+ assert(false && "Not implemented.");
+ (void) parser->lexer.real_number;
+ parser_advance(parser);
+ break;
+ case CLEX_dqstring:
+ atom->type = Node_Type_String;
+ atom->as.string = nob_sv_from_parts(parser->lexer.string, parser->lexer.string_len);
+ parser_advance(parser);
+ break;
+ case CLEX_id:
+ atom->type = Node_Type_Symbol;
+ atom->as.symbol = nob_sv_from_parts(parser->lexer.string, parser->lexer.string_len);
+ parser_advance(parser);
+ break;
+ default:
+ parser_error(parser, "parser_parse_atom");
+ break;
+ }
+
+ return atom;
+}
+
+////////////////////////////////////////////////////////////////
int
main(int argc, char **argv) {
+ fprintf(stdout, "GMP version: %s\n", gmp_version);
+
if (argc != 2) {
assert(argc >= 1);
fprintf(stderr, "Usage: %s <source>\n", argv[0]);
@@ -134,20 +315,39 @@ main(int argc, char **argv) {
char string_store[1024 * 1024];
int store_length = ARRAY_LEN(string_store);
- stb_lexer lexer = {0};
+ // stb_lexer lexer = {0};
+ // const char *input_stream = sb.items;
+ // const char *input_stream_end = sb.items + sb.count;
+ // stb_c_lexer_init(&lexer, input_stream, input_stream_end,
+ // string_store, store_length);
+
+ // while (stb_c_lexer_get_token(&lexer)) {
+ // if (lexer.token == CLEX_parse_error) {
+ // fprintf(stderr, "\n<<<PARSE ERROR>>>\n");
+ // break;
+ // }
+ // print_token(stdout, &lexer);
+ // fprintf(stdout, " ");
+ // }
+ // fprintf(stdout, "\n");
+
+ Context context = {
+ .arena = {0},
+ };
+
+ Parser parser = {
+ .context = &context,
+ .lexer = {0},
+ .had_error = false,
+ };
+
const char *input_stream = sb.items;
const char *input_stream_end = sb.items + sb.count;
- stb_c_lexer_init(&lexer, input_stream, input_stream_end,
+ stb_c_lexer_init(&parser.lexer, input_stream, input_stream_end,
string_store, store_length);
- while (stb_c_lexer_get_token(&lexer)) {
- if (lexer.token == CLEX_parse_error) {
- fprintf(stderr, "\n<<<PARSE ERROR>>>\n");
- break;
- }
- print_token(&lexer);
- printf(" ");
- }
+ Node *root = parser_parse(&parser);
+ (void) root;
return 0;
}