From 4adade92f2b4f59ff09bbbd056f6ea2e366730ed Mon Sep 17 00:00:00 2001 From: Pedro Souza Date: Wed, 3 Apr 2024 06:34:09 -0300 Subject: done with bu-parser --- bu-parser.c | 134 +++++++++++++++++++++++++++++++++++++----------------------- bu-parser.h | 6 +-- 2 files changed, 84 insertions(+), 56 deletions(-) diff --git a/bu-parser.c b/bu-parser.c index 624bedc..b077938 100644 --- a/bu-parser.c +++ b/bu-parser.c @@ -1,4 +1,6 @@ #include "bu-parser.h" +#include "tree.h" +#include void free_node(node *n) { if (n->lhs) free_node(n->lhs); @@ -6,6 +8,18 @@ void free_node(node *n) { free(n); } +void fprint_stack(FILE *stream, struct expr_parser *ep) { + for (size_t i = 0; i < ep->stack_idx; i++) { + struct stackmember m = ep->stack[i]; + fprintf(stream, "%zu: %d ", i, m.id); + if (m.id == STACKMEMBER_EXPR) { + fprintTree(stream, m.expr, 0); + } else { + fprintf(stream, "%c\n", m.terminal); + } + } +} + struct expr_parser expr_parser_init(lexer_fn src, void* lex_data) { return (struct expr_parser){ .lex = src, @@ -25,9 +39,8 @@ void expr_parser_finish(struct expr_parser *ep) { } struct stackmember stack_pos(struct expr_parser *ep, size_t pos) { - ssize_t npos = ep->stack_idx - pos; + ssize_t npos = ep->stack_idx - pos - 1; if (npos < 0) { - fprintf(stderr, "bad stack idx"); return (struct stackmember) { .id = (enum stackmember_id)TOKEN_OOB, .expr = NULL }; } @@ -35,23 +48,28 @@ struct stackmember stack_pos(struct expr_parser *ep, size_t pos) { } void shift(struct expr_parser *ep) { + struct chartoken lahead = ep->lahead; ep->stack[ep->stack_idx] = (struct stackmember){ - .id = (enum stackmember_id)ep->lahead.id, - .terminal = ep->lahead.c, + .id = (enum stackmember_id)lahead.id, + .terminal = lahead.c, }; ep->stack_idx++; ep->lahead = ep->lex(ep->lex_data); } +#define BAIL_REDUCE_IF(COND, ERRMSG) \ + { if(COND) { \ + ep->st = PARSER_ERR; fprintf(stderr, "%s: " ERRMSG "\n", __func__); \ + expr_parser_debug_print(stderr, ep); \ + return; \ + }} + void var_expr(struct expr_parser *ep) { struct stackmember v = stack_pos(ep, 0); - if (v.id != TOKEN_VARIABLE) { - ep->st = PARSER_ERR; - fprintf(stderr, "%s: not a variable token", __func__); - return; - } + BAIL_REDUCE_IF(v.id != TOKEN_VARIABLE, "not a variable token"); node *n = malloc(sizeof(node)); + *n = (struct node){0}; n->type = LTTR; n->el = v.terminal; @@ -60,7 +78,7 @@ void var_expr(struct expr_parser *ep) { .expr = n, }; - ep->stack[ep->stack_idx] = newm; + ep->stack[ep->stack_idx - 1] = newm; } void binary_expr(struct expr_parser *ep) { @@ -68,16 +86,14 @@ void binary_expr(struct expr_parser *ep) { lhs = stack_pos(ep, 2); op = stack_pos(ep, 1); rhs = stack_pos(ep, 0); - if ( lhs.id != STACKMEMBER_EXPR + BAIL_REDUCE_IF( + lhs.id != STACKMEMBER_EXPR || op.id != TOKEN_BINARY_OPERATOR - || rhs.id != STACKMEMBER_EXPR - ) { - ep->st = PARSER_ERR; - fprintf(stderr, "%s: incorrect token pattern", __func__); - return; - } + || rhs.id != STACKMEMBER_EXPR, + "incorrect token sequence"); node *n = malloc(sizeof(node)); + *n = (struct node){0}; n->type = BIOP; n->el = op.terminal; n->lhs = lhs.expr; @@ -89,22 +105,20 @@ void binary_expr(struct expr_parser *ep) { }; ep->stack_idx -= 2; - ep->stack[ep->stack_idx] = newm; + ep->stack[ep->stack_idx - 1] = newm; } void unary_expr(struct expr_parser *ep) { struct stackmember op, val; op = stack_pos(ep, 1); val = stack_pos(ep, 0); - if ( op.id != TOKEN_UNARY_OPERATOR - || val.id != STACKMEMBER_EXPR - ) { - ep->st = PARSER_ERR; - fprintf(stderr, "%s: incorrect token pattern", __func__); - return; - } + BAIL_REDUCE_IF( + op.id != TOKEN_UNARY_OPERATOR + || val.id != STACKMEMBER_EXPR, + "incorrect token sequence"); node *n = malloc(sizeof(node)); + *n = (struct node){0}; n->type = UNOP; n->el = op.terminal; n->value = val.expr; @@ -115,39 +129,55 @@ void unary_expr(struct expr_parser *ep) { }; ep->stack_idx -= 1; - ep->stack[ep->stack_idx] = newm; + ep->stack[ep->stack_idx - 1] = newm; } int expr_parser_run(struct expr_parser *ep) { - while (ep->st != PARSER_ERR) { - shift(ep); + // prime the look-ahead + ep->lahead = ep->lex(ep->lex_data); + int noshift = 0; + while (ep->st != PARSER_ERR && ep->st != PARSER_ACCEPT) { + if (!noshift) { shift(ep); } + noshift = 0; struct stackmember top = stack_pos(ep, 0); - if (top.id == TOKEN_EOS) { - ep->st = PARSER_OK; - continue; - } - if (top.id == TOKEN_VARIABLE) { - var_expr(ep); - } - if (ep->st == PARSER_OR_OP || ep->st == PARSER_AND_OP) { - if (ep->st == PARSER_OR_OP) { - if (ep->lahead.c == '*') continue; - } - binary_expr(ep); - } - else if (ep->st == PARSER_NOT_OP) { - unary_expr(ep); - } - if (top.id == TOKEN_BINARY_OPERATOR || top.id == TOKEN_UNARY_OPERATOR) { - switch (top.terminal) { - case '+': ep->st = PARSER_OR_OP; break; - case '*': ep->st = PARSER_AND_OP; break; - case '!': ep->st = PARSER_NOT_OP; break; - default: break; - } - continue; + switch (top.id) { + case TOKEN_EOS: + // the result and the EOS token only + if (ep->stack_idx == 2) { + ep->st = PARSER_ACCEPT; + } else { + ep->st = PARSER_ERR; + } + break; + case TOKEN_VARIABLE: + var_expr(ep); + noshift = 1; // check if this enables any reductions first + break; + case STACKMEMBER_EXPR: { + struct stackmember op = stack_pos(ep, 1); + if (op.id == TOKEN_UNARY_OPERATOR) { + unary_expr(ep); + noshift = 1; + } else if (op.id == TOKEN_BINARY_OPERATOR) { + if (op.terminal == '+' && ep->lahead.c == '*') { + break; + } + binary_expr(ep); + noshift = 1; + } + } + break; + default: break; } } return ep->st; } +void expr_parser_debug_print(FILE *stream, struct expr_parser *ep) { + fprintf(stream, + "PARSER DEBUG DUMP:\n" + "state: %d\n" + "stack: \n", ep->st); + fprint_stack(stream, ep); +} + diff --git a/bu-parser.h b/bu-parser.h index 0aff194..7dc7034 100644 --- a/bu-parser.h +++ b/bu-parser.h @@ -23,10 +23,7 @@ enum parser_state { PARSER_ERR = -1, PARSER_READY = 0, - PARSER_OR_OP, - PARSER_AND_OP, - PARSER_NOT_OP, - PARSER_OK, + PARSER_ACCEPT, }; enum token_id { @@ -73,5 +70,6 @@ struct expr_parser { struct expr_parser expr_parser_init(lexer_fn src, void* lex_data); void expr_parser_finish(struct expr_parser *ep); int expr_parser_run(struct expr_parser *ep); +void expr_parser_debug_print(FILE *stream, struct expr_parser *ep); #endif // BU_PARSER_HG -- cgit v1.2.3