From 00ec031cb2ac7b9afda1cda97c91187a7a47e23a Mon Sep 17 00:00:00 2001 From: Pedro Souza Date: Tue, 2 Apr 2024 21:32:43 -0300 Subject: sync --- Makefile | 2 +- bu-parser.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bu-parser.h | 77 ++++++++++++++++++++++++++++++ main.c | 2 + tree.h | 14 +++++- 5 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 bu-parser.c create mode 100644 bu-parser.h diff --git a/Makefile b/Makefile index 98af738..c06e73e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ EXES := $(ARCFIND) CFLAGS := $(addprefix -I,$(SRCDIRS)) $(XTRAFLAGS) LDLIBS := -ARCFIND_O := $(addprefix ./bin/, main.o, tree.o) +ARCFIND_O := $(addprefix ./bin/, main.o tree.o bu-parser.o) all: $(EXES) .PHONY: all diff --git a/bu-parser.c b/bu-parser.c new file mode 100644 index 0000000..624bedc --- /dev/null +++ b/bu-parser.c @@ -0,0 +1,153 @@ +#include "bu-parser.h" + +void free_node(node *n) { + if (n->lhs) free_node(n->lhs); + if (n->rhs) free_node(n->rhs); + free(n); +} + +struct expr_parser expr_parser_init(lexer_fn src, void* lex_data) { + return (struct expr_parser){ + .lex = src, + .lex_data = lex_data, + .lahead = {0}, + .st = PARSER_READY, + .stack = {0}, + .stack_idx = 0, + }; +} + +void expr_parser_finish(struct expr_parser *ep) { + for (size_t i = 0; i < ep->stack_idx; i++) { + if (ep->stack[i].id == STACKMEMBER_EXPR) + free_node(ep->stack[i].expr); + } +} + +struct stackmember stack_pos(struct expr_parser *ep, size_t pos) { + ssize_t npos = ep->stack_idx - pos; + if (npos < 0) { + fprintf(stderr, "bad stack idx"); + return (struct stackmember) + { .id = (enum stackmember_id)TOKEN_OOB, .expr = NULL }; + } + return ep->stack[npos]; +} + +void shift(struct expr_parser *ep) { + ep->stack[ep->stack_idx] = (struct stackmember){ + .id = (enum stackmember_id)ep->lahead.id, + .terminal = ep->lahead.c, + }; + ep->stack_idx++; + ep->lahead = ep->lex(ep->lex_data); +} + +void var_expr(struct expr_parser *ep) { + struct stackmember v = stack_pos(ep, 0); + if (v.id != TOKEN_VARIABLE) { + ep->st = PARSER_ERR; + fprintf(stderr, "%s: not a variable token", __func__); + return; + } + + node *n = malloc(sizeof(node)); + n->type = LTTR; + n->el = v.terminal; + + struct stackmember newm = { + .id = STACKMEMBER_EXPR, + .expr = n, + }; + + ep->stack[ep->stack_idx] = newm; +} + +void binary_expr(struct expr_parser *ep) { + struct stackmember rhs, op, lhs; + lhs = stack_pos(ep, 2); + op = stack_pos(ep, 1); + rhs = stack_pos(ep, 0); + if ( lhs.id != STACKMEMBER_EXPR + || op.id != TOKEN_BINARY_OPERATOR + || rhs.id != STACKMEMBER_EXPR + ) { + ep->st = PARSER_ERR; + fprintf(stderr, "%s: incorrect token pattern", __func__); + return; + } + + node *n = malloc(sizeof(node)); + n->type = BIOP; + n->el = op.terminal; + n->lhs = lhs.expr; + n->rhs = rhs.expr; + + struct stackmember newm = { + .id = STACKMEMBER_EXPR, + .expr = n, + }; + + ep->stack_idx -= 2; + ep->stack[ep->stack_idx] = newm; +} + +void unary_expr(struct expr_parser *ep) { + struct stackmember op, val; + op = stack_pos(ep, 1); + val = stack_pos(ep, 0); + if ( op.id != TOKEN_UNARY_OPERATOR + || val.id != STACKMEMBER_EXPR + ) { + ep->st = PARSER_ERR; + fprintf(stderr, "%s: incorrect token pattern", __func__); + return; + } + + node *n = malloc(sizeof(node)); + n->type = UNOP; + n->el = op.terminal; + n->value = val.expr; + + struct stackmember newm = { + .id = STACKMEMBER_EXPR, + .expr = n, + }; + + ep->stack_idx -= 1; + ep->stack[ep->stack_idx] = newm; +} + +int expr_parser_run(struct expr_parser *ep) { + while (ep->st != PARSER_ERR) { + shift(ep); + struct stackmember top = stack_pos(ep, 0); + if (top.id == TOKEN_EOS) { + ep->st = PARSER_OK; + continue; + } + if (top.id == TOKEN_VARIABLE) { + var_expr(ep); + } + if (ep->st == PARSER_OR_OP || ep->st == PARSER_AND_OP) { + if (ep->st == PARSER_OR_OP) { + if (ep->lahead.c == '*') continue; + } + binary_expr(ep); + } + else if (ep->st == PARSER_NOT_OP) { + unary_expr(ep); + } + if (top.id == TOKEN_BINARY_OPERATOR || top.id == TOKEN_UNARY_OPERATOR) { + switch (top.terminal) { + case '+': ep->st = PARSER_OR_OP; break; + case '*': ep->st = PARSER_AND_OP; break; + case '!': ep->st = PARSER_NOT_OP; break; + default: break; + } + continue; + } + } + return ep->st; +} + diff --git a/bu-parser.h b/bu-parser.h new file mode 100644 index 0000000..0aff194 --- /dev/null +++ b/bu-parser.h @@ -0,0 +1,77 @@ +#ifndef BU_PARSER_HG +#define BU_PARSER_HG + +#include +#include +#include +#include "tree.h" + +/* PRODUCTION RULES + + ::= | | + ::= "*" | "+" + ::= "!" + ::= r#[a-z] + + */ + +/* POSSIBILITIES + a-z => var + + */ + +enum parser_state { + PARSER_ERR = -1, + PARSER_READY = 0, + PARSER_OR_OP, + PARSER_AND_OP, + PARSER_NOT_OP, + PARSER_OK, +}; + +enum token_id { + TOKEN_INVALID = 0, + TOKEN_BINARY_OPERATOR, + TOKEN_UNARY_OPERATOR, + TOKEN_VARIABLE, + TOKEN_EOS, + TOKEN_OOB, + TOKEN_MAX, +}; + +enum stackmember_id { + // all token ids + STACKMEMBER_INVALID = TOKEN_INVALID, + // ... + STACKMEMBER_EXPR = TOKEN_MAX + 1, + STACKMEMBER_MAX, +}; + +struct chartoken { enum token_id id; char c; }; +typedef struct chartoken(*lexer_fn)(void*); + +struct stackmember { + enum stackmember_id id; + union { + char terminal; + node *expr; + }; +}; + +struct expr_parser { + lexer_fn lex; + void *lex_data; + // since tokens are 1-to-1 maps to chars, + // we're fine making this type assumption + struct chartoken lahead; + enum parser_state st; + // (and 1024 oughta be enough, FIXME if not) + struct stackmember stack[1024]; + size_t stack_idx; +}; + +struct expr_parser expr_parser_init(lexer_fn src, void* lex_data); +void expr_parser_finish(struct expr_parser *ep); +int expr_parser_run(struct expr_parser *ep); + +#endif // BU_PARSER_HG diff --git a/main.c b/main.c index 566330c..86e6362 100644 --- a/main.c +++ b/main.c @@ -6,4 +6,6 @@ int main(){ printf("type in your tree(no spaces please): "); scanf("%[^\n]", str); printTree(parse(str),0); + + return 0; } diff --git a/tree.h b/tree.h index a58d270..8353a6d 100644 --- a/tree.h +++ b/tree.h @@ -1,3 +1,6 @@ +#ifndef TREE_H_HG +#define TREE_H_HG + enum nodeType { BIOP, UNOP, @@ -7,7 +10,16 @@ enum nodeType { typedef struct node { enum nodeType type; char el; - struct node *child[2]; + union { + struct node *child[2]; + struct { + struct node *lhs, *rhs; + }; + struct node *value; + }; } node; + node * parse(char *str); void printTree(node *root, int level); + +#endif -- cgit v1.2.3