package sexp import "common:name" import "core:container/intrusive/list" import "core:fmt" import "core:io" import "core:strconv" import "core:strings" import "core:testing" import "core:unicode" import "core:unicode/utf8" SEXP_Parser :: struct { data: string, pos: int, c: rune, } Ident :: distinct name.Name Tag :: distinct name.Name Atom :: union { Ident, Tag, string, f64, } Sexp_List_Node :: struct { expr: Sexp, node: list.Node, } Sexp_List :: distinct list.List Sexp :: union { Atom, Sexp_List, } to_ident :: proc(sexp: Sexp) -> (ident: Ident, ok: bool) { atom: Atom atom, ok = sexp.(Atom) if ok { ident, ok = atom.(Ident) } return } to_ident_checked :: proc(sexp: Sexp, expr := #caller_expression) -> Ident { ident, ok := to_ident(sexp) assert(ok, expr) return ident } is_ident_expr :: proc(sexp: Sexp) -> (ok: bool) { atom: Atom atom, ok = sexp.(Atom) if ok { _, ok = atom.(Ident) } return } to_string_temp_sexp :: proc(sexp: Sexp) -> string { builder := strings.builder_make(context.temp_allocator) w := strings.to_writer(&builder) print_sexp(sexp, w) return strings.to_string(builder) } to_string_temp_iter :: proc(it: List_Iterator) -> string { builder := strings.builder_make(context.temp_allocator) w := strings.to_writer(&builder) print_list(it, w) return strings.to_string(builder) } to_string_temp :: proc { to_string_temp_sexp, to_string_temp_iter, } expect :: proc(ctx: ^SEXP_Parser, expected: rune) -> Error { c, next := peek(ctx) or_return if c != expected { return make_error(ctx, fmt.tprintf("unexpected character {}, expected: {}", c, expected)) } ctx.pos = next return nil } is_ident :: proc(c: rune) -> bool { switch c { case 'a' ..= 'z', 'A' ..= 'Z', '0' ..= '9', '-', '_', '.': return true case: } return false } parse_atom :: proc(ctx: ^SEXP_Parser) -> (atom: Atom, error: Error) { c, next := peek(ctx) or_return switch c { case 'a' ..= 'z', 'A' ..= 'Z': start := ctx.pos for ctx.pos < len(ctx.data) { c, next = peek(ctx) or_return if !is_ident(c) { break } ctx.pos = next } return Ident(name.from_string(ctx.data[start:ctx.pos])), nil case ':': // skip it ctx.pos = next start := ctx.pos for ctx.pos < len(ctx.data) { c, next = peek(ctx) or_return if !is_ident(c) { break } ctx.pos = next } return Tag(name.from_string(ctx.data[start:ctx.pos])), nil case '"': // skip it ctx.pos = next start := ctx.pos loop: for { c, next = peek(ctx) or_return switch c { case '\\': _, next = peek(ctx) or_return ctx.pos = next case '"': break loop } ctx.pos = next } result := ctx.data[start:ctx.pos] ctx.pos = next return result, nil case '0' ..= '9', '-', '+': value, n, ok := strconv.parse_f64_prefix(ctx.data[ctx.pos:]) if !ok { return nil, make_error(ctx, "failed to parse number") } ctx.pos += n return value, nil } return nil, make_error(ctx, fmt.tprintf("unknown atom {}", c)) } parse_list :: proc(ctx: ^SEXP_Parser) -> (result: Sexp_List, error: Error) { skip_whitespace(ctx) or_return expect(ctx, '(') or_return for { skip_whitespace(ctx) or_return c, next := peek(ctx) or_return if c == ')' { ctx.pos = next break } else { sexp := parse_sexp(ctx) or_return node := new(Sexp_List_Node) node.expr = sexp list.push_back(cast(^list.List)&result, &node.node) } } return } parse_sexp :: proc(ctx: ^SEXP_Parser) -> (sexp: Sexp, error: Error) { skip_whitespace(ctx) or_return c, _ := peek(ctx) or_return switch c { case '(': return parse_list(ctx) case: return parse_atom(ctx) } } // Parses a top level list of s-expressions which are not enclosed by parens // No freeing procedure is provided, instead pass in an arena allocator and call free_all on it parse :: proc( ctx: ^SEXP_Parser, allocator := context.temp_allocator, ) -> ( sexp: Sexp_List, error: Error, ) { context.allocator = allocator sexp_list: Sexp_List for ctx.pos < len(ctx.data) { expr := parse_sexp(ctx) or_return node := new(Sexp_List_Node) node.expr = expr list.push_back(cast(^list.List)&sexp_list, &node.node) skip_whitespace(ctx) } return sexp_list, nil } List_Iterator :: list.Iterator(Sexp_List_Node) iterator_list :: proc(sexp_list: Sexp_List) -> List_Iterator { return list.iterator_head(list.List(sexp_list), Sexp_List_Node, "node") } iterator_next :: proc(it: ^List_Iterator) -> (^Sexp_List_Node, bool) { return list.iterate_next(it) } iterator_next_checked :: proc(it: ^List_Iterator) -> Sexp { node, ok := iterator_next(it) assert(ok) return node.expr } iterator_has_more :: proc(it: List_Iterator) -> bool { return it.curr != nil } iterator_expect_list :: proc(it: ^List_Iterator) -> (Sexp_List, bool) { next, ok := iterator_next(it) if !ok { return {}, false } list_expr: Sexp_List list_expr, ok = next.expr.(Sexp_List) return list_expr, ok } iterator_expect_atom :: proc(it: ^List_Iterator, $T: typeid) -> (T, bool) { next, ok := iterator_next(it) if !ok { return {}, false } atom_expr: Atom atom_expr, ok = next.expr.(Atom) if !ok { return {}, false } result: T result, ok = atom_expr.(T) return result, ok } print_list :: proc(it: List_Iterator, w: io.Writer) -> io.Error { it := it io.write_byte(w, '(') or_return first := true for sexp_node in list.iterate_next(&it) { if !first { io.write_byte(w, ' ') or_return } print_sexp(sexp_node.expr, w) or_return first = false } io.write_byte(w, ')') or_return return nil } print_sexp :: proc(sexp: Sexp, w: io.Writer) -> io.Error { switch s in sexp { case Atom: switch a in s { case Ident: io.write_string(w, string(name.to_string(name.Name(a)))) or_return case Tag: io.write_byte(w, ':') or_return io.write_string(w, string(name.to_string(name.Name(a)))) or_return case string: io.write_quoted_string(w, a, '"') or_return case f64: io.write_f64(w, a) or_return } case Sexp_List: it := list.iterator_head(list.List(s), Sexp_List_Node, "node") print_list(it, w) or_return } return nil } Error_Type :: struct { msg: string, pos: int, } Error :: union { Error_Type, } make_error :: proc(ctx: ^SEXP_Parser, err: string) -> Error { return Error_Type{msg = err, pos = ctx.pos} } get_line_column :: proc(str: string, pos: int) -> (line: int, column: int) { line, column = 0, 0 p := 0 for p < pos { c, num_bytes := utf8.decode_rune(str[p:]) p += num_bytes if c == '\n' { line += 1 column = 0 } else { column += 1 } } return line, column } get_line_string :: proc(str: string, line: int) -> string { cur_line := 0 p := 0 start := -1 for p < len(str) { c, num_bytes := utf8.decode_rune(str[p:]) if c == '\n' { cur_line += 1 } if cur_line == line + 1 { break } p += num_bytes if cur_line == line && start == -1 { start = p } } if start != -1 { return str[start:p] } else { return "" } } peek :: proc(ctx: ^SEXP_Parser) -> (c: rune, next: int, err: Error) { if ctx.pos < len(ctx.data) { num_bytes: int c, num_bytes = utf8.decode_rune(ctx.data[ctx.pos:]) if c == utf8.RUNE_ERROR { return c, ctx.pos, make_error(ctx, "invalid utf8 rune") } return c, ctx.pos + num_bytes, nil } return 0, 0, make_error(ctx, "unexpected EOF") } skip_whitespace :: proc(ctx: ^SEXP_Parser) -> Error { if ctx.pos == len(ctx.data) { return nil } for { c, next := peek(ctx) or_return if c == ';' { skip_until_newline(ctx) or_return } else if unicode.is_white_space(c) { ctx.pos = next } else { break } } return nil } skip_until_newline :: proc(ctx: ^SEXP_Parser) -> Error { for { c, next := peek(ctx) or_return ctx.pos = next if c == '\n' { break } } return nil } print_pretty_error :: proc(w: io.Writer, data: string, error: Error) -> io.Error { if error == nil { return nil } err := error.(Error_Type) line, column := get_line_column(data, err.pos) line_str := get_line_string(data, line) io.write_byte(w, '\n') offset: int io.write_int(w, line + 1, 10, &offset) or_return io.write_string(w, " | ") or_return io.write_string(w, line_str) or_return io.write_byte(w, '\n') or_return for _ in 0 ..< (offset + 3 + column) { io.write_byte(w, ' ') } io.write_string(w, "^ ") io.write_string(w, err.msg) return nil } // Prints pretty error to temp string temp_pretty_error :: proc(data: string, error: Error) -> string { builder: strings.Builder strings.builder_init(&builder, context.temp_allocator) writer := strings.to_writer(&builder) print_pretty_error(writer, data, error) return strings.to_string(builder) } @(test) test_parse :: proc(t: ^testing.T) { ctx: SEXP_Parser ctx.data = "ident (sexp with \"string\") (nested (sexp))" sexp, err := parse(&ctx) testing.expect_value(t, err, nil) builder: strings.Builder strings.builder_init(&builder, context.temp_allocator) writer := strings.to_writer(&builder) print_sexp(sexp, writer) printed := strings.to_string(builder) testing.expect_value(t, printed, "(ident (sexp with \"string\") (nested (sexp)))") } @(test) test_error :: proc(t: ^testing.T) { ctx: SEXP_Parser ctx.data = ` ident (sexp with "string"` _, err := parse(&ctx) testing.expect(t, err != nil) builder: strings.Builder strings.builder_init(&builder, context.temp_allocator) writer := strings.to_writer(&builder) print_pretty_error(writer, ctx.data, err) printed := strings.to_string(builder) testing.expect_value( t, printed, ` 3 | (sexp with "string" ^ unexpected EOF`, ) }