452 lines
8.3 KiB
Odin

package sexp
import "common:name"
import "core:container/intrusive/list"
import "core:fmt"
import "core:io"
import "core:strings"
import "core:testing"
import "core:unicode"
import "core:unicode/utf8"
SEXP_Parser :: struct {
data: string,
pos: int,
c: rune,
}
Ident :: distinct name.Name
Tag :: distinct name.Name
Atom :: union {
Ident,
Tag,
string,
f64,
}
Sexp_List_Node :: struct {
expr: Sexp,
node: list.Node,
}
Sexp_List :: distinct list.List
Sexp :: union {
Atom,
Sexp_List,
}
to_ident :: proc(sexp: Sexp) -> (ident: Ident, ok: bool) {
atom: Atom
atom, ok = sexp.(Atom)
if ok {
ident, ok = atom.(Ident)
}
return
}
to_ident_checked :: proc(sexp: Sexp, expr := #caller_expression) -> Ident {
ident, ok := to_ident(sexp)
assert(ok, expr)
return ident
}
is_ident_expr :: proc(sexp: Sexp) -> (ok: bool) {
atom: Atom
atom, ok = sexp.(Atom)
if ok {
_, ok = atom.(Ident)
}
return
}
to_string_temp_sexp :: proc(sexp: Sexp) -> string {
builder := strings.builder_make(context.temp_allocator)
w := strings.to_writer(&builder)
print_sexp(sexp, w)
return strings.to_string(builder)
}
to_string_temp_iter :: proc(it: List_Iterator) -> string {
builder := strings.builder_make(context.temp_allocator)
w := strings.to_writer(&builder)
print_list(it, w)
return strings.to_string(builder)
}
to_string_temp :: proc {
to_string_temp_sexp,
to_string_temp_iter,
}
expect :: proc(ctx: ^SEXP_Parser, expected: rune) -> Error {
c, next := peek(ctx) or_return
if c != expected {
return make_error(ctx, fmt.tprintf("unexpected character {}, expected: {}", c, expected))
}
ctx.pos = next
return nil
}
is_ident :: proc(c: rune) -> bool {
switch c {
case 'a' ..= 'z', 'A' ..= 'Z', '0' ..= '9', '-', '_', '.':
return true
case:
}
return false
}
parse_atom :: proc(ctx: ^SEXP_Parser) -> (atom: Atom, error: Error) {
c, next := peek(ctx) or_return
switch c {
case 'a' ..= 'z', 'A' ..= 'Z':
start := ctx.pos
for ctx.pos < len(ctx.data) {
c, next = peek(ctx) or_return
if !is_ident(c) {
break
}
ctx.pos = next
}
return Ident(name.from_string(ctx.data[start:ctx.pos])), nil
case ':':
// skip it
ctx.pos = next
start := ctx.pos
for ctx.pos < len(ctx.data) {
c, next = peek(ctx) or_return
if !is_ident(c) {
break
}
ctx.pos = next
}
return Tag(name.from_string(ctx.data[start:ctx.pos])), nil
case '"':
// skip it
ctx.pos = next
start := ctx.pos
loop: for {
c, next = peek(ctx) or_return
switch c {
case '\\':
_, next = peek(ctx) or_return
ctx.pos = next
case '"':
break loop
}
ctx.pos = next
}
result := ctx.data[start:ctx.pos]
ctx.pos = next
return result, nil
}
return nil, make_error(ctx, fmt.tprintf("unknown atom {}", c))
}
parse_list :: proc(ctx: ^SEXP_Parser) -> (result: Sexp_List, error: Error) {
skip_whitespace(ctx) or_return
expect(ctx, '(') or_return
for {
skip_whitespace(ctx) or_return
c, next := peek(ctx) or_return
if c == ')' {
ctx.pos = next
break
} else {
sexp := parse_sexp(ctx) or_return
node := new(Sexp_List_Node)
node.expr = sexp
list.push_back(cast(^list.List)&result, &node.node)
}
}
return
}
parse_sexp :: proc(ctx: ^SEXP_Parser) -> (sexp: Sexp, error: Error) {
skip_whitespace(ctx) or_return
c, _ := peek(ctx) or_return
switch c {
case '(':
return parse_list(ctx)
case:
return parse_atom(ctx)
}
}
// Parses a top level list of s-expressions which are not enclosed by parens
// No freeing procedure is provided, instead pass in an arena allocator and call free_all on it
parse :: proc(
ctx: ^SEXP_Parser,
allocator := context.temp_allocator,
) -> (
sexp: Sexp_List,
error: Error,
) {
context.allocator = allocator
sexp_list: Sexp_List
for ctx.pos < len(ctx.data) {
expr := parse_sexp(ctx) or_return
node := new(Sexp_List_Node)
node.expr = expr
list.push_back(cast(^list.List)&sexp_list, &node.node)
skip_whitespace(ctx)
}
return sexp_list, nil
}
List_Iterator :: list.Iterator(Sexp_List_Node)
iterator_list :: proc(sexp_list: Sexp_List) -> List_Iterator {
return list.iterator_head(list.List(sexp_list), Sexp_List_Node, "node")
}
iterator_next :: proc(it: ^List_Iterator) -> (^Sexp_List_Node, bool) {
return list.iterate_next(it)
}
iterator_next_checked :: proc(it: ^List_Iterator) -> Sexp {
node, ok := iterator_next(it)
assert(ok)
return node.expr
}
print_list :: proc(it: List_Iterator, w: io.Writer) -> io.Error {
it := it
io.write_byte(w, '(') or_return
first := true
for sexp_node in list.iterate_next(&it) {
if !first {
io.write_byte(w, ' ') or_return
}
print_sexp(sexp_node.expr, w) or_return
first = false
}
io.write_byte(w, ')') or_return
return nil
}
print_sexp :: proc(sexp: Sexp, w: io.Writer) -> io.Error {
switch s in sexp {
case Atom:
switch a in s {
case Ident:
io.write_string(w, string(name.to_string(name.Name(a)))) or_return
case Tag:
io.write_byte(w, ':') or_return
io.write_string(w, string(name.to_string(name.Name(a)))) or_return
case string:
io.write_quoted_string(w, a, '"') or_return
case f64:
io.write_f64(w, a) or_return
}
case Sexp_List:
it := list.iterator_head(list.List(s), Sexp_List_Node, "node")
print_list(it, w) or_return
}
return nil
}
Error_Type :: struct {
msg: string,
pos: int,
}
Error :: union {
Error_Type,
}
make_error :: proc(ctx: ^SEXP_Parser, err: string) -> Error {
return Error_Type{msg = err, pos = ctx.pos}
}
get_line_column :: proc(str: string, pos: int) -> (line: int, column: int) {
line, column = 0, 0
p := 0
for p < pos {
c, num_bytes := utf8.decode_rune(str[p:])
p += num_bytes
if c == '\n' {
line += 1
column = 0
} else {
column += 1
}
}
return line, column
}
get_line_string :: proc(str: string, line: int) -> string {
cur_line := 0
p := 0
start := -1
for p < len(str) {
c, num_bytes := utf8.decode_rune(str[p:])
if c == '\n' {
cur_line += 1
}
if cur_line == line + 1 {
break
}
p += num_bytes
if cur_line == line && start == -1 {
start = p
}
}
if start != -1 {
return str[start:p]
} else {
return ""
}
}
peek :: proc(ctx: ^SEXP_Parser) -> (c: rune, next: int, err: Error) {
if ctx.pos < len(ctx.data) {
num_bytes: int
c, num_bytes = utf8.decode_rune(ctx.data[ctx.pos:])
if c == utf8.RUNE_ERROR {
return c, ctx.pos, make_error(ctx, "invalid utf8 rune")
}
return c, ctx.pos + num_bytes, nil
}
return 0, 0, make_error(ctx, "unexpected EOF")
}
skip_whitespace :: proc(ctx: ^SEXP_Parser) -> Error {
if ctx.pos == len(ctx.data) {
return nil
}
for {
c, next := peek(ctx) or_return
if c == ';' {
skip_until_newline(ctx) or_return
} else if unicode.is_white_space(c) {
ctx.pos = next
} else {
break
}
}
return nil
}
skip_until_newline :: proc(ctx: ^SEXP_Parser) -> Error {
for {
c, next := peek(ctx) or_return
ctx.pos = next
if c == '\n' {
break
}
}
return nil
}
print_pretty_error :: proc(w: io.Writer, data: string, error: Error) -> io.Error {
if error == nil {
return nil
}
err := error.(Error_Type)
line, column := get_line_column(data, err.pos)
line_str := get_line_string(data, line)
io.write_byte(w, '\n')
offset: int
io.write_int(w, line + 1, 10, &offset) or_return
io.write_string(w, " | ") or_return
io.write_string(w, line_str) or_return
io.write_byte(w, '\n') or_return
for _ in 0 ..< (offset + 3 + column) {
io.write_byte(w, ' ')
}
io.write_string(w, "^ ")
io.write_string(w, err.msg)
return nil
}
@(test)
test_parse :: proc(t: ^testing.T) {
ctx: SEXP_Parser
ctx.data = "ident (sexp with \"string\") (nested (sexp))"
sexp, err := parse(&ctx)
testing.expect_value(t, err, nil)
builder: strings.Builder
strings.builder_init(&builder, context.temp_allocator)
writer := strings.to_writer(&builder)
print_sexp(sexp, writer)
printed := strings.to_string(builder)
testing.expect_value(t, printed, "(ident (sexp with \"string\") (nested (sexp)))")
}
@(test)
test_error :: proc(t: ^testing.T) {
ctx: SEXP_Parser
ctx.data = `
ident
(sexp with "string"`
_, err := parse(&ctx)
testing.expect(t, err != nil)
builder: strings.Builder
strings.builder_init(&builder, context.temp_allocator)
writer := strings.to_writer(&builder)
print_pretty_error(writer, ctx.data, err)
printed := strings.to_string(builder)
testing.expect_value(
t,
printed,
`
3 | (sexp with "string"
^ unexpected EOF`,
)
}