452 lines
8.3 KiB
Odin
452 lines
8.3 KiB
Odin
package sexp
|
|
|
|
import "common:name"
|
|
import "core:container/intrusive/list"
|
|
import "core:fmt"
|
|
import "core:io"
|
|
import "core:strings"
|
|
import "core:testing"
|
|
import "core:unicode"
|
|
import "core:unicode/utf8"
|
|
|
|
SEXP_Parser :: struct {
|
|
data: string,
|
|
pos: int,
|
|
c: rune,
|
|
}
|
|
|
|
Ident :: distinct name.Name
|
|
Tag :: distinct name.Name
|
|
|
|
Atom :: union {
|
|
Ident,
|
|
Tag,
|
|
string,
|
|
f64,
|
|
}
|
|
|
|
Sexp_List_Node :: struct {
|
|
expr: Sexp,
|
|
node: list.Node,
|
|
}
|
|
|
|
Sexp_List :: distinct list.List
|
|
|
|
Sexp :: union {
|
|
Atom,
|
|
Sexp_List,
|
|
}
|
|
|
|
to_ident :: proc(sexp: Sexp) -> (ident: Ident, ok: bool) {
|
|
atom: Atom
|
|
atom, ok = sexp.(Atom)
|
|
if ok {
|
|
ident, ok = atom.(Ident)
|
|
}
|
|
return
|
|
}
|
|
|
|
to_ident_checked :: proc(sexp: Sexp, expr := #caller_expression) -> Ident {
|
|
ident, ok := to_ident(sexp)
|
|
assert(ok, expr)
|
|
return ident
|
|
}
|
|
|
|
is_ident_expr :: proc(sexp: Sexp) -> (ok: bool) {
|
|
atom: Atom
|
|
atom, ok = sexp.(Atom)
|
|
if ok {
|
|
_, ok = atom.(Ident)
|
|
}
|
|
return
|
|
}
|
|
|
|
to_string_temp_sexp :: proc(sexp: Sexp) -> string {
|
|
builder := strings.builder_make(context.temp_allocator)
|
|
w := strings.to_writer(&builder)
|
|
print_sexp(sexp, w)
|
|
return strings.to_string(builder)
|
|
}
|
|
|
|
to_string_temp_iter :: proc(it: List_Iterator) -> string {
|
|
builder := strings.builder_make(context.temp_allocator)
|
|
w := strings.to_writer(&builder)
|
|
print_list(it, w)
|
|
return strings.to_string(builder)
|
|
}
|
|
|
|
to_string_temp :: proc {
|
|
to_string_temp_sexp,
|
|
to_string_temp_iter,
|
|
}
|
|
|
|
expect :: proc(ctx: ^SEXP_Parser, expected: rune) -> Error {
|
|
c, next := peek(ctx) or_return
|
|
|
|
if c != expected {
|
|
return make_error(ctx, fmt.tprintf("unexpected character {}, expected: {}", c, expected))
|
|
}
|
|
ctx.pos = next
|
|
|
|
return nil
|
|
}
|
|
|
|
is_ident :: proc(c: rune) -> bool {
|
|
switch c {
|
|
case 'a' ..= 'z', 'A' ..= 'Z', '0' ..= '9', '-', '_', '.':
|
|
return true
|
|
case:
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
parse_atom :: proc(ctx: ^SEXP_Parser) -> (atom: Atom, error: Error) {
|
|
c, next := peek(ctx) or_return
|
|
|
|
switch c {
|
|
case 'a' ..= 'z', 'A' ..= 'Z':
|
|
start := ctx.pos
|
|
for ctx.pos < len(ctx.data) {
|
|
c, next = peek(ctx) or_return
|
|
if !is_ident(c) {
|
|
break
|
|
}
|
|
|
|
ctx.pos = next
|
|
}
|
|
return Ident(name.from_string(ctx.data[start:ctx.pos])), nil
|
|
case ':':
|
|
// skip it
|
|
ctx.pos = next
|
|
|
|
start := ctx.pos
|
|
for ctx.pos < len(ctx.data) {
|
|
c, next = peek(ctx) or_return
|
|
if !is_ident(c) {
|
|
break
|
|
}
|
|
|
|
ctx.pos = next
|
|
}
|
|
return Tag(name.from_string(ctx.data[start:ctx.pos])), nil
|
|
case '"':
|
|
// skip it
|
|
ctx.pos = next
|
|
|
|
start := ctx.pos
|
|
loop: for {
|
|
c, next = peek(ctx) or_return
|
|
|
|
switch c {
|
|
case '\\':
|
|
_, next = peek(ctx) or_return
|
|
ctx.pos = next
|
|
case '"':
|
|
break loop
|
|
}
|
|
ctx.pos = next
|
|
}
|
|
|
|
result := ctx.data[start:ctx.pos]
|
|
ctx.pos = next
|
|
return result, nil
|
|
|
|
}
|
|
|
|
return nil, make_error(ctx, fmt.tprintf("unknown atom {}", c))
|
|
}
|
|
|
|
parse_list :: proc(ctx: ^SEXP_Parser) -> (result: Sexp_List, error: Error) {
|
|
skip_whitespace(ctx) or_return
|
|
expect(ctx, '(') or_return
|
|
|
|
for {
|
|
skip_whitespace(ctx) or_return
|
|
c, next := peek(ctx) or_return
|
|
|
|
if c == ')' {
|
|
ctx.pos = next
|
|
break
|
|
} else {
|
|
sexp := parse_sexp(ctx) or_return
|
|
node := new(Sexp_List_Node)
|
|
node.expr = sexp
|
|
list.push_back(cast(^list.List)&result, &node.node)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
parse_sexp :: proc(ctx: ^SEXP_Parser) -> (sexp: Sexp, error: Error) {
|
|
skip_whitespace(ctx) or_return
|
|
c, _ := peek(ctx) or_return
|
|
|
|
switch c {
|
|
case '(':
|
|
return parse_list(ctx)
|
|
case:
|
|
return parse_atom(ctx)
|
|
}
|
|
}
|
|
|
|
// Parses a top level list of s-expressions which are not enclosed by parens
|
|
// No freeing procedure is provided, instead pass in an arena allocator and call free_all on it
|
|
parse :: proc(
|
|
ctx: ^SEXP_Parser,
|
|
allocator := context.temp_allocator,
|
|
) -> (
|
|
sexp: Sexp_List,
|
|
error: Error,
|
|
) {
|
|
context.allocator = allocator
|
|
|
|
sexp_list: Sexp_List
|
|
|
|
for ctx.pos < len(ctx.data) {
|
|
expr := parse_sexp(ctx) or_return
|
|
node := new(Sexp_List_Node)
|
|
node.expr = expr
|
|
list.push_back(cast(^list.List)&sexp_list, &node.node)
|
|
|
|
skip_whitespace(ctx)
|
|
}
|
|
|
|
return sexp_list, nil
|
|
}
|
|
|
|
List_Iterator :: list.Iterator(Sexp_List_Node)
|
|
|
|
iterator_list :: proc(sexp_list: Sexp_List) -> List_Iterator {
|
|
return list.iterator_head(list.List(sexp_list), Sexp_List_Node, "node")
|
|
}
|
|
|
|
iterator_next :: proc(it: ^List_Iterator) -> (^Sexp_List_Node, bool) {
|
|
return list.iterate_next(it)
|
|
}
|
|
|
|
iterator_next_checked :: proc(it: ^List_Iterator) -> Sexp {
|
|
node, ok := iterator_next(it)
|
|
assert(ok)
|
|
|
|
return node.expr
|
|
}
|
|
|
|
print_list :: proc(it: List_Iterator, w: io.Writer) -> io.Error {
|
|
it := it
|
|
io.write_byte(w, '(') or_return
|
|
first := true
|
|
for sexp_node in list.iterate_next(&it) {
|
|
if !first {
|
|
io.write_byte(w, ' ') or_return
|
|
}
|
|
print_sexp(sexp_node.expr, w) or_return
|
|
first = false
|
|
}
|
|
io.write_byte(w, ')') or_return
|
|
|
|
return nil
|
|
}
|
|
|
|
print_sexp :: proc(sexp: Sexp, w: io.Writer) -> io.Error {
|
|
switch s in sexp {
|
|
case Atom:
|
|
switch a in s {
|
|
case Ident:
|
|
io.write_string(w, string(name.to_string(name.Name(a)))) or_return
|
|
case Tag:
|
|
io.write_byte(w, ':') or_return
|
|
io.write_string(w, string(name.to_string(name.Name(a)))) or_return
|
|
case string:
|
|
io.write_quoted_string(w, a, '"') or_return
|
|
case f64:
|
|
io.write_f64(w, a) or_return
|
|
}
|
|
case Sexp_List:
|
|
it := list.iterator_head(list.List(s), Sexp_List_Node, "node")
|
|
print_list(it, w) or_return
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
Error_Type :: struct {
|
|
msg: string,
|
|
pos: int,
|
|
}
|
|
|
|
Error :: union {
|
|
Error_Type,
|
|
}
|
|
|
|
make_error :: proc(ctx: ^SEXP_Parser, err: string) -> Error {
|
|
return Error_Type{msg = err, pos = ctx.pos}
|
|
}
|
|
|
|
get_line_column :: proc(str: string, pos: int) -> (line: int, column: int) {
|
|
line, column = 0, 0
|
|
|
|
p := 0
|
|
|
|
for p < pos {
|
|
c, num_bytes := utf8.decode_rune(str[p:])
|
|
p += num_bytes
|
|
if c == '\n' {
|
|
line += 1
|
|
column = 0
|
|
} else {
|
|
column += 1
|
|
}
|
|
}
|
|
|
|
return line, column
|
|
}
|
|
|
|
get_line_string :: proc(str: string, line: int) -> string {
|
|
cur_line := 0
|
|
p := 0
|
|
|
|
start := -1
|
|
for p < len(str) {
|
|
c, num_bytes := utf8.decode_rune(str[p:])
|
|
if c == '\n' {
|
|
cur_line += 1
|
|
}
|
|
|
|
if cur_line == line + 1 {
|
|
break
|
|
}
|
|
|
|
p += num_bytes
|
|
if cur_line == line && start == -1 {
|
|
start = p
|
|
}
|
|
}
|
|
|
|
if start != -1 {
|
|
return str[start:p]
|
|
} else {
|
|
return ""
|
|
}
|
|
}
|
|
|
|
peek :: proc(ctx: ^SEXP_Parser) -> (c: rune, next: int, err: Error) {
|
|
if ctx.pos < len(ctx.data) {
|
|
num_bytes: int
|
|
c, num_bytes = utf8.decode_rune(ctx.data[ctx.pos:])
|
|
if c == utf8.RUNE_ERROR {
|
|
return c, ctx.pos, make_error(ctx, "invalid utf8 rune")
|
|
}
|
|
return c, ctx.pos + num_bytes, nil
|
|
}
|
|
|
|
return 0, 0, make_error(ctx, "unexpected EOF")
|
|
}
|
|
|
|
skip_whitespace :: proc(ctx: ^SEXP_Parser) -> Error {
|
|
if ctx.pos == len(ctx.data) {
|
|
return nil
|
|
}
|
|
|
|
for {
|
|
c, next := peek(ctx) or_return
|
|
|
|
if c == ';' {
|
|
skip_until_newline(ctx) or_return
|
|
} else if unicode.is_white_space(c) {
|
|
ctx.pos = next
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
skip_until_newline :: proc(ctx: ^SEXP_Parser) -> Error {
|
|
for {
|
|
c, next := peek(ctx) or_return
|
|
ctx.pos = next
|
|
if c == '\n' {
|
|
break
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
print_pretty_error :: proc(w: io.Writer, data: string, error: Error) -> io.Error {
|
|
if error == nil {
|
|
return nil
|
|
}
|
|
|
|
err := error.(Error_Type)
|
|
line, column := get_line_column(data, err.pos)
|
|
line_str := get_line_string(data, line)
|
|
|
|
io.write_byte(w, '\n')
|
|
|
|
offset: int
|
|
io.write_int(w, line + 1, 10, &offset) or_return
|
|
io.write_string(w, " | ") or_return
|
|
io.write_string(w, line_str) or_return
|
|
io.write_byte(w, '\n') or_return
|
|
|
|
for _ in 0 ..< (offset + 3 + column) {
|
|
io.write_byte(w, ' ')
|
|
}
|
|
io.write_string(w, "^ ")
|
|
io.write_string(w, err.msg)
|
|
|
|
return nil
|
|
}
|
|
|
|
@(test)
|
|
test_parse :: proc(t: ^testing.T) {
|
|
ctx: SEXP_Parser
|
|
ctx.data = "ident (sexp with \"string\") (nested (sexp))"
|
|
|
|
sexp, err := parse(&ctx)
|
|
|
|
testing.expect_value(t, err, nil)
|
|
|
|
builder: strings.Builder
|
|
strings.builder_init(&builder, context.temp_allocator)
|
|
|
|
writer := strings.to_writer(&builder)
|
|
print_sexp(sexp, writer)
|
|
|
|
printed := strings.to_string(builder)
|
|
|
|
testing.expect_value(t, printed, "(ident (sexp with \"string\") (nested (sexp)))")
|
|
}
|
|
|
|
@(test)
|
|
test_error :: proc(t: ^testing.T) {
|
|
ctx: SEXP_Parser
|
|
ctx.data = `
|
|
ident
|
|
(sexp with "string"`
|
|
|
|
|
|
_, err := parse(&ctx)
|
|
|
|
testing.expect(t, err != nil)
|
|
|
|
builder: strings.Builder
|
|
strings.builder_init(&builder, context.temp_allocator)
|
|
|
|
writer := strings.to_writer(&builder)
|
|
print_pretty_error(writer, ctx.data, err)
|
|
printed := strings.to_string(builder)
|
|
|
|
testing.expect_value(
|
|
t,
|
|
printed,
|
|
`
|
|
3 | (sexp with "string"
|
|
^ unexpected EOF`,
|
|
)
|
|
}
|