diff --git a/common/container/xarr/xarr.odin b/common/container/xarr/xarr.odin index 2331e2f..bc3054a 100644 --- a/common/container/xarr/xarr.odin +++ b/common/container/xarr/xarr.odin @@ -2,16 +2,20 @@ package xarr import "base:builtin" import "base:intrinsics" +import "common:relptr" +import "core:mem" BASE_CHUNK_SIZE :: uint(64) BASE_CHUNK_SIZE_LOG2 :: intrinsics.constant_log2(BASE_CHUNK_SIZE) BASE_CHUNK_SHIFT :: BASE_CHUNK_SIZE_LOG2 - 1 -NUM_CHUNKS :: 30 +NUM_CHUNKS :: 30 when (size_of(uint) == 8) else 26 // on 32 bit systems max size is 0x80000000 which is about half the addressable space Xarr :: struct($T: typeid, $SOA := false) { len: int, allocated_chunks_mask: u32, - chunks: ([NUM_CHUNKS]#soa[]T when SOA else [NUM_CHUNKS][^]T), + chunks: ([NUM_CHUNKS]relptr.SOA_Slice(T) when SOA else [NUM_CHUNKS]relptr.Ptr( + T, + )), } UINT_BITS :: size_of(uint) * 8 @@ -28,18 +32,20 @@ chunk_size :: #force_inline proc "contextless" (chunk_idx: i32) -> uint { return BASE_CHUNK_SIZE << intrinsics.saturating_sub(u32(chunk_idx), 1) } -get_chunk_slice_scalar :: #force_inline proc "contextless" ( +get_chunk_slice_scalar :: #force_inline proc( a: $T/Xarr($E, false), chunk_idx: i32, + base := context.user_ptr, ) -> []E { - return a.chunks[chunk_idx][:chunk_size(chunk_idx)] + return relptr.deref_multi_ptr(a.chunks[chunk_idx], base)[:chunk_size(chunk_idx)] } -get_chunk_slice_soa :: #force_inline proc "contextless" ( +get_chunk_slice_soa :: #force_inline proc( a: $T/Xarr($E, true), chunk_idx: i32, + base := context.user_ptr, ) -> #soa[]E { - return a.chunks[chunk_idx] + return relptr.deref_soa_slice(a.chunks[chunk_idx], base) } get_chunk_slice :: proc { @@ -59,7 +65,16 @@ capacity :: #force_inline proc "contextless" (a: $T/Xarr($E, $SOA)) -> uint { return capacity_from_allocated_mask(allocated_mask) } -reserve :: proc(a: $T/^Xarr($E, $SOA), cap: int, allocator := context.allocator) #no_bounds_check { +len :: #force_inline proc "contextless" (a: $T/Xarr($E, $SOA)) -> int { + return a.len +} + +reserve :: proc( + a: $T/^Xarr($E, $SOA), + cap: int, + allocator := context.allocator, + base := context.user_ptr, +) #no_bounds_check { allocated_mask := a.allocated_chunks_mask current_chunk := msb(allocated_mask) @@ -70,36 +85,61 @@ reserve :: proc(a: $T/^Xarr($E, $SOA), cap: int, allocator := context.allocator) for i := current_chunk + 1; i < required_chunks; i += 1 { when SOA { chunk_slice := make_soa_slice(#soa[]E, chunk_size(i), allocator) - a.chunks[i] = chunk_slice + a.chunks[i] = relptr.from_soa_slice(chunk_slice, base) } else { chunk_slice := make([]E, chunk_size(i), allocator) - a.chunks[i] = raw_data(chunk_slice) + a.chunks[i] = relptr.from_multi_ptr(raw_data(chunk_slice), base) } a.allocated_chunks_mask |= u32(1) << u8(i) } } -append_elem :: proc(a: $T/^Xarr($E, $SOA), elem: E, allocator := context.allocator) { - if capacity(a^) <= uint(a.len + 1) { - reserve(a, a.len + 1) +resize :: proc( + a: $T/^Xarr($E, $SOA), + new_len: int, + allocator := context.allocator, + base := context.user_ptr, +) { + reserve(a, new_len, allocator, base) + a.len = new_len +} + +append_elem :: proc( + a: $T/^Xarr($E, $SOA), + elem: E, + allocator := context.allocator, + base := context.user_ptr, +) { + if capacity(a^) < uint(a.len + 1) { + reserve(a, a.len + 1, allocator, base) } #no_bounds_check { chunk_idx, idx_within_chunk := translate_index(a.len) - a.chunks[chunk_idx][idx_within_chunk] = elem + when SOA { + slice := relptr.deref_soa_slice(a.chunks[chunk_idx], base) + slice[idx_within_chunk] = elem + } else { + relptr.deref_multi_ptr(a.chunks[chunk_idx], base)[idx_within_chunk] = elem + } } a.len += 1 } -append_elems :: proc(a: $T/^Xarr($E, $SOA), elems: ..E, allocator := context.allocator) { - if len(elems) == 0 { +append_elems :: proc( + a: $T/^Xarr($E, $SOA), + elems: ..E, + allocator := context.allocator, + base := context.user_ptr, +) { + if builtin.len(elems) == 0 { return } - if capacity(a^) < uint(a.len + len(elems)) { - reserve(a, a.len + len(elems)) + if capacity(a^) < uint(a.len + builtin.len(elems)) { + reserve(a, a.len + builtin.len(elems), allocator, base) } - set_elems_assume_allocated(a, elems) - a.len += len(elems) + set_elems_assume_allocated(a, elems, base) + a.len += builtin.len(elems) } append :: proc { @@ -120,22 +160,20 @@ translate_index :: #force_inline proc "contextless" ( } @(private = "file") -set_elems_assume_allocated :: proc "contextless" ( +set_elems_assume_allocated :: proc( a: $T/^Xarr($E, $SOA), elems: []E, + base: rawptr, ) #no_bounds_check { for &e, i in elems { idx := a.len + i chunk_idx, idx_within_chunk := translate_index(idx) when SOA { - a.chunks[chunk_idx][idx_within_chunk] = e + slice := relptr.deref_soa_slice(a.chunks[chunk_idx], base) + slice[idx_within_chunk] = e } else { - intrinsics.mem_copy_non_overlapping( - &a.chunks[chunk_idx][idx_within_chunk], - &e, - size_of(E), - ) + relptr.deref_multi_ptr(a.chunks[chunk_idx], base)[idx_within_chunk] = e } } } @@ -153,17 +191,29 @@ get :: proc(a: $T/Xarr($E, $SOA), #any_int idx: int) -> E { return get_chunk_slice(a, chunk_idx)[idx_within_chunk] } -get_ptr :: proc(a: $T/Xarr($E, $SOA), #any_int idx: int) -> ^E { +get_ptr_scalar :: proc(a: $T/^Xarr($E, false), #any_int idx: int) -> ^E { assert(idx >= 0 && idx < a.len) chunk_idx, idx_within_chunk := translate_index(idx) - return &get_chunk_slice(a, chunk_idx)[idx_within_chunk] + return &get_chunk_slice_scalar(a, chunk_idx)[idx_within_chunk] +} + +get_ptr_soa :: proc(a: $T/^Xarr($E, true), #any_int idx: int) -> #soa^#soa[]E { + assert(idx >= 0 && idx < a.len) + + chunk_idx, idx_within_chunk := translate_index(idx) + return &get_chunk_slice_soa(a, chunk_idx)[idx_within_chunk] +} + +get_ptr :: proc { + get_ptr_scalar, + get_ptr_soa, } unordered_remove :: proc(a: $T/^Xarr($E, $SOA), #any_int idx: int) { assert(idx >= 0 && idx < a.len) - get_ptr(a^, idx)^ = get(a^, a.len - 1) + get_ptr(a, idx)^ = get(a^, a.len - 1) a.len -= 1 } @@ -172,7 +222,7 @@ clear :: proc "contextless" (a: $T/^Xarr($E, $SOA)) { } delete :: proc(a: $T/^Xarr($E, $SOA), allocator := context.allocator) { - for i in 0 ..< len(a.chunks) { + for i in 0 ..< builtin.len(a.chunks) { builtin.delete(get_chunk_slice(a^, i32(i)), allocator) } @@ -193,7 +243,7 @@ iterator_next :: proc(it: ^Iterator($E, $SOA)) -> (e: ^E, idx: int, ok: bool) { return nil, it.idx, false } - e = get_ptr(it.xarr^, it.idx) + e = get_ptr(it.xarr, it.idx) idx = it.idx ok = true @@ -225,7 +275,7 @@ chunk_iterator_next_scalar :: proc( chunk = get_chunk_slice_scalar(it.xarr^, it.chunk_idx) // Limit the chunk to the length so user code doesn't have to worry about this base_element_idx = it.base_element_idx - chunk = chunk[:min(len(chunk), it.xarr.len - base_element_idx)] + chunk = chunk[:min(builtin.len(chunk), it.xarr.len - base_element_idx)] ok = true base_element_idx += int(chunk_size(it.chunk_idx)) @@ -240,17 +290,17 @@ chunk_iterator_next_soa :: proc( base_element_idx: int, ok: bool, ) { - if (it.xarr.allocated_chunks_mask & (u32(1) << it.chunk_idx)) == 0 { + if (it.xarr.allocated_chunks_mask & (u32(1) << u32(it.chunk_idx))) == 0 { return nil, 0, false } chunk = get_chunk_slice_soa(it.xarr^, it.chunk_idx) // Limit the chunk to the length so user code doesn't have to worry about this base_element_idx = it.base_element_idx - chunk = chunk[:min(len(chunk), it.xarr.len - base_element_idx)] + chunk = chunk[:min(builtin.len(chunk), it.xarr.len - base_element_idx)] ok = true - base_element_idx += chunk_size(it.chunk_idx) + base_element_idx += int(chunk_size(it.chunk_idx)) it.chunk_idx += 1 return }