gc kinda almost there

This commit is contained in:
Mirek Kratochvil 2023-10-31 00:12:51 +01:00
parent 8504d72bda
commit 2bc3d9a381
9 changed files with 298 additions and 111 deletions

View file

@ -5,6 +5,7 @@
# || -> cont
.thunkcode main
needs_alloc $0120
thunkto %r11, $INT_code, $100
thunkto %r12, $INT_code, $23
thunkto %r13, $plus, $2, %r11, %r12

3
fibs.s
View file

@ -8,6 +8,7 @@
# | lag1 | lag2 | -> cont
.thunkcode fibs
needs_alloc $0140
# next value
thunkto %r11, $plus, $2, 020(%rbp), 030(%rbp)
@ -28,12 +29,14 @@
# || -> cont
.thunkcode fibs0
needs_alloc $0100
thunkto %r12, $INT_code, $1
thunkto %r11, $INT_code, $0
thunkto %rbp, $fibs, $2, %r11, %r12
enter_rbp
.thunkcode main
needs_alloc $0160
thunkto %r12, $fibs0, $0
thunkto %r11, $INT_code, $20
thunkto %r11, $list_int_index, $2, %r11, %r12

View file

@ -2,40 +2,104 @@
.ifndef _data_s_file
_data_s_file:
# Simple values and boxed machine integers
# | ptr | value |
CON_evacuate1:
retq # TODO
CON_scavenge1:
retq
# Format of the info tables:
# - code
# ----- code pointer
# - 8B helper information for eval/apply (generally this is 0, and only gets used for FUN/PAP)
# - 8B pointer to scavenge
# - 8B pointer to evacuate
#
# Evacuate interface:
# in: %rsi continuation, %rbp what to evacuate
# out: %rbp where the thing is now
# Notes:
# - IND thunks skip themselves on evacuate
# - checking if whether stuff is already in write region are managed by _gc_evacuate prelude
#
# Scavenge interface:
# in: %rsi continuation, %rbp what to scavenge
# out: %rbp next thing to scavenge in memory
#
# Saved registers by evacuate and scavenge
# - _uskel_gc needs to preserve %rdi now; that might increase
# - scavenges use %r12-%r15
# - %rax-%rdx is scratch and evacuate use
# Simple values and boxed machine integers
# | ptr | value |
INT_evacuate:
pushq 010(%rbp)
pushq $INT_code
mov %rsp,%rbp
jmp *%rsi
INT_scavenge:
add $020, %rbp
jmp *%rsi
INT_info_table:
cell CON_evacuate1
cell CON_scavenge1
cell INT_evacuate
cell INT_scavenge
cell 0
INT_code:
continue
# Indirection
# | ptr | indptr |
IND_evacuate:
mov 010(%rbp), %rbp
jmp _gc_evacuate
IND_scavenge:
jmp 0 # thou shalt not scavenge here
IND_info:
cell IND_evacuate
cell IND_scavenge
cell 0
IND_code:
enter 010(%rbp)
# List
# | ptr | 0 |
# | ptr | 1 | a | b |
# | ptr | 0 | # [] case
# | ptr | 1 | a | b | # (a:b) case
LIST_evacuate:
# [] | a : b
retq #TODO
cmpq $0, 010(%rbp)
je LIST_evacuate_nil
pushq 030(%rbp)
pushq 020(%rbp)
pushq $1
pushq $LIST_code
mov %rsp, %rbp
jmp *%rsi
LIST_evacuate_nil:
pushq $0
pushq $LIST_code
mov %rsp, %rbp
jmp *%rsi
LIST_scavenge:
mov 010(%rbp), %rax
shl $1, %rax
add $2, %rax
shl $3, %rax
add %rax, %rsi
retq
cmpq $0, 010(%rbp)
je LIST_scavenge_nil
mov %rbp, %r15
mov %rsi, %r14
mov $LIST_scavenge1, %rsi
mov 020(%r15), %rbp
jmp _gc_evacuate
LIST_scavenge1:
mov %rbp, 020(%r15)
mov $LIST_scavenge2, %rsi
mov 030(%r15), %rbp
jmp _gc_evacuate
LIST_scavenge2:
mov %rbp, 030(%r15)
mov %r15, %rbp
add $040, %rbp
jmp *%r14
LIST_scavenge_nil:
add $020, %rbp
jmp *%rsi
LIST_info_table:
cell LIST_evacuate
cell LIST_scavenge
@ -46,13 +110,14 @@ LIST_code:
# FUN objects
# | ptr | thunkptr | args | arg[0] | arg[1] | ... | arg[args] |
FUN_evacuate:
retq #TODO
#TODO
FUN_scavenge:
mov 020(%rbp), %rax
add $3, %rax
shl $3, %rax
add %rax, %rsi
retq
#TODO
#mov 020(%rbp), %rax
#add $3, %rax
#shl $3, %rax
#add %rax, %rsi
#retq
# Info tables for FUN objects.
FUN0_info_table:
@ -89,39 +154,55 @@ FUN4_info_table:
cell 4
FUN4_code:
continue
# add more funN here as needed
# TODO: add more funN here as needed
# indirection (Q: how to recognize IND and THUNK on return?)
# | ptr | indptr |
IND_evacuate:
retq #TODO
IND_scavenge:
add $020,%rsi
retq
IND_info:
cell IND_evacuate
cell IND_scavenge
cell 0
IND_code:
enter 010(%rbp)
# THU objects (gc implementation only, actual THUs are defined by functions)
# THU objects (gc implementation only, actual THU data are created by functions)
# | ptr | args | arg[0] | arg[1] | ... | arg[args] |
# args wouldn't need to be here but let's keep them for gc simplicity
THU_evacuate:
retq #TODO
mov 010(%rbp), %rbx
mov %rbx,%rdx
mov %rbx,%rcx
add $2, %rdx
shl $3, %rdx
add %rbp, %rdx
THU_evacuate_one:
dec %rcx
cmp $0, %rcx
jl THU_evacuate_fini
sub $010, %rdx
pushq (%rdx)
jmp THU_evacuate_one
THU_evacuate_fini:
pushq %rbx
pushq 000(%rbp)
mov %rsp, %rbp
jmp *%rsi
THU_scavenge:
mov 010(%rbp), %rax
add $2,%rax
shl $3,%rax
add %rax,%rsi
retq
mov 010(%rbp), %r12
mov %rbp, %r15
mov %rsi, %r14
mov %r12,%r13
add $2, %r13
shl $3, %r13
add %r15, %r13
THU_scavenge_one:
sub $010, %r13
sub $1, %r12
jl THU_scavenge_fini
mov (%r13), %rbp
mov $THU_scavenge_one_cont, %rsi
jmp _gc_evacuate
THU_scavenge_one_cont:
mov %rbp, (%r13)
jmp THU_scavenge_one
THU_scavenge_fini:
mov %r15, %rbp
mov 010(%rbp), %r13
add $2, %r13
shl $3, %r13
add %r13, %rbp
jmp *%r14
.endif # _data_s_file
# evacuate and scavenge:
# - evacuate just copies the object
# - scavenge evacuates all children (to the new location IF they are in the old
# location), changes the pointer, and moves the scavenge pointer to the next
# object (because everything needs to be scavenged)

View file

@ -2,10 +2,53 @@
.ifndef _gc_s_file
_gc_s_file:
.macro prealloc name amount
mov %rsp,%rax
.section .bss
_write_region_start:
# begin of the active memory area
cell 0
_write_region_end:
# end of the active memory area (%rsp kinda starts here and goes down
# towars the start)
cell 0
_gc_trigger:
# point in memory where the gc will trigger (we don't necessarily wait for the write region to fill up!)
cell 0
_gc_last_size:
# how much data we evacuated last time
cell 0
_gc_min_alloc:
# minimum possible allocation
cell 0 # tunable constant
_gc_grow_ratio:
# 256th's of the minimal amount of memory increment compared to the
# last time. New minimal amount is compared as:
# (ratio * last size) >> 8
cell 0 # tunable constant
_gc_shrink_ratio:
# 256th's of the ratio of post-gc still-free to-space that should be considered for discarding
cell 0
_gc_region_start:
# in GC, this region is being evacuated and will eventually disappear
cell 0
_gc_region_end:
# end of the disappear region
cell 0
_gc_backup_thunk:
# backup of %rsi so that we can use the register for other nonsense
cell 0
_gc_backup_cont:
# backup of %rbp for same reason
cell 0
.section .text
.macro needs_alloc amount
mov %rsp, %rax
sub _write_region_start, %rax
cmp $amount, %rax #TODO check direction
cmp \amount, %rax
jb _uskel_gc
.endm
@ -14,23 +57,24 @@ _uskel_alloc:
# calculate the desired size to %r14
mov _gc_min_alloc, %r14
#add _gc_region_end, %r14
#sub _gc_region_start, %r14
# check if the desired size isn't greater because of the last gc use
mov _gc_last_size, %rax
mul _gc_min_expect_ratio, %rax
shr 8, %rax
cmp %r14, %rax #TODO check direction
mulq _gc_grow_ratio
shr $8, %rax
add _gc_min_alloc, %rax
cmp %r14, %rax
cmova %rax, %r14
# check if we don't need even more space because we need to evacuate stuff
mov _gc_region_end, %rax
sub %rsp, %rax # trick -- if we counted from gc region start, allocated memory could never shrink
cmp %r14, %rax #TODO direction
cmp %r14, %rax
cmova %rax, %r14
#TODO add functionality to trigger the gc's a bit earlier than when
# they hit _write_region_start, to allow for faster compaction.
# maybe _write_region_trigger = 25%ish between _write_region_start and %rsp ?
alloc_goes_mmap:
mov $9, %rax # mmap
mov $0, %rdi # addr = NULL
mov %r14, %rsi # len = %r14
@ -48,27 +92,110 @@ _uskel_alloc:
jmp *%r15
_uskel_gc_init:
mov %rsi, %r13
movq $0x100, _gc_min_alloc
movq $0x180, _gc_grow_ratio
movq $0x40, _gc_shrink_ratio
mov $0, %rsp # fake original rsp for first alloc run
mov $_uskel_gc_init_cont, %rsi
jmp _uskel_alloc
_uskel_gc_init_cont:
mov _write_region_start, %rax
mov %rax, _gc_trigger
jmp *%r13
_uskel_gc:
# save what we did before ending up here
mov %rbp, _gc_backup_thunk
mov %rsi, _gc_backup_cont
# first we need a new memory area
mov _write_region_start, _gc_region_start
mov _write_region_end, _gc_region_end
mov $_uskel_gc_evacuate, %r15
mov _write_region_start, %rbx
mov _write_region_end, %rcx
mov %rbx, _gc_region_start
mov %rcx, _gc_region_end
mov $_uskel_gc_evacuate, %rsi
jmp _uskel_alloc
_uskel_gc_evacuate:
# we may also need a scavenging queue! Or can that be solved by walking the write region?
# plan:
# evacuate thunk and cont, save them right away in _gc_backup_thunk and _cont
# scavenge from the top of the gc region, evacuating stuff en route
# (this is a little more complex because we grow down, so we have to
# scavenge up from %rsp to the last known scavenged position)
# finish when scavenge doesn't have anything more to scavenge
# point the writer to the new memory area
mov _write_region_end, %rsp
mov %rsp, %rdi # %rdi is "the last scavenged thing"
# start by evacuating the thunk and cont
mov _gc_backup_thunk, %rbp
mov $_uskel_gc_evacuate_cont_thunk, %rsi
jmp _gc_evacuate
_uskel_gc_evacuate_cont_thunk:
mov %rbp, _gc_backup_thunk
mov _gc_backup_cont, %rbp
mov $_uskel_gc_evacuate_cont_cont, %rsi
jmp _gc_evacuate
_uskel_gc_evacuate_cont_cont:
mov %rbp, _gc_backup_cont
# scavenge everything
_uskel_gc_scavenge:
# start at what we wrote last
mov %rsp, %rbp
# if the thing is already scavenged, we didn't write anything, mark done.
cmp %rbp, %rdi
jbe _uskel_gc_scavenge_end
_uskel_gc_scavenge1:
# if all ok, scavenge one thing (moving %rbp) and recheck
mov $_uskel_gc_scavenge1_ret, %rsi
mov (%rbp), %rax
jmp *-020(%rax) # scavenge position in infotable
_uskel_gc_scavenge1_ret:
cmp %rbp, %rdi
ja _uskel_gc_scavenge1
# everything above rsp is now scavenged, continue with next round
mov %rsp, %rdi
jmp _uskel_gc_scavenge
_uskel_gc_scavenge_end:
# deallocate the old memory region
mov $11, %rax # munmap
mov _gc_region_end, %rsi
mov _gc_region_start, %rdi # addr = gc start
sub %rdi, %rsi # len = gc end - gc start
syscall
# recalculate the gc trigger point
mov %rsp, %rax
sub _write_region_start, %rax
mulq _gc_shrink_ratio
shr $8, %rax
add _write_region_start, %rax
mov %rax, _gc_trigger
# save how much data we actually had at this point
mov _write_region_end, _gc_last_size
sub %rsp, _gc_last_size
mov _write_region_end, %rax
sub %rsp, %rax
mov %rax, _gc_last_size
enter_rbp # restart the thunk for simplicity
# restore what we were doing
mov _gc_backup_thunk, %rbp
mov _gc_backup_cont, %rsi
enter_rbp # for simplicity just restart the thunk
_gc_evacuate:
# check if we are really out of the target region
cmp _write_region_start, %rbp
jb _gc_evacuate_ok
cmp _write_region_end, %rbp
jae _gc_evacuate_ok
# if not, let's just jump to cont and leave %rbp as result
jmp *%rsi
_gc_evacuate_ok:
# if we should evacuate, jump to the evac routine
mov (%rbp), %rax
jmp *-030(%rax)
.endif #_gc_s_file

View file

@ -4,6 +4,7 @@ _io_s_file:
# | int | -> cont
.thunkcode print
needs_alloc $040
thunkto %rsi, $print_fini, $2, %rbp, %rsi
enter 020(%rbp)

View file

@ -6,6 +6,8 @@ _listops_s_file:
# | n | list | -> cont
.primop2 list_int_index
needs_alloc $060
mov 010(%rsi), %rdx # the list constructor id, must be 1
cmp $1, %rdx
jne list_int_index_not_found

View file

@ -9,6 +9,7 @@ _primops_s_file:
.macro .primop1 name
# | arg1 | -> cont
.thunkcode \name
needs_alloc $040
# push a thunk for collecting the first arg and set it as continuation
thunkto %rsi, $\name\()_fini, $2, %rbp, %rsi
enter 020(%rbp) # evaluate arg1
@ -39,6 +40,7 @@ _primops_s_file:
.macro .primop2 name
# | arg1 | arg2 | -> cont
.thunkcode \name
needs_alloc $050
# push a thunk for collecting the first arg and set it as continuation
thunkto %rsi, $\name\()_step1, $3, 030(%rbp), %rbp, %rsi
enter 020(%rbp) # evaluate arg1

View file

@ -8,47 +8,16 @@ _start:
.include "include/macros.s"
# this has globals
.section .bss
_unix_rsp:
# back-up of program entry rsp (aka the actual stack given by the
# actual OS; we might like to use it at some point, maybe)
cell 0
_write_region_start:
# begin of the active memory area
cell 0
_write_region_end:
# end of the active memory area (%rsp kinda starts here and goes down
# towars the start)
cell 0
_gc_last_size:
# how much data we evacuated last time
cell 0
_gc_min_alloc:
# minimum possible allocation
cell 0x100000 # tunable constant
_gc_min_expect_ratio:
# 256th's of the minimal amount of memory increment compared to the
# last time. New minimal amount is compared as:
# (ratio * last size) >> 8
cell 0x200 # tunable constant
_gc_region_start:
# in GC, this region is being evacuated and will eventually disappear
cell 0
_gc_region_end:
# end of the disappear region
cell 0
_gc_backup_thunk:
# backup of %rsi so that we can use the register for other nonsense
cell 0
_gc_backup_cont:
# backup of %rbp for same reason
cell 0
.section .text
.include "include/gc.s"
.section .text
_uskel_start:
# we use the stack pointer for easy writing to the heap;
# back it up to memory state just if we ever needed it again.
@ -56,7 +25,7 @@ _uskel_start:
# allocate the initial chunk of memory
mov $_uskel_start_main, %rsi
jmp _uskel_alloc
jmp _uskel_gc_init
_uskel_start_main:
# push a thunk for main

View file

@ -7,6 +7,7 @@
# || -> cont
.thunkcode main
needs_alloc $0160
# push a new integer
thunkto %r11, $INT_code, $100