adjust the amount of diligence given into shooting stuff down

properly blackhole apply thunks
fix blackhole (partially), kill apply1
2023-11-04 16:15:13 +01:00 · 2023-11-04 14:09:29 +01:00 · 2023-11-03 21:28:40 +01:00 · 2023-11-01 18:25:26 +01:00 · 2023-11-01 18:25:06 +01:00 · 2023-10-29 23:04:01 +01:00
12 changed files with 197 additions and 100 deletions
--- a/fibs.s
+++ b/fibs.s
@ -32,8 +32,8 @@
 	needs_alloc $0100
 	thunkto %r12, $INT_code, $1
 	thunkto %r11, $INT_code, $0
-	thunkto %rbp, $fibs, $2, %r11, %r12
-	enter_rbp
+	thunk $fibs, $2, %r11, %r12
+	enter %rsp

 .thunkcode main
 	needs_alloc $0160
--- a/include/apply.s
+++ b/include/apply.s
@ -1,71 +1,26 @@

 .include "include/data.s"

-#TODO apply1 seems obsolete by generic apply
-
-# | fun | arg | -> cont
-.thunkcode apply1
-	needs_alloc $050
-	thunkto %rsi, $apply1_fini, $3, 030(%rbp), %rbp, %rsi
-	enter 020(%rbp) # evaluate fun
-
-# fun -> | arg | ret | cont |
-.thunkcode apply1_fini
-	# we now know that fun points to a FUN with at least one arg missing.
-	# we're certainly going to copy a lot of args.
-	mov 020(%rsi), %r11 # amount of args applied now
-
-	# prepare enough memory for the worst case alloc (make FUN from arg count + 3)
-	lea 030(,%r11,010), %r12
-	needs_alloc %r12
-
-	# the copying code is shared so let's do that first:
-	pushq 020(%rbp) #push the new arg
-	lea 030(%rsi), %rdx # the end (first arg)
-	lea (%rdx, %r11, 010), %rbx # address behind the last arg
-
-	cmp %rdx, %rbx
-	jbe apply1_fini_cont
-apply1_fini_copy:
-	sub $010, %rbx # iterate down
-	pushq (%rbx) # push what we have
-	cmp %rdx, %rbx # check if we are at the end
-	ja apply1_fini_copy # if not, continue
-apply1_fini_cont:
-	add $1, %r11
-	pushq %r11 # new number of args of fun/thunk
-	pushq 010(%rsi) # thunk code pointer
-
-	# copying of all args and their thunky header is now done, let's find
-	# out how we need to finish it.
-
-	mov (%rsi), %rdi # infotable for the original fun
-	mov -010(%rdi), %r12 # amount of args required to make the thunk
-	cmp %r11, %r12
-	ja apply1_fini_feed # not enough args, just make a bigger FUN
-
-	# if there was enough args, we simply have a thunk that we want to
-	# continue evaluating, so let's jump to it.
-	mov 030(%rbp), %rdi # load the original thunk
-	mov %rsp, 010(%rdi) # set indirect to the new thunk
-	movq $IND_code, 0(%rdi)
-	mov 040(%rbp), %rsi # set continuation to the original continuation
-	enter %rsp # evaluate the new thunk
-
-apply1_fini_feed:
-	# if there were not enough args, we push the function info and return
-	pushq (%rsi) # copy the function infoptr
-
-	mov 030(%rbp), %rdi # load the original thunk
-	mov %rsp, 010(%rdi) # set the indirect to the new FUN
-	movq $IND_code, 0(%rdi)
-	mov %rsp, %rsi # return the new FUN
-	enter 040(%rbp) # jump to the continuation
-
 # | fun | arg[1] | arg[2] | ... | arg[args-1] | -> cont
 .thunkcode apply
-	needs_alloc $040
-	thunkto %rsi, $apply_fini, $2, %rbp, %rsi
+	#determine how much stuff we need
+	mov 010(%rbp), %rcx
+	dec %rcx # we don't move the FUN
+	lea 040(,%rcx,010), %rbx # all args (in rcx) + rbp+rsi + 2qw thunk header
+	needs_alloc %rbx
+
+	# push all closure args
+	mov %rcx, %rbx # backup arg count
+	lea 030(%rbp, %rcx, 010), %rdx #point behind the thunk (this re-adds the FUN qw!)
+	apply_copy:
+	sub $010, %rdx
+	pushq (%rdx)
+	loop apply_copy
+
+	# push thunk header (+2 args for rbp/rsi) and continue evaluating the FUN
+	add $2, %rbx
+	thunkto %rsi, $apply_fini, %rbx, %rbp, %rsi
+	blackhole
 	enter 020(%rbp)

 # fun -> | ret (with args) | cont |
@ -74,11 +29,11 @@ apply1_fini_feed:
 	mov 020(%rbp), %r10 # the original thunk
 	mov 020(%rsi), %r11 # amount of args applied in the closure
 	mov -010(%r9), %r12 # amount of args required to make a thunk
-	mov 010(%r10), %r13 # amount of args in the original thunk
-	sub $1, %r13 # amount of args we want to apply (the 1st one is the FUN)
+	mov 010(%rbp), %r13 # amount of args in the original thunk
+	sub $2, %r13 # amount of args we want to apply (the extra ones are the backup rbp, rsi, and the FUN)

 	lea (%r11, %r13), %r14 # total amount arguments we have
-	lea 050(%r14), %r15 # how much memory this needs in extreme
+	lea 050(%r14), %r15 # how much memory this needs in extreme #TODO: check this
 	needs_alloc %r15
 	# worst-case memory is: we make a thunk (2 headers + some args) and a
 	# leftover closure (3 headers + rest of args)
@ -95,7 +50,7 @@ apply_fini_pt:
 	mov %r13, %rcx
 	cmp $0, %rcx
 	jz apply_fini_pt_thunk_skip
-	lea 030(%r10, %r13, 010), %rdx
+	lea 040(%rbp, %r13, 010), %rdx
 	apply_fini_pt_thunk_copy:
 	sub $010, %rdx
 	pushq (%rdx)
@ -114,26 +69,27 @@ apply_fini_pt:
 	apply_fini_pt_fun_skip:

 	# make a thunk
-	thunk 010(%rsi), %r14
+	thunk 010(%rsi), %r14 # thunk code (from FUN code) + amount of args
 	cmp %r12, %r14 # are we precisely at the right amount of arguments for a thunk?
-	je apply_fini_pt_thunk # if not, wrap a closure
-	apply_fini_pt_closure:
-	thunkto %rsi, %r9
-
-	# replace the original thunk with an indirect
-	mov %rsi, 010(%r10)
-	movq $IND_code, (%r10)
-	# return the closure (%rsi) to the original continuation
-	enter 030(%rbp)
+	jb apply_fini_pt_closure # if not, wrap a closure

 	apply_fini_pt_thunk:
-	# it is a thunk, point to it and start evaluating it
+	# we've made the exact thunk we want. Replace the original with an indirect
 	mov %rsp, 010(%r10)
 	movq $IND_code, (%r10)
-	# tell the thunk to evaluate into the original continuation
+	# and tell the new thunk to evaluate into the original continuation
 	mov 030(%rbp), %rsi
 	enter %rsp

+	apply_fini_pt_closure:
+	# if we still have an incomplete closure, rewrap it in the original FUN wrappage
+	thunkto %rsi, %r9
+	# replace the original thunk with an indirect
+	mov %rsp, 010(%r10)
+	movq $IND_code, (%r10)
+	# and return the closure (%rsi) to the original continuation as a result
+	enter 030(%rbp)
+
 apply_fini_o: #TODO needs to be tested
 	# too many args, we need to split off a bit
 	# first move just the right amount of args off the thunk
@ -141,14 +97,14 @@ apply_fini_o: #TODO needs to be tested
 	sub %r11, %rcx
 	cmp $0, %rcx
 	jz apply_fini_o_tc_skip
-	lea 030(%r10, %rcx, 010), %rdx
+	lea 040(%rbp, %rcx, 010), %rdx
 	apply_fini_o_tc_copy:
 	sub $010, %rdx
 	pushq (%rdx)
 	loop apply_fini_o_tc_copy
 	apply_fini_o_tc_skip:

-	# move all args from the closure
+	# now add all the args from the closure
 	mov %r11, %rcx
 	cmp $0, %rcx
 	jz apply_fini_o_fun_skip
@ -159,16 +115,17 @@ apply_fini_o: #TODO needs to be tested
 	loop apply_fini_o_fun_copy
 	apply_fini_o_fun_skip:

-	# make the thunk for the application that can be evaluated later
+	# make a thunk out of the successfully finished closure; it will be
+	# evaluated later
 	thunkto %r15, 010(%rsi), %r14

 	# now make a thunk with the rest of the stuff
 	mov %r14, %rcx
 	sub %r12, %rcx
-	mov %rcx, %r14 # backup leftover count for later
+	mov %rcx, %r14 # backup the leftover-args count for later
 	cmp $0, %rcx
 	jz apply_fini_o_tt_skip
-	lea 030(%r10, %r13, 010), %rdx
+	lea 040(%rbp, %r13, 010), %rdx
 	apply_fini_o_tt_copy:
 	sub $010, %rdx
 	pushq (%rdx)
@ -176,12 +133,12 @@ apply_fini_o: #TODO needs to be tested
 	apply_fini_o_tt_skip:

 	# finish the leftovers thunk
-	add $1, %r14 # (1 fun to apply to + args)
-	thunk $apply,%r14,%r15
+	add $1, %r14 # (1 FUN to apply to + lefrover args)
+	thunk $apply, %r14, %r15 # push the 1st arg (FUN) + argcount

 	# replace the original thunk with an indirect
 	mov %rsp, 010(%r10)
 	movq $IND_code, (%r10)
-	# evaluate to the original continuation
+	# return the applied function to the original continuation
 	mov 030(%rbp), %rsi
 	enter %rsp
--- a/include/data.s
+++ b/include/data.s
@ -1,7 +1,7 @@

 .ifndef _data_s_file
 _data_s_file:
-	nop
+	nop # avoid confusing gdb

 # Format of the info tables:
 # - code
@ -64,6 +64,28 @@ IND_info:
 IND_code:
 	enter 010(%rbp)

+# Blackhole (contains the original thunkptr for debugging purposes)
+# | ptr | orig_thunkptr |
+BLE_evacuate:
+	pushq 010(%rbp)
+	pushq $BLE_code
+	mov %rsp,%rbp
+	jmp _gc_evacuate_ret
+BLE_scavenge:
+	add $020, %rbp
+	jmp _gc_scavenge_ret
+
+BLE_info_table:
+	cell BLE_evacuate
+	cell BLE_scavenge
+	cell 0
+BLE_code:
+	# if we hit this, we've got a pure loop in a program, and it is never
+	# going to actually progress. So let's just shoot it down.
+	mov 0, %rax
+	jmp BLE_code
+	# this might eventually generate an actual IO-style exception or something.
+
 # List
 # | ptr | 0 |           # [] case
 # | ptr | 1 | a | b |   # (a:b) case
@ -86,7 +108,7 @@ LIST_scavenge:
 	cmpq $0, 010(%rbp)
 	je LIST_scavenge_nil
 	mov %rbp, %r15
-	
+
 	mov $LIST_scavenge1, %rsi
 	mov 020(%r15), %rbp
 	jmp _gc_evacuate
--- a/include/gc.s
+++ b/include/gc.s
@ -96,7 +96,7 @@ _uskel_alloc:

 _uskel_gc_init:
 	mov %rsi, %r13
-	movq $0x100, _gc_min_alloc # must be higher than 2x the biggest thunk possible
+	movq $0x100000, _gc_min_alloc # must be higher than 2x the biggest thunk possible
 	movq $0x180, _gc_grow_ratio
 	movq $0x40, _gc_shrink_ratio
 	mov $0, %rsp # fake original rsp for first alloc run
@ -124,7 +124,7 @@ _uskel_gc:
 	# point the writer to the new memory area
 	mov _write_region_end, %rsp
 	mov %rsp, %r8 # % r8 is the "last thing that was scavenged"
-	
+
 	# start by evacuating the thunk and cont
 	mov _gc_backup_thunk, %rbp
 	mov $_uskel_gc_evacuate_cont_thunk, %rsi
@ -167,7 +167,7 @@ _uskel_gc:
 	mov _gc_region_start, %rdi # addr = gc start
 	sub %rdi, %rsi # len = gc end - gc start
 	syscall
-	
+
 	# recalculate the gc trigger point
 	mov %rsp, %rax
 	sub _write_region_start, %rax
--- a/include/intops.s
+++ b/include/intops.s
@ -5,13 +5,13 @@ _intops_s_file:
 .include "include/primops.s"

 .primop2 plus
-	mov 010(%rsi), %rax # arg 2 
+	mov 010(%rsi), %rax # arg 2
 	mov 020(%rbp), %rsi # location of arg1
 	add 010(%rsi), %rax # arg 1
 	primop2_ret_int %rax

 .primop2 mul
-	mov 010(%rsi), %rax # arg 2 
+	mov 010(%rsi), %rax # arg 2
 	mov 020(%rbp), %rsi # location of arg1
 	mulq 010(%rsi) # arg 1 (goes to %rax and %rdx)
 	primop2_ret_int %rax
@ -19,7 +19,7 @@ _intops_s_file:
 .primop2 sub
 	mov 020(%rbp), %rdi # location of arg1
 	mov 010(%rdx), %rax # arg 1
-	sub 010(%rsi), %rax # arg 2 
+	sub 010(%rsi), %rax # arg 2
 	primop2_ret_int %rax

 .endif # _intops_s_file
--- a/include/io.s
+++ b/include/io.s
@ -10,6 +10,7 @@ _io_s_file:

 # arg -> | ret | cont |
 .thunkcode print_fini
+	needs_alloc $0110 #64 bit characters + 8 backup
 	mov 010(%rsi), %rax

 	# make a string
@ -25,7 +26,7 @@ _io_s_file:
 	shr $1, %rax
 	jnz print_fini_loop

-	mov $0, %rdi #stdin
+	mov $1, %rdi #stdout
 	mov %rsp, %rdx
 	sub %r15, %rdx #size
 	mov %r15, %rsi #buf
--- a/include/macros.s
+++ b/include/macros.s
@ -51,4 +51,10 @@ _macros_s_file:
 	mov %rsp, \reg
 .endm

+.macro blackhole
+	mov (%rbp), %rax
+	mov %rax, 010(%rbp)
+	movq $BLE_code, (%rbp)
+.endm
+
 .endif # _macros_s_file
--- a/include/main_exit.s
+++ b/include/main_exit.s
@ -6,5 +6,8 @@ _main_exit_s_file:
 	mov 010(%rsi), %rdi # result INT goes to syscall exitcode
 	mov $60, %rax  # exit=60
 	syscall # exit %rdi
+	# syscall might also die; at that point let's die more elaborately
+	mov 0, %rax
+	jmp main_exit

 .endif # _main_exit_s_file
--- a/include/primops.s
+++ b/include/primops.s
@ -12,6 +12,7 @@ _primops_s_file:
 	needs_alloc $040
 	# push a thunk for collecting the first arg and set it as continuation
 	thunkto %rsi, $\name\()_fini, $2, %rbp, %rsi
+	blackhole
 	enter 020(%rbp) # evaluate arg1

 # arg1 -> | ret | cont |
@ -43,6 +44,7 @@ _primops_s_file:
 	needs_alloc $050
 	# push a thunk for collecting the first arg and set it as continuation
 	thunkto %rsi, $\name\()_step1, $3, 030(%rbp), %rbp, %rsi
+	blackhole
 	enter 020(%rbp) # evaluate arg1

 # arg1 -> | arg2 | ret | cont |
--- a/include/uskel.s
+++ b/include/uskel.s
@ -32,10 +32,10 @@ _uskel_start:
 	pushq $0
 	pushq $main

-	mov $0, %rsi # set continuation to exit
+	# loop the continuation to itself (prevents gc trouble, should never be reached)
+	mov %rsp, %rsi
 	enter %rsp # run the program
 	# Q: are there gonna be functions that have both the argument AND the cont?
-	# 
 	# A: No, stuff is either entered as return-continuation (takes res,
 	# cont has to be saved) or as forward call (takes cont)
 	#
--- a/sumac.s
+++ b/sumac.s
@ -0,0 +1,40 @@
+
+
+.include "include/uskel.s"
+
+.include "include/data.s"
+.include "include/io.s"
+.include "include/intops.s"
+
+.primop2 sumac
+	needs_alloc $0100
+
+	mov 020(%rbp), %rdi #1st arg
+	mov 010(%rdi), %rcx #1st arg val
+	mov 010(%rsi), %rax #2nd arg val
+
+	cmp $0, %rcx
+	jz sumac_ret
+
+	add %rcx, %rax
+	dec %rcx
+	thunkto %r10, $INT_code, %rcx
+	thunkto %r11, $INT_code, %rax
+	thunkto %r10, $sumac, $2, %r10, %r11
+	primop2_cont_indirect %r10
+
+	sumac_ret:
+	primop2_ret_int %rax
+
+.thunkcode main
+	needs_alloc $0150
+
+	thunkto %r11, $INT_code, $10000000
+	thunkto %r12, $INT_code, $0
+	thunkto %r11, $sumac, $2, %r11, %r12
+
+	thunkto %r11, $print, $1, %r11
+	thunkto %rsi, $main_exit, $0
+	enter %r11
+
+.include "include/main_exit.s"
--- a/zipfib.s
+++ b/zipfib.s
@ -0,0 +1,66 @@
+
+.include "include/uskel.s"
+
+.include "include/listops.s"
+.include "include/intops.s"
+.include "include/io.s"
+.include "include/main_exit.s"
+.include "include/apply.s"
+
+.thunkcode zipWith
+	needs_alloc $070
+	thunkto %rsi, $zipWith_arg1, $5, 020(%rbp), 030(%rbp), 040(%rbp), %rbp, %rsi
+	blackhole
+	enter 030(%rbp)
+
+.thunkcode zipWith_arg1
+	movq $zipWith_fini, (%rbp)
+	mov %rsi, 030(%rbp)
+	mov %rbp, %rsi
+	mov 040(%rbp), %rbp
+	enter_rbp
+
+.thunkcode zipWith_fini
+	needs_alloc $0150
+	mov 030(%rbp), %r8 # arg1
+	mov %rsi, %r9 # arg2
+	cmpq $0, 010(%r8)
+	je zipWith_null
+	cmpq $0, 010(%r9)
+	je zipWith_null
+
+	# f (head arg1) (head arg2) : zipWith f (tail arg1) (tail arg2)
+	thunkto %r10, $zipWith, $3, 020(%rbp), 030(%r8), 030(%r9)
+	thunkto %r11, $apply, $3, 020(%rbp), 020(%r8), 020(%r9)
+	thunkto %rsi, $LIST_code, $1, %r11, %r10
+
+	zipWith_ret:
+	mov 050(%rbp), %r8
+	movq $IND_code, 000(%r8)
+	mov %rsi, 010(%r8)
+	mov 060(%rbp), %rbp
+	enter_rbp
+
+	zipWith_null:
+	thunkto %rsi, $LIST_code, $0
+	jmp zipWith_ret
+
+
+.thunkcode main
+	needs_alloc $0370
+	# x = 0 : 1 : zipWith plus x (tail x)
+	thunkto %r8, $FUN2_code, $plus, $0
+	thunkto %r8, $zipWith, $3, %r8, $0, $0
+	thunkto %r9, $INT_code, $1
+	thunkto %r9, $LIST_code, $1, %r9, %r8
+	thunkto %r10, $INT_code, $0
+	thunkto %r10, $LIST_code, $1, %r10, %r9
+	# recurse args!
+	mov %r10, 030(%r8)
+	mov %r9, 040(%r8)
+
+	thunkto %r8, $INT_code, $25
+	thunkto %r8, $list_int_index, $2, %r8, %r10
+	thunkto %r8, $print, $1, %r8
+	thunkto %rsi, $main_exit, $0
+	enter %r8
Author	SHA1	Message	Date
Mirek Kratochvil	efcf450182	adjust the amount of diligence given into shooting stuff down	2023-11-04 16:15:13 +01:00
Mirek Kratochvil	25be2fb0d4	properly blackhole apply thunks	2023-11-04 14:09:29 +01:00
Mirek Kratochvil	d7fcbcdbf0	fix blackhole (partially), kill apply1	2023-11-03 21:28:40 +01:00
Mirek Kratochvil	1909e97d46	Merge branch 'gc-attempt-1'	2023-11-01 18:25:26 +01:00
Mirek Kratochvil	970ffb4684	all looks ok	2023-11-01 18:25:06 +01:00
Mirek Kratochvil	32d71ef9f2	shorter	2023-10-29 23:04:01 +01:00