From 25be2fb0d4bc8095771e02db3f33ebe45aa4376e Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sat, 4 Nov 2023 14:09:29 +0100 Subject: [PATCH] properly blackhole apply thunks --- include/apply.s | 72 ++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/include/apply.s b/include/apply.s index a8d6768..638aac2 100644 --- a/include/apply.s +++ b/include/apply.s @@ -3,10 +3,24 @@ # | fun | arg[1] | arg[2] | ... | arg[args-1] | -> cont .thunkcode apply - needs_alloc $040 - thunkto %rsi, $apply_fini, $2, %rbp, %rsi - # TODO: this needs to be blackholed here, but we need to copy out all - # the args because the blackhole can't hold them + #determine how much stuff we need + mov 010(%rbp), %rcx + dec %rcx # we don't move the FUN + lea 040(,%rcx,010), %rbx # all args (in rcx) + rbp+rsi + 2qw thunk header + needs_alloc %rbx + + # push all closure args + mov %rcx, %rbx # backup arg count + lea 030(%rbp, %rcx, 010), %rdx #point behind the thunk (this re-adds the FUN qw!) + apply_copy: + sub $010, %rdx + pushq (%rdx) + loop apply_copy + + # push thunk header (+2 args for rbp/rsi) and continue evaluating the FUN + add $2, %rbx + thunkto %rsi, $apply_fini, %rbx, %rbp, %rsi + blackhole enter 020(%rbp) # fun -> | ret (with args) | cont | @@ -15,11 +29,11 @@ mov 020(%rbp), %r10 # the original thunk mov 020(%rsi), %r11 # amount of args applied in the closure mov -010(%r9), %r12 # amount of args required to make a thunk - mov 010(%r10), %r13 # amount of args in the original thunk - sub $1, %r13 # amount of args we want to apply (the 1st one is the FUN) + mov 010(%rbp), %r13 # amount of args in the original thunk + sub $2, %r13 # amount of args we want to apply (the extra ones are the backup rbp, rsi, and the FUN) lea (%r11, %r13), %r14 # total amount arguments we have - lea 050(%r14), %r15 # how much memory this needs in extreme + lea 050(%r14), %r15 # how much memory this needs in extreme #TODO: check this needs_alloc %r15 # worst-case memory is: we make a thunk (2 headers + some args) and a # leftover closure (3 headers + rest of args) @@ -36,7 +50,7 @@ apply_fini_pt: mov %r13, %rcx cmp $0, %rcx jz apply_fini_pt_thunk_skip - lea 030(%r10, %r13, 010), %rdx + lea 040(%rbp, %r13, 010), %rdx apply_fini_pt_thunk_copy: sub $010, %rdx pushq (%rdx) @@ -55,26 +69,27 @@ apply_fini_pt: apply_fini_pt_fun_skip: # make a thunk - thunk 010(%rsi), %r14 + thunk 010(%rsi), %r14 # thunk code (from FUN code) + amount of args cmp %r12, %r14 # are we precisely at the right amount of arguments for a thunk? - je apply_fini_pt_thunk # if not, wrap a closure - apply_fini_pt_closure: - thunkto %rsi, %r9 - - # replace the original thunk with an indirect - mov %rsi, 010(%r10) - movq $IND_code, (%r10) - # return the closure (%rsi) to the original continuation - enter 030(%rbp) + jb apply_fini_pt_closure # if not, wrap a closure apply_fini_pt_thunk: - # it is a thunk, point to it and start evaluating it + # we've made the exact thunk we want. Replace the original with an indirect mov %rsp, 010(%r10) movq $IND_code, (%r10) - # tell the thunk to evaluate into the original continuation + # and tell the new thunk to evaluate into the original continuation mov 030(%rbp), %rsi enter %rsp + apply_fini_pt_closure: + # if we still have an incomplete closure, rewrap it in the original FUN wrappage + thunkto %rsi, %r9 + # replace the original thunk with an indirect + mov %rsp, 010(%r10) + movq $IND_code, (%r10) + # and return the closure (%rsi) to the original continuation as a result + enter 030(%rbp) + apply_fini_o: #TODO needs to be tested # too many args, we need to split off a bit # first move just the right amount of args off the thunk @@ -82,14 +97,14 @@ apply_fini_o: #TODO needs to be tested sub %r11, %rcx cmp $0, %rcx jz apply_fini_o_tc_skip - lea 030(%r10, %rcx, 010), %rdx + lea 040(%rbp, %rcx, 010), %rdx apply_fini_o_tc_copy: sub $010, %rdx pushq (%rdx) loop apply_fini_o_tc_copy apply_fini_o_tc_skip: - # move all args from the closure + # now add all the args from the closure mov %r11, %rcx cmp $0, %rcx jz apply_fini_o_fun_skip @@ -100,16 +115,17 @@ apply_fini_o: #TODO needs to be tested loop apply_fini_o_fun_copy apply_fini_o_fun_skip: - # make the thunk for the application that can be evaluated later + # make a thunk out of the successfully finished closure; it will be + # evaluated later thunkto %r15, 010(%rsi), %r14 # now make a thunk with the rest of the stuff mov %r14, %rcx sub %r12, %rcx - mov %rcx, %r14 # backup leftover count for later + mov %rcx, %r14 # backup the leftover-args count for later cmp $0, %rcx jz apply_fini_o_tt_skip - lea 030(%r10, %r13, 010), %rdx + lea 040(%rbp, %r13, 010), %rdx apply_fini_o_tt_copy: sub $010, %rdx pushq (%rdx) @@ -117,12 +133,12 @@ apply_fini_o: #TODO needs to be tested apply_fini_o_tt_skip: # finish the leftovers thunk - add $1, %r14 # (1 fun to apply to + args) - thunk $apply,%r14,%r15 + add $1, %r14 # (1 FUN to apply to + lefrover args) + thunk $apply, %r14, %r15 # push the 1st arg (FUN) + argcount # replace the original thunk with an indirect mov %rsp, 010(%r10) movq $IND_code, (%r10) - # evaluate to the original continuation + # return the applied function to the original continuation mov 030(%rbp), %rsi enter %rsp