From 74cad993376dc269e8389fbf150be9ecc36890c7 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 6 Aug 2023 00:25:53 +0200 Subject: [PATCH] fibs work --- Makefile | 14 ++- exit_123.s | 48 ++------ fibs.s | 81 ++++++++++++ include/data.s | 99 +++++++++++++++ include/intops.s | 25 ++++ include/io.s | 44 +++++++ include/listops.s | 40 ++++++ macros.s => include/macros.s | 16 ++- include/main_exit.s | 10 ++ include/primops.s | 54 ++++++++ include/uskel.s | 54 ++++++++ print_123.s | 42 +++++++ uskel.s | 232 ----------------------------------- 13 files changed, 479 insertions(+), 280 deletions(-) create mode 100644 fibs.s create mode 100644 include/data.s create mode 100644 include/intops.s create mode 100644 include/io.s create mode 100644 include/listops.s rename macros.s => include/macros.s (69%) create mode 100644 include/main_exit.s create mode 100644 include/primops.s create mode 100644 include/uskel.s create mode 100644 print_123.s delete mode 100644 uskel.s diff --git a/Makefile b/Makefile index d75a9df..8866dc6 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,17 @@ -all: uskel +SYMSRCS=$(wildcard *.s) +OBJS=$(SYMSRCS:.s=.o) +PROGS=$(SYMSRCS:.s=) + +.SUFFIXES: + +all: $(PROGS) clean: - rm -f uskel.o uskel + rm -f $(OBJS) $(PROGS) -uskel.o: uskel.s $(wildcard *.s) +%.o: %.s $(wildcard include/*.s) as $< -o $@ -uskel: uskel.o +%: %.o ld $@.o -o $@ diff --git a/exit_123.s b/exit_123.s index 9fe9264..ed96ad1 100644 --- a/exit_123.s +++ b/exit_123.s @@ -1,6 +1,10 @@ +.include "include/uskel.s" + +.include "include/data.s" + # || -> cont -.func main +.thunkcode main # push a new integer pushq $100 pushq $INT_code @@ -27,44 +31,6 @@ mov %rsp, %rsi enter %r13 -# exitcode -> | cont (unused, should be 0) | -.func main_exit - mov 0x8(%rsi), %rdi # result to syscall exitcode - mov $0x3c, %rax # syscall 60 - syscall # exit %rdi - -# | arg1 | arg2 | -> cont -.func plus - # push a thunk for finishing the plus - push %rsi # cont - push %rbp # ret (self) - mov 0x18(%rbp), %rax - push %rax # arg2 - pushq $3 - pushq $plus_step1 - - mov %rsp, %rsi # continue to the new thunk - enter 0x10(%rbp) # evaluate arg1 - -# arg1 -> | arg2 | ret | cont | -.func plus_step1 - # this is guaranteed to be entered only once (it's a cont), so we can rewrite the thunk in place - mov 0x10(%rbp), %rax - movq $plus_fini, 0x00(%rbp) - mov %rsi, 0x10(%rbp) - - mov %rbp, %rsi # continue on the rewritten thunk - enter %rax # evaluate arg1 - -# arg2 -> | arg1 | ret | cont | -.func plus_fini - mov 0x8(%rsi), %rax # arg 2 - mov 0x10(%rbp), %rsi # location of arg1 - add 0x8(%rsi), %rax # arg 1 - - mov 0x18(%rbp), %rsi # save result to the original plus thunk - mov %rax, 0x08(%rsi) - movq $INT_code, 0x00(%rsi) - - enter 0x20(%rbp) # eval cont, returning %rsi +.include "include/main_exit.s" +.include "include/intops.s" diff --git a/fibs.s b/fibs.s new file mode 100644 index 0000000..c32f424 --- /dev/null +++ b/fibs.s @@ -0,0 +1,81 @@ + +.include "include/uskel.s" + +.include "include/listops.s" +.include "include/intops.s" +.include "include/io.s" +.include "include/main_exit.s" + +# | lag1 | lag2 | -> cont +.thunkcode fibs + # next value + pushq 030(%rbp) + pushq 020(%rbp) + pushq $2 + pushq $plus + mov %rsp, %r11 + + # fib call with the next value + push %r11 + pushq 030(%rbp) + pushq $2 + pushq $fibs + mov %rsp, %r12 + + # cons list with lag1 + push %r12 + push 020(%rbp) + pushq $1 + pushq $LIST_code + mov %rsp, %r13 + + # replace self with IND + mov %r13, 010(%rbp) + movq $IND_code, 0(%rbp) + + mov %rsi, %rbp + mov %r13, %rsi + enter_rbp + +# || -> cont +.thunkcode fibs0 + pushq $1 + pushq $INT_code + mov %rsp, %r12 + pushq $0 + pushq $INT_code + mov %rsp, %r11 + + push %r12 + push %r11 + pushq $2 + pushq $fibs + + enter %rsp + +.thunkcode main + pushq $0 + pushq $fibs0 + mov %rsp, %r12 + + pushq $20 + pushq $INT_code + mov %rsp, %r11 + + push %r12 + push %r11 + pushq $2 + pushq $list_int_index + mov %rsp, %r11 + + push %r11 + pushq $1 + pushq $print + mov %rsp, %r11 + + push %rsi + pushq $1 + pushq $main_exit + + mov %rsp, %rsi + enter %r11 diff --git a/include/data.s b/include/data.s new file mode 100644 index 0000000..8261881 --- /dev/null +++ b/include/data.s @@ -0,0 +1,99 @@ + +.ifndef _data_s_file +_data_s_file: + +# Simple values and boxed machine integers +# | ptr | value | +CON_evacuate1: + retq +CON_scavenge1: + add $0x10, %rsi + retq +INT_info_table: + cell CON_evacuate1 + cell CON_scavenge1 + cell 0 +INT_code: + continue + +# List +# | ptr | 0 | +# | ptr | 1 | a | b | +LIST_evacuate: + # [] | a : b + retq #TODO +LIST_scavenge: + mov 0x8(%rbp), %rax + shl $1, %rax + add $2, %rax + shl $3, %rax + add %rax, %rsi + retq +LIST_info_table: + cell LIST_evacuate + cell LIST_scavenge + cell 0 +LIST_code: + continue + +# FUN/PAP combo objects +# | ptr | thunkptr | args | arg[0] | arg[1] | ... | arg[args] | +FUN_evacuate: + retq #TODO +FUN_scavenge: + mov 0x10(%rbp), %rax + add $3, %rax + shl $3, %rax + add %rax, %rsi + retq + +# Simple info for n-ary functions +# TODO continue to add as required +fun1_info_table: + cell FUN_evacuate + cell FUN_scavenge + cell 1 +fun1_code: + continue + +fun2_info_table: + cell FUN_evacuate + cell FUN_scavenge + cell 2 +fun2_code: + continue + +fun3_info_table: + cell FUN_evacuate + cell FUN_scavenge + cell 3 +fun3_code: + continue + +# indirection (Q: how to recognize IND and THUNK on return?) +# | ptr | indptr | +IND_evacuate: + retq #TODO +IND_scavenge: + add $0x10,%rsi + retq +IND_info: + cell IND_evacuate + cell IND_scavenge + cell 0 +IND_code: + enter 0x8(%rbp) + +# THU objects (gc implementation only, actual THUs are defined by functions) +# | ptr | args | arg[0] | arg[1] | ... | arg[args] | +# args wouldn't need to be here but let's keep them for gc simplicity +THU_evacuate: + retq #TODO +THU_scavenge: + mov 0x8(%rbp), %rax + add $2,%rax + shl $3,%rax + add %rax,%rsi + retq + +.endif # _data_s_file diff --git a/include/intops.s b/include/intops.s new file mode 100644 index 0000000..dbadb37 --- /dev/null +++ b/include/intops.s @@ -0,0 +1,25 @@ + +.ifndef _intops_s_file +_intops_s_file: + +.include "include/primops.s" + +.primop2 plus + mov 010(%rsi), %rax # arg 2 + mov 020(%rbp), %rsi # location of arg1 + add 010(%rsi), %rax # arg 1 + primop2_ret_int %rax + +.primop2 mul + mov 010(%rsi), %rax # arg 2 + mov 020(%rbp), %rsi # location of arg1 + mulq 010(%rsi) # arg 1 (goes to %rax and %rdx) + primop2_ret_int %rax + +.primop2 sub + mov 020(%rbp), %rdi # location of arg1 + mov 010(%rdx), %rax # arg 1 + sub 010(%rsi), %rax # arg 2 + primop2_ret_int %rax + +.endif # _intops_s_file diff --git a/include/io.s b/include/io.s new file mode 100644 index 0000000..c348e7f --- /dev/null +++ b/include/io.s @@ -0,0 +1,44 @@ + +.ifndef _io_s_file +_io_s_file: + +# | int | -> cont +.thunkcode print + push %rsi + push %rbp + pushq $2 + pushq $print_fini + + mov %rsp, %rsi + enter 020(%rbp) + +# arg -> | ret | cont | +.thunkcode print_fini + mov 010(%rsi), %rax + + # make a string + mov %rsp, %r15 + sub $1, %r15 + movb $0x0a, (%r15) + print_fini_next: + mov %al, %bl + and $1, %bl + add $0x30, %bl + sub $1, %r15 + movb %bl, (%r15) + shr $1, %rax + jnz print_fini_next + + mov $0, %rdi #stdin + mov %rsp, %rdx + sub %r15, %rdx #size + mov %r15, %rsi #buf + mov $1, %rax #write + syscall + + mov 020(%rbp), %rsi + movq $0, 010(%rsi) + movq $INT_code, 0(%rsi) + enter 030(%rbp) + +.endif # _io_s_file diff --git a/include/listops.s b/include/listops.s new file mode 100644 index 0000000..eb8f6cf --- /dev/null +++ b/include/listops.s @@ -0,0 +1,40 @@ + +.ifndef _listops_s_file +_listops_s_file: + +.include "include/primops.s" + +# | n | list | -> cont +.primop2 list_int_index + mov 010(%rsi), %rdx # the list constructor id, must be 1 + cmp $1, %rdx + jne list_int_index_not_found + + mov 020(%rbp), %rcx + mov 010(%rcx), %rcx + test %rcx, %rcx + jz list_int_index_found #we are taking 0, all happy, return it + + #more probably we need to continue, make replacement thunks + sub $1, %rcx + pushq %rcx + pushq $INT_code + mov %rsp, %r11 + + pushq 030(%rsi) # tail + push %r11 + pushq $2 + pushq $list_int_index + mov %rsp, %r11 + + primop2_cont_indirect %r11 + +list_int_index_not_found: + movq 0, %rax #fault + +list_int_index_found: + mov 020(%rsi), %rax #head + primop2_cont_indirect %rax + + +.endif # _listops_s_file diff --git a/macros.s b/include/macros.s similarity index 69% rename from macros.s rename to include/macros.s index 8bc05da..d915171 100644 --- a/macros.s +++ b/include/macros.s @@ -1,7 +1,14 @@ +.ifndef _macros_s_file +_macros_s_file: + +.macro enter_rbp + jmp *(%rbp) +.endm + .macro enter x mov \x, %rbp - jmp *(%rbp) + enter_rbp .endm .macro cell x @@ -23,11 +30,14 @@ jmp *(%rbp) .endm -.macro .makethunk name +# this needs data.s +.macro .thunkcode name .align 8 - \name\()_info: + __\name\()_info: cell THU_evacuate cell THU_scavenge cell 0 \name: .endm + +.endif # _macros_s_file diff --git a/include/main_exit.s b/include/main_exit.s new file mode 100644 index 0000000..a38c5e5 --- /dev/null +++ b/include/main_exit.s @@ -0,0 +1,10 @@ +.ifndef _main_exit_s_file +_main_exit_s_file: + +# exitcode -> | cont (unused, should be 0) | +.thunkcode main_exit + mov 0x8(%rsi), %rdi # result to syscall exitcode + mov $0x3c, %rax # syscall 60 + syscall # exit %rdi + +.endif # _main_exit_s_file diff --git a/include/primops.s b/include/primops.s new file mode 100644 index 0000000..bcb835f --- /dev/null +++ b/include/primops.s @@ -0,0 +1,54 @@ + +.ifndef _primops_s_file +_primops_s_file: + +.include "include/data.s" + +.macro .primop2 name +# | arg1 | arg2 | -> cont +.thunkcode \name + # push a thunk for finishing the plus + push %rsi # cont + push %rbp # ret (self) + pushq 030(%rbp) + pushq $3 + pushq $\name\()_step1 + + mov %rsp, %rsi # continue to the new thunk + enter 0x10(%rbp) # evaluate arg1 + +# arg1 -> | arg2 | ret | cont | +.thunkcode \name\()_step1 + # this is guaranteed to be entered only once (it's a cont), so we can rewrite the thunk in place + mov 020(%rbp), %rax + movq $\name\()_fini, 0(%rbp) + mov %rsi, 020(%rbp) + + mov %rbp, %rsi # continue on the rewritten thunk + enter %rax # evaluate arg1 + +# arg2 -> | arg1 | ret | cont | +.thunkcode \name\()_fini + # at this point, + # arg1 is pointed to by 020(%rbp) + # arg2 is pointed to by %rsi +.endm + +.macro primop2_ret_int val + # the result should now be in %rax + mov 030(%rbp), %rsi # save result to the original plus thunk + movq \val, 010(%rsi) + movq $INT_code, 0(%rsi) + + enter 040(%rbp) # eval cont, returning %rsi +.endm + +.macro primop2_cont_indirect new + mov 030(%rbp), %rdi # load the original thunk + mov 040(%rbp), %rsi # set the continuation + movq \new, 010(%rdi) # set the indirect to the new thunk + movq $IND_code, 0(%rdi) + enter \new # continue evaluating the new thunk +.endm + +.endif # _primops_s_file diff --git a/include/uskel.s b/include/uskel.s new file mode 100644 index 0000000..136b382 --- /dev/null +++ b/include/uskel.s @@ -0,0 +1,54 @@ + +# uskel runtime and start; include this at the top. + +.section .init +.global _start +_start: + jmp _uskel_start + +.include "include/macros.s" + +.section .bss +_memory_state: + cell 0 # bottom of allocation (grows down) + cell 0 # region start + cell 0 # region end + cell 0 # program entry rsp (aka the actual stack) + +.section .text + +_uskel_alloc_basic_mem: + mov $0x100000, %r15 # desired size + + mov $0x9, %rax # mmap + mov $0, %rdi # addr = NULL + mov %r15, %rsi # len = %rcx + mov $0x3, %rdx # prot = PROT_READ 0x1 | PROT_WRITE 0x2 + mov $0x22, %r10 # flags = MAP_PRIVATE 0x2 | MAP_ANONYMOUS 0x20 + mov $-1, %r8 # fd = -1 + mov $0, %r9 # off = 0 + syscall + mov $_memory_state, %rdi + mov %rax, 010(%rdi) + add %r15, %rax + mov %rax, (%rdi) + mov %rax, 020(%rdi) + retq + +_uskel_start: + call _uskel_alloc_basic_mem + # use the stack pointer for easy writing to the heap, + # but back it up to memory state + mov $_memory_state, %rdi + mov %rsp, 030(%rdi) + mov 0(%rdi), %rsp + + # push a thunk for main + pushq $0 + pushq $main + + mov $0, %rsi # set continuation to exit + enter %rsp # run the program + # Q: are there gonna be functions that have both the argument AND the cont? + # A: No, stuff is either entered as return-continuation (takes res, cont has to be saved) or as forward call (takes cont) + # (needs validation) diff --git a/print_123.s b/print_123.s new file mode 100644 index 0000000..612b4c8 --- /dev/null +++ b/print_123.s @@ -0,0 +1,42 @@ + +.include "include/uskel.s" + +.include "include/data.s" +.include "include/io.s" +.include "include/intops.s" + +# || -> cont +.thunkcode main + # push a new integer + pushq $100 + pushq $INT_code + mov %rsp, %r11 # backup first arg + + # push another new integer + pushq $23 + pushq $INT_code + mov %rsp, %r12 # backup second arg + + # push the plus + push %r12 + push %r11 + pushq $2 + pushq $plus + mov %rsp, %r11 # backup plus + + # push the print + push %r11 + pushq $1 + pushq $print + mov %rsp, %r11 # backup print + + # push a cont thunk for main_exit + push %rsi + pushq $1 + pushq $main_exit + + # evaluate into main_exit + mov %rsp, %rsi + enter %r11 + +.include "include/main_exit.s" diff --git a/uskel.s b/uskel.s deleted file mode 100644 index f0978fc..0000000 --- a/uskel.s +++ /dev/null @@ -1,232 +0,0 @@ - -.include "macros.s" - -.section .init -.global _start -_start: - jmp _uskel_start - -.section .bss -_memory_state: - cell 0 # bottom of allocation (grows down) - cell 0 # region start - cell 0 # region end - -.section .text - -_uskel_alloc_basic_mem: - mov $0x100000, %r15 # desired size - - mov $0x9, %rax # mmap - mov $0, %rdi # addr = NULL - mov %r15, %rsi # len = %rcx - mov $0x3, %rdx # prot = PROT_READ 0x1 | PROT_WRITE 0x2 - mov $0x22, %r10 # flags = MAP_PRIVATE 0x2 | MAP_ANONYMOUS 0x20 - mov $-1, %r8 # fd = -1 - mov $0, %r9 # off = 0 - syscall - mov $_memory_state, %rdi - mov %rax, (%rdi) - mov %rax, 0x8(%rdi) - add %r15, %rax - mov %rax, 0x10(%rdi) - retq - -_uskel_start: - call _uskel_alloc_basic_mem - mov _memory_state, %rdi - # push a thunk for main - mov %rdi, %r15 # backup main for later - movq $main, 0x00(%rdi) - movq $0, 0x08(%rdi) - add $0x10, %rdi - # save the memory ptr - mov %rdi, _memory_state - - mov $0, %rsi # set continuation to exit - enter %r15 # run the program - -# Simple values and boxed machine integers -# | ptr | value | -CON_evacuate1: - retq -CON_scavenge1: - add $0x10, %rsi - retq -INT_info_table: - cell CON_evacuate1 - cell CON_scavenge1 - cell 0 -INT_code: - continue - -# List -# | ptr | 0 | -# | ptr | 1 | a | b | -LIST_evacuate: - # [] | a : b - retq #TODO -LIST_scavenge: - mov 0x8(%rbp), %rax - shl $1, %rax - add $2, %rax - shl $3, %rax - add %rax, %rsi - retq -LIST_info_table: - cell LIST_evacuate - cell LIST_scavenge - cell 0 -LIST_code: - continue - -# FUN/PAP combo objects -# | ptr | thunkptr | args | arg[0] | arg[1] | ... | arg[args] | -FUN_evacuate: - retq #TODO -FUN_scavenge: - mov 0x10(%rbp), %rax - add $3, %rax - shl $3, %rax - add %rax, %rsi - retq - -# Simple info for n-ary functions -# TODO continue to add as required -fun1_info_table: - cell FUN_evacuate - cell FUN_scavenge - cell 1 -fun1_code: - continue - -fun2_info_table: - cell FUN_evacuate - cell FUN_scavenge - cell 2 -fun2_code: - continue - -fun3_info_table: - cell FUN_evacuate - cell FUN_scavenge - cell 3 -fun3_code: - continue - -# indirection (Q: how to recognize IND and THUNK on return?) -# | ptr | indptr | -IND_evacuate: - retq #TODO -IND_scavenge: - add $0x10,%rsi - retq -IND_info: - cell IND_evacuate - cell IND_scavenge - cell 0 -IND_code: - enter 0x8(%rbp) - -# THU objects (gc implementation only, actual THUs are defined by functions) -# | ptr | args | arg[0] | arg[1] | ... | arg[args] | -# args wouldn't need to be here but let's keep them for gc simplicity -THU_evacuate: - retq #TODO -THU_scavenge: - mov 0x8(%rbp), %rax - add $2,%rax - shl $3,%rax - add %rax,%rsi - retq - -# -# Actual code! -# - -# || -> cont -.makethunk main - mov _memory_state, %r15 - - # push a new integer - mov %r15, %r11 # backup first arg - movq $INT_code, 0x00(%r15) - movq $100, 0x08(%r15) - add $0x10, %r15 - - # push another new integer - mov %r15, %r12 # backup second arg - movq $INT_code, 0x00(%r15) - movq $23, 0x08(%r15) - add $0x10, %r15 - - # push the plus - mov %r15, %r13 # backup plus - movq $plus, 0x00(%r15) - movq $2, 0x08(%r15) - mov %r11, 0x10(%r15) - mov %r12, 0x18(%r15) - add $0x20, %r15 - - # push a cont thunk for main_exit - mov %r15, %r14 # backup cont thunk - movq $main_exit, 0x00(%r15) - movq $1, 0x08(%r15) - mov %rsi, 0x10(%r15) - add $0x18, %r15 - - mov %r15, _memory_state - - # evaluate into main_exit - mov %r14, %rsi - enter %r13 - -# exitcode -> | cont (unused, should be 0) | -.makethunk main_exit - mov 0x8(%rsi), %rdi - mov $0x3c, %rax - syscall # exit %rdi - -# | arg1 | arg2 | -> cont -.makethunk plus - # push a thunk for finishing the plus - mov _memory_state, %r15 - mov %r15, %r14 # plus_step1 origin - movq $plus_step1, 0x00(%r15) - movq $3, 0x08(%r15) - mov 0x18(%rbp), %rax - mov %rax, 0x10(%r15) - mov %rbp, 0x18(%r15) - mov %rsi, 0x20(%r15) - add $0x28, %r15 - mov %r15, _memory_state - - # evaluate arg0 - mov %r14, %rsi - enter 0x10(%rbp) - -# arg0 -> | arg1 | ret | cont | -.makethunk plus_step1 - # this is guaranteed to be entered only once (it's a cont), so we can rewrite the thunk in place - mov 0x10(%rbp), %rax - movq $plus_fini, 0x00(%rbp) - mov %rsi, 0x10(%rbp) - - mov %rbp, %rsi # continue on the rewritten thunk - enter %rax # evaluate arg1 - -# arg1 -> | arg0 | ret | cont | -.makethunk plus_fini - mov 0x8(%rsi), %rax # arg1 - mov 0x10(%rbp), %rsi - add 0x8(%rsi), %rax # + arg0 - - mov 0x18(%rbp), %rsi # rewrite the resulting thunk - movq $INT_code, 0x00(%rsi) - mov %rax, 0x08(%rsi) - # result is in rsi already - enter 0x20(%rbp) - -# Q: are there gonna be functions that have both the argument AND the cont? -# A: No, either stuff is entered as return-continuation (takes res) or as forward call (takes cont) -# (needs validation)