#lang scheme/base ;; Code to build structured assembly code graph from forth code. This ;; uses an extension to scat's 2-stack model to represent ;; concatenative macros with a Forth-style control stack. (require scheme/control scheme/match "../tools.ss" "../control.ss" ;; for 2stack parent class "../scat.ss" ;; for make-word "../target.ss" "postprocess.ss" ) (provide make-target-label ;; create a label make-target-split ;; label -> splitter macro target-compile-1 wrap-macro/mexit wrap-macro/postponed-word wrap-macro/postponed-variable state:compiler (struct-out compiler) name-directive? ;; for assembler ;; Parameters that depend on underlying language core: compile-exit compile-literal compile-word ;; These are Scat words present in the Scheme namespace. They are ;; used unquoted in label.ss terminate-chain ;; indicates that control flow has left current chain split-store ;; current state > ctrl + set new state combine-store ;; combine all chains in store to one merge-store ;; combine split store with current semicolon ) (define compile-literal (make-parameter #f)) (define compile-word (make-parameter #f)) (define (semicolon state) ((semi) state)) ;; Representation for different target-word structures. See ;; purrr/purrr-lang.ss for a usage example. ;; Return 3 values: ;; * a target wird struct (label) ;; * a referring macro ;; * a body code generator macro (define (wrap-macro/postponed-word name loc macro) (let ((label (new-target-word #:name name #:realm 'code #:srcloc loc #:postponed macro))) (values label ((compile-word) label) (compose macro (make-target-split label))))) (define (wrap-macro/postponed-variable name loc macro) (let ((label (new-target-word #:name name #:realm 'data #:srcloc loc))) (values label ((compile-literal) label) (compose ;; FIXME: data variables are not chained. This should be: ;; data variables are not chained with code chunks. terminate-chain macro (make-target-split label))))) ;; Macro representations need to be wrapped to implement 'mexit', ;; which jumps past the end of the code generated by macro. For ;; languages that do not use mexit, do not wrap the macro, as this ;; requires at least one call to mexit at the end. (define (wrap-macro/mexit name loc macro) (make-word (lambda (state) (parameterize ((semi (compile-mexit))) (or (mleave (macro (menter state))) (match loc ((list file l c p s) (error 'non-terminated-macro "~a:~a:~a: ~a" file l c name)))))))) ;; *** COMPILE *** ;; During compilation the assembly code (the result of instantiating ;; macros) is organized in the following hierarchy: ;; * A word is a single entry point, represented by a target-word ;; structure associated to a chunk, which is a list of consecutive ;; assembly code instructions. Code inside a word can only be ;; reached through a jump to its label, and is thus not observable ;; to the world. Words serve as the unit of code generation (and ;; recombination). Any operation on code that doesn't alter ;; semantics is legal within a chunk. ;; * A chain is a list of words (chunks) with implicit ;; fallthrough. Each word indicates a single entry point. Chains ;; are terminated by exit points. Chains are the unit of target ;; address allocation: each chain can be associated to an address ;; independent of other chains. Some chains have fixed addresse ;; (org). ;; ;; * The store is a set of recently constructed chains (implemented ;; as a stack) ;; So, chunks represent ENTRY points, chains represent EXIT ;; points. This hierarchy is necessary because Forth words can have ;; multiple entry and exit points. A Forth word then consists of ;; multipel chains, since a chain has a single EXIT point, but can ;; have multiple ENTRY points. ;; Organizing it this way gives maximum flexibility: the basic ;; operation is the jump, possibly conditional. The Forth-style ;; language is a thin layer on top of assembly code which sacrifices ;; no efficiency, and still allows for simple control flow analysis ;; that can re-arrange code in memory. (define-struct dict (current ;; current word label chain ;; list of words with fallthrough store)) ;; set (stack?) of fallthrough lists ;; Save code under label, but drop if there is no label, which means ;; the code is not reachable. (define (log-dead code) (log: (format "dead:~a\n" (apply string-append (map (lambda (ins) (format " ~a" (instruction->string ins))) (reverse code)))))) (define (dict-label d new-word code) (match d ((struct dict (current chain store)) (make-dict new-word (if current (cons (list current code) chain) (begin (unless (null? code) (log-dead code)) chain)) store)))) ;; Terminate current fallthrough chain by moving it to the store. (define dict-terminate (match-lambda ((struct dict (current chain store)) (make-dict current '() (if (null? chain) ;; drop empty chains store (cons chain store)))))) (define-struct (compiler 2stack) (dict ;; dictionary object: keeps track of label -> code bindings rs)) ;; 'return stack' for macros: a list of exit labels + refcount (define update-compiler (case-lambda ((state asm) (update-compiler state asm (2stack-ctrl-list state))) ((state asm ctrl) (update-compiler state asm ctrl (compiler-dict state) (compiler-rs state))) ((state asm ctrl dict rs) (make-state:compiler asm ctrl dict rs)))) (define (make-state:compiler ctrl asm dict rs) (make-compiler update-compiler ctrl asm dict rs)) (define (state:compiler) (make-state:compiler '() '() (make-dict #f '() '()) '())) ;; The 'split' state update function: save the word currently being ;; compiled on the word stack (dictionary) and continue with an empty ;; assembly stack and a new current word. (define (make-target-split new-word) (state-lambda compiler (asm ctrl dict rs) ;; => (update '() ctrl (dict-label dict new-word asm) rs))) ;; 'compile-forth' sets up the internal state for compilation of ;; Forth-like functionality. Compared to ordinary 'macro->code' which ;; operates only on the parameter stack and an extra assembly stack, ;; Forth code has access to: ;; * labels = entry points of target code chains ;; * fallthrough words ;; * macro return stack (local exit in macros) (define print-state (match-lambda ((struct compiler (update asm ctrl (struct dict (current chain store)) rs)) (printf "STATE:\n~a\n~a\n~a\n~a\n~a\n~a\n" asm ctrl current chain store rs)))) (define semi (make-parameter #f)) (define (compile-forth macro) (parameterize ((semi (compile-exit))) ;; Execute macro on empty state + terminate properly. (let ((state (terminate-chain (macro (state:compiler))))) ;; Type check (macros are not allowed to change the state type!) ;; and require empty compilation state. (unless (compiler? state) (error 'compile-state-type-error)) ;; (print-state state) (assert-empty-ctrl state) ;; Return the dict-store list. This is then passed to ;; target-post! and onward to the assembler. (dict-store (compiler-dict state))))) (define (make-target-label [name (next-label)]) (new-target-word #:name name)) ;; *** FALLTHROUGH CHAINS *** (define terminate-chain (compose ;; Terminate the fallthrough chain. (state-lambda compiler (asm ctrl dict rs) ;; => (update asm ctrl (dict-terminate dict) rs)) ;; Code following the jump is not reachable. (make-target-split #f))) ;; *** ORG *** ;; Save the compilation state to the control stack. ;; FIXME: Maybe this needs to split off only the store? (define split-store (state-lambda compiler (asm ctrl dict rs) ;; => (update '() (cons (list asm rs dict) ctrl) (make-dict #f '() '()) '()))) ;; FIXME: This is currrently only defined for empty asm and rs, and a ;; #f word (i.e. the state after a split) simply because there's no ;; need for this in the middle of a word compilation, and i don't want ;; to invent one to test it. (define merge-store (state-lambda compiler ('() ;; empty asm (list-rest (list asm rs (struct dict (current chain store))) ctrl) (struct dict (#f '() store+)) '()) ;; empty rs (update asm ctrl (make-dict current chain (append store+ store)) rs))) ;; Combine the current word chain into one. This is used in the ;; definition of org-pop, for example to ensure fallthrough for an ;; interrupt vector table. Probably only meaningful after ;; terminate-chain, which results in a store that has all chains. (define combine-store (state-lambda compiler (asm ctrl (struct dict (current chain store)) rs) ;; => (update asm ctrl (make-dict current chain (list (apply append store))) rs))) (define (name-directive? dir name) (let ((dir? (lambda (x) (eq? dir x)))) (match name ((list (? dir?) addr) #f) (_ #f)))) ;; *** MEXIT *** ;; Multiple exit points for macros. This mechanism makes it possible ;; to turn words into macros, as long as they do not modify the return ;; stack. (define-struct mexit (label refs)) ;; Patched later. (define compile-exit (make-parameter (lambda _ (error 'compile-exit-undefined)))) (define compile-mexit (make-parameter (state-lambda compiler (asm ctrl dict (list-rest (struct mexit (label refs)) rs+)) ;; => (update (cons `[jw ,label] asm) ;; compile jump ctrl dict (cons (make-mexit label (+ 1 refs)) rs+))))) ;; inc ref ;; push new label to rs (define menter (state-lambda compiler (asm ctrl dict rs) (update asm ctrl dict (cons (make-mexit (make-target-label) 0) rs)))) ;; check termination, drop last jump + split if there are more (define mleave (state-lambda compiler (asm ctrl dict (list-rest (struct mexit (label refs)) rs+)) (and (terminated? asm label) (let ((dropped (update (cdr asm) ctrl dict rs+))) (if (> refs 1) ((make-target-split label) dropped) dropped))))) ;; Check if the macro is properly terminated by looking at the ;; last instruction and the current exit label. (define (terminated? asm exit-label) (match asm ((list-rest [list 'jw label] _) (eq? label exit-label)) (else #f))) ;; Project word structs to the compiled state. (define (target-compile-1 words) (append (compile-forth (apply compose (filter procedure? (cons (lambda (x) x) words)))) (filter (lambda (x) (not (procedure? x))) words)))