#lang scheme/base
(require mzlib/string
scheme/port
scheme/match
(planet chongkai/sml/hamlet/parse-main))
(provide (rename-out (ml-read read)
(unparsed-read read-syntax)))
(define (unparsed-read source-name-v input-port)
(let-values (((line column position)
(port-next-location input-port)))
(let* ((source
(let ((sp (open-output-string)))
(copy-port input-port sp)
(get-output-string sp)))
(sexp (hamlet-parse source))
(port (open-input-string sexp)))
(ml-read-syntax input-port
source-name-v
port
position))))
(define (ml-read port)
(syntax->datum (unparsed-read #f port)))
(define (ml-read-syntax port source-name-v input-port pos)
(to-scheme-syntax port
source-name-v
(read-hamlet input-port pos)))
(define (read-hamlet p pos)
(let ((c (read-char p)))
(cond ((eof-object? c)
c)
((char=? c #\()
(read-hamlets p
(read p)
(read-location p pos)
pos))
((char-whitespace? c)
(read-hamlet p pos))
(else
(error "ml-read : got " c)))))
(define (bytes->number s)
(string->number (bytes->string/latin-1 s)))
(define (read-location p pos)
(let-values (((position line column
end-position end-line end-column)
(apply values
(map bytes->number
(cdr
(regexp-match (pregexp "(\\d*)\\((\\d*)\\.(\\d*)\\)-(\\d*)\\((\\d*)\\.(\\d*)\\)")
p))))))
(list line
column
(+ position pos)
(- end-position position))))
(define (read-ml-id p)
(let* ((raw (read-line p))
(id-str (substring raw 1 (sub1 (string-length raw)))))
(string->symbol id-str)))
(define (read-ml-string p)
(if (char=? (read-char p) #\")
(let ((sp (open-output-string)))
(write-char #\" sp)
(read-ml-string-help p sp))
(read-ml-string p)))
(define (read-ml-string-help p sp)
(let ((c (read-char p)))
(cond ((char=? c #\\)
(read-ml-string-dispach p sp))
((char=? c #\")
(write-char #\" sp)
(read-line p)
(read-from-string (get-output-string sp)))
(else
(write-char c sp)
(read-ml-string-help p sp)))))
(define (read-ml-string-dispach p sp)
(let ((c (read-char p)))
(cond ((char-numeric? c) (write-char #\\ sp)
(display (ddd->ooo (ddd->char c (read-char p) (read-char p))) sp)
(read-ml-string-help p sp))
((char=? c #\^) (write-char #\\ sp)
(display (ddd->ooo (- (char->integer (read-char p)) 64)) sp)
(read-ml-string-help p sp))
((char-formatting? c) (read-ml-string-f p sp))
(else (write-char #\\ sp)
(write-char c sp)
(read-ml-string-help p sp)))))
(define (char-formatting? c)
(memv c (list #\space #\tab #\return #\newline #\page)))
(define (d->i d)
(- (char->integer d) 48))
(define (dd->i d1 d2)
(+ (* d1 10) d2))
(define (ddd->char d1 d2 d3)
(dd->i (dd->i (d->i d1) (d->i d2)) (d->i d3)))
(define (ddd->ooo d)
(let* ((s (number->string d 8))
(l (string-length s)))
(if (= l 3)
s
(string-append (make-string (- 3 l) #\0) s))))
(define (read-ml-string-f p sp)
((if (char=? (read-char p) #\\)
read-ml-string-help
read-ml-string-f)
p sp))
(define (read-ml-char p)
(if (char=? (read-char p) #\#)
(string-ref (read-ml-string p) 0)
(read-ml-char p)))
(define (read-ml-number p)
(let ((raw (read p)))
(if (number? raw)
raw
(string->number (regexp-replace* "~" (symbol->string raw) "-")))))
(define (read-ml-word p)
(let ((raw (symbol->string (read p))))
(if (char=? (string-ref raw 2) #\0)
(string->number (substring raw 4))
(string->number (substring raw 3) 16))))
(define (read-hamlets p head loc pos)
(case head
((VId LongVId LongStrId StrId TyCon LongTyCon TyVar Lab SigId FunId)
(list head loc (read-ml-id p)))
((INTSCon REALSCon)
(begin0
(list head loc (read-ml-number p))
(read-char p)))
((STRINGSCon)
(list head loc (read-ml-string p)))
((CHARSCon)
(list head loc (read-ml-char p)))
((WORDSCon)
(begin0
(list head loc (read-ml-word p))
(read-char p)))
(else
(let lp ((acc '()))
(let ilp ((c (peek-char p)))
(cond ((eof-object? c)
(error "ml-read" head loc (map car acc)))
((char-whitespace? c)
(read-char p)
(ilp (peek-char p)))
((char=? c #\()
(lp (cons (read-hamlet p pos) acc)))
((char=? c #\))
(read-char p)
(list* head loc (reverse acc)))
(else
(error "ml-read" head loc c))))))))
(define no-ctxt-stx
(read-syntax #f (open-input-string "SML")))
(define (to-scheme-syntax port source-name-v sexp)
(let* ([p-name (object-name port)]
[name (if (path? p-name)
(let-values ([(base name dir?) (split-path p-name)])
(string->symbol (path->string (path-replace-suffix name #""))))
'page)]
[stx (to-syntax-object source-name-v sexp)])
(datum->syntax #f
`(module ,name (planet chongkai/sml)
(#%module-begin ,stx))
#f
no-ctxt-stx)))
(define (add: sym)
(symbol-append sym ":"))
(define (to-syntax-object source-name-v sexp)
(match sexp
((list (and head
(or 'VId 'LongVId 'TyVar 'Lab 'INTSCon 'REALSCon 'STRINGSCon 'CHARSCon 'WORDSCon))
loc id)
(datum->syntax #f
(list (add: head)
(datum->syntax #f
id
(cons source-name-v loc)
no-ctxt-stx))
(cons source-name-v loc)
no-ctxt-stx))
((list (and head (or 'TyCon 'LongTyCon)) loc id)
(datum->syntax #f
(list (add: head)
(datum->syntax #f
(symbol-append id "-type")
(cons source-name-v loc)
no-ctxt-stx))
(cons source-name-v loc)
no-ctxt-stx))
((list (and head (or 'LongStrId 'StrId)) loc id)
(datum->syntax #f
(list (add: head)
(datum->syntax #f
(symbol-append id "-struct")
(cons source-name-v loc)
no-ctxt-stx))
(cons source-name-v loc)
no-ctxt-stx))
((list 'SigId loc id)
(datum->syntax #f
(list 'SigId:
(datum->syntax #f
(symbol-append id "-sig")
(cons source-name-v loc)
no-ctxt-stx))
(cons source-name-v loc)
no-ctxt-stx))
((list 'FunId loc id)
(datum->syntax #f
(list 'FunId:
(datum->syntax #f
(symbol-append id "-functor")
(cons source-name-v loc)
no-ctxt-stx))
(cons source-name-v loc)
no-ctxt-stx))
((list-rest head loc p)
(datum->syntax #f
(cons (add: head)
(map (lambda (s)
(to-syntax-object source-name-v s))
p))
(cons source-name-v loc)
no-ctxt-stx))))
(define (symbol-append sym str)
(string->symbol
(string-append
(symbol->string sym)
str)))