lang/reader.ss
#lang scheme/base

(require mzlib/string
         scheme/port
         scheme/match
         (planet chongkai/sml/hamlet/parse-main))

(provide (rename-out (ml-read read)
                     (unparsed-read read-syntax)))

(define (unparsed-read source-name-v input-port)
  (let-values (((line column position)
                (port-next-location input-port)))
    (let* ((source
            (let ((sp (open-output-string)))
              (copy-port input-port sp)
              (get-output-string sp)))
           (sexp (hamlet-parse source))
           (port (open-input-string sexp)))
      (ml-read-syntax input-port
                      source-name-v
                      port
                      position))))

(define (ml-read port)
  (syntax->datum (unparsed-read #f port)))

(define (ml-read-syntax port source-name-v input-port pos)
  (to-scheme-syntax port
                    source-name-v
                    (read-hamlet input-port pos)))

(define (read-hamlet p pos)
  (let ((c (read-char p)))
    (cond ((eof-object? c)
           c)
          ((char=? c #\()
           (read-hamlets p
                         (read p)
                         (read-location p pos)
                         pos))
          ((char-whitespace? c)
           (read-hamlet p pos))
          (else
           (error "ml-read : got " c)))))

(define (bytes->number s)
  (string->number (bytes->string/latin-1 s)))

(define (read-location p pos)
  (let-values (((position line column
                          end-position end-line end-column)
                (apply values
                       (map bytes->number
                            (cdr
                             (regexp-match (pregexp "(\\d*)\\((\\d*)\\.(\\d*)\\)-(\\d*)\\((\\d*)\\.(\\d*)\\)")
                                           p))))))
    (list line
          column
          (+ position pos)
          (- end-position position))))


(define (read-ml-id p)
  (let* ((raw (read-line p))
         (id-str (substring raw 1 (sub1 (string-length raw)))))
    (string->symbol id-str)))

(define (read-ml-string p)
  (if (char=? (read-char p) #\")
      (let ((sp (open-output-string)))
        (write-char #\" sp)
        (read-ml-string-help p sp))
      (read-ml-string p)))

(define (read-ml-string-help p sp)
  (let ((c (read-char p)))
    (cond ((char=? c #\\)
           (read-ml-string-dispach p sp))
          ((char=? c #\")
           (write-char #\" sp)
           (read-line p)
           (read-from-string (get-output-string sp)))
          (else
           (write-char c sp)
           (read-ml-string-help p sp)))))

(define (read-ml-string-dispach p sp)
  (let ((c (read-char p)))
    (cond ((char-numeric? c) ;\ddd
           (write-char #\\ sp)
           (display (ddd->ooo (ddd->char c (read-char p) (read-char p))) sp)
           (read-ml-string-help p sp))
          ((char=? c #\^) ;\^c
           (write-char #\\ sp)
           (display (ddd->ooo (- (char->integer (read-char p)) 64)) sp)
           (read-ml-string-help p sp))
          ((char-formatting? c) ;\f...f\
           (read-ml-string-f p sp))
          (else ;scheme read will deal with other cases
           (write-char #\\ sp)
           (write-char c sp)
           (read-ml-string-help p sp)))))

(define (char-formatting? c)
  (memv c (list #\space #\tab #\return #\newline #\page)))

(define (d->i d)
  (- (char->integer d) 48))
(define (dd->i d1 d2)
  (+ (* d1 10) d2))
(define (ddd->char d1 d2 d3)
  (dd->i (dd->i (d->i d1) (d->i d2)) (d->i d3)))
(define (ddd->ooo d)
  (let* ((s (number->string d 8))
         (l (string-length s)))
    (if (= l 3)
        s
        (string-append (make-string (- 3 l) #\0) s))))

(define (read-ml-string-f p sp)
  ((if (char=? (read-char p) #\\)
       read-ml-string-help
       read-ml-string-f)
   p sp))      

(define (read-ml-char p)
  (if (char=? (read-char p) #\#)
      (string-ref (read-ml-string p) 0)
      (read-ml-char p)))

;~ in ML as - in Scheme
(define (read-ml-number p)
  (let ((raw (read p)))
    (if (number? raw)
        raw
        ;otherwise raw should be a symble with ~ in it
        (string->number (regexp-replace* "~" (symbol->string raw) "-")))))

(define (read-ml-word p)
  (let ((raw (symbol->string (read p))))
    (if (char=? (string-ref raw 2) #\0)
        ;0w0w[0-9]+
        (string->number (substring raw 4))
        ;0wx
        (string->number (substring raw 3) 16))))

(define (read-hamlets p head loc pos)
  (case head
    ;using the property that haMLet output is:
    ;space + id + end-parenthesis + newline
    ((VId LongVId LongStrId StrId TyCon LongTyCon TyVar Lab SigId FunId)
     (list head loc (read-ml-id p)))
    ((INTSCon REALSCon)
     (begin0
       (list head loc (read-ml-number p))
       ;read the #\)
       (read-char p)))
    ((STRINGSCon)
     (list head loc (read-ml-string p)))
    ((CHARSCon)
     (list head loc (read-ml-char p)))
    ((WORDSCon)
     (begin0
       (list head loc (read-ml-word p))
       (read-char p)))
    (else
     (let lp ((acc '()))
       (let ilp ((c (peek-char p)))
         (cond ((eof-object? c)
                (error "ml-read" head loc (map car acc)))
               ((char-whitespace? c)
                (read-char p)
                (ilp (peek-char p)))
               ((char=? c #\()
                (lp (cons (read-hamlet p pos) acc)))
               ((char=? c #\))
                (read-char p)
                (list* head loc (reverse acc)))
               (else
                (error "ml-read" head loc c))))))))

;for syntax-original?
(define no-ctxt-stx
  (read-syntax #f (open-input-string "SML")))

(define (to-scheme-syntax port source-name-v sexp)
  (let* ([p-name (object-name port)]
         [name (if (path? p-name)
                   (let-values ([(base name dir?) (split-path p-name)])
                     (string->symbol (path->string (path-replace-suffix name #""))))
                   'page)]
         [stx (to-syntax-object source-name-v sexp)])
    (datum->syntax #f
                   `(module ,name (planet chongkai/sml)
                      (#%module-begin ,stx))
                   #f
                   no-ctxt-stx)))

(define (add: sym)
  (symbol-append sym ":"))

;to-syntax-object return an syntax-object with location information and source-name-v
(define (to-syntax-object source-name-v sexp)
  (match sexp
    ((list (and head
                (or 'VId 'LongVId 'TyVar 'Lab 'INTSCon 'REALSCon 'STRINGSCon 'CHARSCon 'WORDSCon))
           loc id)
     (datum->syntax #f
                    (list (add: head) id)
                    (cons source-name-v loc)
                    no-ctxt-stx))
    ((list (and head (or 'TyCon 'LongTyCon)) loc id)
     (datum->syntax #f
                    (list (add: head) (symbol-append id "-type"))
                    (cons source-name-v loc)
                    no-ctxt-stx))
    ((list (and head (or 'LongStrId 'StrId)) loc id)
     (datum->syntax #f
                    (list (add: head) (symbol-append id "-struct"))
                    (cons source-name-v loc)
                    no-ctxt-stx))
    ((list 'SigId loc id)
     (datum->syntax #f
                    (list 'SigId: (symbol-append id "-sig"))
                    (cons source-name-v loc)
                    no-ctxt-stx))
    ((list 'FunId loc id)
     (datum->syntax #f
                    (list 'FunId: (symbol-append id "-functor"))
                    (cons source-name-v loc)
                    no-ctxt-stx))
    ((list-rest head loc p)
     (datum->syntax #f
                    (cons (add: head)
                          (map (lambda (s)
                                 (to-syntax-object source-name-v s))
                               p))
                    (cons source-name-v loc)
                    no-ctxt-stx))))

(define (symbol-append sym str)
  (string->symbol
   (string-append
    (symbol->string sym)
    str)))