private/src/grammar.ss
;;; grammar.ss -- datum grammar acceptors
;;;
;;; by chaynes@indiana.edu
;;;
;;; This program was written for Chez Scheme (version 5.0b), but could be
;;; ported without much effort.
;;;
;;; The syntax of the grammar form might be defined using itself as follows:
;;;
;;; (grammar grammar-expression
;;;   (datum (predicate (lambda (x) #t)))
;;;   (expression datum)
;;;   (grammar-expression
;;;     (report-if-bad 'grammar-expression
;;;       (lst 'grammar start (plus production))))
;;;   (start variable)
;;;   (variable (predicate symbol?))
;;;   (production (report-if-bad 'production (lst variable (star element))))
;;;   (element (alt terminal non-terminal))
;;;   (terminal (lst 'quote datum))
;;;   (non-terminal
;;;     (report-if-bad 'non-terminal
;;;       (alt variable
;;;         (lst 'alt (star element))
;;;         (lst 'seq (star element))
;;;         (lst 'lst (star element))
;;;         (lst 'star element)
;;;         (lst 'plus element)
;;;         (lst 'opt element)
;;;         (lst 'dot element element)
;;;         (lst 'predicate expression)
;;;         (lst 'cfa expression)
;;;         (lst 'report-if-bad datum non-terminal)))))
;;;
;;; A grammar expression returns a predicate that takes a datum and
;;; indicates whether it conforms to the given grammar.
;;;
;;; For more user documentation, see
;;; http://www.cs.indiana.edu/proglang/scheme/grammar.html.
;;;
;;; context-free acceptor cfa: [input] -> trial
;;; input, output: list of objects
;;; trial: (union #f #t output)
;;;   #f: the cfa failed to accept
;;;   #t: the cfa failed through report-if-bad
;;;   output: tail of list after cfa has accepted some prefix
;;; element: (union cfa datum)

(eval-when (compile load eval)

  (define grammar-input
    (lambda (x)
      (if (or (pair? x) (null? x))
	x
	#f)))
  
  (define grammar-try ; [element input cfa] -> trial
    (lambda (element input cfa)
      (if (procedure? element)
	(let ((trial (element input)))
	  (or (and (grammar-input trial) (cfa trial))
	    trial))
	(and (pair? input)
	  (equal? element (car input))
	  (cfa (cdr input))))))

  (define grammar-predicate ; [predicate] -> cfa
    (lambda (pred)
      (lambda (input)
	(and (pair? input)
	  (pred (car input))
	  (cdr input)))))

  (define grammar-alt ; (list element) -> cfa
    (lambda elements
      (lambda (input)
	(let loop ((elements elements))
	  (and (not (null? elements))
	    (or (grammar-try (car elements) input (lambda (x) x))
	      (loop (cdr elements))))))))

  (define grammar-seq ; (list element) -> cfa
    (lambda elements
      (let loop ((elements elements))
	(lambda (input)
	  (if (null? elements)
	    input
	    (grammar-try (car elements) input
	      (loop (cdr elements))))))))

  (define grammar-lst ; (list element) -> cfa
    (lambda elements
      (lambda (input)
	(and (pair? input)
	  (list? (car input))
	  (grammar-try (apply grammar-seq elements)
	    (car input)
	    (lambda (trial)
	      (and (null? trial) (cdr input))))))))

  (define grammar-star ; [element] -> cfa
    (lambda (element)
      (lambda (input)
	(if (null? input) input
	  (let ((trial (grammar-try element input (lambda (x) x))))
	    (if (grammar-input trial)
	      ((grammar-star element) trial)
	      (or trial input)))))))

  (define grammar-plus ; [element] -> cfa
    (lambda (element)
      (grammar-seq element (grammar-star element))))

  (define grammar-opt ; [element] -> cfa
    (lambda (element)
      (grammar-alt (grammar-seq element) (grammar-seq))))

  (define grammar-dot ; [element element] -> cfa
    (lambda (prefix-element suffix-element)
      (lambda (input)
	(and (pair? input)
	  (pair? (car input))
	  (not (list? (car input)))
	  (let loop ((object (car input)) (ls '()))
	    (if (pair? object)
	      (loop (cdr object) (cons (car object) ls))
	      (and ((grammar-seq prefix-element) (reverse ls))
		((grammar-seq suffix-element) (list object))
		(cdr input))))))))

  (define grammar-report-if-bad ; [symbol cfa] -> cfa
    (lambda (name cfa) ; assume cfa accepts (car input)
      (lambda (input)
	(or (grammar-input (cfa input))
	  (parameterize ((print-level 2))
	    (if (pair? input) (printf "Bad ~s: ~s~n" name (car input)))
	    #t)))))

  (define grammar-cfa->predicate
    (lambda (cfa)
      (lambda (object)
	(and (null? (cfa (list object))) #t))))
  )

(define-syntax grammar
  (let ()
    (define own-grammar
      (let ((variable (grammar-predicate symbol?))
	    (datum (grammar-predicate (lambda (object) #t)))
	    (delayed-non-terminal 'ignored))
	(let ((non-terminal (lambda (x) (delayed-non-terminal x)))
	      (terminal (grammar-lst 'quote datum)))
	  (let ((element (grammar-alt terminal non-terminal)))
	    (set! delayed-non-terminal
	      (grammar-report-if-bad 'non-terminal
		(grammar-alt variable
		  (grammar-lst 'alt (grammar-star element))
		  (grammar-lst 'seq (grammar-star element))
		  (grammar-lst 'lst (grammar-star element))
		  (grammar-lst 'star element)
		  (grammar-lst 'plus element)
		  (grammar-lst 'opt element)
		  (grammar-lst 'dot element element)
		  (grammar-lst 'predicate datum)
		  (grammar-lst 'cfa datum)
		  (grammar-lst 'report-if-bad datum non-terminal))))
	    (grammar-cfa->predicate
	      (grammar-report-if-bad 'grammar
		(grammar-lst 'grammar variable
		  (grammar-plus
		    (grammar-report-if-bad 'production
		      (grammar-lst variable (grammar-star element)))))))))))
    (lambda (x)
      (syntax-case x ()
	((_ start (i v ...) ...)
	 (and (memq (syntax-object->datum (syntax start))
		(syntax-object->datum (syntax (i ...))))
	   (own-grammar (syntax-object->datum x)))
	 (with-syntax
	   (((t ...) (generate-temporaries (syntax (i ...))))
	    ((id ...) (map (lambda (id)
			     (datum->syntax-object (syntax start) id))
			'(alt seq lst star plus opt dot predicate
			   cfa report-if-bad))))
	   (syntax ((lambda (id ...)
		      (let ((t #f) ...)
			(let ((i (lambda (x) (t x))) ...)
			  (set! t (grammar-seq v ...)) ...
			  (grammar-cfa->predicate start))))
		    grammar-alt grammar-seq grammar-lst grammar-star
		    grammar-plus grammar-opt grammar-dot grammar-predicate 
		    (lambda (cfa) cfa) grammar-report-if-bad))))))))