#lang scheme/base
(require "../main.ss"
mzlib/trace
)
(define regex-sof (zero-one (char= #\^) #\$))
(define regex-eof (zero-one (char= #\$) #\^))
(define regex-meta-chars '( #\. #\+ #\* #\? #\^ #\$ #\[ #\] #\( #\) #\{ #\} #\\))
(define regex-digit (seq "\\d" (return digit)))
(define regex-not-digit (seq "\\D" (return not-digit)))
(define regex-word (seq "\\w" (return word)))
(define regex-not-word (seq "\\W" (return not-word)))
(define regex-whitespace (seq "\\s" (return whitespace)))
(define regex-not-whitespace (seq "\\S" (return not-whitespace)))
(define regex-any-char (seq #\. (return any-char)))
(define regex-literal (choice regex-digit
regex-not-digit
regex-word
regex-not-word
regex-whitespace
regex-not-whitespace
regex-any-char
(seq v <- (choice e-newline
e-return
e-tab
(escaped-char #\\ any-char)
(char-not-in regex-meta-chars))
(return (char= v)))))
(define regex-atom (choice regex-literal
regex-group
regex-choice
))
(define regex-char-range (seq lc <- (char-not-in (cons #\- regex-meta-chars))
#\-
hc <- (char-not-in (cons #\- regex-meta-chars))
(return `(,char-between ,lc ,hc))))
(define regex-choice (seq #\[
literals <- (one-many (choice regex-char-range
regex-literal))
#\]
(return `(,one-of* ,@literals))))
(define regex-group (seq #\(
chars <- (one-many regex-atom)
#\)
(return `(,sequence* ,@chars))))
(define regex-zero-one (seq v <- regex-atom
#\?
(return `(,zero-one ,v))))
(define regex-zero-many (seq v <- regex-atom
#\*
(return `(,zero-many ,v))))
(define regex-one-many (seq v <- regex-atom
#\+
(return `(,one-many ,v))))
(define regex-range (seq v <- regex-atom
#\{
min <- (zero-one natural-number 0)
max <- (zero-one (seq #\,
max <- (zero-one natural-number +inf.0)
(return max))
min)
#\}
(return `(,repeat ,v ,min ,max))))
(define regex-exp (seq SOF
sof <- regex-sof
atoms <- (zero-many (choice regex-zero-one
regex-zero-many
regex-one-many
regex-range
regex-atom
))
eof <- regex-eof
EOF
(return `(,regex-parser* ,@(if (char=? sof #\^)
`(,SOF)
'())
,@atoms
,@(if (char=? eof #\$)
`(,EOF)
'())))))
(define (regex-parser parsers)
(let ((regexp (sequence parsers)))
(if (eq? (car parsers) SOF)
regexp
(seq v <- (choice regexp
(seq any-char (regex-parser parsers)))
(return v)))))
(define (regex-parser* parser . parsers)
(regex-parser (cons parser parsers)))
(define (make-regex-exp in)
(define (helper exp)
(cond ((list? exp) (apply (car exp) (map helper (cdr exp))))
(else exp)))
(let-values (((exp in)
(regex-exp (make-input in))))
(if (failed? exp)
(error 'make-regex-exp "the regular expression is invalid")
(lambda (in)
((helper exp) (make-input in))))))
(provide regex-parser
make-regex-exp
)