#lang racket/base
(require parser-tools/yacc
parser-tools/lex
racket/list
"rule-structs.rkt")
(provide tokens
token-LPAREN
token-RPAREN
token-LBRACKET
token-RBRACKET
token-PIPE
token-REPEAT
token-RULE_HEAD
token-ID
token-LIT
token-EOF
grammar-parser
current-source
current-parser-error-handler
[struct-out rule]
[struct-out lhs-id]
[struct-out pattern]
[struct-out pattern-id]
[struct-out pattern-lit]
[struct-out pattern-token]
[struct-out pattern-choice]
[struct-out pattern-repeat]
[struct-out pattern-maybe]
[struct-out pattern-seq])
(define-tokens tokens (LPAREN
RPAREN
LBRACKET
RBRACKET
PIPE
REPEAT
RULE_HEAD
ID
LIT
EOF))
(define grammar-parser
(parser
(tokens tokens)
(src-pos)
(start rules)
(end EOF)
(grammar
[rules
[(rules*) $1]]
[rules*
[(rule rules*)
(cons $1 $2)]
[()
'()]]
[rule
[(RULE_HEAD pattern)
(begin
(define trimmed (regexp-replace #px"\\s*:$" $1 ""))
(rule $1-start-pos
$2-end-pos
(lhs-id $1-start-pos
(position (+ (position-offset $1-start-pos)
(string-length trimmed))
(position-line $1-start-pos)
(position-col $1-start-pos))
trimmed)
$2))]]
[pattern
[(implicit-pattern-sequence PIPE pattern)
(if (pattern-choice? $3)
(pattern-choice $1-start-pos
$3-end-pos
(cons $1 (pattern-choice-vals $3)))
(pattern-choice $1-start-pos
$3-end-pos
(list $1 $3)))]
[(implicit-pattern-sequence)
$1]]
[implicit-pattern-sequence
[(repeatable-pattern implicit-pattern-sequence)
(if (pattern-seq? $2)
(pattern-seq $1-start-pos $2-end-pos (cons $1 (pattern-seq-vals $2)))
(pattern-seq $1-start-pos $2-end-pos (list $1 $2)))]
[(repeatable-pattern)
$1]]
[repeatable-pattern
[(atomic-pattern REPEAT)
(cond [(string=? $2 "*")
(pattern-repeat $1-start-pos $2-end-pos 0 $1)]
[(string=? $2 "+")
(pattern-repeat $1-start-pos $2-end-pos 1 $1)]
[else
(error 'grammar-parse "unknown repetition operator ~e" $2)])]
[(atomic-pattern)
$1]]
[atomic-pattern
[(LIT)
(pattern-lit $1-start-pos $1-end-pos
(substring $1 1 (sub1 (string-length $1))))]
[(ID)
(if (token-id? $1)
(pattern-token $1-start-pos $1-end-pos $1)
(pattern-id $1-start-pos $1-end-pos $1))]
[(LBRACKET pattern RBRACKET)
(pattern-maybe $1-start-pos $3-end-pos $2)]
[(LPAREN pattern RPAREN)
$2]])
(error (lambda (tok-ok? tok-name tok-value start-pos end-pos)
((current-parser-error-handler) tok-ok? tok-name tok-value start-pos end-pos)))))
(define (token-id? id)
(string=? (string-upcase id)
id))
(define current-source (make-parameter #f))
(struct exn:fail:parse-grammar exn:fail (srclocs)
#:transparent
#:property prop:exn:srclocs (lambda (instance)
(exn:fail:parse-grammar-srclocs instance)))
(define current-parser-error-handler
(make-parameter
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
(raise (exn:fail:parse-grammar
(format "Error while parsing grammar near: ~e [line=~a, column~a, position=~a]"
tok-value
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos))
(current-continuation-marks)
(list (srcloc (current-source)
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(if (and (number? (position-offset end-pos))
(number? (position-offset start-pos)))
(- (position-offset end-pos)
(position-offset start-pos))
#f))))))))