(module lexer mzscheme
(provide parse-css-port parse-css-file parse-css-string)
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools"))
(lib "cfg-parser.ss" "algol60")
(lib "yacc.ss" "parser-tools")
(lib "readerr.ss" "syntax"))
(define-lex-abbrevs
(atoz (:or (:/ #\a #\z) (:/ #\A #\Z)))
(atof (:or (:/ #\a #\f) (:/ #\A #\F)))
(digit (:/ #\0 #\9))
(hex (:or digit atof))
(non-ascii (:/ #\u0080 #\u00ff))
(unicode (:seq #\\ (:** 1 6 hex)
(:? (:or (:seq #\return #\newline)
#\space #\tab #\return #\newline #\page))))
(escape (:or unicode (:seq #\\ (:~ #\return #\newline #\page hex))))
(nmstart (:or #\_ atoz non-ascii escape))
(nmchar (:or #\_ atoz digit #\- non-ascii escape))
(string1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))
#\"))
(string2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))
#\'))
(invalid1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))))
(invalid2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))))
(ident (:seq (:? #\-) nmstart (:* nmchar)))
(name (:+ nmchar))
(num (:or (:+ digit)
(:seq (:* digit) #\. (:+ digit))))
(String (:or string1 string2))
(invalid (:or invalid1 invalid2))
(url (:* (:or #\! #\# #\$ #\% #\&
(char-range #\* #\~) non-ascii
escape)))
(s (:or #\space #\tab #\return #\newline #\page))
(w (:* s))
(nl (:or #\newline (:seq #\return #\newline) #\return #\page))
(A (:or #\a #\A (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\1) (:seq #\6 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(B (:or #\b #\B (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\2) (:seq #\6 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(C (:or #\c #\C (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\3) (:seq #\6 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(D (:or #\d #\D (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\4) (:seq #\6 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(E (:or #\e #\E (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\5) (:seq #\6 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(F (:or #\f #\F (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\6) (:seq #\6 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(G (:or #\g #\G (:seq #\\ #\\ #\g) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\7) (:seq #\6 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(H (:or #\h #\H (:seq #\\ #\\ #\h) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\8) (:seq #\6 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(I (:or #\i #\I (:seq #\\ #\\ #\i) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\9) (:seq #\6 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(J (:or #\j #\J (:seq #\\ #\\ #\j) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\a) (:seq #\6 #\a)) (:? (:or (:seq #\return #\newline) s)))))
(K (:or #\k #\K (:seq #\\ #\\ #\k) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\b) (:seq #\6 #\b)) (:? (:or (:seq #\return #\newline) s)))))
(L (:or #\l #\L (:seq #\\ #\\ #\l) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\c) (:seq #\6 #\c)) (:? (:or (:seq #\return #\newline) s)))))
(M (:or #\m #\M (:seq #\\ #\\ #\m) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\d) (:seq #\6 #\d)) (:? (:or (:seq #\return #\newline) s)))))
(N (:or #\n #\N (:seq #\\ #\\ #\n) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\e) (:seq #\6 #\e)) (:? (:or (:seq #\return #\newline) s)))))
(O (:or #\o #\O (:seq #\\ #\\ #\o) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\f) (:seq #\6 #\f)) (:? (:or (:seq #\return #\newline) s)))))
(P (:or #\p #\P (:seq #\\ #\\ #\p) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\0) (:seq #\7 #\0)) (:? (:or (:seq #\return #\newline) s)))))
(Q (:or #\q #\Q (:seq #\\ #\\ #\q) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\1) (:seq #\7 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(R (:or #\r #\R (:seq #\\ #\\ #\r) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\2) (:seq #\7 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(S (:or #\s #\S (:seq #\\ #\\ #\s) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\3) (:seq #\7 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(T (:or #\t #\T (:seq #\\ #\\ #\t) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\4) (:seq #\7 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(U (:or #\u #\U (:seq #\\ #\\ #\u) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\5) (:seq #\7 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(V (:or #\v #\V (:seq #\\ #\\ #\v) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\6) (:seq #\7 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(W (:or #\w #\W (:seq #\\ #\\ #\w) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\7) (:seq #\7 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(X (:or #\x #\X (:seq #\\ #\\ #\x) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\8) (:seq #\7 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(Y (:or #\y #\Y (:seq #\\ #\\ #\y) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\9) (:seq #\7 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(Z (:or #\z #\Z (:seq #\\ #\\ #\z) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\a) (:seq #\7 #\a)) (:? (:or (:seq #\return #\newline) s)))))
)
(define-tokens non-terminals (<angle> <dimension> <ems> <exs> <freq>
<function> <hash> <ident> <invalid>
<length> <number> <percentage>
<string> <time> <uri>
CDO CDC CHARSET-SYM
COLON
COMMA
DASHMATCH
DOT
EOF
EQUAL
IMPORT-SYM
IMPORTANT-SYM
INCLUDES
GREATER
LBRACE
LBRACKET
MEDIA-SYM
MINUS
PAGE-SYM
PLUS
S SLASH
RBRACE
RBRACKET
RPAREN
SEMICOLON
STAR
UNPARSEABLE))
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
(define-syntax (token stx)
(syntax-case stx ()
[(_ name val)
(identifier? (syntax name))
(let ([name (syntax name)])
(with-syntax ([token-name (datum->syntax-object
name
(string->symbol
(format "token-~a" (syntax-e name))))]
[source-name (datum->syntax-object name 'source-name)]
[start-pos (datum->syntax-object name 'start-pos)]
[end-pos (datum->syntax-object name 'end-pos)])
(syntax
(token-name
(datum->syntax-object #f val
(list
source-name
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(- (position-offset end-pos)
(position-offset start-pos)))
stx-for-original-property)))))]))
(define-syntax (ttoken stx)
(syntax-case stx ()
[(_ name)
(identifier? (syntax name))
(syntax (token name 'name))]))
(define (lex source-name)
(letrec ([loop
(lexer
((:seq "url(" w String w ")") (token <uri> lexeme))
((:seq "url(" w url w ")") (token <uri> lexeme))
((:: "/*" (complement (:: any-string "*/" any-string)) "*/")
(loop input-port))
((:: (:+ s) "/*" (complement (:: any-string "*/" any-string)) "*/")
(token S lexeme))
((:seq "/*"
(:* (:~ #\*)) (:+ #\*)
(:* (:~ #\/ #\*) (:* (:~ #\*)) (:+ #\*))
"/")
(loop input-port))
((:seq (:+ s) "/*"
(:~ #\*) (:+ #\*)
(:* (:~ #\/ #\*) (:* (:~ #\*)) (:+ #\*))
"/") (token S lexeme))
((:+ s) (token S lexeme))
("<!--" (token CDO '<!--))
("--!>" (token CDC '--!>))
("~=" (token INCLUDES '~=))
("|=" (token DASHMATCH (string->symbol "|=")))
((:seq w #\{) (token LBRACE '|{|))
((:seq w #\}) (token RBRACE '|}|))
((:seq w #\+) (token PLUS '+))
((:seq w #\>) (token GREATER '>))
((:seq w #\,) (token COMMA '|,|))
(String (token <string> lexeme))
(invalid (token <invalid> lexeme))
(ident (token <ident> lexeme))
((:seq #\# name) (token <hash> lexeme))
("@import" (token IMPORT-SYM '@import))
("@page" (token PAGE-SYM '@page))
("@media" (token MEDIA-SYM '@media))
("@charset" (token CHARSET-SYM '@charset))
((:seq #\! w
"important") (token IMPORTANT-SYM '!important))
((:seq num E M) (token <ems> lexeme))
((:seq num E X) (token <exs> lexeme))
((:seq num P X) (token <length> lexeme))
((:seq num C M) (token <length> lexeme))
((:seq num M M) (token <length> lexeme))
((:seq num I N) (token <length> lexeme))
((:seq num P T) (token <length> lexeme))
((:seq num P C) (token <length> lexeme))
((:seq num D E G) (token <angle> lexeme))
((:seq num R A D) (token <angle> lexeme))
((:seq num G R A D) (token <angle> lexeme))
((:seq num M S) (token <time> lexeme))
((:seq num S) (token <time> lexeme))
((:seq num H Z) (token <freq> lexeme))
((:seq num K H Z) (token <freq> lexeme))
((:seq num ident) (token <dimension> lexeme))
((:seq num #\%) (token <percentage> lexeme))
((:seq num) (token <number> lexeme))
((:seq ident "(") (token <function> lexeme))
((eof) (ttoken EOF))
(#\; (token SEMICOLON '| (#\: (token COLON '|:|))
(#\/ (token SLASH '/))
(#\- (token MINUS '-))
(#\+ (token PLUS '+))
(#\> (token GREATER '>))
(#\. (token DOT '|.|))
(#\* (token STAR '*))
(#\[ (token LBRACKET '|[|))
(#\] (token RBRACKET '|]|))
(#\= (token EQUAL '=))
(#\) (token RPAREN '|)|))
(any-char (token UNPARSEABLE (string->symbol lexeme)))
)])
loop))
(define parse
(cfg-parser
(tokens non-terminals)
(start stylesheet)
(end EOF)
(error (lambda (a b stx)
(display stx)
(raise-read-error (format "parse error near ~a" (syntax-e stx))
(syntax-source stx)
(syntax-line stx)
(syntax-column stx)
(syntax-position stx)
(syntax-span stx))))
(suppress)
(grammar
(S* ((S* S) 'skip)
(() 'skip))
(S+ ((S+ S) 'skip)
((S) 'skip))
(stylesheet ((stylesheet1 stylesheet2* stylesheet3*) `(css ,$1 . ,(append (reverse $2) (reverse $3))))
(( stylesheet2* stylesheet3*) `(css . ,(append (reverse $1) (reverse $2)))))
(stylesheet1 ((CHARSET-SYM S* <string> S* SEMICOLON) `(@charset ,$3)))
(stylesheet2* ((s-cdo-cds* stylesheet22*) (reverse $2))) (s-cdo-cds ((S) 'S)
((CDO) 'CDO)
((CDC) 'CDC))
(s-cdo-cds* ((s-cdo-cds* s-cdo-cds) (cons $2 $1))
(() '()))
(stylesheet22 ((import s-cdo-cds*) $1)
((import) $1))
(stylesheet22* ((stylesheet22* stylesheet22) (cons $2 $1))
(() '()))
(stylesheet3 ((ruleset s-cdo-cds*) $1)
((media s-cdo-cds*) $1)
((page s-cdo-cds*) $1))
(stylesheet3* ((stylesheet3* stylesheet3) (cons $2 $1))
(() '()))
(import ((IMPORT-SYM S* string-or-uri S* import1 SEMICOLON S*) `((import $3) ,@(reverse $5)))
((IMPORT-SYM S* string-or-uri S* SEMICOLON S*) `((import $3))))
(string-or-uri ((<string>) $1)
((<uri>) $1))
(import1 ((medium comma-medium-list) (cons $1 $2)))
(comma-medium-list ((comma-medium-list COMMA S* medium) (cons $4 $1))
(() '()))
(media ((MEDIA-SYM S* medium comma-medium-list LBRACE S* ruleset-start RBRACE S*)
`(MEDIA ,(cons $3 (reverse $4)) ,(reverse $7))))
(medium ((<ident> S*) $1))
(page ((PAGE-SYM S* pseudo-page S* LBRACE S* declaration semi-declaration-star RBRACE S*)
`(PAGE ,$3 ,(cons $7 (reverse $8))))
((PAGE-SYM S* S* LBRACE S* declaration semi-declaration-star RBRACE S*)
`(PAGE ,(cons $6 (reverse $7)))))
(pseudo-page ((COLON <ident>) $2))
(operator ((SLASH S*) '/)
((COMMA S*) 'COMMA)
(() '()))
(combinator ((PLUS S*) '+)
((GREATER S*) '>)
((S+) 'SPACE-COMBINATOR))
(unary-operator ((MINUS) '-)
((PLUS) '+))
(property ((<ident> S*) $1))
(ruleset ((selector comma-selector-star LBRACE S* declaration semi-declaration-star RBRACE S*)
`(RULESET ,$1 ,$2 ,(cons $5 (reverse $6)))))
(comma-selector-star ((comma-selector-star COMMA S* selector) (cons $4 $1))
(() '()))
(semi-declaration-star ((semi-declaration-star SEMICOLON S* declaration) (cons $4 $1))
(() ()))
(ruleset-start ((ruleset-start ruleset) (cons $1 $2))
(() '()))
(selector ((simple-selector combinator-simple-selector-star) `(SELECTOR ,@(cons $1 (reverse $2)))))
(combinator-simple-selector-star ((combinator-simple-selector-star combinator simple-selector) (cons $3 (cons $2 $1)))
(() '()))
(simple-selector ((element-name HASH/class/attrib/pseudo-star) `(SIMPLE-SELECTOR ,$1 ,(reverse $2)))
((HASH/class/attrib/pseudo-plus) `(SIMPLE-SELECTOR ,(reverse $1))))
(HASH/class/attrib/pseudo ((<hash>) $1)
((class) $1)
((attrib) $1)
((pseudo) $1))
(HASH/class/attrib/pseudo-star ((HASH/class/attrib/pseudo-star HASH/class/attrib/pseudo) (cons $2 $1))
(() '()))
(HASH/class/attrib/pseudo-plus ((HASH/class/attrib/pseudo-star HASH/class/attrib/pseudo) (cons $2 $1))
((HASH/class/attrib/pseudo) (list $1)))
(class ((DOT <ident>) `(CLASS ,$2)))
(element-name ((<ident>) $1)
((STAR) '*))
(attrib ((LBRACKET <ident> S* RBRACKET) `(ATTRIB $2))
((LBRACKET <ident> S* attrib1 RBRACKET) `(ATTRIB $2 ,@$4)))
(attrib1 ((equal/includes/dashmatch S* ident/string S*) (list $1 $3)))
(equal/includes/dashmatch ((EQUAL) '=)
((INCLUDES) 'INCLUDES)
((DASHMATCH) 'DASHMATCH))
(ident/string ((<ident>) $1)
((<string>) $1))
(pseudo ((COLON <ident>) `(PSEUDO ,$2))
((COLON <function> S* RPAREN) `(PSEUDO ,$2))
((COLON <function> S* <ident> S* RPAREN) `(PSEUDO ,$2 ,$4)))
(declaration ((property COLON S* expr) `(DECLARATION ,$1 ,$4))
((property COLON S* expr prio) `(DECLARATION ,$1 ,$4 ,$5))
(() '()))
(prio ((IMPORTANT-SYM S*) 'IMPORTANT))
(expr ((term operator-term-star) (cons $1 (reverse $2))))
(operator-term-star ((operator-term-star operator term) (cons $3 (cons $2 $1)))
(() '()))
(term ((term2) `(TERM2 ,$1))
((unary-operator term1) `(TERM1 ,(cons $1 $2)))
((term1) `(TERM1 ,$1)))
(term1 ((<number> S*) $1) ((<percentage> S*) $1) ((<length> S*) $1) ((<ems> S*) $1) ((<exs> S*) $1)
((<angle> S*) $1) ((<time> S*) $1) ((<freq> S*) $1))
(term2 ((<string> S*) $1) ((<ident> S*) $1) ((<uri> S*) `(URI ,$1)) ((hexcolor) $1) ((function) $1))
(function ((<function> S* expr RPAREN S*) `(FUNCTION ,$1 ,$3)))
(hexcolor ((<hash> S*) `(HEXCOLOR ,$1))))))
(define (parse-css-port port file)
(let ([lexer (lex file)])
(port-count-lines! port)
(parse
(lambda ()
(let loop ()
(let ([v (lexer port)])
(if (void? v)
(loop)
v)))))))
(define (parse-css-file file)
(with-input-from-file file
(lambda ()
(parse-css-port (current-input-port)
(path->complete-path file)))))
(define (parse-css-string string)
(let ([port (open-input-string string)])
(parse-css-port port (object-name port))))
)