(module syntax-coloring mzscheme
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools")))
(define-lex-abbrevs
(atoz (:or (:/ #\a #\z) (:/ #\A #\Z)))
(atof (:or (:/ #\a #\f) (:/ #\A #\F)))
(digit (:/ #\0 #\9))
(hex (:or digit atof))
(non-ascii (:/ #\u0080 #\u00ff))
(unicode (:seq #\\ (:** 1 6 hex)
(:? (:or (:seq #\return #\newline)
#\space #\tab #\return #\newline #\page))))
(escape (:or unicode (:seq #\\ (:~ #\return #\newline #\page hex))))
(nmstart (:or #\_ atoz non-ascii escape))
(nmchar (:or #\_ atoz digit #\- non-ascii escape))
(string1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))
#\"))
(string2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))
#\'))
(invalid1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))))
(invalid2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))))
(ident (:seq (:? #\-) nmstart (:* nmchar)))
(name (:+ nmchar))
(num (:or (:+ digit)
(:seq (:* digit) #\. (:+ digit))))
(String (:or string1 string2))
(invalid (:or invalid1 invalid2))
(url (:* (:or #\! #\# #\$ #\% #\&
(char-range #\* #\~) non-ascii
escape)))
(s (:or #\space #\tab #\return #\newline #\page))
(w (:* s))
(nl (:or #\newline (:seq #\return #\newline) #\return #\page))
(A (:or #\a #\A (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\1) (:seq #\6 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(B (:or #\b #\B (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\2) (:seq #\6 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(C (:or #\c #\C (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\3) (:seq #\6 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(D (:or #\d #\D (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\4) (:seq #\6 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(E (:or #\e #\E (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\5) (:seq #\6 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(F (:or #\f #\F (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\6) (:seq #\6 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(G (:or #\g #\G (:seq #\\ #\\ #\g) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\7) (:seq #\6 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(H (:or #\h #\H (:seq #\\ #\\ #\h) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\8) (:seq #\6 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(I (:or #\i #\I (:seq #\\ #\\ #\i) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\9) (:seq #\6 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(J (:or #\j #\J (:seq #\\ #\\ #\j) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\a) (:seq #\6 #\a)) (:? (:or (:seq #\return #\newline) s)))))
(K (:or #\k #\K (:seq #\\ #\\ #\k) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\b) (:seq #\6 #\b)) (:? (:or (:seq #\return #\newline) s)))))
(L (:or #\l #\L (:seq #\\ #\\ #\l) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\c) (:seq #\6 #\c)) (:? (:or (:seq #\return #\newline) s)))))
(M (:or #\m #\M (:seq #\\ #\\ #\m) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\d) (:seq #\6 #\d)) (:? (:or (:seq #\return #\newline) s)))))
(N (:or #\n #\N (:seq #\\ #\\ #\n) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\e) (:seq #\6 #\e)) (:? (:or (:seq #\return #\newline) s)))))
(O (:or #\o #\O (:seq #\\ #\\ #\o) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\f) (:seq #\6 #\f)) (:? (:or (:seq #\return #\newline) s)))))
(P (:or #\p #\P (:seq #\\ #\\ #\p) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\0) (:seq #\7 #\0)) (:? (:or (:seq #\return #\newline) s)))))
(Q (:or #\q #\Q (:seq #\\ #\\ #\q) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\1) (:seq #\7 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(R (:or #\r #\R (:seq #\\ #\\ #\r) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\2) (:seq #\7 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(S (:or #\s #\S (:seq #\\ #\\ #\s) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\3) (:seq #\7 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(T (:or #\t #\T (:seq #\\ #\\ #\t) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\4) (:seq #\7 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(U (:or #\u #\U (:seq #\\ #\\ #\u) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\5) (:seq #\7 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(V (:or #\v #\V (:seq #\\ #\\ #\v) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\6) (:seq #\7 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(W (:or #\w #\W (:seq #\\ #\\ #\w) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\7) (:seq #\7 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(X (:or #\x #\X (:seq #\\ #\\ #\x) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\8) (:seq #\7 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(Y (:or #\y #\Y (:seq #\\ #\\ #\y) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\9) (:seq #\7 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(Z (:or #\z #\Z (:seq #\\ #\\ #\z) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\a) (:seq #\7 #\a)) (:? (:or (:seq #\return #\newline) s)))))
)
(define (syn-val lex a b c d)
(values lex a b (position-offset c) (position-offset d)))
(define (colorize-block-comment my-start-pos)
(define lxr
(lexer
[(:seq #\* #\/)
(syn-val "" 'comment #f my-start-pos end-pos)]
[(eof) (syn-val "" 'error #f my-start-pos end-pos)]
[any-char (lxr input-port)]))
lxr)
(define (colorize-string delimiter my-start-pos)
(define lxr
(lexer
[(:or #\' #\")
(if (string=? lexeme delimiter)
(syn-val "" 'string #f my-start-pos end-pos)
(lxr input-port))]
[(eof) (syn-val "" 'error #f my-start-pos end-pos)]
[(:seq #\\ (:or #\' #\")) (lxr input-port)]
[any-char (lxr input-port)]))
lxr)
(define get-syntax-token
(lexer
[(:seq (:? "#") num
(:? (:or (:seq E M) (:seq E X) (:seq P X)
(:seq C M) (:seq M M) (:seq I N)
(:seq P T) (:seq P C)
(:seq D E G) (:seq R A D) (:seq G R A D)
(:seq M S) (:seq S) (:seq H Z) ident "%")))
(syn-val lexeme 'literal #f start-pos end-pos)]
[(:seq "#" (:or (:= 3 hex) (:= 6 hex)))
(syn-val lexeme 'literal #f start-pos end-pos)]
[(:seq #\/ #\*)
((colorize-block-comment start-pos) input-port)]
[(:or "@import" "@page" "@media" "@charset"
(:seq "!" (:or #\ #\newline #\tab #\page) "important")
"url(")
(syn-val lexeme 'keyword #f start-pos end-pos)]
[(:or "," ":" ";" "=" "." "/" ">")
(syn-val lexeme 'default #f start-pos end-pos)]
[(:seq (:or "#" "." "") ident)
(syn-val lexeme 'identifier #f start-pos end-pos)]
[(:or #\' #\")
((colorize-string lexeme start-pos) input-port)]
[(:or "[" "]" "{" "}" "(" ")")
(syn-val lexeme 'parenthesis #f start-pos end-pos)]
[(:+ s)
(syn-val lexeme 'whitespace #f start-pos end-pos)]
[(eof)
(syn-val lexeme 'eof #f start-pos end-pos)]
[any-char
(syn-val lexeme 'error #f start-pos end-pos)]
))
(provide get-syntax-token))