syntax-coloring.scm
(module syntax-coloring mzscheme
  (require (lib "lex.ss" "parser-tools")
           (prefix : (lib "lex-sre.ss" "parser-tools")))
  
  (define-lex-abbrevs
    (atoz       (:or (:/ #\a #\z) (:/ #\A #\Z)))
    (atof       (:or (:/ #\a #\f) (:/ #\A #\F)))
    (digit      (:/ #\0 #\9))
    (hex        (:or digit atof))
    (non-ascii  (:/ #\u0080 #\u00ff))
    (unicode    (:seq #\\ (:** 1 6 hex)
                      (:? (:or (:seq #\return #\newline)
                               #\space #\tab #\return #\newline #\page))))
    (escape     (:or unicode  (:seq #\\ (:~ #\return #\newline #\page hex))))
    (nmstart    (:or #\_ atoz non-ascii escape))
    (nmchar     (:or #\_ atoz digit #\- non-ascii escape))
    (string1    (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
                                   (:seq #\\ nl)
                                   escape))
                      #\"))
    (string2    (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
                                   (:seq #\\ nl)
                                   escape))
                      #\'))
    (invalid1   (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
                                   (:seq #\\ nl)
                                   escape))))
    (invalid2   (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
                                   (:seq #\\ nl)
                                   escape))))
    (ident      (:seq (:? #\-) nmstart (:* nmchar)))
    (name       (:+ nmchar))
    (num        (:or (:+ digit)
                     (:seq (:* digit) #\. (:+ digit))))
    (String     (:or string1 string2))
    (invalid    (:or invalid1 invalid2))
    (url        (:* (:or #\! #\# #\$ #\% #\& 
                         (char-range #\* #\~)  ; this range contains . / digits lower and uper letter and more
                         non-ascii
                         escape)))
    (s          (:or #\space #\tab #\return #\newline #\page))
    (w          (:* s))
    (nl         (:or #\newline (:seq #\return #\newline) #\return #\page))
    
    (A (:or #\a #\A (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\1) (:seq #\6 #\1)) (:? (:or (:seq #\return #\newline) s)))))
    (B (:or #\b #\B (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\2) (:seq #\6 #\2)) (:? (:or (:seq #\return #\newline) s)))))
    (C (:or #\c #\C (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\3) (:seq #\6 #\3)) (:? (:or (:seq #\return #\newline) s)))))
    (D (:or #\d #\D (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\4) (:seq #\6 #\4)) (:? (:or (:seq #\return #\newline) s)))))
    (E (:or #\e #\E (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\5) (:seq #\6 #\5)) (:? (:or (:seq #\return #\newline) s)))))
    (F (:or #\f #\F (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\6) (:seq #\6 #\6)) (:? (:or (:seq #\return #\newline) s)))))
    (G (:or #\g #\G (:seq #\\ #\\ #\g) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\7) (:seq #\6 #\7)) (:? (:or (:seq #\return #\newline) s)))))
    (H (:or #\h #\H (:seq #\\ #\\ #\h) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\8) (:seq #\6 #\8)) (:? (:or (:seq #\return #\newline) s)))))
    (I (:or #\i #\I (:seq #\\ #\\ #\i) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\9) (:seq #\6 #\9)) (:? (:or (:seq #\return #\newline) s)))))
    (J (:or #\j #\J (:seq #\\ #\\ #\j) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\a) (:seq #\6 #\a)) (:? (:or (:seq #\return #\newline) s)))))
    (K (:or #\k #\K (:seq #\\ #\\ #\k) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\b) (:seq #\6 #\b)) (:? (:or (:seq #\return #\newline) s)))))
    (L (:or #\l #\L (:seq #\\ #\\ #\l) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\c) (:seq #\6 #\c)) (:? (:or (:seq #\return #\newline) s)))))
    (M (:or #\m #\M (:seq #\\ #\\ #\m) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\d) (:seq #\6 #\d)) (:? (:or (:seq #\return #\newline) s)))))
    (N (:or #\n #\N (:seq #\\ #\\ #\n) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\e) (:seq #\6 #\e)) (:? (:or (:seq #\return #\newline) s)))))
    (O (:or #\o #\O (:seq #\\ #\\ #\o) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\f) (:seq #\6 #\f)) (:? (:or (:seq #\return #\newline) s)))))
    (P (:or #\p #\P (:seq #\\ #\\ #\p) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\0) (:seq #\7 #\0)) (:? (:or (:seq #\return #\newline) s)))))
    (Q (:or #\q #\Q (:seq #\\ #\\ #\q) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\1) (:seq #\7 #\1)) (:? (:or (:seq #\return #\newline) s)))))
    (R (:or #\r #\R (:seq #\\ #\\ #\r) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\2) (:seq #\7 #\2)) (:? (:or (:seq #\return #\newline) s)))))
    (S (:or #\s #\S (:seq #\\ #\\ #\s) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\3) (:seq #\7 #\3)) (:? (:or (:seq #\return #\newline) s)))))
    (T (:or #\t #\T (:seq #\\ #\\ #\t) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\4) (:seq #\7 #\4)) (:? (:or (:seq #\return #\newline) s)))))
    (U (:or #\u #\U (:seq #\\ #\\ #\u) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\5) (:seq #\7 #\5)) (:? (:or (:seq #\return #\newline) s)))))
    (V (:or #\v #\V (:seq #\\ #\\ #\v) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\6) (:seq #\7 #\6)) (:? (:or (:seq #\return #\newline) s)))))
    (W (:or #\w #\W (:seq #\\ #\\ #\w) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\7) (:seq #\7 #\7)) (:? (:or (:seq #\return #\newline) s)))))
    (X (:or #\x #\X (:seq #\\ #\\ #\x) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\8) (:seq #\7 #\8)) (:? (:or (:seq #\return #\newline) s)))))
    (Y (:or #\y #\Y (:seq #\\ #\\ #\y) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\9) (:seq #\7 #\9)) (:? (:or (:seq #\return #\newline) s)))))
    (Z (:or #\z #\Z (:seq #\\ #\\ #\z) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\a) (:seq #\7 #\a)) (:? (:or (:seq #\return #\newline) s)))))
    )
  
  (define (syn-val lex a b c d)
    (values lex a b (position-offset c) (position-offset d)))

  (define (colorize-block-comment my-start-pos)
    (define lxr
      (lexer
       [(:seq #\* #\/)
        (syn-val "" 'comment #f my-start-pos end-pos)]
       [(eof) (syn-val "" 'error #f my-start-pos end-pos)]
       [any-char (lxr input-port)]))
    lxr)
  
  (define (colorize-string delimiter my-start-pos)
    (define lxr
      (lexer
       [(:or #\' #\")
        (if (string=? lexeme delimiter)
            (syn-val "" 'string #f my-start-pos end-pos)
            (lxr input-port))]
       [(eof) (syn-val "" 'error #f my-start-pos end-pos)]
       [(:seq #\\ (:or #\' #\")) (lxr input-port)]
       [any-char (lxr input-port)]))
    lxr)

  (define get-syntax-token
    (lexer
     ; numbers
     [(:seq (:? "#") num 
            (:? (:or (:seq E M) (:seq E X) (:seq P X) 
                     (:seq C M) (:seq M M) (:seq I N)
                     (:seq P T) (:seq P C)
                     (:seq D E G) (:seq R A D) (:seq G R A D)
                     (:seq M S) (:seq S) (:seq H Z) ident "%")))
      (syn-val lexeme 'literal #f start-pos end-pos)]
     ; color codes
     [(:seq "#" (:or (:= 3 hex) (:= 6 hex)))
      (syn-val lexeme 'literal #f start-pos end-pos)]
     ; comments
     [(:seq #\/ #\*)
      ((colorize-block-comment start-pos) input-port)]
     ;
     [(:or "@import" "@page" "@media" "@charset"
           (:seq "!" (:or #\ #\newline #\tab #\page) "important")
           "url(")
      (syn-val lexeme 'keyword #f start-pos end-pos)]
     [(:or "," ":" ";" "=" "." "/" ">")
      (syn-val lexeme 'default #f start-pos end-pos)]
     ;
     [(:seq (:or "#" "." "") ident)
      (syn-val lexeme 'identifier #f start-pos end-pos)]
     ; strings
     [(:or #\' #\")
      ((colorize-string lexeme start-pos) input-port)]
     ; parentheses
     [(:or "[" "]" "{" "}" "(" ")")
      (syn-val lexeme 'parenthesis #f start-pos end-pos)]
     ; whitespace
     [(:+ s)
      (syn-val lexeme 'whitespace #f start-pos end-pos)]
     [(eof)
      (syn-val lexeme 'eof #f start-pos end-pos)]
     [any-char
      (syn-val lexeme 'error #f start-pos end-pos)]
     ))

  (provide get-syntax-token))