geocoder.ss
(module geocoder mzscheme
  (require (planet "io.ss" ("dherman" "io.plt" 1 6)))
  (require (planet "xml.ss" ("jim" "webit.plt" 1 2)))
  (require (lib "string.ss" "srfi" "13"))
  (require (lib "head.ss" "net"))
  (require (lib "url.ss" "net"))
  (require (lib "ssax.ss" "ssax"))
  (require (lib "contract.ss"))
  (require (lib "match.ss"))
  (require (lib "string.ss"))

  (define current-geocoder-domain (make-parameter "rpc.geocoder.us"))
  (define current-geocoder-port (make-parameter 80))

  ;; ===========================================================================
  ;; DATA DEFINITIONS
  ;; ===========================================================================

  (define-struct (exn:fail:geocoder exn:fail) ())
  (define-struct (exn:fail:geocoder:request exn:fail:geocoder) (response))
  (define-syntax raise-geocoder-error
    (syntax-rules ()
      [(_ fmt-str args ...)
       (raise (make-exn:fail:geocoder
               (string->immutable-string
                (format "geocoder: ~a" (format fmt-str args ...)))
               (current-continuation-marks)))]))
  (define-syntax raise-request-error
    (syntax-rules ()
      [(_ lines)
       (let ([lines-v lines])
         (raise (make-exn:fail:geocoder:request
                 (string->immutable-string
                  (format "geocoder: failed request (~a ...)"
                          (car lines-v)))
                 (current-continuation-marks)
                 lines-v)))]))

  (define-struct content-type (type subtype attributes) #f)
  (define-struct geo-point (description longitude latitude) #f)

  ;; ===========================================================================
  ;; XML DATA DEFINITIONS
  ;; ===========================================================================

  (define-element (RDF http://www.w3.org/1999/02/22-rdf-syntax-ns#))
  (define-element (Point http://www.w3.org/2003/01/geo/wgs84_pos#))
  (define-element (lat http://www.w3.org/2003/01/geo/wgs84_pos#))
  (define-element (long http://www.w3.org/2003/01/geo/wgs84_pos#))
  (define-element (description http://purl.org/dc/elements/1.1/))

  (define *namespaces* null)
;  (define *namespaces* '((dc . "http://purl.org/dc/elements/1.1/")
;                         (geo . "http://www.w3.org/2003/01/geo/wgs84_pos#")
;                         (rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")))

  ;; ===========================================================================
  ;; RESPONSE PARSING
  ;; ===========================================================================

  ;; parse-content-type : (union string #f) -> content-type
  (define (parse-content-type content-type)
    (unless content-type
      (raise-geocoder-error "no Content-Type field received from host"))
    (match (regexp-split #rx"[ 	]*;[ 	]*" content-type)
      [(type-str extras ...)
       (match (regexp-split #rx"/" type-str)
         [(type subtype)
          (let ([extras* (map (lambda (pair)
                                (match (regexp-split #rx"[ 	]*=[ 	]*" pair)
                                  [(key value)
                                   (cons (string->symbol (string-downcase key))
                                         value)]
                                  [_ (raise-geocoder-error "invalid format: ~a"
                                                           content-type)]))
                              extras)])
            (make-content-type type subtype extras*))]
         [_ (raise-geocoder-error "invalid content type: ~a" content-type)])]
      [_ (raise-geocoder-error "invalid content type: ~a" content-type)]))

  ;; parse-geo-points : xml-document -> (listof geo-point)
  (define (parse-geo-points doc)
    (xml-match (xml-document-content doc)
      [(RDF ,{etc} ...)
       (list etc ...)]
      [(Point ,{description} ,{long} ,{lat})
       (make-geo-point description long lat)]
      [(description ,text) text]
      [(lat ,contents) (string->number contents)]
      [(long ,contents) (string->number contents)]
      [,otherwise (printf "~a~n" (xml-element-tag otherwise))]))

  ;; ===========================================================================
  ;; NETWORK PROCEDURES
  ;; ===========================================================================

  ;; read-headers : input-port -> (listof string)
  (define (read-headers in)
    (let loop ([headers null])
      (let ([line (read-line in 'any)])
        (if (or (eof-object? line) (string-null? line))
            (reverse headers)
            (loop (cons line headers))))))

  ;; string->http-path : string -> string
  (define (string->http-path street-address)
    (url->string
     (make-url #f #f #f #f
               (list "service" "rest")
               (list (cons 'address street-address))
               #f)))

  ;; download : string * string * string * string -> sxml
  (define (download street-address city state zip)
    (let ([path (string->http-path
                 (format "~a, ~a, ~a ~a" street-address city state zip))])
      (let-values ([(in out) (tcp-connect (current-geocoder-domain)
                                          (current-geocoder-port))])
        (dynamic-wind
         void
         (lambda ()
           (fprintf out "GET ~a HTTP/1.0\n" path)
           (fprintf out "Host: ~a:~a\n" (current-geocoder-domain)
                                        (current-geocoder-port))
           (fprintf out "\n")
           (tcp-abandon-port out)
           (let ([type (parse-content-type
                        (ormap (lambda (h) (extract-field "Content-Type" h))
                               (read-headers in)))])
             (match type
               [($ content-type "text" "rdf+xml" _)
                (ssax:xml->sxml in *namespaces*)]
               [($ content-type "text" "plain" attributes)
                (raise-request-error (read-lines in 'any))]
               [($ content-type t s _)
                (raise-geocoder-error "unsupported content type: ~a/~a"
                                      t s)])))
         (lambda ()
           (tcp-abandon-port in)
           (tcp-abandon-port out))))))

  ;; ===========================================================================
  ;; FRONT END
  ;; ===========================================================================

  ;; TODO: loosen the input format (just opt/lambda? or more combinations?)

  ;; geocode : string * string * string * string -> (listof geo-point)
  (define (geocode address city state zip)
    (parse-geo-points (download address city state zip)))

  (provide/contract [current-geocoder-domain parameter?]
                    [current-geocoder-port parameter?])
  (provide/contract [geocode (string? string? string? string? . -> . (listof geo-point?))])
  (provide/contract (struct exn:fail:geocoder ())
                    (struct exn:fail:geocoder:request ([response (listof string?)]))
                    (struct geo-point ([description string?]
                                       [longitude number?]
                                       [latitude number?]))))