(module xml mzscheme (require (lib "xml.ss" "xml")) (require (lib "pregexp.ss" "mzlib")) (require (lib "list.ss" "mzlib")) (provide read-xexpr write-xexpr xexpr-remove-whitespace xpath-xexpr xe-1 xe-1-more xe-0-more xexpr-sax xexpr-get-attr xexpr-elem xexpr-elems xexpr-attrs xexpr-attr ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;=pod ;; ;;=head1 HO-Utils - XML Extensions ;; ;;=head2 Synopsys ;; ;;=syn scm,8 ;; ;; >(require (lib "xml.scm" "xml")) ;; >(require (planet "xml.scm" ("oesterholt" "ho-utils.plt" 1 0))) ;; ;; >(define p (open-input-port "some.xml")) ;; >(define e (read-xexpr p)) ;; >(set! e (xexpr-remove-whitespace e)) ;; >(xpath-xexpr "/sdf-styles/style[1]/name" e) ;; ("Style 1") ;; >(xpath-xexpr "/*/*/name" e) ;; ("Style 1" "Style 2" "Style 3") ;; >(xpath-xexpr "/*/*/name[2]" e) ;; ("Style 2") ;; >(xe-1 e "/*/*/fg-color[1]/r" (lambda (x) (error "HE!")) string->number) ;; 232 ;; >(xe-1 e "/*/*/qq-color[1]/r" (lambda (x) (error "HE!")) string->number) ;; HE! ;; >(xe-0-more e "/*/*/fg-color/r" string->number) ;; (2 232 4 23) ;; >(xe-1-more e "/*/*/tr-color" (lambda (x) (error "ZERO!")) string->number) ;; ZERO! ;; > (write-xexpr e "new.xml") ;; > (define e (read-xexpr "<?xml version=\"1.0\" encoding=\"UTF-8\" ?><a>Hello!<b><c>1</c><d>2</d></b><b>Hi</b></a>")) ;; > (xexpr-sax (lambda (element attributes value tree-level intermediate xexprs) ;; (display (format "~s:~s=~s (~s)~%" tree-level element value intermediate)) ;; #t) ;; e) ;; 0:a="Hello!" (#t) ;; 1:b="" (#t) ;; 2:c="1" (#f) ;; 2:d="2" (#f) ;; 1:b="Hi" (#f) ;; ;; >(define X (xexpr-elem 'sdf (xexpr-attrs (xexpr-attr 'version 1.0)) ;; (xexpr-elems (xexpr-elem 'fg-color "#434343") ;; (xexpr-elem 'bg-color "black")))) ;; ;; >(write-xexpr X) ;; ;;=head2 Reading/Writing XML Documents ;; ;;=head3 C<(read-xexpr port) : xexpr> ;; ;;Reads XML from port and converts it directly to an XExpression, removing ;;all whitespace only values. ;; ;;=head3 C<(write-xexpr xexpr . port) : void> ;; ;;Writes the xexpr as XML document to the default output port, or port. ;; ;; ;;=head2 Processing X-Expressions ;; ;;=head3 C<(xexpr-remove-whitespace xexpr) : xexpr> ;; ;;Removes all whitespace only values from xexpr. ;; ;;=head3 C<(xpath-xexpr xpath e) : xexpr> ;; ;;Evaluates xpath on e and returns the result set for this evaluation, ;;which is a list of XML subexpressions or (end-)values. If the expression ;;doesn't compute, the empty list is returned. If no elements are found, ;;also the empty list is returned. ;; ;;B<This function can't handle values for intermediate nodes yet!> ;; ;;B<precondition:> C<xpath> must be a valid XPath B<and> white space must ;;have been removed from C<xexpr> using C<xexpr-remove-whitespace>. ;; ;;=head3 C<(xe-1 xexpr xpath handler . convertor) : value> ;; ;;Expects 1 result in the result set of (xpath-xexpr xpath xexpr). ;;Returns (convertor applied to) this one result, or the result ;;of handler if 0 or more than 1 results have been returned. ;; ;;=head3 C<(xe-1-more xexpr xpath handler . convertor) : list of values> ;; ;;Expects 1 or more results in the result set of (xpath-xexpr xpath xexpr). ;;Returns (convertor applied to) this result-set (list of results), or the result ;;of handler if 0 results have been returned. ;; ;;=head3 C<(xe-0-more xexpr xpath . convertor) : list of values> ;; ;;Expects 0 or more results in the result set of (xpath-xexpr xpath xexpr). ;;Returns (convertor applied to) this result-set (list of results). ;; ;;=head3 C<(xexpr-get-attr attributes attribute . default) : string> ;; ;;Returns #f (or (car default)), if attribute is not found in attributes.E<lb> ;;Returns the associated value with attribute, otherwise.E<lb> ;; ;;Can be used in conjunction with the callback that must be provided for C<xexpr-sax>. ;; ;;=head3 C<(xexpr-sax callback xexpr) : boolean> ;; ;;Traverses the xexpr depth first, calling C<callback> for each element ;;with parameters C<element>, C<attributes>, C<value>, C<level>, C<intermediate-node> and C<xexprs>. ;; ;;If C<intermediate-node> is #f, this is and end node in the XML tree, otherwise, ;;this node contains other nodes. C<level> gives the current depth in the ;;XML tree, 0 being the top level.E<lb> ;; ;;=over 1 ;; ;;=over 1 ;; ;;=item C<element> ;; ;;is a symbol giving the element name of the current node. ;; ;;=item C<attributes> ;; ;;is a list of attributes C<(attribute-name:symbol value:string))>. ;; ;;=item C<value> ;; ;;is a string containing the value of the current node (works also for intermediate nodes). ;; ;;=item C<xexprs> ;; ;;is '(), if this is an end node. It is a list of all next level nodes, otherwise. ;; ;;=back ;; ;;=back ;; ;;I<Return value of callback:> if C<#f>, C<xexpr-sax> will not go depth first. ;;if C<#t>, C<xexpr-sax> will go depth first. This makes it possible to call ;;the sax parser again with a different C<callback> function on deeper level nodes. ;; ;;The basic behaviour of C<xexpr-sax> is O(xml-tags) (it traverses all xml-tags of ;;an XML document twice) (first pass assembling the value of a node and the next level nodes ;;of that node; second pass traversing the next level nodes). ;; ;;=head2 Creating X-Expressions ;; ;;=head3 C<(xexpr-elems first-elements . elements) : xexpr elements> ;; ;;Creates a sequence of xexpr XML elements. ;; ;;=head3 C<(xexpr-elem element . value|attributes|xexprs{0,3}) : xexpr element> ;; ;;Creates an xexpr element with optional value and/or attributes. ;; ;;=head3 C<(xexpr-attr attr value) : xexpr attribute> ;; ;;Creates an attribute. ;; ;;=head3 C<(xexpr-attrs . attributes) : xexpr attributes> ;; ;;Creates the attributes for an xexpression element ;; ;;=head2 Info ;; ;;Author: Hans Oesterholt-Dijkema, License: LGPL, (c) 2006. ;; ;;=cut ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; THIS IS WAY TO SLOW! ; (define reg-ws (pregexp "^[ \t\r\n]+$")) ; ; (define (is-space? str) ; (not (eq? (pregexp-match reg-ws str) #f))) ;;;;;;; ;; THIS IS ORDERS OF MAGNITUDE FASTER ; (define (is-space? str) ; (do ; ((i 0 (+ i 1)) ; (n (string-length str))) ; ((or (= i n) ; (not (char-whitespace? (string-ref str i)))) ; (= i n)) ; ())) ; (define (is-space-char? L) ; (if (null? L) ; #t ; (if (char-whitespace? (car L)) ; (is-space-char? (cdr L)) ; #f))) ; (define (is-space? str) ; (is-space-char? (string->list str))) ;;; THIS ONE SEEMS TO BE THE FASTEST ; (define re (regexp "^[ \t\n]*$")) ; (define (is-space? x) (regexp-match re x)) (define (is-space? x) (regexp-match #rx"^[ \t\n]*$" x)) ; (define (is-space? x) (andmap char-whitespace? (string->list x))) (define (xexpr-remove-whitespace xexpr) (define (filter L n) (if (null? L) L (if (string? (car L)) (if (null? (cdr L)) (if (is-space? (car L)) (if (= n 0) (list "") (list)) (cons (car L) (filter (cdr L) (+ n 1)))) (if (is-space? (car L)) (filter (cdr L) (+ n 1)) (cons (car L) (filter (cdr L) (+ n 1))))) (if (list? (car L)) (cons (xexpr-remove-whitespace (car L)) (filter (cdr L) (+ n 1))) (cons (car L) (filter (cdr L) (+ n 1))))))) (filter xexpr 0)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define re-slash (pregexp "[/]")) (define re-bracket-left (pregexp "\\[")) (define re-bracket-right (pregexp "\\]")) (define (xpath-xexpr path xexpr) (define (iterate-childs-for-all-symbols path-elem xexpressions) (if (null? xexpressions) (list) (if (or (eq? path-elem '*) (eq? (caar xexpressions) path-elem)) (cons (car xexpressions) (iterate-childs-for-all-symbols path-elem (cdr xexpressions))) (iterate-childs-for-all-symbols path-elem (cdr xexpressions))))) (define (iterate-childs-for-nth-of-symbol path-elem n xexpressions) (if (< n 1) (list) (let ((R (iterate-childs-for-all-symbols path-elem xexpressions))) (if (> n (length R)) (list) (list (list-ref R (- n 1))))))) (define (find-attribute attribute xexpressions) (define (find L attribute) (if (null? L) #f (if (eq? (caar L) attribute) (cadar L) (find (cdr L) attribute)))) (filter (lambda (e) (not (eq? e #f))) (map (lambda (xexpr) (if (list? xexpr) (find (cadr xexpr) attribute) #f)) xexpressions))) (define (calculate-for-xpath-element elem xexprs) (cond ((eq? (car elem) '@) (find-attribute (cadr elem) xexprs)) ((= (length elem) 2) (if (eq? (cadr elem) '*) (iterate-childs-for-all-symbols (car elem) xexprs) (iterate-childs-for-nth-of-symbol (car elem) (cadr elem) xexprs))) ((= (length elem) 1) (iterate-childs-for-all-symbols (car elem) xexprs)) (else (list)))) (define (xml-values result-set) (apply append (filter (lambda (e) (list? (car e))) (map (lambda (expr) (cddr expr)) result-set)))) (define (all-values result-set) (apply append (map (lambda (expr) (if (list? expr) (cddr expr) (list expr))) result-set))) (define (unroll-xpath xpath-elements xexpressions) ;(display (format "~s, ~s~%" xpath-elements xexpressions)) (if (null? xpath-elements) xexpressions (let* ((xpath-elem (car xpath-elements)) (result-set (calculate-for-xpath-element xpath-elem xexpressions))) ;(display (format "result-set: ~s~%" result-set)) (if (null? result-set) (list) (if (null? (cdr xpath-elements)) (all-values result-set) (if (eq? (caadr xpath-elements) '@) (unroll-xpath (cdr xpath-elements) result-set) (unroll-xpath (cdr xpath-elements) (xml-values result-set)))))))) (let ((path-elements (pregexp-split re-slash path))) ;(write path-elements)(newline) (unroll-xpath (filter (lambda (element) (not (eq? element '%nil%))) (map (lambda (element) (if (string=? element "") '%nil% (if (string=? (substring element 0 1) "@") (list '@ (string->symbol (substring element 1))) (let ((S (pregexp-split re-bracket-left element))) (if (= (length S) 2) (list (string->symbol (car S)) (string->number (pregexp-replace re-bracket-right (cadr S) ""))) (list (string->symbol element))))))) path-elements)) (list xexpr))) ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (read-xexpr port) (if (string? port) (let ((S (open-input-string port))) (read-xexpr S)) (xml->xexpr (document-element (read-xml port))))) (define (write-xexpr x . port) (let ((d (make-document (make-prolog (list (make-pi #f #f 'xml "version=\"1.0\" encoding=\"UTF-8\"")) #f) (if (list? x) (xexpr->xml x) x) '()))) (if (null? port) (write-xml d) (write-xml d (car port))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (xe-1 e xpath handler . convertor) (let ((values (xpath-xexpr xpath e))) ;(display (format "~s ~s ~s~%" values handler convertor)) (if (null? values) (handler values) (if (null? (cdr values)) (if (null? convertor) (car values) ((car convertor) (car values))) (handler values))))) (define (xe-1-more e xpath handler . convertor) (let ((values (xpath-xexpr xpath e))) (if (null? values) (handler values) (if (null? convertor) values (map (car convertor) values))))) (define (xe-0-more e xpath . convertor) (let ((values (xpath-xexpr xpath e))) (if (null? convertor) values (map (car convertor) values)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (xexpr-sax f xexpr) (define (traverse-xml-depth-first xexpr level) (define (assemble-element-value L intermediate) (if (null? L) (list "" (list)) (let ((v (assemble-element-value (cdr L) intermediate))) (if (string? (car L)) (cons (string-append (car L) (car v)) (cdr v)) (begin (set-car! intermediate #t) (list (car v) (cons (car L) (cadr v)))))))) (define (iterate xexpr) (if (null? xexpr) #t (let ((xexpr-element (car xexpr))) (if (list? xexpr-element) (let ((e (car xexpr-element)) (a (cadr xexpr-element)) (v (cddr xexpr-element)) (i (list #f))) (let ((value (assemble-element-value v i))) (let ((result (f e a (car value) level (car i) (cadr value)))) (if (car i) (if result (begin (traverse-xml-depth-first v (+ level 1)) (iterate (cdr xexpr))) (iterate (cdr xexpr))) (iterate (cdr xexpr)))))) (iterate (cdr xexpr)))))) (iterate xexpr)) (if (null? xexpr) #t (if (symbol? (car xexpr)) (traverse-xml-depth-first (list xexpr) 0) (traverse-xml-depth-first xexpr 0)))) (define (xexpr-get-attr attributes name . default) (let ((a (assq name attributes))) (if (eq? a #f) (if (null? default) #f (car default)) (cadr a)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Creating X expressions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (xexpr-elems . args) (if (not (null? args)) (if (null? (cdr args)) (if (null? (car args)) (list '%xexpr-elems%) (if (list? (caar args)) (cons '%xexpr-elems% (car args)) (cons '%xexpr-elems% args))) (cons '%xexpr-elems% args)) (cons '%xexpr-elems% args))) (define (xexpr-elem e . args) (define (convert v) (if (list? v) v (format "~a" v))) (define (get-attrs v) (cdr v)) (define (get-xexpr v) (cdr v)) (define (attributes? v) (if (list? v) (if (null? v) #f (if (eq? (car v) '%attributes) #t #f)) #f)) (define (xexpr? v) (if (list? v) (if (null? v) #f (if (eq? (car v) '%xexpr-elems%) #t #f)) #f)) (let ((a #f) (v #f) (x #f)) (for-each (lambda (r) (if (attributes? r) (set! a (get-attrs r)) (if (xexpr? r) (set! x (get-xexpr r)) (set! v (convert r))))) args) ;(display (format "a=~s, v=~s, x=~s~%" a v x)) (cond ((and a v x) (cons e (cons a (cons v x)))) ((and a v) (list e a v)) ((and a x) (cons e (cons a x))) ((and v x) (cons e (cons '() (cons v x)))) (a (list e a)) (v (list e '() v)) (x (cons e (cons '() x))) (else (list e '()))))) (define (xexpr-attr a v) (list a (format "~a" v))) (define (xexpr-attrs . attrs) (cons '%attributes attrs)) ) ;; module