#cs(module modif mzscheme (require (planet "ssax.ss" ("lizorkin" "ssax.plt" 1 0))) (require "sxpathlib.ss") (require "sxml-tools.ss") (require "xpathlink.ss") (require "xpath-ast.ss") (require "ddo-txpath.ss") ;; A tool for making functional-style modifications to SXML documents ; ; This software is in Public Domain. ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. ; ; Please send bug reports and comments to: ; lizorkin@hotbox.ru Dmitry Lizorkin ; ; The basics of modification language design was inspired by Patrick Lehti ; and his data manipulation processor for XML Query Language: ; http://www.ipsi.fraunhofer.de/~lehti/ ; However, with functional techniques we can do this better... ;========================================================================== ; Modification core ; Displays an error to stderr and returns #f (define (sxml:modification-error . text) (cerr "Modification error: ") (apply cerr text) (cerr nl) #f) ; Separates the list into two lists with respect to the predicate ; Returns: (values res-lst1 res-lst2) ; res-lst1 - contains all members from the input lst that satisfy the pred? ; res-lst2 - contains the remaining members of the input lst (define (sxml:separate-list pred? lst) (let loop ((lst lst) (satisfy '()) (rest '())) (cond ((null? lst) (values (reverse satisfy) (reverse rest))) ((pred? (car lst)) ; the first member satisfies the predicate (loop (cdr lst) (cons (car lst) satisfy) rest)) (else (loop (cdr lst) satisfy (cons (car lst) rest)))))) ;------------------------------------------------- ; Miscellaneous helpers ; Asserts that the given obj is a proper attribute node. ; If this is the case, returns #t. Otherwise, calls sxml:modification-error ; with the appropriate error message. ; Handles singular attributes correctly. In accordance with SXML 3.0, accepts ; aux lists as attribute nodes (define (sxml:assert-proper-attribute obj) (if (or (and (pair? obj) ; aux node - any content is acceptable (not (null? obj)) (eq? (car obj) '@)) (and (list? obj) ; '() is not a list (symbol? (car obj)) (or (null? (cdr obj)) ; singular attribute (null? (cddr obj))))) #t (sxml:modification-error "improper attribute node - " obj))) ; Unites a list of annot-attributes into a single annot-attributes. ; Ensures that every attribute is a proper one, and that there is no duplicate ; attributes ; annot-attributes-lst ::= (listof annot-attributes) ; In accordance with SXML specification, version 3.0: ; [3] <annot-attributes> ::= (@ <attribute>* <annotations>? ) ; In case of an error, returns #f. ; In the correct case, returns: annot-attributes (define (sxml:unite-annot-attributes-lists . annot-attributes-lst) (if (null? annot-attributes-lst) ; nothing to do '() (let iter-lst ((src annot-attributes-lst) (attrs '()) (annotations '())) (if (null? src) ; Recursion finished (if (null? annotations) (cons '@ (reverse attrs)) `(@ ,@(reverse attrs) (@ ,@annotations))) (let iter-annot-attrs ((annot-attrs (cdar src)) (attrs attrs) (annotations annotations)) (if (null? annot-attrs) ; proceed with the outer loop (iter-lst (cdr src) attrs annotations) (let ((curr (car annot-attrs))) (cond ((and (pair? curr) (not (null? curr)) (eq? (car curr) '@)) ; an annotation node (iter-annot-attrs (cdr annot-attrs) attrs (append annotations (cdr curr)))) ((sxml:assert-proper-attribute curr) (if (assq (car curr) attrs) ; duplicate attribute detected (sxml:modification-error "duplicate attribute - " (car curr)) (iter-annot-attrs (cdr annot-attrs) (cons curr attrs) annotations))) (else ; improper attribute #f))))))))) ;------------------------------------------------- ; The core function of document transformation into a new document ; doc - a source SXML document ; update-targets ::= (listof update-target) ; update-target ::= (list context handler base-node) ; context - context of the node selected by the location path ; handler ::= (lambda (node context base-node) ...) ; handler - specifies the required transformation over the node selected ; base-node - the node with respect to which the location path was evaluated ; ; Returns the new document. In case of a transformation that results to a ; non-well-formed document, returns #f and the error message is displayed to ; stderr as a side effect (define (sxml:transform-document doc update-targets) (letrec (; targets-alist ::= (listof (cons node-chain update-target)) ; node-chain - the chain of nodes, starting from the current node (tree-trans (lambda (curr-node targets-alist) (let-values* (((matched ; handlers which match this node targets-alist ; the rest ) (sxml:separate-list (lambda (pair) (null? (car pair))) targets-alist))) (and-let* ((after-subnodes ; curr-node after its subnodes are processed (if (or (not (pair? curr-node)) ; leaf node (null? targets-alist) ; no more handlers ) curr-node (let process-attrs ((targets-alist targets-alist) (src-attrs (sxml:attr-list curr-node)) (res-attrs '())) (if (null? src-attrs) ; all attributes processed ; Go to proceed child elements (if (null? targets-alist) ; children don't need to be processed (cons ; Constructing the result node (car curr-node) ; node name ((lambda (kids) (if (null? res-attrs) ; no attributes kids (cons (cons '@ (reverse res-attrs)) kids))) ((if (and (not (null? (cdr curr-node))) (pair? (cadr curr-node)) (eq? (caadr curr-node) '@)) cddr cdr) curr-node))) (let process-kids ((targets-alist targets-alist) (src-kids (cdr curr-node)) (res-kids '())) (cond ((null? src-kids) ; all kids processed (let-values* (((more-attrs kids) (sxml:separate-list (lambda (obj) (and (pair? obj) (not (null? obj)) (eq? (car obj) '@))) res-kids))) (cons ; Constructing the result node (car curr-node) ; node name (if (and (null? res-attrs) (null? more-attrs)) kids (and-let* ((overall-attrs (apply sxml:unite-annot-attributes-lists (cons (cons '@ (reverse res-attrs)) more-attrs)))) (cons overall-attrs kids)))))) ((and (pair? (car src-kids)) (eq? (caar src-kids) '@)) ; attribute node - already processed (process-kids targets-alist (cdr src-kids) res-kids)) (else (let ((kid-templates (filter (lambda (pair) (eq? (caar pair) (car src-kids))) targets-alist))) (if (null? kid-templates) ; this child node remains as is (process-kids targets-alist (cdr src-kids) (append res-kids (list (car src-kids)))) (and-let* ((new-kid (tree-trans (car src-kids) (map (lambda (pair) (cons (cdar pair) (cdr pair))) kid-templates)))) (process-kids (filter (lambda (pair) (not (eq? (caar pair) (car src-kids)))) targets-alist) (cdr src-kids) (append res-kids (if (nodeset? new-kid) new-kid (list new-kid))))))))))) (let* ((curr-attr (car src-attrs)) (attr-templates (filter (lambda (pair) (eq? (caar pair) curr-attr)) targets-alist))) (if (null? attr-templates) ; this attribute remains as is (process-attrs targets-alist (cdr src-attrs) (cons curr-attr res-attrs)) (let ((new-attr ; cannot produce error for attrs (tree-trans curr-attr (map (lambda (pair) (cons (cdar pair) (cdr pair))) attr-templates)))) (process-attrs (filter (lambda (pair) (not (eq? (caar pair) curr-attr))) targets-alist) (cdr src-attrs) (if (nodeset? new-attr) (append (reverse new-attr) res-attrs) (cons new-attr res-attrs))))))))))) (let process-this ((new-curr-node after-subnodes) (curr-handlers (map cdr matched))) (if (null? curr-handlers) new-curr-node (process-this (if (nodeset? new-curr-node) (map-union (lambda (node) (as-nodeset ((cadar curr-handlers) ; lambda node (caar curr-handlers) ; context (caddar curr-handlers) ; base-node ))) new-curr-node) ((cadar curr-handlers) ; lambda new-curr-node (caar curr-handlers) ; context (caddar curr-handlers) ; base-node )) (cdr curr-handlers))))))))) (let ((targets-alist (map-union (lambda (triple) (let ((node-path (reverse (sxml:context->content (car triple))))) (if (eq? (car node-path) doc) (list (cons (cdr node-path) triple)) '()))) update-targets))) (if (null? targets-alist) ; nothing to do doc (tree-trans doc targets-alist))))) ;========================================================================== ; Processing update-specifiers ; Evaluates lambda-upd-specifiers for the SXML document doc� ; Returns: ; update-targets ::= (listof update-target) ; update-target ::= (list context handler base-node) ; context - context of the node selected by the location path ; handler ::= (lambda (node context base-node) ...) ; handler - specifies the required transformation over the node selected ; base-node - the node with respect to which the location path was evaluated (define (sxml:lambdas-upd-specifiers->targets doc lambdas-upd-specifiers) (let ((doc-list (list doc))) (letrec ((construct-targets ; base-cntxtset - base context set for the current upd-specifier ; lambdas-upd-specifiers - is assumed to be non-null? (lambda (base-cntxtset lambdas-upd-specifiers) (let ((triple (car lambdas-upd-specifiers))) ; Iterates members of the base context-set ; new-base ::= (listof context-set) ; Each context-set is obtained by applying the txpath-lambda ; to the each member of base-cntxtset (let iter-base ((base-cntxtset base-cntxtset) (res '()) (new-base '())) (if (null? base-cntxtset) ; finished scanning base context-set (if (null? (cdr lambdas-upd-specifiers)) ; no more members res (append res (construct-targets (if (cadadr lambdas-upd-specifiers) ; following is relative (apply ddo:unite-multiple-context-sets new-base) doc-list) (cdr lambdas-upd-specifiers)))) (let* ((curr-base-context (car base-cntxtset)) (context-set ((car triple) (list curr-base-context) (cons 1 1) '() ; dummy var-binding ))) (iter-base (cdr base-cntxtset) (append res (map (lambda (context) (list context (caddr triple) ; handler (sxml:context->node curr-base-context))) context-set)) (cons context-set new-base))))))))) (if (null? lambdas-upd-specifiers) ; no transformation rules '() (construct-targets doc-list lambdas-upd-specifiers))))) ; "Precompiles" each of update-specifiers, by transforming location paths and ; update actions into lambdas. ; Returns: ; lambdas-upd-specifiers ::= (listof lambdas-upd-specifier) ; lambdas-upd-specifier ::= (list txpath-lambda relative? handler) ; txpath-lambda ::= (lambda (nodeset position+size var-binding) ...) ; txpath-lambda - full-argument implementation of a location path ; relative? - whether the txpath lambda is to be evaluated relatively to the ; node selected by the previous lambdas-upd-specifier, or with respect to ; the root of the document. For relative?=#t the base-node is the node ; selected by the previous lambdas-upd-specifier, otherwise the base node is ; the root of the document being transformed ; handler ::= (lambda (node context base-node) ...) (define (sxml:update-specifiers->lambdas update-specifiers) (let iter ((src update-specifiers) (res '())) (if (null? src) ; every specifier processed (reverse res) (let ((curr (car src))) (if (or (not (list? curr)) (null? (cdr curr))) (sxml:modification-error "improper update-specifier: " curr) (and-let* ; Convert Location path to XPath AST ((ast (txp:xpath->ast (car curr)))) (let-values* (((txpath-pair relative?) (if (eq? (car ast) 'absolute-location-path) (values (ddo:ast-relative-location-path (cons 'relative-location-path (cdr ast)) #f ; keep all ancestors #t ; on a single level, since a single node 0 ; zero predicate nesting ) #f) (values (ddo:ast-relative-location-path ast #f #t 0) (not (null? res)) ; absolute for the first rule )))) (if (not txpath-pair) ; semantic error txpath-pair ; propagate the error (let ((txpath-lambda (car txpath-pair)) (action (cadr curr))) (if (procedure? action) ; user-supplied handler (iter (cdr src) (cons (list txpath-lambda relative? action) res)) (case action ((delete delete-undeep) (iter (cdr src) (cons (list txpath-lambda relative? (cdr (assq action `((delete . ,modif:delete) (delete-undeep . ,modif:delete-undeep))))) res))) ((insert-into insert-following insert-preceding) (let ((params (cddr curr))) (iter (cdr src) (cons (list txpath-lambda relative? ((cdr (assq action `((insert-into . ,modif:insert-into) (insert-following . ,modif:insert-following) (insert-preceding . ,modif:insert-preceding)))) (lambda (context base-node) params))) res)))) ((replace) (let ((params (cddr curr))) (iter (cdr src) (cons (list txpath-lambda relative? (lambda (node context base-node) params)) res)))) ((rename) (if (or (null? (cddr curr)) ; no parameter supplied (not (symbol? (caddr curr)))) (sxml:modification-error "improper new name for the node to be renamed: " curr) (iter (cdr src) (cons (let ((new-name (caddr curr))) (list txpath-lambda relative? (modif:rename new-name))) res)))) ((move-into move-following move-preceding) (if (or (null? (cddr curr)) ; no lpath supplied (not (string? (caddr curr)))) (sxml:modification-error "improper destination location path for move action: " curr) (and-let* ((ast (txp:xpath->ast (caddr curr))) (txpath-pair (ddo:ast-location-path ast #f #t 0))) (iter (cdr src) (cons (list (car txpath-pair) #t ((cdr (assq action `((move-into . ,modif:insert-into) (move-following . ,modif:insert-following) (move-preceding . ,modif:insert-preceding)))) (lambda (context base-node) base-node))) (cons (list txpath-lambda relative? modif:delete) res)))))) (else (sxml:modification-error "unknown action: " curr))))))))))))) ;========================================================================== ; Several popular handlers ; Node insertion ; node-specifier ::= (lambda (context base-node) ...) ; The lambda specifies the node to be inserted (define (modif:insert-following node-specifier) (lambda (node context base-node) (cons node (as-nodeset (node-specifier context base-node))))) (define (modif:insert-preceding node-specifier) (lambda (node context base-node) (let ((new (node-specifier context base-node))) (if (nodeset? new) (append new (list node)) (list new node))))) (define (modif:insert-into node-specifier) (lambda (node context base-node) (if (not (pair? node)) ; can't insert into node (append node (as-nodeset (node-specifier context base-node)))))) ; Rename (define (modif:rename new-name) (lambda (node context base-node) (if (pair? node) ; named node (cons new-name (cdr node)) node))) ; Delete (define modif:delete (lambda (node context base-node) '())) (define modif:delete-undeep (lambda (node context base-node) (if (pair? node) (cdr node) '()))) ;========================================================================== ; Highest-level API function ; update-specifiers ::= (listof update-specifier) ; update-specifier ::= (list xpath-location-path action [action-parametes]) ; xpath-location-path - addresses the node(s) to be transformed, in the form of ; XPath location path. If the location path is absolute, it addresses the ; node(s) with respect to the root of the document being transformed. If the ; location path is relative, it addresses the node(s) with respect to the ; node selected by the previous update-specifier. The location path in the ; first update-specifier always addresses the node(s) with respect to the ; root of the document. We'll further refer to the node with respect of which ; the location path is evaluated as to the base-node for this location path. ; action - specifies the modification to be made over each of the node(s) ; addressed by the location path. Possible actions are described below. ; action-parameters - additional parameters supplied for the action. The number ; of parameters and their semantics depend on the definite action. ; ; action ::= 'delete | 'delete-undeep | ; 'insert-into | 'insert-following | 'insert-preceding | ; 'replace | ; 'move-into | 'move-following | 'move-preceding | ; handler ; 'delete - deletes the node. Expects no action-parameters ; 'delete-undeep - deletes the node, but keeps all its content (which thus ; moves to one level upwards in the document tree). Expects no ; action-parameters ; 'insert-into - inserts the new node(s) as the last children of the given ; node. The new node(s) are specified in SXML as action-parameters ; 'insert-following, 'insert-preceding - inserts the new node(s) after (before) ; the given node. Action-parameters are the same as for 'insert-into ; 'replace - replaces the given node with the new node(s). Action-parameters ; are the same as for 'insert-into ; 'rename - renames the given node. The node to be renamed must be a pair (i.e. ; not a text node). A single action-parameter is expected, which is to be ; a Scheme symbol to specify the new name of the given node ; 'move-into - moves the given node to a new location. The single ; action-parameter is the location path, which addresses the new location ; with respect to the given node as the base node. The given node becomes ; the last child of the node selected by the parameter location path. ; 'move-following, 'move-preceding - the given node is moved to the location ; respectively after (before) the node selected by the parameter location ; path ; handler ::= (lambda (node context base-node) ...) ; handler - specifies the required transformation. It is an arbitrary lambda ; that consumes the node and its context (the latter can be used for addressing ; the other node of the source document relative to the given node). The hander ; can return one of the following 2 things: a node or a nodeset. ; 1. If a node is returned, than it replaces the source node in the result ; document ; 2. If a nodeset is returned, than the source node is replaced by (multiple) ; nodes from this nodeset, in the same order in which they appear in the ; nodeset. In particular, if the empty nodeset is returned by the handler, the ; source node is removed from the result document and nothing is inserted ; instead. ; ; Returns either (lambda (doc) ...) or #f ; The latter signals of an error, an the error message is printed into stderr ; as a side effect. In the former case, the lambda can be applied to an SXML ; document and produces the new SXML document being the result of the ; modification specified. (define (sxml:modify . update-specifiers) (and-let* ((lambdas-upd-specifiers (sxml:update-specifiers->lambdas update-specifiers))) (lambda (doc) (sxml:transform-document doc (sxml:lambdas-upd-specifiers->targets doc lambdas-upd-specifiers))))) (provide (all-defined)))