#lang racket/base
(require (planet neil/mcfly:1:0))
(doc (section "Introduction")
(para (italic "Note: This package is in a state of active development, and
some interface changes, perhaps not backward-compatible,
are expected. Documentation is also in-progress."))
(para (hyperlink "http://pobox.com/~oleg/ftp/Scheme/SXML.html"
"SXML")
" is a representation for XML in Scheme, defined by Oleg
Kiselyov. ``SXML/xexp'' is the temporary name for a format for Racket that's
based on SXML and is mostly compatible with it. SXML/xexp is used for both
HTML and XML. The current plan is, hopefully, for the ``/xexp'' part of the
name to go away, and for SXML and SXML/xexp to merge. For now, Racket language
identifiers based on SXML/xexp will have ``"
(racketfont "xexp")
"''instead of ``"
(racketfont "sxml")
"'', because we do not want to call something ``SXML'' if it is not
strictly SXML. (And, historically, ``xexp'' was much more different from SXML,
while we experimented with unifying SXML, SHTML, and PLT "
(racketfont "xexpr")
", but we have decided to move back to as compatible with SXML as
practical.)"))
(doc (subsection "Differences with SXML")
(para "SXML/xexp can be defined as differences from SXML:")
(itemlist
(item (italic "xexp")
" syntax must be ordered as in SXML first normal form (1NF). For
example, any attributes list must precede child elements. SXML/xexp tools "
(italic "may")
" be permissive about accepting other orderings, but generally
should not emit any ordering but 1NF ordering.")
(item "The SXML keyword symbols, such as "
(racket *TOP*)
" may be in lowercase (e.g., "
(racket *top*)
")")
(item (italic "xexp")
" adds a special "
(racket &)
" syntax for character entity references. The syntax is "
(racket (& #,(italic "val")))
", where "
(racket #,(italic "val"))
" is the symbolic name of the character as a symbol, or an integer
with the numeric value of the character.")))
(doc (section "SXML and SXML/xexp Tools")
(para "Libraries using SXML/xexp include:")
(tabular
(list
(list (hyperlink "http://www.neilvandyke.org/racket-html-writing/"
"html-writing")
(list "Writing HTML from SXML/xexp."))
(list (hyperlink "http://www.neilvandyke.org/racket-html-template/"
"html-template")
(list "Writing HTML from SXML/xexp templates."))
(list (hyperlink "http://www.neilvandyke.org/racket-html-parsing/"
"html-parsing")
(list "Permissively parsing HTML to SXML/xexp."))
(list (hyperlink "http://www.neilvandyke.org/webscraperhelper/"
"WebScraperHelper")
(list "Example-based SXPath query generation for SXML/xexp."))))
(para "There are also some older libraries for SXML, which often can be
used for SXML/xexp:")
(tabular
(list
(list (hyperlink "http://okmij.org/ftp/Scheme/xml.html#SXPath"
"SXPath")
"XPath query language implementation for SXML, by Oleg Kiselyov.")
(list (hyperlink "http://celtic.benderweb.net/sxml-match/"
"sxml-match")
"Pattern-matching of SXML, by Jim Bender.")
(list (hyperlink "http://www196.pair.com/lisovsky/xml/ssax/"
"SSAX")
"Parsing of XML to SXML, by Oleg Kiselyov, and maintained by Kirill
Lisovsky."))))
(doc (section "Definitions")
(para "Some definitions used by many SXML/xexp packages..."))
(doc (subsection "Exceptions"))
(doc (defstruct (exn:fail:invalid-xexp exn:fail)
((expected string?)
(context-xexp any/c)
(invalid-xexp any/c))
#:transparent)
"!!!")
(provide (struct-out exn:fail:invalid-xexp))
(define-struct (exn:fail:invalid-xexp exn:fail)
(expected
context-xexp
invalid-xexp)
#:transparent)
(doc (defproc (make-invalid-xexp-exn
( sym symbol?)
(#:continuation-marks continuation-marks continuation-marks?)
(#:expected expected string?)
(#:invalid-xexp invalid-xexp any/c)
(#:context-xexp context-xexp any/c (void)))
fail:exn:invalid-xexp?
(para "Constructs a "
(racket fail:exn:invalid-xexp)
" exception object.")))
(provide make-invalid-xexp-exn)
(define (make-invalid-xexp-exn
sym
#:continuation-marks continuation-marks
#:expected expected
#:invalid-xexp invalid-xexp
#:context-xexp (context-xexp (void)))
(exn:fail:invalid-xexp
(if (void? context-xexp)
(format "~A: invalid xexp: expected ~A; got ~S"
sym
expected
invalid-xexp)
(format "~A: invalid xexp: expected ~A; got ~S in ~S"
sym
expected
invalid-xexp
context-xexp))
continuation-marks
expected
context-xexp
invalid-xexp))
(doc (defform (raise-invalid-xexp-exn !!!)
"!!!"))
(provide raise-invalid-xexp-exn)
(define-syntax raise-invalid-xexp-exn
(syntax-rules ()
((_ SYM
#:expected EXPECTED
#:invalid-xexp INVALID-XEXP
#:context-xexp CONTEXT-XEXP)
(raise (make-invalid-xexp-exn
SYM
#:continuation-marks (current-continuation-marks)
#:expected EXPECTED
#:invalid-xexp INVALID-XEXP
#:context-xexp CONTEXT-XEXP)))
((_ SYM
#:expected EXPECTED
#:invalid-xexp INVALID-XEXP)
(raise-invalid-xexp-exn
SYM
#:expected EXPECTED
#:invalid-xexp INVALID-XEXP
#:context-xexp (void)))))
(doc (subsection "Misc.")
(para "The following definitions are used by some "
(italic "xexp")
"-related libraries."))
(doc (defproc (make-xexp-char-ref (val symbol?))
xexp-char-ref?
"Yields an SXML/xexp "
(racket xexp)
" character entity reference for "
(racket val)
". For example:"
(racketinput (make-xexp-char-ref 'rArr)
#,(racketresult (& rArr)))
(racketinput (make-xexp-char-ref 151)
#,(racketresult (& 151)))))
(define (make-xexp-char-ref val)
(if (or (symbol? val) (integer? val))
`(& ,val)
(error 'make-xexp-char-ref
"invalid xexp reference value: ~S"
val)))
(define (%find-first-xexp-non-extraneous-list lst)
(let loop ((lst lst))
(cond ((null? lst) #f)
((pair? lst)
(loop (car lst))
(loop (cdr lst)))
(else lst))))
(define (%xexp:assert-only-xexp-extraneous-lists lst)
(cond ((%find-first-xexp-non-extraneous-list lst)
=> (lambda (x)
(raise-invalid-xexp-exn
'%xexp:assert-only-xexp-extraneous-lists
#:expected "nothing except extraneous lists and nulls"
#:invalid-xexp x)))))
(doc (defproc (xexp-char-ref-value (char-ref xexp-char-ref?))
(or/c symbol? integer?)
"Yields the symbol or integer value for SXML/xexp character reference "
(racket char-ref)
". Raises exception "
(racket exn:fail:invalid-xexp)
" on an error. For example:"
(racketinput (xexp-char-ref-value '(& nbsp))
#,(racketresult nbsp))
(racketinput (xexp-char-ref-value '(& 2000))
#,(racketresult 2000))))
(provide xexp-char-ref-value)
(define (xexp-char-ref-value char-ref)
(if (and (pair? char-ref)
(eq? '& (car char-ref)))
(let loop-find-symbol ((lst (cdr char-ref)))
(cond ((null? lst)
(raise-invalid-xexp-exn
'xexp-char-ref-value
#:expected "proper list in xexp-char-ref body"
#:invalid-xexp lst
#:context-xexp char-ref))
((pair? lst)
(let ((head (car lst)))
(cond ((symbol? head)
(%xexp:assert-only-xexp-extraneous-lists (cdr lst))
head)
((pair? head)
(cond ((loop-find-symbol head)
=> (lambda (found)
(%xexp:assert-only-xexp-extraneous-lists (cdr head))
(%xexp:assert-only-xexp-extraneous-lists (cdr lst))
found))
((loop-find-symbol (cdr head))
=> (lambda (found)
(%xexp:assert-only-xexp-extraneous-lists (cdr lst))
found))
(else (loop-find-symbol (cdr lst)))))
((null? head) (loop-find-symbol (cdr lst)))
(else (raise-invalid-xexp-exn
'xexp-char-ref-value
#:expected "xexp-char-ref body"
#:invalid-xexp head
#:context-xexp char-ref)))))
(else (raise-invalid-xexp-exn
'xexp-char-ref-value
#:expected "proper list in xexp-char-ref body"
#:invalid-xexp lst
#:context-xexp char-ref))))
(raise-invalid-xexp-exn 'xexp-char-ref-value
#:expected "xexp-char-ref"
#:invalid-xexp char-ref)))
(doc (defthing always-empty-html-elements
(list/c symbol?)
(para "Deprecated. This is a legacy definition from HtmlPrag that will
eventually disappear."))
(para "List of symbols for names of HTML elements that can never have content.
For example, the "
(code "br")
" element."))
(provide always-empty-html-elements)
(define always-empty-html-elements
'(area base br frame hr img input isindex keygen link meta object param
spacer wbr))
(define %xexp:whitespace-char-list
(list #\space
#\tab
#\newline
#\return))
(define (%xexp:whitespace-char? c)
(memv c %xexp:whitespace-char-list))
(doc history
(#:planet 2:0 #:date "2012-06-12"
(itemlist
(item "Somewhat heavy changes. (Previous version was labeled
as in-development, as is this one.)")
(item "Converted to McFly.")
(item "Removed the "
(code "typed/racket/base/no-check")
" for now.")))
(#:version "0.1" #:planet 1:0 #:date "2011-08-21"
(itemlist
(item
"Part of forked development from HtmlPrag."))))