#!r6rs
(library (bert)
(export bert-encode bert-decode)
(import (rnrs base)
(rnrs bytevectors)
(rnrs lists)
(rnrs hashtables)
(rnrs mutable-pairs)
(rnrs io ports)
(rnrs io simple)
(rnrs control)
(rename
(rnrs arithmetic bitwise)
(bitwise-and band)
(bitwise-ior bor)
(bitwise-arithmetic-shift-left bsl)
(bitwise-arithmetic-shift-right bsr))
(rnrs arithmetic fixnums)
(rnrs arithmetic flonums)
(rnrs r5rs)
(rnrs unicode)
(srfi :19))
(define ERL_SMALL_ERL_INT 97)
(define ERL_INT 98)
(define ERL_SMALL_BIGNUM 110)
(define ERL_LARGE_BIGNUM 111)
(define ERL_FLOAT 99)
(define ERL_ATOM 100)
(define ERL_SMALL_TUPLE 104)
(define ERL_LARGE_TUPLE 105)
(define ERL_NIL 106)
(define ERL_STRING 107)
(define ERL_LIST 108)
(define ERL_BIN 109)
(define ERL_MAGIC 131)
(define MAX_ERL_INT (- (bsl 1 31) 1))
(define MIN_ERL_INT (- (bsl 1 31)))
(define (dropwhile proc list)
(let recur ((proc proc)
(list list)
(acc '()))
(if (or (null? list) (not (proc (car list))))
(reverse acc)
(recur proc (cdr list) (cons (car list) acc)))))
(define (fold/and proc lst)
(cond
((null? lst)
#t)
((proc (car lst))
(fold/and proc (cdr lst)))
(else #f)))
(define (write-1 outpr byte)
(put-u8 outpr byte))
(define (write-2 outpr short)
(put-u8 outpr (bsr short 8))
(put-u8 outpr (band short #xff)))
(define (write-4 outpr long)
(let ((b (make-bytevector 4)))
(bytevector-s32-set! b 0 long (endianness big))
(put-bytevector outpr b)))
(define (write-binary outpr data)
(write-1 outpr ERL_BIN)
(let ((b (string->bytevector data (native-transcoder))))
(write-4 outpr (bytevector-length b))
(put-bytevector outpr b)))
(define (write-float outpr float)
(write-1 outpr ERL_FLOAT)
(let* ((str (fldigits float))
(len (string-length str)))
(write-str outpr str)
(let pad ((x (- 31 len)))
(if (zero? x) '()
(begin
(write-1 outpr 0)
(pad (- x 1)))))))
(define (write-str outpr str)
(put-bytevector
outpr
(string->bytevector str (native-transcoder))))
(define (write-atom outpr symbol)
(let* ((str (symbol->string symbol))
(len (string-length str)))
(if (< len 256)
(if (eq? #f (pregexp-match "^[a-zA-Z]+[a-zA-Z0-9_]*$" str))
(error "write-symbol"
"Syntax error, unexpected character.")
(begin
(write-1 outpr ERL_ATOM)
(write-2 outpr len)
(write-str outpr str)))
(error "write-symbol"
"Length error, should be in range [0,255]"))))
(define (write-list outpr data)
(cond
((null? data)
(write-1 outpr ERL_NIL))
((and (<= (length data) 65535)
(fold/and (lambda (x) (isbyte? x)) data))
(write-1 outpr ERL_STRING)
(write-2 outpr (length data))
(for-each (lambda (x) (write-1 outpr x)) data))
(else (write-1 outpr ERL_LIST)
(write-4 outpr (length data))
(for-each (lambda (x) (write-any-raw outpr x)) data)
(write-1 outpr ERL_NIL))))
(define (write-tuple outpr data)
(let ((len (vector-length data)))
(if (< len 256)
(begin
(write-1 outpr ERL_SMALL_TUPLE)
(write-1 outpr len))
(begin
(write-1 outpr ERL_LARGE_TUPLE)
(write-4 outpr len)))
(let vector-do ((position 0))
(unless (= position len)
(begin
(write-any-raw outpr (vector-ref data position))
(vector-do (+ position 1)))))))
(define (isbyte? x)
(and (integer? x) (>= x 0) (< x 256)))
(define (write-fixnum outpr num)
(cond
((isbyte? num)
(write-1 outpr ERL_SMALL_ERL_INT)
(write-1 outpr num))
((and (>= num MIN_ERL_INT) (<= num MAX_ERL_INT))
(write-1 outpr ERL_INT)
(write-4 outpr num))
(else (write-bignum outpr num))))
(define (write-bignum outpr num)
(let ((n (ceiling (/ (bitwise-length num) 8)))) bitwise-length
(if (< n 256)
(begin
(write-1 outpr ERL_SMALL_BIGNUM)
(write-1 outpr n))
(begin
(write-1 outpr ERL_LARGE_BIGNUM)
(write-4 outpr n)))
(write-bignum-guts outpr num)))
(define (write-bignum-guts outpr num)
(write-1 outpr (if (< num 0) 1 0)) (let wr-b ((numabs (abs num)))
(unless (zero? numabs)
(begin
(write-1 outpr (mod numabs 256))
(wr-b (bsr numabs 8))))))
(define (write-hash outpr hash)
(let-values (((a b) (hashtable-entries hash)))
(write-any-raw outpr
(vector 'bert 'dict
(vector->list
(vector-map
(lambda (k v)
(vector (convert k) (convert v)))
a b))))))
(define (write-any-raw outpr obj)
(cond
((list? obj)
(write-list outpr obj))
((hashtable? obj)
(write-hash outpr obj))
((vector? obj)
(write-tuple outpr obj))
((symbol? obj)
(write-atom outpr obj))
((flonum? obj) (write-float outpr obj))
((integer? obj)
(write-fixnum outpr obj))
((string? obj)
(write-binary outpr obj))
(else (error "write-any-raw" "Not implemented."))))
(define (write-any outpr obj)
(write-1 outpr ERL_MAGIC)
(write-any-raw outpr obj))
(define (convert obj)
(cond
((eq? obj #t)
(vector 'bert 'true))
((eq? obj #f)
(vector 'bert 'false))
((eqv? obj 'nil)
(vector 'bert 'nil))
((time? obj)
(let ((t (time-second obj)))
(vector 'bert
'time
(div t 1000000)
(mod t 1000000)
(div (time-nanosecond obj) 1000))))
(else obj)))
(define (bert-encode obj)
(call-with-bytevector-output-port
(lambda(outpr) (write-any outpr (convert obj)))))
(define (funpack float)
(let ((b (make-bytevector 8)))
(bytevector-ieee-double-native-set! b 0 float)
(values
(bitwise-bit-field (bytevector-u64-native-ref b 0) 63 64)
(bitwise-bit-field (bytevector-u64-native-ref b 0) 52 63)
(bitwise-bit-field (bytevector-u64-native-ref b 0) 0 52))))
(define (frexp float)
(let ((ERL_FLOAT_BIAS 1022))
(define (frac1 sign expon frac)
(let ((b (make-bytevector 8)))
(bytevector-u64-native-set!
b 0 (bor (bsl (bor (bsl sign 11) ERL_FLOAT_BIAS) 52) frac))
(bytevector-ieee-double-native-ref b 0)))
(let-values (((sign expon frac) (funpack float)))
(cond
((fold/and zero? (list sign expon frac))
(values 0 0))
((zero? expon)
(cons (frac1 sign exp (- frac 1))
(+ (- (- ERL_FLOAT_BIAS) 52) (bitwise-length frac))))
(else
(cons (frac1 sign expon frac)
(- expon ERL_FLOAT_BIAS)))))))
(define (insert-decimal-exp place s)
(let* ((len (string-length s))
(s0 (if (= len 1) "0"
(substring s 1 len)))
(e (if (< place 1) "e-" "e+"))
(t (- 20 (string-length s0)))
(s1 (if (not (zero? t))
(string-append s0 (make-string t #\0))
s0)))
(string-append (substring s 0 1) "." s1 e
(number->string (abs (- place 1))))))
(define (fldigits float)
(if (zero? float)
"0.0"
(let* ((frexpres (frexp float))
(f (car frexpres))
(e (cdr frexpres))
(e1 (- e 53))
(f1 (exact (truncate (* (abs f) (bsl 1 53)))))
(fdig (flonum->digits float f1 e1))
(place (car fdig))
(digits (cdr fdig))
(r (map (lambda(x)(integer->char (+ x #x30))) digits))
(strdigits (list->string r)))
(string-append (if (< float 0) "-" "")
(insert-decimal-exp place strdigits)))))
(define flonum->digits
(let ([min-e -1074]
[bp-1 (expt 2 52)])
(lambda (v f e)
(let ([round? (even? f)])
(if (>= e 0)
(if (not (= f bp-1))
(let ([be (expt 2 e)])
(scale (* f be 2) 2 be be 0 round? round? v))
(let ([be (expt 2 e)])
(scale (* f be 4) 4 (* be 2) be 0 round? round? v)))
(if (or (= e min-e) (not (= f bp-1)))
(scale (* f 2) (expt 2 (- 1 e)) 1 1 0 round? round? v)
(scale (* f 4) (expt 2 (- 2 e)) 2 1 0
round? round? v)))))))
(define scale
(lambda (r s m+ m- k low-ok? high-ok? v)
(newline)
(let ([est (exact (ceiling (- (log10 (abs v)) 1e-10)))])
(if (>= est 0)
(fixup r (* s (expt10 est)) m+ m- est low-ok? high-ok?)
(let ([scale (expt10 (- est))])
(fixup (* r scale) s (* m+ scale) (* m- scale)
est low-ok? high-ok?))))))
(define fixup
(lambda (r s m+ m- k low-ok? high-ok?)
(if ((if high-ok? >= >) (+ r m+) s) (cons (+ k 1) (generate r s m+ m- low-ok? high-ok?))
(cons k
(generate (* r 10) s (* m+ 10)
(* m- 10) low-ok? high-ok?)))))
(define generate
(lambda (r s m+ m- low-ok? high-ok?)
(let ([d (quotient r s)]
[r (remainder r s)])
(let ([tc1 ((if low-ok? <= <) r m-)]
[tc2 ((if high-ok? >= >) (+ r m+) s)])
(if (not tc1)
(if (not tc2)
(cons d (generate (* r 10) s (* m+ 10) (* m- 10)
low-ok? high-ok?))
(list (+ d 1)))
(if (not tc2)
(list d)
(if (< (* r 2) s) (list d) (list (+ d 1)))))))))
(define expt10
(let ([table (make-vector 326)])
(do ([k 0 (+ k 1)] [v 1 (* v 10)])
((= k 326))
(vector-set! table k v))
(lambda (k)
(vector-ref table k))))
(define log10
(let ([f (/ (log 10))])
(lambda (x)
(* (log x) f))))
(define (bert-decode bytevector)
(read-any (open-bytevector-input-port bytevector)))
(define (read-any inp)
(if (eq? (read-small-int inp) ERL_MAGIC)
(read-any-raw inp)
(error "read-any" "Bad Magic")))
(define (read-any-raw inp)
(let ((tag (read-small-int inp)))
(cond
((eq? tag ERL_ATOM) (read-atom inp))
((eq? tag ERL_SMALL_ERL_INT) (read-small-int inp))
((eq? tag ERL_INT) (read-int inp))
((eq? tag ERL_SMALL_BIGNUM) (read-bignum-t inp 'small))
((eq? tag ERL_LARGE_BIGNUM) (read-bignum-t inp 'large))
((eq? tag ERL_FLOAT) (read-float inp))
((eq? tag ERL_SMALL_TUPLE) (read-tuple-t inp 'small))
((eq? tag ERL_LARGE_TUPLE) (read-tuple-t inp 'large))
((eq? tag ERL_NIL) (read-nil inp))
((eq? tag ERL_STRING) (read-erl-string inp))
((eq? tag ERL_LIST) (read-list inp))
((eq? tag ERL_BIN) (read-bin inp))
(else
(error "read-any-raw"
(string-append
"Unknown term tag: "
(number->string tag)))))))
(define (read-n inp len)
(get-bytevector-n inp len))
(define (read-1 inp)
(read-n inp 1))
(define (read-2 inp)
(read-n inp 2))
(define (read-4 inp)
(read-n inp 4))
(define (read-short inp)
(bytevector-u16-ref (read-2 inp) 0 (endianness big)))
(define (read-small-int inp)
(bytevector-u8-ref (read-1 inp) 0))
(define (read-int inp)
(bytevector-s32-ref (read-4 inp) 0 (endianness big)))
(define (read-str inp length)
(bytevector->string (read-n inp length)
(native-transcoder)))
(define (read-atom inp)
(string->symbol (read-str inp (read-short inp))))
(define (read-bignum-t inp type)
(let* ((size (if (eq? type 'small)
(read-small-int inp)
(read-int inp)))
(sign (read-small-int inp))
(bytes (bytevector->u8-list (read-n inp size))))
(let recur ((i 0)
(bytes bytes)
(sum 0))
(if (null? bytes)
(if (zero? sign) sum (- sum))
(recur (+ i 1) (cdr bytes)
(+ sum (* (car bytes) (expt 256 i))))))))
(define (read-float inp)
(string->number
(bytevector->string
(u8-list->bytevector
(dropwhile (lambda(x) (> x 0))
(bytevector->u8-list (read-n inp 31))))
(native-transcoder))))
(define (read-tuple-t inp type)
(let ((arity (if (eq? type 'small)
(read-small-int inp)
(read-int inp))))
(if (> arity 0)
(let ((tag (read-any-raw inp)))
(if (eq? tag 'bert)
(read-complex-type inp arity)
(let ((v (make-vector arity)))
(vector-set! v 0 tag)
(let recur ((n 1))
(if (eq? n arity)
v
(begin
(vector-set! v n (read-any-raw inp))
(recur (+ n 1))))))))
(vector))))
(define (read-complex-type inp arity)
(let ((obj (read-any-raw inp)))
(cond
((eq? obj 'nil)
'nil)
((eq? obj 'true)
#t)
((eq? obj 'false)
#f)
((eq? obj 'time)
(let* ((a (read-any-raw inp))
(b (read-any-raw inp))
(c (read-any-raw inp))
(second (+ (* a 1000000) b))
(nanosecond (* c 1000)))
(make-time 'time-utc nanosecond second)))
((eq? obj 'dict)
(read-dict inp))
(else
'nil))))
(define (read-dict inp)
(let ((type (read-small-int inp))
(h (make-eq-hashtable)))
(cond
((eq? type ERL_LIST)
(let recur ((len (read-int inp)))
(if (zero? len)
(begin
(read-1 inp) h)
(let* ((pair (read-any-raw inp))
(k (vector-ref pair 0))
(v (vector-ref pair 1)))
(hashtable-set! h k v)
(recur (- len 1))))))
((eq? type ERL_NIL)
h)
(else
(error "read-dict"
"Invalid dict spec, not an erlang list"))))
)
(define (read-list inp)
(let ((len (read-int inp)))
(let recur ((i len)
(lst '()))
(if (zero? i)
(begin
(read-1 inp)
(reverse lst))
(recur (- i 1) (cons (read-any-raw inp) lst))))))
(define (read-nil inp)
'())
(define (read-erl-string inp)
(bytevector->u8-list (read-n inp (read-short inp))))
(define (read-bin inp)
(read-str inp (read-int inp)))
(define *pregexp-version* 20050502)
(define *pregexp-comment-char* #\;)
(define *pregexp-nul-char-int*
(- (char->integer #\a) 97))
(define *pregexp-return-char*
(integer->char
(+ 13 *pregexp-nul-char-int*)))
(define *pregexp-tab-char*
(integer->char
(+ 9 *pregexp-nul-char-int*)))
(define *pregexp-space-sensitive?* #t)
(define pregexp-reverse!
(lambda (s)
(let loop ((s s) (r '()))
(if (null? s) r
(let ((d (cdr s)))
(set-cdr! s r)
(loop d s))))))
(define pregexp-error
(lambda whatever
(display "Error:")
(for-each (lambda (x) (display #\space) (write x))
whatever)
(newline)
(error "pregexp-error" "OH SHI-")))
(define pregexp-read-pattern
(lambda (s i n)
(if (>= i n)
(list
(list ':or (list ':seq)) i)
(let loop ((branches '()) (i i))
(if (or (>= i n)
(char=? (string-ref s i) #\)))
(list (cons ':or (pregexp-reverse! branches)) i)
(let ((vv (pregexp-read-branch
s
(if (char=? (string-ref s i) #\|) (+ i 1) i) n)))
(loop (cons (car vv) branches) (cadr vv))))))))
(define pregexp-read-branch
(lambda (s i n)
(let loop ((pieces '()) (i i))
(cond ((>= i n)
(list (cons ':seq (pregexp-reverse! pieces)) i))
((let ((c (string-ref s i)))
(or (char=? c #\|)
(char=? c #\))))
(list (cons ':seq (pregexp-reverse! pieces)) i))
(else (let ((vv (pregexp-read-piece s i n)))
(loop (cons (car vv) pieces) (cadr vv))))))))
(define pregexp-read-piece
(lambda (s i n)
(let ((c (string-ref s i)))
(case c
((#\^) (list ':bos (+ i 1)))
((#\$) (list ':eos (+ i 1)))
((#\.) (pregexp-wrap-quantifier-if-any
(list ':any (+ i 1)) s n))
((#\[) (let ((i+1 (+ i 1)))
(pregexp-wrap-quantifier-if-any
(case (and (< i+1 n) (string-ref s i+1))
((#\^)
(let ((vv (pregexp-read-char-list s (+ i 2) n)))
(list (list ':neg-char (car vv)) (cadr vv))))
(else (pregexp-read-char-list s i+1 n)))
s n)))
((#\()
(pregexp-wrap-quantifier-if-any
(pregexp-read-subpattern s (+ i 1) n) s n))
((#\\ )
(pregexp-wrap-quantifier-if-any
(cond ((pregexp-read-escaped-number s i n) =>
(lambda (num-i)
(list (list ':backref (car num-i)) (cadr num-i))))
((pregexp-read-escaped-char s i n) =>
(lambda (char-i)
(list (car char-i) (cadr char-i))))
(else (pregexp-error 'pregexp-read-piece 'backslash)))
s n))
(else
(if (or *pregexp-space-sensitive?*
(and (not (char-whitespace? c))
(not (char=? c *pregexp-comment-char*))))
(pregexp-wrap-quantifier-if-any
(list c (+ i 1)) s n)
(let loop ((i i) (in-comment? #f))
(if (>= i n) (list ':empty i)
(let ((c (string-ref s i)))
(cond (in-comment?
(loop (+ i 1)
(not (char=? c #\newline))))
((char-whitespace? c)
(loop (+ i 1) #f))
((char=? c *pregexp-comment-char*)
(loop (+ i 1) #t))
(else (list ':empty i))))))))))))
(define pregexp-read-escaped-number
(lambda (s i n)
(and (< (+ i 1) n) (let ((c (string-ref s (+ i 1))))
(and (char-numeric? c)
(let loop ((i (+ i 2)) (r (list c)))
(if (>= i n)
(list (string->number
(list->string (pregexp-reverse! r))) i)
(let ((c (string-ref s i)))
(if (char-numeric? c)
(loop (+ i 1) (cons c r))
(list (string->number
(list->string (pregexp-reverse! r)))
i))))))))))
(define pregexp-read-escaped-char
(lambda (s i n)
(and (< (+ i 1) n)
(let ((c (string-ref s (+ i 1))))
(case c
((#\b) (list ':wbdry (+ i 2)))
((#\B) (list ':not-wbdry (+ i 2)))
((#\d) (list ':digit (+ i 2)))
((#\D) (list '(:neg-char :digit) (+ i 2)))
((#\n) (list #\newline (+ i 2)))
((#\r) (list *pregexp-return-char* (+ i 2)))
((#\s) (list ':space (+ i 2)))
((#\S) (list '(:neg-char :space) (+ i 2)))
((#\t) (list *pregexp-tab-char* (+ i 2)))
((#\w) (list ':word (+ i 2)))
((#\W) (list '(:neg-char :word) (+ i 2)))
(else (list c (+ i 2))))))))
(define pregexp-read-posix-char-class
(lambda (s i n)
(let ((neg? #f))
(let loop ((i i) (r (list #\:)))
(if (>= i n)
(pregexp-error 'pregexp-read-posix-char-class)
(let ((c (string-ref s i)))
(cond ((char=? c #\^)
(set! neg? #t)
(loop (+ i 1) r))
((char-alphabetic? c)
(loop (+ i 1) (cons c r)))
((char=? c #\:)
(if (or (>= (+ i 1) n)
(not (char=? (string-ref s (+ i 1)) #\])))
(pregexp-error 'pregexp-read-posix-char-class)
(let ((posix-class
(string->symbol
(list->string (pregexp-reverse! r)))))
(list (if neg? (list ':neg-char posix-class)
posix-class)
(+ i 2)))))
(else
(pregexp-error 'pregexp-read-posix-char-class)))))))))
(define pregexp-read-cluster-type
(lambda (s i n)
(let ((c (string-ref s i)))
(case c
((#\?)
(let ((i (+ i 1)))
(case (string-ref s i)
((#\:) (list '() (+ i 1)))
((#\=) (list '(:lookahead) (+ i 1)))
((#\!) (list '(:neg-lookahead) (+ i 1)))
((#\>) (list '(:no-backtrack) (+ i 1)))
((#\<)
(list (case (string-ref s (+ i 1))
((#\=) '(:lookbehind))
((#\!) '(:neg-lookbehind))
(else (pregexp-error 'pregexp-read-cluster-type)))
(+ i 2)))
(else (let loop ((i i) (r '()) (inv? #f))
(let ((c (string-ref s i)))
(case c
((#\-) (loop (+ i 1) r #t))
((#\i) (loop (+ i 1)
(cons (if inv? ':case-sensitive
':case-insensitive) r) #f))
((#\x)
(set! *pregexp-space-sensitive?* inv?)
(loop (+ i 1) r #f))
((#\:) (list r (+ i 1)))
(else (pregexp-error
'pregexp-read-cluster-type)))))))))
(else (list '(:sub) i))))))
(define pregexp-read-subpattern
(lambda (s i n)
(let* ((remember-space-sensitive? *pregexp-space-sensitive?*)
(ctyp-i (pregexp-read-cluster-type s i n))
(ctyp (car ctyp-i))
(i (cadr ctyp-i))
(vv (pregexp-read-pattern s i n)))
(set! *pregexp-space-sensitive?* remember-space-sensitive?)
(let ((vv-re (car vv))
(vv-i (cadr vv)))
(if (and (< vv-i n)
(char=? (string-ref s vv-i)
#\)))
(list
(let loop ((ctyp ctyp) (re vv-re))
(if (null? ctyp) re
(loop (cdr ctyp)
(list (car ctyp) re))))
(+ vv-i 1))
(pregexp-error 'pregexp-read-subpattern))))))
(define pregexp-wrap-quantifier-if-any
(lambda (vv s n)
(let ((re (car vv)))
(let loop ((i (cadr vv)))
(if (>= i n) vv
(let ((c (string-ref s i)))
(if (and (char-whitespace? c) (not *pregexp-space-sensitive?*))
(loop (+ i 1))
(case c
((#\* #\+ #\? #\{)
(let* ((new-re (list ':between 'minimal?
'at-least 'at-most re))
(new-vv (list new-re 'next-i)))
(case c
((#\*) (set-car! (cddr new-re) 0)
(set-car! (cdddr new-re) #f))
((#\+) (set-car! (cddr new-re) 1)
(set-car! (cdddr new-re) #f))
((#\?) (set-car! (cddr new-re) 0)
(set-car! (cdddr new-re) 1))
((#\{) (let ((pq (pregexp-read-nums s (+ i 1) n)))
(if (not pq)
(pregexp-error
'pregexp-wrap-quantifier-if-any
'left-brace-must-be-followed-by-number))
(set-car! (cddr new-re) (car pq))
(set-car! (cdddr new-re) (cadr pq))
(set! i (caddr pq)))))
(let loop ((i (+ i 1)))
(if (>= i n)
(begin (set-car! (cdr new-re) #f)
(set-car! (cdr new-vv) i))
(let ((c (string-ref s i)))
(cond ((and (char-whitespace? c)
(not *pregexp-space-sensitive?*))
(loop (+ i 1)))
((char=? c #\?)
(set-car! (cdr new-re) #t)
(set-car! (cdr new-vv) (+ i 1)))
(else (set-car! (cdr new-re) #f)
(set-car! (cdr new-vv) i))))))
new-vv))
(else vv)))))))))
(define pregexp-read-nums
(lambda (s i n)
(let loop ((p '()) (q '()) (k i) (reading 1))
(if (>= k n) (pregexp-error 'pregexp-read-nums))
(let ((c (string-ref s k)))
(cond ((char-numeric? c)
(if (= reading 1)
(loop (cons c p) q (+ k 1) 1)
(loop p (cons c q) (+ k 1) 2)))
((and (char-whitespace? c) (not *pregexp-space-sensitive?*))
(loop p q (+ k 1) reading))
((and (char=? c #\,) (= reading 1))
(loop p q (+ k 1) 2))
((char=? c #\})
(let ((p (string->number (list->string (pregexp-reverse! p))))
(q (string->number (list->string (pregexp-reverse! q)))))
(cond ((and (not p) (= reading 1)) (list 0 #f k))
((= reading 1) (list p p k))
(else (list p q k)))))
(else #f))))))
(define pregexp-invert-char-list
(lambda (vv)
(set-car! (car vv) ':none-of-chars)
vv))
(define pregexp-read-char-list
(lambda (s i n)
(let loop ((r '()) (i i))
(if (>= i n)
(pregexp-error 'pregexp-read-char-list
'character-class-ended-too-soon)
(let ((c (string-ref s i)))
(case c
((#\]) (if (null? r)
(loop (cons c r) (+ i 1))
(list (cons ':one-of-chars (pregexp-reverse! r))
(+ i 1))))
((#\\ )
(let ((char-i (pregexp-read-escaped-char s i n)))
(if char-i (loop (cons (car char-i) r) (cadr char-i))
(pregexp-error 'pregexp-read-char-list 'backslash))))
((#\-) (if (or (null? r)
(let ((i+1 (+ i 1)))
(and (< i+1 n)
(char=? (string-ref s i+1) #\]))))
(loop (cons c r) (+ i 1))
(let ((c-prev (car r)))
(if (char? c-prev)
(loop (cons (list ':char-range c-prev
(string-ref s (+ i 1))) (cdr r))
(+ i 2))
(loop (cons c r) (+ i 1))))))
((#\[) (if (char=? (string-ref s (+ i 1)) #\:)
(let ((posix-char-class-i
(pregexp-read-posix-char-class s (+ i 2) n)))
(loop (cons (car posix-char-class-i) r)
(cadr posix-char-class-i)))
(loop (cons c r) (+ i 1))))
(else (loop (cons c r) (+ i 1)))))))))
(define pregexp-string-match
(lambda (s1 s i n sk fk)
(let ((n1 (string-length s1)))
(if (> n1 n) (fk)
(let loop ((j 0) (k i))
(cond ((>= j n1) (sk k))
((>= k n) (fk))
((char=? (string-ref s1 j) (string-ref s k))
(loop (+ j 1) (+ k 1)))
(else (fk))))))))
(define pregexp-char-word?
(lambda (c)
(or (char-alphabetic? c)
(char-numeric? c)
(char=? c #\_))))
(define pregexp-at-word-boundary?
(lambda (s i n)
(or (= i 0) (>= i n)
(let ((c/i (string-ref s i))
(c/i-1 (string-ref s (- i 1))))
(let ((c/i/w? (pregexp-check-if-in-char-class?
c/i ':word))
(c/i-1/w? (pregexp-check-if-in-char-class?
c/i-1 ':word)))
(or (and c/i/w? (not c/i-1/w?))
(and (not c/i/w?) c/i-1/w?)))))))
(define pregexp-check-if-in-char-class?
(lambda (c char-class)
(case char-class
((:any) (not (char=? c #\newline)))
((:alnum) (or (char-alphabetic? c) (char-numeric? c)))
((:alpha) (char-alphabetic? c))
((:ascii) (< (char->integer c) 128))
((:blank) (or (char=? c #\space) (char=? c *pregexp-tab-char*)))
((:cntrl) (< (char->integer c) 32))
((:digit) (char-numeric? c))
((:graph) (and (>= (char->integer c) 32)
(not (char-whitespace? c))))
((:lower) (char-lower-case? c))
((:print) (>= (char->integer c) 32))
((:punct) (and (>= (char->integer c) 32)
(not (char-whitespace? c))
(not (char-alphabetic? c))
(not (char-numeric? c))))
((:space) (char-whitespace? c))
((:upper) (char-upper-case? c))
((:word) (or (char-alphabetic? c)
(char-numeric? c)
(char=? c #\_)))
((:xdigit) (or (char-numeric? c)
(char-ci=? c #\a) (char-ci=? c #\b)
(char-ci=? c #\c) (char-ci=? c #\d)
(char-ci=? c #\e) (char-ci=? c #\f)))
(else (pregexp-error 'pregexp-check-if-in-char-class?)))))
(define pregexp-list-ref
(lambda (s i)
(let loop ((s s) (k 0))
(cond ((null? s) #f)
((= k i) (car s))
(else (loop (cdr s) (+ k 1)))))))
(define pregexp-make-backref-list
(lambda (re)
(let sub ((re re))
(if (pair? re)
(let ((car-re (car re))
(sub-cdr-re (sub (cdr re))))
(if (eqv? car-re ':sub)
(cons (cons re #f) sub-cdr-re)
(append (sub car-re) sub-cdr-re)))
'()))))
(define pregexp-match-positions-aux
(lambda (re s sn start n i)
(let ((identity (lambda (x) x))
(backrefs (pregexp-make-backref-list re))
(case-sensitive? #t))
(let sub ((re re) (i i) (sk identity) (fk (lambda () #f)))
(cond ((eqv? re ':bos)
(if (= i start) (sk i) (fk))
)
((eqv? re ':eos)
(if (>= i n) (sk i) (fk))
)
((eqv? re ':empty)
(sk i))
((eqv? re ':wbdry)
(if (pregexp-at-word-boundary? s i n)
(sk i)
(fk)))
((eqv? re ':not-wbdry)
(if (pregexp-at-word-boundary? s i n)
(fk)
(sk i)))
((and (char? re) (< i n))
(if ((if case-sensitive? char=? char-ci=?)
(string-ref s i) re)
(sk (+ i 1)) (fk)))
((and (not (pair? re)) (< i n))
(if (pregexp-check-if-in-char-class?
(string-ref s i) re)
(sk (+ i 1)) (fk)))
((and (pair? re) (eqv? (car re) ':char-range) (< i n))
(let ((c (string-ref s i)))
(if (let ((c< (if case-sensitive? char<=? char-ci<=?)))
(and (c< (cadr re) c)
(c< c (caddr re))))
(sk (+ i 1)) (fk))))
((pair? re)
(case (car re)
((:char-range)
(if (>= i n) (fk)
(pregexp-error 'pregexp-match-positions-aux)))
((:one-of-chars)
(if (>= i n) (fk)
(let loup-one-of-chars ((chars (cdr re)))
(if (null? chars) (fk)
(sub (car chars) i sk
(lambda ()
(loup-one-of-chars (cdr chars))))))))
((:neg-char)
(if (>= i n) (fk)
(sub (cadr re) i
(lambda (i1) (fk))
(lambda () (sk (+ i 1))))))
((:seq)
(let loup-seq ((res (cdr re)) (i i))
(if (null? res) (sk i )
(sub (car res) i
(lambda (i1 )
(loup-seq (cdr res) i1 ))
fk))))
((:or)
(let loup-or ((res (cdr re)))
(if (null? res) (fk)
(sub (car res) i
(lambda (i1 )
(or (sk i1 )
(loup-or (cdr res))))
(lambda () (loup-or (cdr res)))))))
((:backref)
(let* ((c (pregexp-list-ref backrefs (cadr re)))
(backref
(cond (c => cdr)
(else
(pregexp-error 'pregexp-match-positions-aux
'non-existent-backref re)
#f))))
(if backref
(pregexp-string-match
(substring s (car backref) (cdr backref))
s i n (lambda (i) (sk i)) fk)
(sk i))))
((:sub)
(sub (cadr re) i
(lambda (i1)
(set-cdr! (assv re backrefs) (cons i i1))
(sk i1)) fk))
((:lookahead)
(let ((found-it?
(sub (cadr re) i
identity (lambda () #f))))
(if found-it? (sk i) (fk))))
((:neg-lookahead)
(let ((found-it?
(sub (cadr re) i
identity (lambda () #f))))
(if found-it? (fk) (sk i))))
((:lookbehind)
(let ((n-actual n) (sn-actual sn))
(set! n i) (set! sn i)
(let ((found-it?
(sub (list ':seq '(:between #f 0 #f :any)
(cadr re) ':eos) 0
identity (lambda () #f))))
(set! n n-actual) (set! sn sn-actual)
(if found-it? (sk i) (fk)))))
((:neg-lookbehind)
(let ((n-actual n) (sn-actual sn))
(set! n i) (set! sn i)
(let ((found-it?
(sub (list ':seq '(:between #f 0 #f :any)
(cadr re) ':eos) 0
identity (lambda () #f))))
(set! n n-actual) (set! sn sn-actual)
(if found-it? (fk) (sk i)))))
((:no-backtrack)
(let ((found-it? (sub (cadr re) i
identity (lambda () #f))))
(if found-it?
(sk found-it?)
(fk))))
((:case-sensitive :case-insensitive)
(let ((old case-sensitive?))
(set! case-sensitive?
(eqv? (car re) ':case-sensitive))
(sub (cadr re) i
(lambda (i1)
(set! case-sensitive? old)
(sk i1))
(lambda ()
(set! case-sensitive? old)
(fk)))))
((:between)
(let* ((maximal? (not (cadr re)))
(p (caddr re))
(q (cadddr re))
(could-loop-infinitely? (and maximal? (not q)))
(re (car (cddddr re))))
(let loup-p ((k 0) (i i) )
(if (< k p)
(sub re i
(lambda (i1 )
(if (and could-loop-infinitely?
(= i1 i))
(pregexp-error
'pregexp-match-positions-aux
'greedy-quantifier-operand-could-be-empty))
(loup-p (+ k 1) i1 ))
fk)
(let ((q (and q (- q p))))
(let loup-q ((k 0) (i i))
(let ((fk (lambda ()
(sk i ))))
(if (and q (>= k q)) (fk)
(if maximal?
(sub re i
(lambda (i1)
(if (and could-loop-infinitely?
(= i1 i))
(pregexp-error
'pregexp-match-positions-aux
'greedy-quantifier-operand-could-be-empty))
(or (loup-q (+ k 1) i1)
(fk)))
fk)
(or (fk)
(sub re i
(lambda (i1)
(loup-q (+ k 1) i1))
fk)))))))))))
(else (pregexp-error 'pregexp-match-positions-aux))))
((>= i n) (fk))
(else (pregexp-error 'pregexp-match-positions-aux))))
(let ((backrefs (map cdr backrefs)))
(and (car backrefs) backrefs)))))
(define pregexp-replace-aux
(lambda (str ins n backrefs)
(let loop ((i 0) (r ""))
(if (>= i n) r
(let ((c (string-ref ins i)))
(if (char=? c #\\ )
(let* ((br-i (pregexp-read-escaped-number ins i n))
(br (if br-i (car br-i)
(if (char=? (string-ref ins (+ i 1)) #\&) 0
#f)))
(i (if br-i (cadr br-i)
(if br (+ i 2)
(+ i 1)))))
(if (not br)
(let ((c2 (string-ref ins i)))
(loop (+ i 1)
(if (char=? c2 #\$) r
(string-append r (string c2)))))
(loop i
(let ((backref (pregexp-list-ref backrefs br)))
(if backref
(string-append r
(substring str (car backref) (cdr backref)))
r)))))
(loop (+ i 1) (string-append r (string c)))))))))
(define pregexp
(lambda (s)
(set! *pregexp-space-sensitive?* #t) (list ':sub (car (pregexp-read-pattern s 0 (string-length s))))))
(define pregexp-match-positions
(lambda (pat str . opt-args)
(cond ((string? pat) (set! pat (pregexp pat)))
((pair? pat) #t)
(else (pregexp-error 'pregexp-match-positions
'pattern-must-be-compiled-or-string-regexp
pat)))
(let* ((str-len (string-length str))
(start (if (null? opt-args) 0
(let ((start (car opt-args)))
(set! opt-args (cdr opt-args))
start)))
(end (if (null? opt-args) str-len
(car opt-args))))
(let loop ((i start))
(and (<= i end)
(or (pregexp-match-positions-aux
pat str str-len start end i)
(loop (+ i 1))))))))
(define pregexp-match
(lambda (pat str . opt-args)
(let ((ix-prs (apply pregexp-match-positions pat str opt-args)))
(and ix-prs
(map
(lambda (ix-pr)
(and ix-pr
(substring str (car ix-pr) (cdr ix-pr))))
ix-prs)))))
(define pregexp-split
(lambda (pat str)
(let ((n (string-length str)))
(let loop ((i 0) (r '()) (picked-up-one-undelimited-char? #f))
(cond ((>= i n) (pregexp-reverse! r))
((pregexp-match-positions pat str i n)
=>
(lambda (y)
(let ((jk (car y)))
(let ((j (car jk)) (k (cdr jk)))
(cond ((= j k)
(loop (+ k 1)
(cons (substring str i (+ j 1)) r) #t))
((and (= j i) picked-up-one-undelimited-char?)
(loop k r #f))
(else
(loop k (cons (substring str i j) r) #f)))))))
(else (loop n (cons (substring str i n) r) #f)))))))
(define pregexp-replace
(lambda (pat str ins)
(let* ((n (string-length str))
(pp (pregexp-match-positions pat str 0 n)))
(if (not pp) str
(let ((ins-len (string-length ins))
(m-i (caar pp))
(m-n (cdar pp)))
(string-append
(substring str 0 m-i)
(pregexp-replace-aux str ins ins-len pp)
(substring str m-n n)))))))
(define pregexp-replace*
(lambda (pat str ins)
(let ((pat (if (string? pat) (pregexp pat) pat))
(n (string-length str))
(ins-len (string-length ins)))
(let loop ((i 0) (r ""))
(if (>= i n) r
(let ((pp (pregexp-match-positions pat str i n)))
(if (not pp)
(if (= i 0)
str
(string-append
r (substring str i n)))
(loop (cdar pp)
(string-append
r
(substring str i (caar pp))
(pregexp-replace-aux str ins ins-len pp))))))))))
(define pregexp-quote
(lambda (s)
(let loop ((i (- (string-length s) 1)) (r '()))
(if (< i 0) (list->string r)
(loop (- i 1)
(let ((c (string-ref s i)))
(if (memv c '(#\\ #\. #\? #\* #\+ #\| #\^ #\$
#\[ #\] #\{ #\} #\( #\)))
(cons #\\ (cons c r))
(cons c r)))))))))