#lang racket/base
(require "label.rkt"
"structs.rkt"
"debug.rkt")
(debug-add-hooks ukkonen2
(debug enable-ukkonen-debug-messages
disable-ukkonen-debug-messages))
(provide enable-ukkonen-debug-messages)
(provide disable-ukkonen-debug-messages)
(define current-node->id
(make-parameter
(let ((hash (make-weak-hasheq)) (n 0)
(sema (make-semaphore 1)))
(hash-set! hash #f "#f")
(lambda (node)
(call-with-semaphore sema
(lambda ()
(hash-ref hash node
(lambda ()
(hash-set! hash node
(number->string n))
(set! n (add1 n))
(hash-ref hash node)))))))))
(provide skip-count)
(define skip-count
(lambda (node label)
(skip-count-helper node label 0 (label-length label))))
(define (skip-count-helper node label k N)
(define (loop node k)
(let* ((child (node-find-child node (label-ref label k)))
(child-label (node-up-label child))
(child-label-length (label-length child-label))
(rest-of-chars-left-to-skip (- N k)))
(if (> rest-of-chars-left-to-skip child-label-length)
(loop child
(+ k child-label-length))
(values child rest-of-chars-left-to-skip))))
(if (>= k N)
(values node (label-length (node-up-label node)))
(loop node k)))
(provide jump-to-suffix)
(define (jump-to-suffix node)
(cond ((node-root? node)
(values node #f))
((node-suffix-link node)
(begin (debug "following suffix link from ~a to ~a"
((current-node->id) node)
((current-node->id) (node-suffix-link node)))
(values (node-suffix-link node) 0)))
((node-root? (node-parent node))
(values (node-parent node) #f))
(else
(values (node-suffix-link (node-parent node))
(label-length (node-up-label node))))))
(provide try-to-set-suffix-edge!)
(define (try-to-set-suffix-edge! from-node to-node)
(when (not (node-suffix-link from-node))
(debug "setting suffix link from ~a to ~a"
((current-node->id) from-node)
((current-node->id) to-node))
(set-node-suffix-link! from-node to-node)))
(provide find-next-extension-point/add-suffix-link!)
(define (find-next-extension-point/add-suffix-link! node label initial-i j)
(define (fixed-start suffix-offset)
(if suffix-offset (- initial-i suffix-offset) j))
(let*-values
(((suffix-node suffix-offset) (jump-to-suffix node))
((K N) (values (fixed-start suffix-offset)
(label-length label))))
(letrec
[
(loop-first
(lambda (i)
(loop-general i (lambda (skipped-node skip-offset)
(when (node-position-at-end?
skipped-node skip-offset)
(try-to-set-suffix-edge!
node skipped-node))))))
(loop-rest
(lambda (i)
(loop-general i (lambda (skipped-node skip-offset)
(void)))))
(loop-general
(lambda (i first-shot)
(if (>= i N)
(values #f #f #f)
(let-values
(((skipped-node skipped-offset)
(skip-count-helper suffix-node label K i)))
(first-shot skipped-node skipped-offset)
(if (node-position-at-end? skipped-node skipped-offset)
(find-extension-at-end!
skipped-node skipped-offset i)
(find-extension-in-edge
skipped-node skipped-offset i))))))
(find-extension-in-edge
(lambda (skipped-node skip-offset i)
(if (label-element-equal?
(label-ref label i)
(label-ref (node-up-label skipped-node)
skip-offset))
(loop-rest (add1 i))
(values skipped-node skip-offset i))))
(find-extension-at-end!
(lambda (skipped-node skip-offset i)
(if (node-find-child skipped-node (label-ref label i))
(loop-rest (add1 i))
(values skipped-node skip-offset i))))
]
(loop-first initial-i))))
(provide extend-at-point!)
(define extend-at-point!
(letrec [
(main-logic
(lambda (node offset label i)
(if (should-extend-as-leaf? node offset)
(attach-as-leaf! node label i)
(splice-with-internal-node! node offset label i))))
(should-extend-as-leaf?
(lambda (node offset)
(node-position-at-end? node offset)))
(attach-as-leaf!
(lambda (node label i)
(debug "adding ~S as leaf off of ~A"
(label->string (sublabel label i))
((current-node->id) node))
(let ((leaf (node-add-leaf! node (sublabel label i))))
(debug "leaf ~A added" ((current-node->id) leaf))
node)))
(splice-with-internal-node!
(lambda (node offset label i)
(debug "adding ~S within edge above ~A between ~S and ~S"
(label->string (sublabel label i))
((current-node->id) node)
(label->string (sublabel (node-up-label node) 0 offset))
(label->string (sublabel (node-up-label node) offset)))
(let-values (((split-node leaf)
(node-up-splice-leaf!
node offset (sublabel label i))))
(debug "spliced ~A with leaf ~A"
((current-node->id) split-node)
((current-node->id) leaf))
split-node)))
]
main-logic))
(provide suffix-tree-add!)
(define suffix-tree-add!
(letrec
[
(do-construction!
(lambda (tree label)
(debug "Starting construction for ~S" (label->string label))
(debug "Root node is ~A"
((current-node->id) (suffix-tree-root tree)))
(let-values (((starting-node starting-offset)
(add-first-suffix! tree label)))
(add-rest-suffixes! label starting-node starting-offset)
(debug "finished construction"))))
(add-first-suffix!
(let
[
(matched-at-node
(lambda (node)
(report-implicit-tree-constructed)))
(matched-in-node
(lambda (node offset)
(report-implicit-tree-constructed)))
(mismatched-at-node
(lambda (node label label-offset)
(let ((leaf (node-add-leaf!
node (sublabel label label-offset))))
(debug "adding leaf ~A with label ~S"
((current-node->id) leaf)
(label->string (node-up-label leaf)))
(values node label-offset))))
(mismatched-in-node
(lambda (node offset label label-offset)
(let-values (((joint leaf)
(node-up-splice-leaf!
node offset
(sublabel label label-offset))))
(debug "spliced leaf ~A with label ~S"
((current-node->id) leaf)
(label->string (node-up-label leaf)))
(values joint label-offset))))
]
(lambda (tree label)
(node-follow/k
(suffix-tree-root tree) label
matched-at-node
matched-in-node
mismatched-at-node
mismatched-in-node))))
(add-rest-suffixes!
(lambda (label starting-node starting-offset)
(add-rest-suffixes-loop!
label
(label-length label)
(max starting-offset 1)
1
starting-node)))
(add-rest-suffixes-loop!
(lambda (label N i j active-node)
(when (< j N)
(debug "At node ~a (i=~a, j=~a)"
((current-node->id) active-node) i j)
(let-values (((next-extension-node next-extension-offset i*)
(find-next-extension-point/add-suffix-link!
active-node label i j)))
(if i*
(begin
(let ((new-active-node
(extend-at-point! next-extension-node
next-extension-offset
label i*)))
(try-to-set-suffix-edge! active-node new-active-node)
(add-rest-suffixes-loop!
label N
(max i* (add1 j)) (add1 j) new-active-node)))
(begin
(report-implicit-tree-constructed)))))))
(report-implicit-tree-constructed
(lambda ()
(debug "Implicit tree constructed")
(void)))
]
do-construction!))