histogram.ss
;;; PLT Scheme Science Collection
;;; histogram.ss
;;; Copyright (c) 2004 M. Douglas Williams
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 2.1 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA.
;;;
;;; -------------------------------------------------------------------
;;;
;;; This code implements histograms for the PLT Scheme Science
;;; Collection.  It is loosely based on the histograms provided by the
;;; GNU Scientific Library (GSL).
;;;
;;; Version  Date      Description
;;; 0.1.0    08/06/04  This is the initial release of the histogram
;;;                    routines. (Doug Williams)
;;; 0.2.0    08/28/04  Added make-histogram-with-ranges-uniform and
;;;                    improved the histogram-increment! and
;;;                    histogram-accumulate! more efficient by
;;;                    computing the bin for histograms with uniform
;;;                    ranges.  (Doug Williams)
;;; 1.0.0    09/28/04  Added contracts for functions.  Marked as ready
;;;                    for Release 1.0.  (Doug Williams)

(module histogram mzscheme
  
  ;; Data definition
  ;;
  ;; Histogram structure with 4 fields:
  ;;   0 - n, number of bins
  ;;   1 - ranges, vector defining the bin ranges (length is n+1)
  ;;   2 - bins, vector containing the bin values
  ;;   3 - ranges-uniform?, boolean
  
  (define-values (struct:histogram
                  histogram-constructor
                  histogram?
                  histogram-field-ref
                  set-histogram-field!)
    (make-struct-type 'histogram #f 4 0))
  
  ;; Contracts
  
  (require (lib "contract.ss"))
  
  (provide/contract
   (histogram?
    (-> any/c boolean?))
   (make-histogram
    (-> (integer-in 1 +inf.0) histogram?))
   (make-histogram-with-ranges-uniform
    (->r ((n (integer-in 1 +inf.0))
          (min real?)
          (max (>/c min)))
         histogram?))
   (histogram-n
    (-> histogram? (integer-in 1 +inf.0)))
   (histogram-ranges
    (-> histogram? (vectorof real?)))
   (set-histogram-ranges!
    (-> histogram? (vectorof real?) void?))
   (set-histogram-ranges-uniform!
    (->r ((h histogram?)
          (min real?)
          (max (>/c min)))
         void?))
   (histogram-bins
    (-> histogram? (vectorof real?)))
   (histogram-increment!
    (-> histogram? real? void?))
   (histogram-accumulate!
    (-> histogram? real? (>=/c 0.0) void?))
   (histogram-get
    (-> histogram? natural-number/c (>=/c 0.0)))
   (histogram-get-range
    (-> histogram? natural-number/c (values real? real?)))
   (histogram-max
    (-> histogram? (>=/c 0.0)))
   (histogram-min
    (-> histogram? (>=/c 0.0)))
   (histogram-mean
    (-> histogram? real?))
   (histogram-sigma
    (-> histogram? (>=/c 0.0)))
   (histogram-sum
    (-> histogram? (>=/c 0.0))))
  
  ;; make-histogram: integer -> histogram
  ;;
  ;; The procedure make-histogram returns a new histogram with the
  ;; specified number of bins.
  
  (define (make-histogram n)
    (if (not (> n 0))
        (error 'make-histogram 
               "number of bins must be greater than 0"))
    (histogram-constructor n 
                           (make-vector (+ n 1)) 
                           (make-vector n)
                           #f))
  
  ;; make-histogram-with-ranges-uniform: integer x real x real ->
  ;;                                     histogram
  ;;
  ;; This function returns a histogram with the specified number
  ;; of uniform sized bins over the specified range.
  
  (define (make-histogram-with-ranges-uniform n min max)
    (if (not (> n 0))
        (error 'make-histogram-with-ranges-uniform
               "number of bins must be greater than 0"))
    (let ((h (make-histogram n)))
      (set-histogram-ranges-uniform! h min max)
      h))
  
  ;; histogram-n: histogram -> integer
  
  (define histogram-n
    (make-struct-field-accessor histogram-field-ref 0 'n))
  
  ;; histogram-ranges: histogram -> vector
  
  (define histogram-ranges
    (make-struct-field-accessor histogram-field-ref 1 'ranges))
  
  ;; set-histogram-ranges: histogram x vector -> void
  
  (define (set-histogram-ranges! h ranges)
    (if (not (= (vector-length ranges)
                (+ (histogram-n h) 1)))
        (error 'set-histogram-ranges
               "size of ranges vector must match size of histogram"))
    ;; Set ranges.
    (do ((i 0 (+ i 1))) 
        ((> i (histogram-n h)) (void))
      (vector-set! (histogram-ranges h) i
                   (vector-ref ranges i)))
    ;; Re-initialize the bins.
    (do ((i 0 (+ i 1)))
        ((= i (histogram-n h)) (void))
      (vector-set! (histogram-bins h) i 0))
    ;; Clear ranges-uniform?
    (set-histogram-ranges-uniform?! h #f)
    (void))
  
  ;; set-histogram-ranges-uniform!: histogram x real x real -> void
  
  (define (set-histogram-ranges-uniform! h min max)
    (let* ((n (histogram-n h))
           (ranges (histogram-ranges h))
           (bin-size (/ (- max min) n)))
      (do ((i 0 (+ i 1)))
          ((= i n) (void))
        (vector-set! ranges i (+ min (* i bin-size))))
      (vector-set! ranges n max))
    ;; Re-initialize the bins.
    (do ((i 0 (+ i 1)))
      ((= i (histogram-n h)) (void))
      (vector-set! (histogram-bins h) i 0))
    ;; Set uniform-ranges?
    (set-histogram-ranges-uniform?! h #t)
    (void))
  
  ;; histogram-bins: histogram -> vector
  
  (define histogram-bins
    (make-struct-field-accessor histogram-field-ref 2 'bins))
  
  ;; histogram-ranges-uniform?: histogram -> boolean
  
  (define histogram-ranges-uniform?
    (make-struct-field-accessor histogram-field-ref 3
                                'ranges-uniform?))
  
  ;; set-histogram-ranges-uniform?! histogram x boolean -> void
  
  (define set-histogram-ranges-uniform?!
    (make-struct-field-mutator set-histogram-field! 3
                               'ranges-uniform?))
  
  ;; histogram-increment!: histogram x real -> void
  ;;
  ;; Increment the bin corresponding to the x value by one.
  
  (define (histogram-increment! h x)
    (let ((n (histogram-n h))
          (ranges (histogram-ranges h))
          (bins (histogram-bins h))
          (uniform-ranges? (histogram-ranges-uniform? h)))
      (if uniform-ranges?
          ;; Compute bin
          (let ((i (inexact->exact
                    (floor (/ (- x (vector-ref ranges 0))
                              (/ (- (vector-ref ranges n)
                                    (vector-ref ranges 0))
                                 n))))))
            (if (<= 0 i (- n 1))
                (vector-set! bins i
                             (+ (vector-ref bins i) 1))))
          ;; Search for bin
          (let/ec exit
            (if (< x (vector-ref ranges 0))
                (exit))
            (do ((i 0 (+ i 1)))
              ((= i n) (void))
              (if (< x (vector-ref ranges (+ i 1)))
                  (begin
                    (vector-set! bins i
                                 (+ (vector-ref bins i) 1))
                    (exit))))))))
  
  ;; histogram-accumulate!: histogram x real x real -> void
  ;;
  ;; Increment the bin corresponding to the x value by the weight.
  
  (define (histogram-accumulate! h x weight)
    (let ((n (histogram-n h))
          (ranges (histogram-ranges h))
          (bins (histogram-bins h))
          (uniform-ranges? (histogram-ranges-uniform? h)))
      (if uniform-ranges?
          ;; Compute bin
          (let ((i (inexact->exact
                    (floor (/ (- x (vector-ref ranges 0))
                              (/ (- (vector-ref ranges n)
                                    (vector-ref ranges 0))
                                 n))))))
            (if (<= 0 i (- n 1))
                (vector-set! bins i
                             (+ (vector-ref bins i) weight))))
          ;; Search for bin
          (let/ec exit
            (if (< x (vector-ref ranges 0))
                (exit))
            (do ((i 0 (+ i 1)))
              ((= i n) (void))
              (if (< x (vector-ref ranges (+ i 1)))
                  (begin
                    (vector-set! bins i
                                 (+ (vector-ref bins i) weight))
                    (exit))))))))
  
  ;; histogram-get: histogram x integer -> real
  
  (define (histogram-get h i)
    (vector-ref (histogram-bins h) i))
  
  ;; histogram-range: histogram x integer -> real x real
  (define (histogram-get-range h i)
    (values (vector-ref (histogram-ranges h) i)
            (vector-ref (histogram-ranges h) (+ i 1))))
  
  ;;; Histogram Statistics
  
  ;; histogram-max: histogram -> real
  
  (define (histogram-max h)
    (let ((n (histogram-n h))
          (bins (histogram-bins h))
          ;; Initialize max to the first bin value
          (max (vector-ref (histogram-bins h) 0)))
      (do ((i 0 (+ i 1)))
          ((= i n) max)
        (if (> (vector-ref bins i) max)
            (set! max (vector-ref bins i))))))
  
  ;; histogram-min: histogram -> real
  
  (define (histogram-min h)
    (let ((n (histogram-n h))
          (bins (histogram-bins h))
          ;; Initialize min to the first bin value.
          (min (vector-ref (histogram-bins h) 0)))
      (do ((i 0 (+ i 1)))
          ((= i n) min)
        (if (< (vector-ref bins i) min)
            (set! min (vector-ref bins i))))))
  
  ;; histogram-mean: histogram -> real
  
  (define (histogram-mean h)
    (let ((n (histogram-n h))
          (ranges (histogram-ranges h))
          (bins (histogram-bins h))
          (wmean 0.0)
          (W 0.0))
      (do ((i 0 (+ i 1)))
          ((= i n) wmean)
        (let ((xi (/ (+ (vector-ref ranges (+ i 1))
                        (vector-ref ranges i))
                     2.0))
              (wi (vector-ref bins i)))
          (if (> wi 0.0)
              (begin
                (set! W (+ W wi))
                (set! wmean (+ wmean (* (- xi wmean) (/ wi W))))))))))
  
  ;; histogram-sigma: histogram -> real
  
  (define (histogram-sigma h)
    (let ((n (histogram-n h))
          (ranges (histogram-ranges h))
          (bins (histogram-bins h))
          (wvariance 0.0)
          (wmean 0.0)
          (W 0))
      (do ((i 0 (+ i 1)))
          ((= i n) wmean)
        (let ((xi (/ (+ (vector-ref ranges (+ i 1))
                        (vector-ref ranges i))
                     2.0))
              (wi (vector-ref bins i)))
          (if (> wi 0.0)
              (begin
                (set! W (+ W wi))
                (set! wmean (+ wmean (* (- xi wmean) (/ wi W))))))))
      (set! W 0.0)
      (do ((i 0 (+ i 1)))
          ((= i n) (sqrt wvariance))
        (let ((xi (/ (+ (vector-ref ranges (+ i 1))
                        (vector-ref ranges i))
                     2.0))
              (wi (vector-ref bins i)))
          (if (> wi 0.0)
              (let ((delta (- xi wmean)))
                (set! W (+ W wi))
                (set! wvariance (+ wvariance 
                                  (* (- (* delta delta) wvariance)
                                     (/ wi W))))))))))
  
  ;; histogram-sum: histogram -> real
  
  (define (histogram-sum h)
    (let ((n (histogram-n h))
          (bins (histogram-bins h))
          (sum 0.0))
      (do ((i 0 (+ i 1)))
          ((= i n) sum)
        (set! sum (+ sum (vector-ref bins i))))))
          
)