annotate src/clojure/contrib/probabilities/finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;; Finite probability distributions
rlm@10 2
rlm@10 3 ;; by Konrad Hinsen
rlm@10 4 ;; last updated January 8, 2010
rlm@10 5
rlm@10 6 ;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use
rlm@10 7 ;; and distribution terms for this software are covered by the Eclipse
rlm@10 8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 9 ;; which can be found in the file epl-v10.html at the root of this
rlm@10 10 ;; distribution. By using this software in any fashion, you are
rlm@10 11 ;; agreeing to be bound by the terms of this license. You must not
rlm@10 12 ;; remove this notice, or any other, from this software.
rlm@10 13
rlm@10 14 (ns
rlm@10 15 ^{:author "Konrad Hinsen"
rlm@10 16 :doc "Finite probability distributions
rlm@10 17 This library defines a monad for combining finite probability
rlm@10 18 distributions."}
rlm@10 19 clojure.contrib.probabilities.finite-distributions
rlm@10 20 (:use [clojure.contrib.monads
rlm@10 21 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
rlm@10 22 [clojure.contrib.def :only (defvar)]))
rlm@10 23
rlm@10 24 ; The probability distribution monad. It is limited to finite probability
rlm@10 25 ; distributions (e.g. there is a finite number of possible value), which
rlm@10 26 ; are represented as maps from values to probabilities.
rlm@10 27
rlm@10 28 (defmonad dist-m
rlm@10 29 "Monad describing computations on fuzzy quantities, represented by a finite
rlm@10 30 probability distribution for the possible values. A distribution is
rlm@10 31 represented by a map from values to probabilities."
rlm@10 32 [m-result (fn m-result-dist [v]
rlm@10 33 {v 1})
rlm@10 34 m-bind (fn m-bind-dist [mv f]
rlm@10 35 (reduce (partial merge-with +)
rlm@10 36 (for [[x p] mv [y q] (f x)]
rlm@10 37 {y (* q p)})))
rlm@10 38 ])
rlm@10 39
rlm@10 40 ; Applying the monad transformer maybe-t to the basic dist monad results
rlm@10 41 ; in the cond-dist monad that can handle invalid values. The total probability
rlm@10 42 ; for invalid values ends up as the probability of m-zero (which is nil).
rlm@10 43 ; The function normalize takes this probability out of the distribution and
rlm@10 44 ; re-distributes its weight over the valid values.
rlm@10 45
rlm@10 46 (defvar cond-dist-m
rlm@10 47 (maybe-t dist-m)
rlm@10 48 "Variant of the dist monad that can handle undefined values.")
rlm@10 49
rlm@10 50 ; Normalization
rlm@10 51
rlm@10 52 (defn- scale-by
rlm@10 53 "Multiply each entry in dist by the scale factor s and remove zero entries."
rlm@10 54 [dist s]
rlm@10 55 (into {}
rlm@10 56 (for [[val p] dist :when (> p 0)]
rlm@10 57 [val (* p s)])))
rlm@10 58
rlm@10 59 (defn normalize-cond [cdist]
rlm@10 60 "Normalize a probability distribution resulting from a computation in
rlm@10 61 the cond-dist monad by re-distributing the weight of the invalid values
rlm@10 62 over the valid ones."
rlm@10 63 (let [missing (get cdist nil 0)
rlm@10 64 dist (dissoc cdist nil)]
rlm@10 65 (cond (zero? missing) dist
rlm@10 66 (= 1 missing) {}
rlm@10 67 :else (let [scale (/ 1 (- 1 missing))]
rlm@10 68 (scale-by dist scale)))))
rlm@10 69
rlm@10 70 (defn normalize
rlm@10 71 "Convert a weight map (e.g. a map of counter values) to a distribution
rlm@10 72 by multiplying with a normalization factor. If the map has a key
rlm@10 73 :total, its value is assumed to be the sum over all the other values and
rlm@10 74 it is used for normalization. Otherwise, the sum is calculated
rlm@10 75 explicitly. The :total key is removed from the resulting distribution."
rlm@10 76 [weights]
rlm@10 77 (let [total (:total weights)
rlm@10 78 w (dissoc weights :total)
rlm@10 79 s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
rlm@10 80 (scale-by w s)))
rlm@10 81
rlm@10 82 ; Functions that construct distributions
rlm@10 83
rlm@10 84 (defn uniform
rlm@10 85 "Return a distribution in which each of the elements of coll
rlm@10 86 has the same probability."
rlm@10 87 [coll]
rlm@10 88 (let [n (count coll)
rlm@10 89 p (/ 1 n)]
rlm@10 90 (into {} (for [x (seq coll)] [x p]))))
rlm@10 91
rlm@10 92 (defn choose
rlm@10 93 "Construct a distribution from an explicit list of probabilities
rlm@10 94 and values. They are given in the form of a vector of probability-value
rlm@10 95 pairs. In the last pair, the probability can be given by the keyword
rlm@10 96 :else, which stands for 1 minus the total of the other probabilities."
rlm@10 97 [& choices]
rlm@10 98 (letfn [(add-choice [dist [p v]]
rlm@10 99 (cond (nil? p) dist
rlm@10 100 (= p :else)
rlm@10 101 (let [total-p (reduce + (vals dist))]
rlm@10 102 (assoc dist v (- 1 total-p)))
rlm@10 103 :else (assoc dist v p)))]
rlm@10 104 (reduce add-choice {} (partition 2 choices))))
rlm@10 105
rlm@10 106 (defn bernoulli
rlm@10 107 [p]
rlm@10 108 "Returns the Bernoulli distribution for probability p."
rlm@10 109 (choose p 1 :else 0))
rlm@10 110
rlm@10 111 (defn- bc
rlm@10 112 [n]
rlm@10 113 "Returns the binomial coefficients for a given n."
rlm@10 114 (let [r (inc n)]
rlm@10 115 (loop [c 1
rlm@10 116 f (list 1)]
rlm@10 117 (if (> c n)
rlm@10 118 f
rlm@10 119 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
rlm@10 120
rlm@10 121 (defn binomial
rlm@10 122 [n p]
rlm@10 123 "Returns the binomial distribution, which is the distribution of the
rlm@10 124 number of successes in a series of n experiments whose individual
rlm@10 125 success probability is p."
rlm@10 126 (let [q (- 1 p)
rlm@10 127 n1 (inc n)
rlm@10 128 k (range n1)
rlm@10 129 pk (take n1 (iterate #(* p %) 1))
rlm@10 130 ql (reverse (take n1 (iterate #(* q %) 1)))
rlm@10 131 f (bc n)]
rlm@10 132 (into {} (map vector k (map * f pk ql)))))
rlm@10 133
rlm@10 134 (defn make-distribution
rlm@10 135 "Returns the distribution in which each element x of the collection
rlm@10 136 has a probability proportional to (f x)"
rlm@10 137 [coll f]
rlm@10 138 (normalize (into {} (for [k coll] [k (f k)]))))
rlm@10 139
rlm@10 140 (defn zipf
rlm@10 141 "Returns the Zipf distribution in which the numbers k=1..n have
rlm@10 142 probabilities proportional to 1/k^s."
rlm@10 143 [s n]
rlm@10 144 (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
rlm@10 145
rlm@10 146 (defn certainly
rlm@10 147 "Returns a distribution in which the single value v has probability 1."
rlm@10 148 [v]
rlm@10 149 {v 1})
rlm@10 150
rlm@10 151 (with-monad dist-m
rlm@10 152
rlm@10 153 (defn join-with
rlm@10 154 "Returns the distribution of (f x y) with x from dist1 and y from dist2."
rlm@10 155 [f dist1 dist2]
rlm@10 156 ((m-lift 2 f) dist1 dist2))
rlm@10 157
rlm@10 158 )
rlm@10 159
rlm@10 160 (with-monad cond-dist-m
rlm@10 161 (defn cond-prob
rlm@10 162 "Returns the conditional probability for the values in dist that satisfy
rlm@10 163 the predicate pred."
rlm@10 164 [pred dist]
rlm@10 165 (normalize-cond
rlm@10 166 (domonad
rlm@10 167 [v dist
rlm@10 168 :when (pred v)]
rlm@10 169 v))))
rlm@10 170
rlm@10 171 ; Select (with equal probability) N items from a sequence
rlm@10 172
rlm@10 173 (defn- nth-and-rest [n xs]
rlm@10 174 "Return a list containing the n-th value of xs and the sequence
rlm@10 175 obtained by removing the n-th value from xs."
rlm@10 176 (let [[h t] (split-at n xs)]
rlm@10 177 (list (first t) (concat h (rest t)))))
rlm@10 178
rlm@10 179 (with-monad dist-m
rlm@10 180
rlm@10 181 (defn- select-n [n xs]
rlm@10 182 (letfn [(select-1 [[s xs]]
rlm@10 183 (uniform (for [i (range (count xs))]
rlm@10 184 (let [[nth rest] (nth-and-rest i xs)]
rlm@10 185 (list (cons nth s) rest)))))]
rlm@10 186 ((m-chain (replicate n select-1)) (list '() xs))))
rlm@10 187
rlm@10 188 (defn select [n xs]
rlm@10 189 "Return the distribution for all possible ordered selections of n elements
rlm@10 190 out of xs."
rlm@10 191 ((m-lift 1 first) (select-n n xs)))
rlm@10 192
rlm@10 193 )
rlm@10 194
rlm@10 195 ; Find the probability that a given predicate is satisfied
rlm@10 196
rlm@10 197 (defn prob
rlm@10 198 "Return the probability that the predicate pred is satisfied in the
rlm@10 199 distribution dist, i.e. the sum of the probabilities of the values
rlm@10 200 that satisfy pred."
rlm@10 201 [pred dist]
rlm@10 202 (apply + (for [[x p] dist :when (pred x)] p)))
rlm@10 203