diff src/clojure/contrib/probabilities/finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/probabilities/finite_distributions.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,203 @@
     1.4 +;; Finite probability distributions
     1.5 +
     1.6 +;; by Konrad Hinsen
     1.7 +;; last updated January 8, 2010
     1.8 +
     1.9 +;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved.  The use
    1.10 +;; and distribution terms for this software are covered by the Eclipse
    1.11 +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
    1.12 +;; which can be found in the file epl-v10.html at the root of this
    1.13 +;; distribution.  By using this software in any fashion, you are
    1.14 +;; agreeing to be bound by the terms of this license.  You must not
    1.15 +;; remove this notice, or any other, from this software.
    1.16 +
    1.17 +(ns
    1.18 +  ^{:author "Konrad Hinsen"
    1.19 +     :doc "Finite probability distributions
    1.20 +           This library defines a monad for combining finite probability
    1.21 +           distributions."}
    1.22 +  clojure.contrib.probabilities.finite-distributions
    1.23 +  (:use [clojure.contrib.monads
    1.24 +	 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
    1.25 +	 [clojure.contrib.def :only (defvar)]))
    1.26 +
    1.27 +; The probability distribution monad. It is limited to finite probability
    1.28 +; distributions (e.g. there is a finite number of possible value), which
    1.29 +; are represented as maps from values to probabilities.
    1.30 +
    1.31 +(defmonad dist-m
    1.32 +  "Monad describing computations on fuzzy quantities, represented by a finite
    1.33 +   probability distribution for the possible values. A distribution is
    1.34 +   represented by a map from values to probabilities."
    1.35 +  [m-result (fn m-result-dist [v]
    1.36 +	      {v 1})
    1.37 +   m-bind   (fn m-bind-dist [mv f]
    1.38 +	      (reduce (partial merge-with +)
    1.39 +		      (for [[x p] mv  [y q] (f x)]
    1.40 +			{y (* q p)})))
    1.41 +   ])
    1.42 +
    1.43 +; Applying the monad transformer maybe-t to the basic dist monad results
    1.44 +; in the cond-dist monad that can handle invalid values. The total probability
    1.45 +; for invalid values ends up as the probability of m-zero (which is nil).
    1.46 +; The function normalize takes this probability out of the distribution and
    1.47 +; re-distributes its weight over the valid values.
    1.48 +
    1.49 +(defvar cond-dist-m
    1.50 +  (maybe-t dist-m)
    1.51 +  "Variant of the dist monad that can handle undefined values.")
    1.52 +
    1.53 +; Normalization
    1.54 +
    1.55 +(defn- scale-by
    1.56 +  "Multiply each entry in dist by the scale factor s and remove zero entries."
    1.57 +  [dist s]
    1.58 +  (into {}
    1.59 +	(for [[val p] dist :when (> p 0)]
    1.60 +	  [val (* p s)])))
    1.61 +
    1.62 +(defn normalize-cond [cdist]
    1.63 +  "Normalize a probability distribution resulting from a computation in
    1.64 +   the cond-dist monad by re-distributing the weight of the invalid values
    1.65 +   over the valid ones."
    1.66 +  (let [missing (get cdist nil 0)
    1.67 +	dist    (dissoc cdist nil)]
    1.68 +    (cond (zero? missing) dist
    1.69 +	  (= 1 missing)   {}
    1.70 +	  :else (let [scale  (/ 1 (- 1 missing))]
    1.71 +		  (scale-by dist scale)))))
    1.72 +
    1.73 +(defn normalize
    1.74 +  "Convert a weight map (e.g. a map of counter values) to a distribution
    1.75 +   by multiplying with a normalization factor. If the map has a key
    1.76 +   :total, its value is assumed to be the sum over all the other values and
    1.77 +   it is used for normalization. Otherwise, the sum is calculated
    1.78 +   explicitly. The :total key is removed from the resulting distribution."
    1.79 +  [weights]
    1.80 +  (let [total (:total weights)
    1.81 +	w (dissoc weights :total)
    1.82 +	s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
    1.83 +    (scale-by w s)))
    1.84 +
    1.85 +; Functions that construct distributions
    1.86 +
    1.87 +(defn uniform
    1.88 +  "Return a distribution in which each of the elements of coll
    1.89 +   has the same probability."
    1.90 +  [coll]
    1.91 +  (let [n (count coll)
    1.92 +	p (/ 1 n)]
    1.93 +    (into {} (for [x (seq coll)] [x p]))))
    1.94 +
    1.95 +(defn choose
    1.96 +  "Construct a distribution from an explicit list of probabilities
    1.97 +   and values. They are given in the form of a vector of probability-value
    1.98 +   pairs. In the last pair, the probability can be given by the keyword
    1.99 +   :else, which stands for 1 minus the total of the other probabilities."
   1.100 +  [& choices]
   1.101 +  (letfn [(add-choice [dist [p v]]
   1.102 +	    (cond (nil? p) dist
   1.103 +		  (= p :else)
   1.104 +		        (let [total-p (reduce + (vals dist))]
   1.105 +			  (assoc dist v (- 1 total-p)))
   1.106 +		  :else (assoc dist v p)))]
   1.107 +    (reduce add-choice {} (partition 2 choices))))
   1.108 +
   1.109 +(defn bernoulli
   1.110 +  [p]
   1.111 +  "Returns the Bernoulli distribution for probability p."
   1.112 +  (choose p 1 :else 0))
   1.113 +
   1.114 +(defn- bc
   1.115 +  [n]
   1.116 +  "Returns the binomial coefficients for a given n."
   1.117 +  (let [r (inc n)]
   1.118 +     (loop [c 1
   1.119 +	    f (list 1)]
   1.120 +       (if (> c n)
   1.121 +	 f
   1.122 +	 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
   1.123 +
   1.124 +(defn binomial
   1.125 +  [n p]
   1.126 +  "Returns the binomial distribution, which is the distribution of the
   1.127 +   number of successes in a series of n experiments whose individual
   1.128 +   success probability is p."
   1.129 +  (let [q (- 1 p)
   1.130 +	n1 (inc n)
   1.131 +	k (range n1)
   1.132 +	pk (take n1 (iterate #(* p %) 1))
   1.133 +	ql (reverse (take n1 (iterate #(* q %) 1)))
   1.134 +	f (bc n)]
   1.135 +    (into {} (map vector k (map * f pk ql)))))
   1.136 +
   1.137 +(defn make-distribution
   1.138 +  "Returns the distribution in which each element x of the collection
   1.139 +   has a probability proportional to (f x)"
   1.140 +  [coll f]
   1.141 +  (normalize (into {} (for [k coll] [k (f k)]))))
   1.142 +
   1.143 +(defn zipf
   1.144 +  "Returns the Zipf distribution in which the numbers k=1..n have
   1.145 +   probabilities proportional to 1/k^s."
   1.146 +  [s n]
   1.147 +  (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
   1.148 +
   1.149 +(defn certainly
   1.150 +  "Returns a distribution in which the single value v has probability 1."
   1.151 +  [v]
   1.152 +  {v 1})
   1.153 +
   1.154 +(with-monad dist-m
   1.155 +
   1.156 +  (defn join-with
   1.157 +    "Returns the distribution of (f x y) with x from dist1 and y from dist2."
   1.158 +    [f dist1 dist2]
   1.159 +    ((m-lift 2 f) dist1 dist2))
   1.160 +
   1.161 +)
   1.162 +
   1.163 +(with-monad cond-dist-m
   1.164 +  (defn cond-prob
   1.165 +    "Returns the conditional probability for the values in dist that satisfy
   1.166 +     the predicate pred."
   1.167 +    [pred dist]
   1.168 +    (normalize-cond
   1.169 +      (domonad
   1.170 +        [v dist
   1.171 +	 :when (pred v)]
   1.172 +	v))))
   1.173 +
   1.174 +; Select (with equal probability) N items from a sequence
   1.175 +
   1.176 +(defn- nth-and-rest [n xs]
   1.177 +  "Return a list containing the n-th value of xs and the sequence
   1.178 +   obtained by removing the n-th value from xs."
   1.179 +  (let [[h t] (split-at n xs)]
   1.180 +    (list (first t) (concat h (rest t)))))
   1.181 +
   1.182 +(with-monad dist-m
   1.183 +
   1.184 +  (defn- select-n [n xs]
   1.185 +    (letfn [(select-1 [[s xs]]
   1.186 +	      (uniform (for [i (range (count xs))]
   1.187 +			 (let [[nth rest] (nth-and-rest i xs)]
   1.188 +			   (list (cons nth s) rest)))))]
   1.189 +      ((m-chain (replicate n select-1)) (list '() xs))))
   1.190 +
   1.191 +  (defn select [n xs]
   1.192 +    "Return the distribution for all possible ordered selections of n elements
   1.193 +     out of xs."
   1.194 +    ((m-lift 1 first) (select-n n xs)))
   1.195 +
   1.196 +)
   1.197 +
   1.198 +; Find the probability that a given predicate is satisfied
   1.199 +
   1.200 +(defn prob
   1.201 +  "Return the probability that the predicate pred is satisfied in the
   1.202 +   distribution dist, i.e. the sum of the probabilities of the values
   1.203 +   that satisfy pred."
   1.204 +  [pred dist]
   1.205 +  (apply + (for [[x p] dist :when (pred x)] p)))
   1.206 +