view src/clojure/contrib/probabilities/finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line source
1 ;; Finite probability distributions
3 ;; by Konrad Hinsen
4 ;; last updated January 8, 2010
6 ;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use
7 ;; and distribution terms for this software are covered by the Eclipse
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
9 ;; which can be found in the file epl-v10.html at the root of this
10 ;; distribution. By using this software in any fashion, you are
11 ;; agreeing to be bound by the terms of this license. You must not
12 ;; remove this notice, or any other, from this software.
14 (ns
15 ^{:author "Konrad Hinsen"
16 :doc "Finite probability distributions
17 This library defines a monad for combining finite probability
18 distributions."}
19 clojure.contrib.probabilities.finite-distributions
20 (:use [clojure.contrib.monads
21 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
22 [clojure.contrib.def :only (defvar)]))
24 ; The probability distribution monad. It is limited to finite probability
25 ; distributions (e.g. there is a finite number of possible value), which
26 ; are represented as maps from values to probabilities.
28 (defmonad dist-m
29 "Monad describing computations on fuzzy quantities, represented by a finite
30 probability distribution for the possible values. A distribution is
31 represented by a map from values to probabilities."
32 [m-result (fn m-result-dist [v]
33 {v 1})
34 m-bind (fn m-bind-dist [mv f]
35 (reduce (partial merge-with +)
36 (for [[x p] mv [y q] (f x)]
37 {y (* q p)})))
38 ])
40 ; Applying the monad transformer maybe-t to the basic dist monad results
41 ; in the cond-dist monad that can handle invalid values. The total probability
42 ; for invalid values ends up as the probability of m-zero (which is nil).
43 ; The function normalize takes this probability out of the distribution and
44 ; re-distributes its weight over the valid values.
46 (defvar cond-dist-m
47 (maybe-t dist-m)
48 "Variant of the dist monad that can handle undefined values.")
50 ; Normalization
52 (defn- scale-by
53 "Multiply each entry in dist by the scale factor s and remove zero entries."
54 [dist s]
55 (into {}
56 (for [[val p] dist :when (> p 0)]
57 [val (* p s)])))
59 (defn normalize-cond [cdist]
60 "Normalize a probability distribution resulting from a computation in
61 the cond-dist monad by re-distributing the weight of the invalid values
62 over the valid ones."
63 (let [missing (get cdist nil 0)
64 dist (dissoc cdist nil)]
65 (cond (zero? missing) dist
66 (= 1 missing) {}
67 :else (let [scale (/ 1 (- 1 missing))]
68 (scale-by dist scale)))))
70 (defn normalize
71 "Convert a weight map (e.g. a map of counter values) to a distribution
72 by multiplying with a normalization factor. If the map has a key
73 :total, its value is assumed to be the sum over all the other values and
74 it is used for normalization. Otherwise, the sum is calculated
75 explicitly. The :total key is removed from the resulting distribution."
76 [weights]
77 (let [total (:total weights)
78 w (dissoc weights :total)
79 s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
80 (scale-by w s)))
82 ; Functions that construct distributions
84 (defn uniform
85 "Return a distribution in which each of the elements of coll
86 has the same probability."
87 [coll]
88 (let [n (count coll)
89 p (/ 1 n)]
90 (into {} (for [x (seq coll)] [x p]))))
92 (defn choose
93 "Construct a distribution from an explicit list of probabilities
94 and values. They are given in the form of a vector of probability-value
95 pairs. In the last pair, the probability can be given by the keyword
96 :else, which stands for 1 minus the total of the other probabilities."
97 [& choices]
98 (letfn [(add-choice [dist [p v]]
99 (cond (nil? p) dist
100 (= p :else)
101 (let [total-p (reduce + (vals dist))]
102 (assoc dist v (- 1 total-p)))
103 :else (assoc dist v p)))]
104 (reduce add-choice {} (partition 2 choices))))
106 (defn bernoulli
107 [p]
108 "Returns the Bernoulli distribution for probability p."
109 (choose p 1 :else 0))
111 (defn- bc
112 [n]
113 "Returns the binomial coefficients for a given n."
114 (let [r (inc n)]
115 (loop [c 1
116 f (list 1)]
117 (if (> c n)
118 f
119 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
121 (defn binomial
122 [n p]
123 "Returns the binomial distribution, which is the distribution of the
124 number of successes in a series of n experiments whose individual
125 success probability is p."
126 (let [q (- 1 p)
127 n1 (inc n)
128 k (range n1)
129 pk (take n1 (iterate #(* p %) 1))
130 ql (reverse (take n1 (iterate #(* q %) 1)))
131 f (bc n)]
132 (into {} (map vector k (map * f pk ql)))))
134 (defn make-distribution
135 "Returns the distribution in which each element x of the collection
136 has a probability proportional to (f x)"
137 [coll f]
138 (normalize (into {} (for [k coll] [k (f k)]))))
140 (defn zipf
141 "Returns the Zipf distribution in which the numbers k=1..n have
142 probabilities proportional to 1/k^s."
143 [s n]
144 (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
146 (defn certainly
147 "Returns a distribution in which the single value v has probability 1."
148 [v]
149 {v 1})
151 (with-monad dist-m
153 (defn join-with
154 "Returns the distribution of (f x y) with x from dist1 and y from dist2."
155 [f dist1 dist2]
156 ((m-lift 2 f) dist1 dist2))
158 )
160 (with-monad cond-dist-m
161 (defn cond-prob
162 "Returns the conditional probability for the values in dist that satisfy
163 the predicate pred."
164 [pred dist]
165 (normalize-cond
166 (domonad
167 [v dist
168 :when (pred v)]
169 v))))
171 ; Select (with equal probability) N items from a sequence
173 (defn- nth-and-rest [n xs]
174 "Return a list containing the n-th value of xs and the sequence
175 obtained by removing the n-th value from xs."
176 (let [[h t] (split-at n xs)]
177 (list (first t) (concat h (rest t)))))
179 (with-monad dist-m
181 (defn- select-n [n xs]
182 (letfn [(select-1 [[s xs]]
183 (uniform (for [i (range (count xs))]
184 (let [[nth rest] (nth-and-rest i xs)]
185 (list (cons nth s) rest)))))]
186 ((m-chain (replicate n select-1)) (list '() xs))))
188 (defn select [n xs]
189 "Return the distribution for all possible ordered selections of n elements
190 out of xs."
191 ((m-lift 1 first) (select-n n xs)))
193 )
195 ; Find the probability that a given predicate is satisfied
197 (defn prob
198 "Return the probability that the predicate pred is satisfied in the
199 distribution dist, i.e. the sum of the probabilities of the values
200 that satisfy pred."
201 [pred dist]
202 (apply + (for [[x p] dist :when (pred x)] p)))