rlm@10
|
1 ;; Finite probability distributions
|
rlm@10
|
2
|
rlm@10
|
3 ;; by Konrad Hinsen
|
rlm@10
|
4 ;; last updated January 8, 2010
|
rlm@10
|
5
|
rlm@10
|
6 ;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use
|
rlm@10
|
7 ;; and distribution terms for this software are covered by the Eclipse
|
rlm@10
|
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
9 ;; which can be found in the file epl-v10.html at the root of this
|
rlm@10
|
10 ;; distribution. By using this software in any fashion, you are
|
rlm@10
|
11 ;; agreeing to be bound by the terms of this license. You must not
|
rlm@10
|
12 ;; remove this notice, or any other, from this software.
|
rlm@10
|
13
|
rlm@10
|
14 (ns
|
rlm@10
|
15 ^{:author "Konrad Hinsen"
|
rlm@10
|
16 :doc "Finite probability distributions
|
rlm@10
|
17 This library defines a monad for combining finite probability
|
rlm@10
|
18 distributions."}
|
rlm@10
|
19 clojure.contrib.probabilities.finite-distributions
|
rlm@10
|
20 (:use [clojure.contrib.monads
|
rlm@10
|
21 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
|
rlm@10
|
22 [clojure.contrib.def :only (defvar)]))
|
rlm@10
|
23
|
rlm@10
|
24 ; The probability distribution monad. It is limited to finite probability
|
rlm@10
|
25 ; distributions (e.g. there is a finite number of possible value), which
|
rlm@10
|
26 ; are represented as maps from values to probabilities.
|
rlm@10
|
27
|
rlm@10
|
28 (defmonad dist-m
|
rlm@10
|
29 "Monad describing computations on fuzzy quantities, represented by a finite
|
rlm@10
|
30 probability distribution for the possible values. A distribution is
|
rlm@10
|
31 represented by a map from values to probabilities."
|
rlm@10
|
32 [m-result (fn m-result-dist [v]
|
rlm@10
|
33 {v 1})
|
rlm@10
|
34 m-bind (fn m-bind-dist [mv f]
|
rlm@10
|
35 (reduce (partial merge-with +)
|
rlm@10
|
36 (for [[x p] mv [y q] (f x)]
|
rlm@10
|
37 {y (* q p)})))
|
rlm@10
|
38 ])
|
rlm@10
|
39
|
rlm@10
|
40 ; Applying the monad transformer maybe-t to the basic dist monad results
|
rlm@10
|
41 ; in the cond-dist monad that can handle invalid values. The total probability
|
rlm@10
|
42 ; for invalid values ends up as the probability of m-zero (which is nil).
|
rlm@10
|
43 ; The function normalize takes this probability out of the distribution and
|
rlm@10
|
44 ; re-distributes its weight over the valid values.
|
rlm@10
|
45
|
rlm@10
|
46 (defvar cond-dist-m
|
rlm@10
|
47 (maybe-t dist-m)
|
rlm@10
|
48 "Variant of the dist monad that can handle undefined values.")
|
rlm@10
|
49
|
rlm@10
|
50 ; Normalization
|
rlm@10
|
51
|
rlm@10
|
52 (defn- scale-by
|
rlm@10
|
53 "Multiply each entry in dist by the scale factor s and remove zero entries."
|
rlm@10
|
54 [dist s]
|
rlm@10
|
55 (into {}
|
rlm@10
|
56 (for [[val p] dist :when (> p 0)]
|
rlm@10
|
57 [val (* p s)])))
|
rlm@10
|
58
|
rlm@10
|
59 (defn normalize-cond [cdist]
|
rlm@10
|
60 "Normalize a probability distribution resulting from a computation in
|
rlm@10
|
61 the cond-dist monad by re-distributing the weight of the invalid values
|
rlm@10
|
62 over the valid ones."
|
rlm@10
|
63 (let [missing (get cdist nil 0)
|
rlm@10
|
64 dist (dissoc cdist nil)]
|
rlm@10
|
65 (cond (zero? missing) dist
|
rlm@10
|
66 (= 1 missing) {}
|
rlm@10
|
67 :else (let [scale (/ 1 (- 1 missing))]
|
rlm@10
|
68 (scale-by dist scale)))))
|
rlm@10
|
69
|
rlm@10
|
70 (defn normalize
|
rlm@10
|
71 "Convert a weight map (e.g. a map of counter values) to a distribution
|
rlm@10
|
72 by multiplying with a normalization factor. If the map has a key
|
rlm@10
|
73 :total, its value is assumed to be the sum over all the other values and
|
rlm@10
|
74 it is used for normalization. Otherwise, the sum is calculated
|
rlm@10
|
75 explicitly. The :total key is removed from the resulting distribution."
|
rlm@10
|
76 [weights]
|
rlm@10
|
77 (let [total (:total weights)
|
rlm@10
|
78 w (dissoc weights :total)
|
rlm@10
|
79 s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
|
rlm@10
|
80 (scale-by w s)))
|
rlm@10
|
81
|
rlm@10
|
82 ; Functions that construct distributions
|
rlm@10
|
83
|
rlm@10
|
84 (defn uniform
|
rlm@10
|
85 "Return a distribution in which each of the elements of coll
|
rlm@10
|
86 has the same probability."
|
rlm@10
|
87 [coll]
|
rlm@10
|
88 (let [n (count coll)
|
rlm@10
|
89 p (/ 1 n)]
|
rlm@10
|
90 (into {} (for [x (seq coll)] [x p]))))
|
rlm@10
|
91
|
rlm@10
|
92 (defn choose
|
rlm@10
|
93 "Construct a distribution from an explicit list of probabilities
|
rlm@10
|
94 and values. They are given in the form of a vector of probability-value
|
rlm@10
|
95 pairs. In the last pair, the probability can be given by the keyword
|
rlm@10
|
96 :else, which stands for 1 minus the total of the other probabilities."
|
rlm@10
|
97 [& choices]
|
rlm@10
|
98 (letfn [(add-choice [dist [p v]]
|
rlm@10
|
99 (cond (nil? p) dist
|
rlm@10
|
100 (= p :else)
|
rlm@10
|
101 (let [total-p (reduce + (vals dist))]
|
rlm@10
|
102 (assoc dist v (- 1 total-p)))
|
rlm@10
|
103 :else (assoc dist v p)))]
|
rlm@10
|
104 (reduce add-choice {} (partition 2 choices))))
|
rlm@10
|
105
|
rlm@10
|
106 (defn bernoulli
|
rlm@10
|
107 [p]
|
rlm@10
|
108 "Returns the Bernoulli distribution for probability p."
|
rlm@10
|
109 (choose p 1 :else 0))
|
rlm@10
|
110
|
rlm@10
|
111 (defn- bc
|
rlm@10
|
112 [n]
|
rlm@10
|
113 "Returns the binomial coefficients for a given n."
|
rlm@10
|
114 (let [r (inc n)]
|
rlm@10
|
115 (loop [c 1
|
rlm@10
|
116 f (list 1)]
|
rlm@10
|
117 (if (> c n)
|
rlm@10
|
118 f
|
rlm@10
|
119 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
|
rlm@10
|
120
|
rlm@10
|
121 (defn binomial
|
rlm@10
|
122 [n p]
|
rlm@10
|
123 "Returns the binomial distribution, which is the distribution of the
|
rlm@10
|
124 number of successes in a series of n experiments whose individual
|
rlm@10
|
125 success probability is p."
|
rlm@10
|
126 (let [q (- 1 p)
|
rlm@10
|
127 n1 (inc n)
|
rlm@10
|
128 k (range n1)
|
rlm@10
|
129 pk (take n1 (iterate #(* p %) 1))
|
rlm@10
|
130 ql (reverse (take n1 (iterate #(* q %) 1)))
|
rlm@10
|
131 f (bc n)]
|
rlm@10
|
132 (into {} (map vector k (map * f pk ql)))))
|
rlm@10
|
133
|
rlm@10
|
134 (defn make-distribution
|
rlm@10
|
135 "Returns the distribution in which each element x of the collection
|
rlm@10
|
136 has a probability proportional to (f x)"
|
rlm@10
|
137 [coll f]
|
rlm@10
|
138 (normalize (into {} (for [k coll] [k (f k)]))))
|
rlm@10
|
139
|
rlm@10
|
140 (defn zipf
|
rlm@10
|
141 "Returns the Zipf distribution in which the numbers k=1..n have
|
rlm@10
|
142 probabilities proportional to 1/k^s."
|
rlm@10
|
143 [s n]
|
rlm@10
|
144 (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
|
rlm@10
|
145
|
rlm@10
|
146 (defn certainly
|
rlm@10
|
147 "Returns a distribution in which the single value v has probability 1."
|
rlm@10
|
148 [v]
|
rlm@10
|
149 {v 1})
|
rlm@10
|
150
|
rlm@10
|
151 (with-monad dist-m
|
rlm@10
|
152
|
rlm@10
|
153 (defn join-with
|
rlm@10
|
154 "Returns the distribution of (f x y) with x from dist1 and y from dist2."
|
rlm@10
|
155 [f dist1 dist2]
|
rlm@10
|
156 ((m-lift 2 f) dist1 dist2))
|
rlm@10
|
157
|
rlm@10
|
158 )
|
rlm@10
|
159
|
rlm@10
|
160 (with-monad cond-dist-m
|
rlm@10
|
161 (defn cond-prob
|
rlm@10
|
162 "Returns the conditional probability for the values in dist that satisfy
|
rlm@10
|
163 the predicate pred."
|
rlm@10
|
164 [pred dist]
|
rlm@10
|
165 (normalize-cond
|
rlm@10
|
166 (domonad
|
rlm@10
|
167 [v dist
|
rlm@10
|
168 :when (pred v)]
|
rlm@10
|
169 v))))
|
rlm@10
|
170
|
rlm@10
|
171 ; Select (with equal probability) N items from a sequence
|
rlm@10
|
172
|
rlm@10
|
173 (defn- nth-and-rest [n xs]
|
rlm@10
|
174 "Return a list containing the n-th value of xs and the sequence
|
rlm@10
|
175 obtained by removing the n-th value from xs."
|
rlm@10
|
176 (let [[h t] (split-at n xs)]
|
rlm@10
|
177 (list (first t) (concat h (rest t)))))
|
rlm@10
|
178
|
rlm@10
|
179 (with-monad dist-m
|
rlm@10
|
180
|
rlm@10
|
181 (defn- select-n [n xs]
|
rlm@10
|
182 (letfn [(select-1 [[s xs]]
|
rlm@10
|
183 (uniform (for [i (range (count xs))]
|
rlm@10
|
184 (let [[nth rest] (nth-and-rest i xs)]
|
rlm@10
|
185 (list (cons nth s) rest)))))]
|
rlm@10
|
186 ((m-chain (replicate n select-1)) (list '() xs))))
|
rlm@10
|
187
|
rlm@10
|
188 (defn select [n xs]
|
rlm@10
|
189 "Return the distribution for all possible ordered selections of n elements
|
rlm@10
|
190 out of xs."
|
rlm@10
|
191 ((m-lift 1 first) (select-n n xs)))
|
rlm@10
|
192
|
rlm@10
|
193 )
|
rlm@10
|
194
|
rlm@10
|
195 ; Find the probability that a given predicate is satisfied
|
rlm@10
|
196
|
rlm@10
|
197 (defn prob
|
rlm@10
|
198 "Return the probability that the predicate pred is satisfied in the
|
rlm@10
|
199 distribution dist, i.e. the sum of the probabilities of the values
|
rlm@10
|
200 that satisfy pred."
|
rlm@10
|
201 [pred dist]
|
rlm@10
|
202 (apply + (for [[x p] dist :when (pred x)] p)))
|
rlm@10
|
203
|