rlm@10
|
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
rlm@10
|
2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
rlm@10
|
3 ;;
|
rlm@10
|
4 ;; Probability distribution application examples
|
rlm@10
|
5 ;;
|
rlm@10
|
6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
rlm@10
|
7 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
rlm@10
|
8
|
rlm@10
|
9 (ns
|
rlm@10
|
10 #^{:author "Konrad Hinsen"
|
rlm@10
|
11 :skip-wiki true
|
rlm@10
|
12 :doc "Examples for finite probability distribution"}
|
rlm@10
|
13 clojure.contrib.probabilities.examples-finite-distributions
|
rlm@10
|
14 (:use [clojure.contrib.probabilities.finite-distributions
|
rlm@10
|
15 :only (uniform prob cond-prob join-with dist-m choose
|
rlm@10
|
16 normalize certainly cond-dist-m normalize-cond)])
|
rlm@10
|
17 (:use [clojure.contrib.monads
|
rlm@10
|
18 :only (domonad with-monad m-seq m-chain m-lift)])
|
rlm@10
|
19 (:require clojure.contrib.accumulators))
|
rlm@10
|
20
|
rlm@10
|
21 ;; Simple examples using dice
|
rlm@10
|
22
|
rlm@10
|
23 ; A single die is represented by a uniform distribution over the
|
rlm@10
|
24 ; six possible outcomes.
|
rlm@10
|
25 (def die (uniform #{1 2 3 4 5 6}))
|
rlm@10
|
26
|
rlm@10
|
27 ; The probability that the result is odd...
|
rlm@10
|
28 (prob odd? die)
|
rlm@10
|
29 ; ... or greater than four.
|
rlm@10
|
30 (prob #(> % 4) die)
|
rlm@10
|
31
|
rlm@10
|
32 ; The sum of two dice
|
rlm@10
|
33 (def two-dice (join-with + die die))
|
rlm@10
|
34 (prob #(> % 6) two-dice)
|
rlm@10
|
35
|
rlm@10
|
36 ; The sum of two dice using a monad comprehension
|
rlm@10
|
37 (assert (= two-dice
|
rlm@10
|
38 (domonad dist-m
|
rlm@10
|
39 [d1 die
|
rlm@10
|
40 d2 die]
|
rlm@10
|
41 (+ d1 d2))))
|
rlm@10
|
42
|
rlm@10
|
43 ; The two values separately, but as an ordered pair
|
rlm@10
|
44 (domonad dist-m
|
rlm@10
|
45 [d1 die
|
rlm@10
|
46 d2 die]
|
rlm@10
|
47 (if (< d1 d2) (list d1 d2) (list d2 d1)))
|
rlm@10
|
48
|
rlm@10
|
49 ; The conditional probability for two dice yielding X if X is odd:
|
rlm@10
|
50 (cond-prob odd? two-dice)
|
rlm@10
|
51
|
rlm@10
|
52 ; A two-step experiment: throw a die, and then add 1 with probability 1/2
|
rlm@10
|
53 (domonad dist-m
|
rlm@10
|
54 [d die
|
rlm@10
|
55 x (choose (/ 1 2) d
|
rlm@10
|
56 :else (inc d))]
|
rlm@10
|
57 x)
|
rlm@10
|
58
|
rlm@10
|
59 ; The sum of n dice
|
rlm@10
|
60 (defn dice [n]
|
rlm@10
|
61 (domonad dist-m
|
rlm@10
|
62 [ds (m-seq (replicate n die))]
|
rlm@10
|
63 (apply + ds)))
|
rlm@10
|
64
|
rlm@10
|
65 (assert (= two-dice (dice 2)))
|
rlm@10
|
66
|
rlm@10
|
67 (dice 3)
|
rlm@10
|
68
|
rlm@10
|
69
|
rlm@10
|
70 ;; Construct an empirical distribution from counters
|
rlm@10
|
71
|
rlm@10
|
72 ; Using an ordinary counter:
|
rlm@10
|
73 (def dist1
|
rlm@10
|
74 (normalize
|
rlm@10
|
75 (clojure.contrib.accumulators/add-items
|
rlm@10
|
76 clojure.contrib.accumulators/empty-counter
|
rlm@10
|
77 (for [_ (range 1000)] (rand-int 5)))))
|
rlm@10
|
78
|
rlm@10
|
79 ; Or, more efficiently, using a counter that already keeps track of its total:
|
rlm@10
|
80 (def dist2
|
rlm@10
|
81 (normalize
|
rlm@10
|
82 (clojure.contrib.accumulators/add-items
|
rlm@10
|
83 clojure.contrib.accumulators/empty-counter-with-total
|
rlm@10
|
84 (for [_ (range 1000)] (rand-int 5)))))
|
rlm@10
|
85
|
rlm@10
|
86
|
rlm@10
|
87 ;; The Monty Hall game
|
rlm@10
|
88 ;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description)
|
rlm@10
|
89
|
rlm@10
|
90 ; The set of doors. In the classical variant, there are three doors,
|
rlm@10
|
91 ; but the code can also work with more than three doors.
|
rlm@10
|
92 (def doors #{:A :B :C})
|
rlm@10
|
93
|
rlm@10
|
94 ; A simulation of the game, step by step:
|
rlm@10
|
95 (domonad dist-m
|
rlm@10
|
96 [; The prize is hidden behind one of the doors.
|
rlm@10
|
97 prize (uniform doors)
|
rlm@10
|
98 ; The player make his initial choice.
|
rlm@10
|
99 choice (uniform doors)
|
rlm@10
|
100 ; The host opens a door which is neither the prize door nor the
|
rlm@10
|
101 ; one chosen by the player.
|
rlm@10
|
102 opened (uniform (disj doors prize choice))
|
rlm@10
|
103 ; If the player stays with his initial choice, the game ends and the
|
rlm@10
|
104 ; following line should be commented out. It describes the switch from
|
rlm@10
|
105 ; the initial choice to a door that is neither the opened one nor
|
rlm@10
|
106 ; his original choice.
|
rlm@10
|
107 choice (uniform (disj doors opened choice))
|
rlm@10
|
108 ]
|
rlm@10
|
109 ; If the chosen door has the prize behind it, the player wins.
|
rlm@10
|
110 (if (= choice prize) :win :loose))
|
rlm@10
|
111
|
rlm@10
|
112
|
rlm@10
|
113 ;; Tree growth simulation
|
rlm@10
|
114 ;; Adapted from the code in:
|
rlm@10
|
115 ;; Martin Erwig and Steve Kollmansberger,
|
rlm@10
|
116 ;; "Probabilistic Functional Programming in Haskell",
|
rlm@10
|
117 ;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006
|
rlm@10
|
118 ;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a
|
rlm@10
|
119
|
rlm@10
|
120 ; A tree is represented by two attributes: its state (alive, hit, fallen),
|
rlm@10
|
121 ; and its height (an integer). A new tree starts out alive and with zero height.
|
rlm@10
|
122 (def new-tree {:state :alive, :height 0})
|
rlm@10
|
123
|
rlm@10
|
124 ; An evolution step in the simulation modifies alive trees only. They can
|
rlm@10
|
125 ; either grow by one (90% probability), be hit by lightning and then stop
|
rlm@10
|
126 ; growing (4% probability), or fall down (6% probability).
|
rlm@10
|
127 (defn evolve-1 [tree]
|
rlm@10
|
128 (let [{s :state h :height} tree]
|
rlm@10
|
129 (if (= s :alive)
|
rlm@10
|
130 (choose 0.9 (assoc tree :height (inc (:height tree)))
|
rlm@10
|
131 0.04 (assoc tree :state :hit)
|
rlm@10
|
132 :else {:state :fallen, :height 0})
|
rlm@10
|
133 (certainly tree))))
|
rlm@10
|
134
|
rlm@10
|
135 ; Multiple evolution steps can be chained together with m-chain,
|
rlm@10
|
136 ; since each step's input is the output of the previous step.
|
rlm@10
|
137 (with-monad dist-m
|
rlm@10
|
138 (defn evolve [n tree]
|
rlm@10
|
139 ((m-chain (replicate n evolve-1)) tree)))
|
rlm@10
|
140
|
rlm@10
|
141 ; Try it for zero, one, or two steps.
|
rlm@10
|
142 (evolve 0 new-tree)
|
rlm@10
|
143 (evolve 1 new-tree)
|
rlm@10
|
144 (evolve 2 new-tree)
|
rlm@10
|
145
|
rlm@10
|
146 ; We can also get a distribution of the height only:
|
rlm@10
|
147 (with-monad dist-m
|
rlm@10
|
148 ((m-lift 1 :height) (evolve 2 new-tree)))
|
rlm@10
|
149
|
rlm@10
|
150
|
rlm@10
|
151
|
rlm@10
|
152 ;; Bayesian inference
|
rlm@10
|
153 ;;
|
rlm@10
|
154 ;; Suppose someone has three dice, one with six faces, one with eight, and
|
rlm@10
|
155 ;; one with twelve. This person throws one die and gives us the number,
|
rlm@10
|
156 ;; but doesn't tell us which die it was. What are the Bayesian probabilities
|
rlm@10
|
157 ;; for each of the three dice, given the observation we have?
|
rlm@10
|
158
|
rlm@10
|
159 ; A function that returns the distribution of a dice with n faces.
|
rlm@10
|
160 (defn die-n [n] (uniform (range 1 (inc n))))
|
rlm@10
|
161
|
rlm@10
|
162 ; The three dice in the game with their distributions. With this map, we
|
rlm@10
|
163 ; can easily calculate the probability for an observation under the
|
rlm@10
|
164 ; condition that a particular die was used.
|
rlm@10
|
165 (def dice {:six (die-n 6)
|
rlm@10
|
166 :eight (die-n 8)
|
rlm@10
|
167 :twelve (die-n 12)})
|
rlm@10
|
168
|
rlm@10
|
169 ; The only prior knowledge is that one of the three dice is used, so we
|
rlm@10
|
170 ; have no better than a uniform distribution to start with.
|
rlm@10
|
171 (def prior (uniform (keys dice)))
|
rlm@10
|
172
|
rlm@10
|
173 ; Add a single observation to the information contained in the
|
rlm@10
|
174 ; distribution. Adding an observation consists of
|
rlm@10
|
175 ; 1) Draw a die from the prior distribution.
|
rlm@10
|
176 ; 2) Draw an observation from the distribution of that die.
|
rlm@10
|
177 ; 3) Eliminate (replace by nil) the trials that do not match the observation.
|
rlm@10
|
178 ; 4) Normalize the distribution for the non-nil values.
|
rlm@10
|
179 (defn add-observation [prior observation]
|
rlm@10
|
180 (normalize-cond
|
rlm@10
|
181 (domonad cond-dist-m
|
rlm@10
|
182 [die prior
|
rlm@10
|
183 number (get dice die)
|
rlm@10
|
184 :when (= number observation) ]
|
rlm@10
|
185 die)))
|
rlm@10
|
186
|
rlm@10
|
187 ; Add one observation.
|
rlm@10
|
188 (add-observation prior 1)
|
rlm@10
|
189
|
rlm@10
|
190 ; Add three consecutive observations.
|
rlm@10
|
191 (-> prior (add-observation 1)
|
rlm@10
|
192 (add-observation 3)
|
rlm@10
|
193 (add-observation 7))
|
rlm@10
|
194
|
rlm@10
|
195 ; We can also add multiple observations in a single trial, but this
|
rlm@10
|
196 ; is slower because more combinations have to be taken into account.
|
rlm@10
|
197 ; With Bayesian inference, it is most efficient to eliminate choices
|
rlm@10
|
198 ; as early as possible.
|
rlm@10
|
199 (defn add-observations [prior observations]
|
rlm@10
|
200 (with-monad cond-dist-m
|
rlm@10
|
201 (let [n-nums #(m-seq (replicate (count observations) (get dice %)))]
|
rlm@10
|
202 (normalize-cond
|
rlm@10
|
203 (domonad
|
rlm@10
|
204 [die prior
|
rlm@10
|
205 nums (n-nums die)
|
rlm@10
|
206 :when (= nums observations)]
|
rlm@10
|
207 die)))))
|
rlm@10
|
208
|
rlm@10
|
209 (add-observations prior [1 3 7])
|