annotate src/clojure/contrib/test_contrib/probabilities/examples_finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
rlm@10 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
rlm@10 3 ;;
rlm@10 4 ;; Probability distribution application examples
rlm@10 5 ;;
rlm@10 6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
rlm@10 7 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
rlm@10 8
rlm@10 9 (ns
rlm@10 10 #^{:author "Konrad Hinsen"
rlm@10 11 :skip-wiki true
rlm@10 12 :doc "Examples for finite probability distribution"}
rlm@10 13 clojure.contrib.probabilities.examples-finite-distributions
rlm@10 14 (:use [clojure.contrib.probabilities.finite-distributions
rlm@10 15 :only (uniform prob cond-prob join-with dist-m choose
rlm@10 16 normalize certainly cond-dist-m normalize-cond)])
rlm@10 17 (:use [clojure.contrib.monads
rlm@10 18 :only (domonad with-monad m-seq m-chain m-lift)])
rlm@10 19 (:require clojure.contrib.accumulators))
rlm@10 20
rlm@10 21 ;; Simple examples using dice
rlm@10 22
rlm@10 23 ; A single die is represented by a uniform distribution over the
rlm@10 24 ; six possible outcomes.
rlm@10 25 (def die (uniform #{1 2 3 4 5 6}))
rlm@10 26
rlm@10 27 ; The probability that the result is odd...
rlm@10 28 (prob odd? die)
rlm@10 29 ; ... or greater than four.
rlm@10 30 (prob #(> % 4) die)
rlm@10 31
rlm@10 32 ; The sum of two dice
rlm@10 33 (def two-dice (join-with + die die))
rlm@10 34 (prob #(> % 6) two-dice)
rlm@10 35
rlm@10 36 ; The sum of two dice using a monad comprehension
rlm@10 37 (assert (= two-dice
rlm@10 38 (domonad dist-m
rlm@10 39 [d1 die
rlm@10 40 d2 die]
rlm@10 41 (+ d1 d2))))
rlm@10 42
rlm@10 43 ; The two values separately, but as an ordered pair
rlm@10 44 (domonad dist-m
rlm@10 45 [d1 die
rlm@10 46 d2 die]
rlm@10 47 (if (< d1 d2) (list d1 d2) (list d2 d1)))
rlm@10 48
rlm@10 49 ; The conditional probability for two dice yielding X if X is odd:
rlm@10 50 (cond-prob odd? two-dice)
rlm@10 51
rlm@10 52 ; A two-step experiment: throw a die, and then add 1 with probability 1/2
rlm@10 53 (domonad dist-m
rlm@10 54 [d die
rlm@10 55 x (choose (/ 1 2) d
rlm@10 56 :else (inc d))]
rlm@10 57 x)
rlm@10 58
rlm@10 59 ; The sum of n dice
rlm@10 60 (defn dice [n]
rlm@10 61 (domonad dist-m
rlm@10 62 [ds (m-seq (replicate n die))]
rlm@10 63 (apply + ds)))
rlm@10 64
rlm@10 65 (assert (= two-dice (dice 2)))
rlm@10 66
rlm@10 67 (dice 3)
rlm@10 68
rlm@10 69
rlm@10 70 ;; Construct an empirical distribution from counters
rlm@10 71
rlm@10 72 ; Using an ordinary counter:
rlm@10 73 (def dist1
rlm@10 74 (normalize
rlm@10 75 (clojure.contrib.accumulators/add-items
rlm@10 76 clojure.contrib.accumulators/empty-counter
rlm@10 77 (for [_ (range 1000)] (rand-int 5)))))
rlm@10 78
rlm@10 79 ; Or, more efficiently, using a counter that already keeps track of its total:
rlm@10 80 (def dist2
rlm@10 81 (normalize
rlm@10 82 (clojure.contrib.accumulators/add-items
rlm@10 83 clojure.contrib.accumulators/empty-counter-with-total
rlm@10 84 (for [_ (range 1000)] (rand-int 5)))))
rlm@10 85
rlm@10 86
rlm@10 87 ;; The Monty Hall game
rlm@10 88 ;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description)
rlm@10 89
rlm@10 90 ; The set of doors. In the classical variant, there are three doors,
rlm@10 91 ; but the code can also work with more than three doors.
rlm@10 92 (def doors #{:A :B :C})
rlm@10 93
rlm@10 94 ; A simulation of the game, step by step:
rlm@10 95 (domonad dist-m
rlm@10 96 [; The prize is hidden behind one of the doors.
rlm@10 97 prize (uniform doors)
rlm@10 98 ; The player make his initial choice.
rlm@10 99 choice (uniform doors)
rlm@10 100 ; The host opens a door which is neither the prize door nor the
rlm@10 101 ; one chosen by the player.
rlm@10 102 opened (uniform (disj doors prize choice))
rlm@10 103 ; If the player stays with his initial choice, the game ends and the
rlm@10 104 ; following line should be commented out. It describes the switch from
rlm@10 105 ; the initial choice to a door that is neither the opened one nor
rlm@10 106 ; his original choice.
rlm@10 107 choice (uniform (disj doors opened choice))
rlm@10 108 ]
rlm@10 109 ; If the chosen door has the prize behind it, the player wins.
rlm@10 110 (if (= choice prize) :win :loose))
rlm@10 111
rlm@10 112
rlm@10 113 ;; Tree growth simulation
rlm@10 114 ;; Adapted from the code in:
rlm@10 115 ;; Martin Erwig and Steve Kollmansberger,
rlm@10 116 ;; "Probabilistic Functional Programming in Haskell",
rlm@10 117 ;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006
rlm@10 118 ;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a
rlm@10 119
rlm@10 120 ; A tree is represented by two attributes: its state (alive, hit, fallen),
rlm@10 121 ; and its height (an integer). A new tree starts out alive and with zero height.
rlm@10 122 (def new-tree {:state :alive, :height 0})
rlm@10 123
rlm@10 124 ; An evolution step in the simulation modifies alive trees only. They can
rlm@10 125 ; either grow by one (90% probability), be hit by lightning and then stop
rlm@10 126 ; growing (4% probability), or fall down (6% probability).
rlm@10 127 (defn evolve-1 [tree]
rlm@10 128 (let [{s :state h :height} tree]
rlm@10 129 (if (= s :alive)
rlm@10 130 (choose 0.9 (assoc tree :height (inc (:height tree)))
rlm@10 131 0.04 (assoc tree :state :hit)
rlm@10 132 :else {:state :fallen, :height 0})
rlm@10 133 (certainly tree))))
rlm@10 134
rlm@10 135 ; Multiple evolution steps can be chained together with m-chain,
rlm@10 136 ; since each step's input is the output of the previous step.
rlm@10 137 (with-monad dist-m
rlm@10 138 (defn evolve [n tree]
rlm@10 139 ((m-chain (replicate n evolve-1)) tree)))
rlm@10 140
rlm@10 141 ; Try it for zero, one, or two steps.
rlm@10 142 (evolve 0 new-tree)
rlm@10 143 (evolve 1 new-tree)
rlm@10 144 (evolve 2 new-tree)
rlm@10 145
rlm@10 146 ; We can also get a distribution of the height only:
rlm@10 147 (with-monad dist-m
rlm@10 148 ((m-lift 1 :height) (evolve 2 new-tree)))
rlm@10 149
rlm@10 150
rlm@10 151
rlm@10 152 ;; Bayesian inference
rlm@10 153 ;;
rlm@10 154 ;; Suppose someone has three dice, one with six faces, one with eight, and
rlm@10 155 ;; one with twelve. This person throws one die and gives us the number,
rlm@10 156 ;; but doesn't tell us which die it was. What are the Bayesian probabilities
rlm@10 157 ;; for each of the three dice, given the observation we have?
rlm@10 158
rlm@10 159 ; A function that returns the distribution of a dice with n faces.
rlm@10 160 (defn die-n [n] (uniform (range 1 (inc n))))
rlm@10 161
rlm@10 162 ; The three dice in the game with their distributions. With this map, we
rlm@10 163 ; can easily calculate the probability for an observation under the
rlm@10 164 ; condition that a particular die was used.
rlm@10 165 (def dice {:six (die-n 6)
rlm@10 166 :eight (die-n 8)
rlm@10 167 :twelve (die-n 12)})
rlm@10 168
rlm@10 169 ; The only prior knowledge is that one of the three dice is used, so we
rlm@10 170 ; have no better than a uniform distribution to start with.
rlm@10 171 (def prior (uniform (keys dice)))
rlm@10 172
rlm@10 173 ; Add a single observation to the information contained in the
rlm@10 174 ; distribution. Adding an observation consists of
rlm@10 175 ; 1) Draw a die from the prior distribution.
rlm@10 176 ; 2) Draw an observation from the distribution of that die.
rlm@10 177 ; 3) Eliminate (replace by nil) the trials that do not match the observation.
rlm@10 178 ; 4) Normalize the distribution for the non-nil values.
rlm@10 179 (defn add-observation [prior observation]
rlm@10 180 (normalize-cond
rlm@10 181 (domonad cond-dist-m
rlm@10 182 [die prior
rlm@10 183 number (get dice die)
rlm@10 184 :when (= number observation) ]
rlm@10 185 die)))
rlm@10 186
rlm@10 187 ; Add one observation.
rlm@10 188 (add-observation prior 1)
rlm@10 189
rlm@10 190 ; Add three consecutive observations.
rlm@10 191 (-> prior (add-observation 1)
rlm@10 192 (add-observation 3)
rlm@10 193 (add-observation 7))
rlm@10 194
rlm@10 195 ; We can also add multiple observations in a single trial, but this
rlm@10 196 ; is slower because more combinations have to be taken into account.
rlm@10 197 ; With Bayesian inference, it is most efficient to eliminate choices
rlm@10 198 ; as early as possible.
rlm@10 199 (defn add-observations [prior observations]
rlm@10 200 (with-monad cond-dist-m
rlm@10 201 (let [n-nums #(m-seq (replicate (count observations) (get dice %)))]
rlm@10 202 (normalize-cond
rlm@10 203 (domonad
rlm@10 204 [die prior
rlm@10 205 nums (n-nums die)
rlm@10 206 :when (= nums observations)]
rlm@10 207 die)))))
rlm@10 208
rlm@10 209 (add-observations prior [1 3 7])