diff src/clojure/contrib/test_contrib/probabilities/examples_finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/test_contrib/probabilities/examples_finite_distributions.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,209 @@
     1.4 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     1.5 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     1.6 +;;
     1.7 +;; Probability distribution application examples
     1.8 +;;
     1.9 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    1.10 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    1.11 +
    1.12 +(ns
    1.13 +  #^{:author "Konrad Hinsen"
    1.14 +     :skip-wiki true
    1.15 +     :doc "Examples for finite probability distribution"}
    1.16 +  clojure.contrib.probabilities.examples-finite-distributions
    1.17 +  (:use [clojure.contrib.probabilities.finite-distributions
    1.18 +	 :only (uniform prob cond-prob join-with dist-m choose
    1.19 +	        normalize certainly cond-dist-m normalize-cond)])
    1.20 +  (:use [clojure.contrib.monads
    1.21 +	 :only (domonad with-monad m-seq m-chain m-lift)])
    1.22 +  (:require clojure.contrib.accumulators))
    1.23 +
    1.24 +;; Simple examples using dice
    1.25 +
    1.26 +; A single die is represented by a uniform distribution over the
    1.27 +; six possible outcomes.
    1.28 +(def die (uniform #{1 2 3 4 5 6}))
    1.29 +
    1.30 +; The probability that the result is odd...
    1.31 +(prob odd? die)
    1.32 +; ... or greater than four.
    1.33 +(prob #(> % 4) die)
    1.34 +
    1.35 +; The sum of two dice
    1.36 +(def two-dice (join-with + die die))
    1.37 +(prob #(> % 6) two-dice)
    1.38 +
    1.39 +; The sum of two dice using a monad comprehension
    1.40 +(assert (= two-dice
    1.41 +	   (domonad dist-m
    1.42 +		    [d1 die
    1.43 +		     d2 die]
    1.44 +		    (+ d1 d2))))
    1.45 +
    1.46 +; The two values separately, but as an ordered pair
    1.47 +(domonad dist-m
    1.48 +  [d1 die
    1.49 +   d2 die]
    1.50 +  (if (< d1 d2) (list d1 d2) (list d2 d1)))
    1.51 +
    1.52 +; The conditional probability for two dice yielding X if X is odd:
    1.53 +(cond-prob odd? two-dice)
    1.54 +
    1.55 +; A two-step experiment: throw a die, and then add 1 with probability 1/2
    1.56 +(domonad dist-m
    1.57 +  [d die
    1.58 +   x (choose (/ 1 2)  d
    1.59 +	     :else    (inc d))]
    1.60 +  x)
    1.61 +
    1.62 +; The sum of n dice
    1.63 +(defn dice [n]
    1.64 +   (domonad dist-m
    1.65 +      [ds (m-seq (replicate n die))]
    1.66 +      (apply + ds)))
    1.67 +
    1.68 +(assert (= two-dice (dice 2)))
    1.69 +
    1.70 +(dice 3)
    1.71 +
    1.72 +
    1.73 +;; Construct an empirical distribution from counters
    1.74 +
    1.75 +; Using an ordinary counter:
    1.76 +(def dist1
    1.77 +  (normalize
    1.78 +    (clojure.contrib.accumulators/add-items
    1.79 +      clojure.contrib.accumulators/empty-counter
    1.80 +      (for [_ (range 1000)] (rand-int 5)))))
    1.81 +
    1.82 +; Or, more efficiently, using a counter that already keeps track of its total:
    1.83 +(def dist2
    1.84 +  (normalize
    1.85 +    (clojure.contrib.accumulators/add-items
    1.86 +      clojure.contrib.accumulators/empty-counter-with-total
    1.87 +      (for [_ (range 1000)] (rand-int 5)))))
    1.88 +
    1.89 +
    1.90 +;; The Monty Hall game
    1.91 +;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description)
    1.92 +
    1.93 +; The set of doors. In the classical variant, there are three doors,
    1.94 +; but the code can also work with more than three doors.
    1.95 +(def doors #{:A :B :C})
    1.96 +
    1.97 +; A simulation of the game, step by step:
    1.98 +(domonad dist-m
    1.99 +  [; The prize is hidden behind one of the doors.
   1.100 +   prize  (uniform doors)
   1.101 +   ; The player make his initial choice.
   1.102 +   choice (uniform doors)
   1.103 +   ; The host opens a door which is neither the prize door nor the
   1.104 +   ; one chosen by the player.
   1.105 +   opened (uniform (disj doors prize choice))
   1.106 +   ; If the player stays with his initial choice, the game ends and the
   1.107 +   ; following line should be commented out. It describes the switch from
   1.108 +   ; the initial choice to a door that is neither the opened one nor
   1.109 +   ; his original choice.
   1.110 +   choice (uniform (disj doors opened choice))
   1.111 +   ]
   1.112 +  ; If the chosen door has the prize behind it, the player wins.
   1.113 +  (if (= choice prize) :win :loose))
   1.114 +
   1.115 +
   1.116 +;; Tree growth simulation
   1.117 +;; Adapted from the code in:
   1.118 +;; Martin Erwig and Steve Kollmansberger,
   1.119 +;; "Probabilistic Functional Programming in Haskell",
   1.120 +;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006
   1.121 +;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a
   1.122 +
   1.123 +; A tree is represented by two attributes: its state (alive, hit, fallen),
   1.124 +; and its height (an integer). A new tree starts out alive and with zero height.
   1.125 +(def new-tree {:state :alive, :height 0})
   1.126 +
   1.127 +; An evolution step in the simulation modifies alive trees only. They can
   1.128 +; either grow by one (90% probability), be hit by lightning and then stop
   1.129 +; growing (4% probability), or fall down (6% probability).
   1.130 +(defn evolve-1 [tree]
   1.131 +  (let [{s :state h :height} tree]
   1.132 +    (if (= s :alive)
   1.133 +      (choose 0.9   (assoc tree :height (inc (:height tree)))
   1.134 +	      0.04  (assoc tree :state :hit) 
   1.135 +	      :else {:state :fallen, :height 0})
   1.136 +      (certainly tree))))
   1.137 +
   1.138 +; Multiple evolution steps can be chained together with m-chain,
   1.139 +; since each step's input is the output of the previous step.
   1.140 +(with-monad dist-m
   1.141 +  (defn evolve [n tree]
   1.142 +    ((m-chain (replicate n evolve-1)) tree)))
   1.143 +
   1.144 +; Try it for zero, one, or two steps.
   1.145 +(evolve 0 new-tree)
   1.146 +(evolve 1 new-tree)
   1.147 +(evolve 2 new-tree)
   1.148 +
   1.149 +; We can also get a distribution of the height only:
   1.150 +(with-monad dist-m
   1.151 +  ((m-lift 1 :height) (evolve 2 new-tree)))
   1.152 +
   1.153 +
   1.154 +
   1.155 +;; Bayesian inference
   1.156 +;;
   1.157 +;; Suppose someone has three dice, one with six faces, one with eight, and
   1.158 +;; one with twelve. This person throws one die and gives us the number,
   1.159 +;; but doesn't tell us which die it was. What are the Bayesian probabilities
   1.160 +;; for each of the three dice, given the observation we have?
   1.161 +
   1.162 +; A function that returns the distribution of a dice with n faces.
   1.163 +(defn die-n [n] (uniform (range 1 (inc n))))
   1.164 +
   1.165 +; The three dice in the game with their distributions. With this map, we
   1.166 +; can easily calculate the probability for an observation under the
   1.167 +; condition that a particular die was used.
   1.168 +(def dice {:six     (die-n 6)
   1.169 +	   :eight   (die-n 8)
   1.170 +	   :twelve  (die-n 12)})
   1.171 +
   1.172 +; The only prior knowledge is that one of the three dice is used, so we
   1.173 +; have no better than a uniform distribution to start with.
   1.174 +(def prior (uniform (keys dice)))
   1.175 +
   1.176 +; Add a single observation to the information contained in the
   1.177 +; distribution. Adding an observation consists of
   1.178 +; 1) Draw a die from the prior distribution.
   1.179 +; 2) Draw an observation from the distribution of that die.
   1.180 +; 3) Eliminate (replace by nil) the trials that do not match the observation.
   1.181 +; 4) Normalize the distribution for the non-nil values.
   1.182 +(defn add-observation [prior observation]
   1.183 +  (normalize-cond
   1.184 +    (domonad cond-dist-m
   1.185 +      [die    prior
   1.186 +       number (get dice die)
   1.187 +       :when  (= number observation) ]
   1.188 +      die)))
   1.189 +
   1.190 +; Add one observation.
   1.191 +(add-observation prior 1)
   1.192 +
   1.193 +; Add three consecutive observations.
   1.194 +(-> prior (add-observation 1)
   1.195 +          (add-observation 3)
   1.196 +	  (add-observation 7))
   1.197 +
   1.198 +; We can also add multiple observations in a single trial, but this
   1.199 +; is slower because more combinations have to be taken into account.
   1.200 +; With Bayesian inference, it is most efficient to eliminate choices
   1.201 +; as early as possible.
   1.202 +(defn add-observations [prior observations]
   1.203 +  (with-monad cond-dist-m
   1.204 +    (let [n-nums #(m-seq (replicate (count observations) (get dice %)))]
   1.205 +      (normalize-cond
   1.206 +        (domonad
   1.207 +          [die    prior
   1.208 +           nums   (n-nums die)
   1.209 +	   :when  (= nums observations)]
   1.210 +	  die)))))
   1.211 +
   1.212 +(add-observations prior [1 3 7])