lasercutter: src/clojure/contrib/probabilities/finite

annotate src/clojure/contrib/probabilities/finite_distributions.clj @ 10:ef7dbbd6452c

added clojure source goodness

author	Robert McIntyre <rlm@mit.edu>
date	Sat, 21 Aug 2010 06:25:44 -0400
parents
children

rev	line source
rlm@10	1 ;; Finite probability distributions
rlm@10	2
rlm@10	3 ;; by Konrad Hinsen
rlm@10	4 ;; last updated January 8, 2010
rlm@10	5
rlm@10	6 ;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use
rlm@10	7 ;; and distribution terms for this software are covered by the Eclipse
rlm@10	8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10	9 ;; which can be found in the file epl-v10.html at the root of this
rlm@10	10 ;; distribution. By using this software in any fashion, you are
rlm@10	11 ;; agreeing to be bound by the terms of this license. You must not
rlm@10	12 ;; remove this notice, or any other, from this software.
rlm@10	13
rlm@10	14 (ns
rlm@10	15 ^{:author "Konrad Hinsen"
rlm@10	16 :doc "Finite probability distributions
rlm@10	17 This library defines a monad for combining finite probability
rlm@10	18 distributions."}
rlm@10	19 clojure.contrib.probabilities.finite-distributions
rlm@10	20 (:use [clojure.contrib.monads
rlm@10	21 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
rlm@10	22 [clojure.contrib.def :only (defvar)]))
rlm@10	23
rlm@10	24 ; The probability distribution monad. It is limited to finite probability
rlm@10	25 ; distributions (e.g. there is a finite number of possible value), which
rlm@10	26 ; are represented as maps from values to probabilities.
rlm@10	27
rlm@10	28 (defmonad dist-m
rlm@10	29 "Monad describing computations on fuzzy quantities, represented by a finite
rlm@10	30 probability distribution for the possible values. A distribution is
rlm@10	31 represented by a map from values to probabilities."
rlm@10	32 [m-result (fn m-result-dist [v]
rlm@10	33 {v 1})
rlm@10	34 m-bind (fn m-bind-dist [mv f]
rlm@10	35 (reduce (partial merge-with +)
rlm@10	36 (for [[x p] mv [y q] (f x)]
rlm@10	37 {y (* q p)})))
rlm@10	38 ])
rlm@10	39
rlm@10	40 ; Applying the monad transformer maybe-t to the basic dist monad results
rlm@10	41 ; in the cond-dist monad that can handle invalid values. The total probability
rlm@10	42 ; for invalid values ends up as the probability of m-zero (which is nil).
rlm@10	43 ; The function normalize takes this probability out of the distribution and
rlm@10	44 ; re-distributes its weight over the valid values.
rlm@10	45
rlm@10	46 (defvar cond-dist-m
rlm@10	47 (maybe-t dist-m)
rlm@10	48 "Variant of the dist monad that can handle undefined values.")
rlm@10	49
rlm@10	50 ; Normalization
rlm@10	51
rlm@10	52 (defn- scale-by
rlm@10	53 "Multiply each entry in dist by the scale factor s and remove zero entries."
rlm@10	54 [dist s]
rlm@10	55 (into {}
rlm@10	56 (for [[val p] dist :when (> p 0)]
rlm@10	57 [val (* p s)])))
rlm@10	58
rlm@10	59 (defn normalize-cond [cdist]
rlm@10	60 "Normalize a probability distribution resulting from a computation in
rlm@10	61 the cond-dist monad by re-distributing the weight of the invalid values
rlm@10	62 over the valid ones."
rlm@10	63 (let [missing (get cdist nil 0)
rlm@10	64 dist (dissoc cdist nil)]
rlm@10	65 (cond (zero? missing) dist
rlm@10	66 (= 1 missing) {}
rlm@10	67 :else (let [scale (/ 1 (- 1 missing))]
rlm@10	68 (scale-by dist scale)))))
rlm@10	69
rlm@10	70 (defn normalize
rlm@10	71 "Convert a weight map (e.g. a map of counter values) to a distribution
rlm@10	72 by multiplying with a normalization factor. If the map has a key
rlm@10	73 :total, its value is assumed to be the sum over all the other values and
rlm@10	74 it is used for normalization. Otherwise, the sum is calculated
rlm@10	75 explicitly. The :total key is removed from the resulting distribution."
rlm@10	76 [weights]
rlm@10	77 (let [total (:total weights)
rlm@10	78 w (dissoc weights :total)
rlm@10	79 s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
rlm@10	80 (scale-by w s)))
rlm@10	81
rlm@10	82 ; Functions that construct distributions
rlm@10	83
rlm@10	84 (defn uniform
rlm@10	85 "Return a distribution in which each of the elements of coll
rlm@10	86 has the same probability."
rlm@10	87 [coll]
rlm@10	88 (let [n (count coll)
rlm@10	89 p (/ 1 n)]
rlm@10	90 (into {} (for [x (seq coll)] [x p]))))
rlm@10	91
rlm@10	92 (defn choose
rlm@10	93 "Construct a distribution from an explicit list of probabilities
rlm@10	94 and values. They are given in the form of a vector of probability-value
rlm@10	95 pairs. In the last pair, the probability can be given by the keyword
rlm@10	96 :else, which stands for 1 minus the total of the other probabilities."
rlm@10	97 [& choices]
rlm@10	98 (letfn [(add-choice [dist [p v]]
rlm@10	99 (cond (nil? p) dist
rlm@10	100 (= p :else)
rlm@10	101 (let [total-p (reduce + (vals dist))]
rlm@10	102 (assoc dist v (- 1 total-p)))
rlm@10	103 :else (assoc dist v p)))]
rlm@10	104 (reduce add-choice {} (partition 2 choices))))
rlm@10	105
rlm@10	106 (defn bernoulli
rlm@10	107 [p]
rlm@10	108 "Returns the Bernoulli distribution for probability p."
rlm@10	109 (choose p 1 :else 0))
rlm@10	110
rlm@10	111 (defn- bc
rlm@10	112 [n]
rlm@10	113 "Returns the binomial coefficients for a given n."
rlm@10	114 (let [r (inc n)]
rlm@10	115 (loop [c 1
rlm@10	116 f (list 1)]
rlm@10	117 (if (> c n)
rlm@10	118 f
rlm@10	119 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
rlm@10	120
rlm@10	121 (defn binomial
rlm@10	122 [n p]
rlm@10	123 "Returns the binomial distribution, which is the distribution of the
rlm@10	124 number of successes in a series of n experiments whose individual
rlm@10	125 success probability is p."
rlm@10	126 (let [q (- 1 p)
rlm@10	127 n1 (inc n)
rlm@10	128 k (range n1)
rlm@10	129 pk (take n1 (iterate #(* p %) 1))
rlm@10	130 ql (reverse (take n1 (iterate #(* q %) 1)))
rlm@10	131 f (bc n)]
rlm@10	132 (into {} (map vector k (map * f pk ql)))))
rlm@10	133
rlm@10	134 (defn make-distribution
rlm@10	135 "Returns the distribution in which each element x of the collection
rlm@10	136 has a probability proportional to (f x)"
rlm@10	137 [coll f]
rlm@10	138 (normalize (into {} (for [k coll] [k (f k)]))))
rlm@10	139
rlm@10	140 (defn zipf
rlm@10	141 "Returns the Zipf distribution in which the numbers k=1..n have
rlm@10	142 probabilities proportional to 1/k^s."
rlm@10	143 [s n]
rlm@10	144 (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
rlm@10	145
rlm@10	146 (defn certainly
rlm@10	147 "Returns a distribution in which the single value v has probability 1."
rlm@10	148 [v]
rlm@10	149 {v 1})
rlm@10	150
rlm@10	151 (with-monad dist-m
rlm@10	152
rlm@10	153 (defn join-with
rlm@10	154 "Returns the distribution of (f x y) with x from dist1 and y from dist2."
rlm@10	155 [f dist1 dist2]
rlm@10	156 ((m-lift 2 f) dist1 dist2))
rlm@10	157
rlm@10	158 )
rlm@10	159
rlm@10	160 (with-monad cond-dist-m
rlm@10	161 (defn cond-prob
rlm@10	162 "Returns the conditional probability for the values in dist that satisfy
rlm@10	163 the predicate pred."
rlm@10	164 [pred dist]
rlm@10	165 (normalize-cond
rlm@10	166 (domonad
rlm@10	167 [v dist
rlm@10	168 :when (pred v)]
rlm@10	169 v))))
rlm@10	170
rlm@10	171 ; Select (with equal probability) N items from a sequence
rlm@10	172
rlm@10	173 (defn- nth-and-rest [n xs]
rlm@10	174 "Return a list containing the n-th value of xs and the sequence
rlm@10	175 obtained by removing the n-th value from xs."
rlm@10	176 (let [[h t] (split-at n xs)]
rlm@10	177 (list (first t) (concat h (rest t)))))
rlm@10	178
rlm@10	179 (with-monad dist-m
rlm@10	180
rlm@10	181 (defn- select-n [n xs]
rlm@10	182 (letfn [(select-1 [[s xs]]
rlm@10	183 (uniform (for [i (range (count xs))]
rlm@10	184 (let [[nth rest] (nth-and-rest i xs)]
rlm@10	185 (list (cons nth s) rest)))))]
rlm@10	186 ((m-chain (replicate n select-1)) (list '() xs))))
rlm@10	187
rlm@10	188 (defn select [n xs]
rlm@10	189 "Return the distribution for all possible ordered selections of n elements
rlm@10	190 out of xs."
rlm@10	191 ((m-lift 1 first) (select-n n xs)))
rlm@10	192
rlm@10	193 )
rlm@10	194
rlm@10	195 ; Find the probability that a given predicate is satisfied
rlm@10	196
rlm@10	197 (defn prob
rlm@10	198 "Return the probability that the predicate pred is satisfied in the
rlm@10	199 distribution dist, i.e. the sum of the probabilities of the values
rlm@10	200 that satisfy pred."
rlm@10	201 [pred dist]
rlm@10	202 (apply + (for [[x p] dist :when (pred x)] p)))
rlm@10	203

Mercurial > lasercutter

annotate src/clojure/contrib/probabilities/finite_distributions.clj @ 10:ef7dbbd6452c