rlm@10: ;; Finite probability distributions rlm@10: rlm@10: ;; by Konrad Hinsen rlm@10: ;; last updated January 8, 2010 rlm@10: rlm@10: ;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use rlm@10: ;; and distribution terms for this software are covered by the Eclipse rlm@10: ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) rlm@10: ;; which can be found in the file epl-v10.html at the root of this rlm@10: ;; distribution. By using this software in any fashion, you are rlm@10: ;; agreeing to be bound by the terms of this license. You must not rlm@10: ;; remove this notice, or any other, from this software. rlm@10: rlm@10: (ns rlm@10: ^{:author "Konrad Hinsen" rlm@10: :doc "Finite probability distributions rlm@10: This library defines a monad for combining finite probability rlm@10: distributions."} rlm@10: clojure.contrib.probabilities.finite-distributions rlm@10: (:use [clojure.contrib.monads rlm@10: :only (defmonad domonad with-monad maybe-t m-lift m-chain)] rlm@10: [clojure.contrib.def :only (defvar)])) rlm@10: rlm@10: ; The probability distribution monad. It is limited to finite probability rlm@10: ; distributions (e.g. there is a finite number of possible value), which rlm@10: ; are represented as maps from values to probabilities. rlm@10: rlm@10: (defmonad dist-m rlm@10: "Monad describing computations on fuzzy quantities, represented by a finite rlm@10: probability distribution for the possible values. A distribution is rlm@10: represented by a map from values to probabilities." rlm@10: [m-result (fn m-result-dist [v] rlm@10: {v 1}) rlm@10: m-bind (fn m-bind-dist [mv f] rlm@10: (reduce (partial merge-with +) rlm@10: (for [[x p] mv [y q] (f x)] rlm@10: {y (* q p)}))) rlm@10: ]) rlm@10: rlm@10: ; Applying the monad transformer maybe-t to the basic dist monad results rlm@10: ; in the cond-dist monad that can handle invalid values. The total probability rlm@10: ; for invalid values ends up as the probability of m-zero (which is nil). rlm@10: ; The function normalize takes this probability out of the distribution and rlm@10: ; re-distributes its weight over the valid values. rlm@10: rlm@10: (defvar cond-dist-m rlm@10: (maybe-t dist-m) rlm@10: "Variant of the dist monad that can handle undefined values.") rlm@10: rlm@10: ; Normalization rlm@10: rlm@10: (defn- scale-by rlm@10: "Multiply each entry in dist by the scale factor s and remove zero entries." rlm@10: [dist s] rlm@10: (into {} rlm@10: (for [[val p] dist :when (> p 0)] rlm@10: [val (* p s)]))) rlm@10: rlm@10: (defn normalize-cond [cdist] rlm@10: "Normalize a probability distribution resulting from a computation in rlm@10: the cond-dist monad by re-distributing the weight of the invalid values rlm@10: over the valid ones." rlm@10: (let [missing (get cdist nil 0) rlm@10: dist (dissoc cdist nil)] rlm@10: (cond (zero? missing) dist rlm@10: (= 1 missing) {} rlm@10: :else (let [scale (/ 1 (- 1 missing))] rlm@10: (scale-by dist scale))))) rlm@10: rlm@10: (defn normalize rlm@10: "Convert a weight map (e.g. a map of counter values) to a distribution rlm@10: by multiplying with a normalization factor. If the map has a key rlm@10: :total, its value is assumed to be the sum over all the other values and rlm@10: it is used for normalization. Otherwise, the sum is calculated rlm@10: explicitly. The :total key is removed from the resulting distribution." rlm@10: [weights] rlm@10: (let [total (:total weights) rlm@10: w (dissoc weights :total) rlm@10: s (/ 1 (if (nil? total) (reduce + (vals w)) total))] rlm@10: (scale-by w s))) rlm@10: rlm@10: ; Functions that construct distributions rlm@10: rlm@10: (defn uniform rlm@10: "Return a distribution in which each of the elements of coll rlm@10: has the same probability." rlm@10: [coll] rlm@10: (let [n (count coll) rlm@10: p (/ 1 n)] rlm@10: (into {} (for [x (seq coll)] [x p])))) rlm@10: rlm@10: (defn choose rlm@10: "Construct a distribution from an explicit list of probabilities rlm@10: and values. They are given in the form of a vector of probability-value rlm@10: pairs. In the last pair, the probability can be given by the keyword rlm@10: :else, which stands for 1 minus the total of the other probabilities." rlm@10: [& choices] rlm@10: (letfn [(add-choice [dist [p v]] rlm@10: (cond (nil? p) dist rlm@10: (= p :else) rlm@10: (let [total-p (reduce + (vals dist))] rlm@10: (assoc dist v (- 1 total-p))) rlm@10: :else (assoc dist v p)))] rlm@10: (reduce add-choice {} (partition 2 choices)))) rlm@10: rlm@10: (defn bernoulli rlm@10: [p] rlm@10: "Returns the Bernoulli distribution for probability p." rlm@10: (choose p 1 :else 0)) rlm@10: rlm@10: (defn- bc rlm@10: [n] rlm@10: "Returns the binomial coefficients for a given n." rlm@10: (let [r (inc n)] rlm@10: (loop [c 1 rlm@10: f (list 1)] rlm@10: (if (> c n) rlm@10: f rlm@10: (recur (inc c) (cons (* (/ (- r c) c) (first f)) f)))))) rlm@10: rlm@10: (defn binomial rlm@10: [n p] rlm@10: "Returns the binomial distribution, which is the distribution of the rlm@10: number of successes in a series of n experiments whose individual rlm@10: success probability is p." rlm@10: (let [q (- 1 p) rlm@10: n1 (inc n) rlm@10: k (range n1) rlm@10: pk (take n1 (iterate #(* p %) 1)) rlm@10: ql (reverse (take n1 (iterate #(* q %) 1))) rlm@10: f (bc n)] rlm@10: (into {} (map vector k (map * f pk ql))))) rlm@10: rlm@10: (defn make-distribution rlm@10: "Returns the distribution in which each element x of the collection rlm@10: has a probability proportional to (f x)" rlm@10: [coll f] rlm@10: (normalize (into {} (for [k coll] [k (f k)])))) rlm@10: rlm@10: (defn zipf rlm@10: "Returns the Zipf distribution in which the numbers k=1..n have rlm@10: probabilities proportional to 1/k^s." rlm@10: [s n] rlm@10: (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s)))) rlm@10: rlm@10: (defn certainly rlm@10: "Returns a distribution in which the single value v has probability 1." rlm@10: [v] rlm@10: {v 1}) rlm@10: rlm@10: (with-monad dist-m rlm@10: rlm@10: (defn join-with rlm@10: "Returns the distribution of (f x y) with x from dist1 and y from dist2." rlm@10: [f dist1 dist2] rlm@10: ((m-lift 2 f) dist1 dist2)) rlm@10: rlm@10: ) rlm@10: rlm@10: (with-monad cond-dist-m rlm@10: (defn cond-prob rlm@10: "Returns the conditional probability for the values in dist that satisfy rlm@10: the predicate pred." rlm@10: [pred dist] rlm@10: (normalize-cond rlm@10: (domonad rlm@10: [v dist rlm@10: :when (pred v)] rlm@10: v)))) rlm@10: rlm@10: ; Select (with equal probability) N items from a sequence rlm@10: rlm@10: (defn- nth-and-rest [n xs] rlm@10: "Return a list containing the n-th value of xs and the sequence rlm@10: obtained by removing the n-th value from xs." rlm@10: (let [[h t] (split-at n xs)] rlm@10: (list (first t) (concat h (rest t))))) rlm@10: rlm@10: (with-monad dist-m rlm@10: rlm@10: (defn- select-n [n xs] rlm@10: (letfn [(select-1 [[s xs]] rlm@10: (uniform (for [i (range (count xs))] rlm@10: (let [[nth rest] (nth-and-rest i xs)] rlm@10: (list (cons nth s) rest)))))] rlm@10: ((m-chain (replicate n select-1)) (list '() xs)))) rlm@10: rlm@10: (defn select [n xs] rlm@10: "Return the distribution for all possible ordered selections of n elements rlm@10: out of xs." rlm@10: ((m-lift 1 first) (select-n n xs))) rlm@10: rlm@10: ) rlm@10: rlm@10: ; Find the probability that a given predicate is satisfied rlm@10: rlm@10: (defn prob rlm@10: "Return the probability that the predicate pred is satisfied in the rlm@10: distribution dist, i.e. the sum of the probabilities of the values rlm@10: that satisfy pred." rlm@10: [pred dist] rlm@10: (apply + (for [[x p] dist :when (pred x)] p))) rlm@10: