annotate src/clojure/contrib/accumulators.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;; Accumulators
rlm@10 2
rlm@10 3 ;; by Konrad Hinsen
rlm@10 4 ;; last updated May 19, 2009
rlm@10 5
rlm@10 6 ;; This module defines various accumulators (list, vector, map,
rlm@10 7 ;; sum, product, counter, and combinations thereof) with a common
rlm@10 8 ;; interface defined by the multimethods add and combine.
rlm@10 9 ;; For each accumulator type, its empty value is defined in this module.
rlm@10 10 ;; Applications typically use this as a starting value and add data
rlm@10 11 ;; using the add multimethod.
rlm@10 12
rlm@10 13 ;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use
rlm@10 14 ;; and distribution terms for this software are covered by the Eclipse
rlm@10 15 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 16 ;; which can be found in the file epl-v10.html at the root of this
rlm@10 17 ;; distribution. By using this software in any fashion, you are
rlm@10 18 ;; agreeing to be bound by the terms of this license. You must not
rlm@10 19 ;; remove this notice, or any other, from this software.
rlm@10 20
rlm@10 21 (ns
rlm@10 22 ^{:author "Konrad Hinsen"
rlm@10 23 :doc "A generic accumulator interface and implementations of various
rlm@10 24 accumulators."}
rlm@10 25 clojure.contrib.accumulators
rlm@10 26 (:refer-clojure :exclude (deftype))
rlm@10 27 (:use [clojure.contrib.types :only (deftype)])
rlm@10 28 (:use [clojure.contrib.def :only (defvar defvar- defmacro-)])
rlm@10 29 (:require [clojure.contrib.generic.arithmetic :as ga]))
rlm@10 30
rlm@10 31 (defmulti add
rlm@10 32 "Add item to the accumulator acc. The exact meaning of adding an
rlm@10 33 an item depends on the type of the accumulator."
rlm@10 34 {:arglists '([acc item])}
rlm@10 35 (fn [acc item] (type acc)))
rlm@10 36
rlm@10 37 (defn add-items
rlm@10 38 "Add all elements of a collection coll to the accumulator acc."
rlm@10 39 [acc items]
rlm@10 40 (reduce add acc items))
rlm@10 41
rlm@10 42 (defmulti combine
rlm@10 43 "Combine the values of the accumulators acc1 and acc2 into a
rlm@10 44 single accumulator of the same type."
rlm@10 45 {:arglists '([& accs])}
rlm@10 46 (fn [& accs] (type (first accs))))
rlm@10 47
rlm@10 48 ;
rlm@10 49 ; An ::accumulator type tag is attached to tbe built-in types
rlm@10 50 ; when used as accumulators, and new types are derived from it.
rlm@10 51 ; Multimethods add and combine for ::accumulator sub-dispatch on class.
rlm@10 52 ; We also define generic addition as the combine operation.
rlm@10 53 ;
rlm@10 54 (let [meta-map {:type ::accumulator}]
rlm@10 55 (defn- with-acc-tag
rlm@10 56 [x]
rlm@10 57 (with-meta x meta-map)))
rlm@10 58
rlm@10 59 (defmethod add ::accumulator
rlm@10 60 [a e]
rlm@10 61 ((get-method add (class a)) a e))
rlm@10 62
rlm@10 63 (defmethod combine ::accumulator
rlm@10 64 [& as]
rlm@10 65 (apply (get-method combine (class (first as))) as))
rlm@10 66
rlm@10 67 (defmethod ga/+ ::accumulator
rlm@10 68 [x y]
rlm@10 69 (combine x y))
rlm@10 70
rlm@10 71 ;
rlm@10 72 ; Vector accumulator
rlm@10 73 ;
rlm@10 74 (defvar empty-vector (with-acc-tag [])
rlm@10 75 "An empty vector accumulator. Adding an item appends it at the end.")
rlm@10 76
rlm@10 77 (defmethod combine clojure.lang.IPersistentVector
rlm@10 78 [& vs]
rlm@10 79 (with-acc-tag (vec (apply concat vs))))
rlm@10 80
rlm@10 81 (defmethod add clojure.lang.IPersistentVector
rlm@10 82 [v e]
rlm@10 83 (with-acc-tag (conj v e)))
rlm@10 84
rlm@10 85 ;
rlm@10 86 ; List accumulator
rlm@10 87 ;
rlm@10 88 (defvar empty-list (with-acc-tag '())
rlm@10 89 "An empty list accumulator. Adding an item appends it at the beginning.")
rlm@10 90
rlm@10 91 (defmethod combine clojure.lang.IPersistentList
rlm@10 92 [& vs]
rlm@10 93 (with-acc-tag (apply concat vs)))
rlm@10 94
rlm@10 95 (defmethod add clojure.lang.IPersistentList
rlm@10 96 [v e]
rlm@10 97 (with-acc-tag (conj v e)))
rlm@10 98
rlm@10 99 ;
rlm@10 100 ; Queue accumulator
rlm@10 101 ;
rlm@10 102 (defvar empty-queue (with-acc-tag clojure.lang.PersistentQueue/EMPTY)
rlm@10 103 "An empty queue accumulator. Adding an item appends it at the end.")
rlm@10 104
rlm@10 105 (defmethod combine clojure.lang.PersistentQueue
rlm@10 106 [& vs]
rlm@10 107 (add-items (first vs) (apply concat (rest vs))))
rlm@10 108
rlm@10 109 (defmethod add clojure.lang.PersistentQueue
rlm@10 110 [v e]
rlm@10 111 (with-acc-tag (conj v e)))
rlm@10 112
rlm@10 113 ;
rlm@10 114 ; Set accumulator
rlm@10 115 ;
rlm@10 116 (defvar empty-set (with-acc-tag #{})
rlm@10 117 "An empty set accumulator.")
rlm@10 118
rlm@10 119 (defmethod combine (class empty-set)
rlm@10 120 [& vs]
rlm@10 121 (with-acc-tag (apply clojure.set/union vs)))
rlm@10 122
rlm@10 123 (defmethod add (class empty-set)
rlm@10 124 [v e]
rlm@10 125 (with-acc-tag (conj v e)))
rlm@10 126
rlm@10 127 ;
rlm@10 128 ; String accumulator
rlm@10 129 ;
rlm@10 130 (defvar empty-string ""
rlm@10 131 "An empty string accumulator. Adding an item (string or character)
rlm@10 132 appends it at the end.")
rlm@10 133
rlm@10 134 (defmethod combine java.lang.String
rlm@10 135 [& vs]
rlm@10 136 (apply str vs))
rlm@10 137
rlm@10 138 (defmethod add java.lang.String
rlm@10 139 [v e]
rlm@10 140 (str v e))
rlm@10 141
rlm@10 142 ;
rlm@10 143 ; Map accumulator
rlm@10 144 ;
rlm@10 145 (defvar empty-map (with-acc-tag {})
rlm@10 146 "An empty map accumulator. Items to be added must be [key value] pairs.")
rlm@10 147
rlm@10 148 (defmethod combine clojure.lang.IPersistentMap
rlm@10 149 [& vs]
rlm@10 150 (with-acc-tag (apply merge vs)))
rlm@10 151
rlm@10 152 (defmethod add clojure.lang.IPersistentMap
rlm@10 153 [v e]
rlm@10 154 (with-acc-tag (conj v e)))
rlm@10 155
rlm@10 156 ;
rlm@10 157 ; Numerical accumulators: sum, product, minimum, maximum
rlm@10 158 ;
rlm@10 159 (defmacro- defacc
rlm@10 160 [name op empty doc-string]
rlm@10 161 (let [type-tag (keyword (str *ns*) (str name))
rlm@10 162 empty-symbol (symbol (str "empty-" name))]
rlm@10 163 `(let [op# ~op]
rlm@10 164 (deftype ~type-tag ~name
rlm@10 165 (fn [~'x] {:value ~'x})
rlm@10 166 (fn [~'x] (list (:value ~'x))))
rlm@10 167 (derive ~type-tag ::accumulator)
rlm@10 168 (defvar ~empty-symbol (~name ~empty) ~doc-string)
rlm@10 169 (defmethod combine ~type-tag [& vs#]
rlm@10 170 (~name (apply op# (map :value vs#))))
rlm@10 171 (defmethod add ~type-tag [v# e#]
rlm@10 172 (~name (op# (:value v#) e#))))))
rlm@10 173
rlm@10 174 (defacc sum + 0
rlm@10 175 "An empty sum accumulator. Only numbers can be added.")
rlm@10 176
rlm@10 177 (defacc product * 1
rlm@10 178 "An empty sum accumulator. Only numbers can be added.")
rlm@10 179
rlm@10 180 ; The empty maximum accumulator should have value -infinity.
rlm@10 181 ; This is represented by nil and taken into account in an
rlm@10 182 ; adapted max function. In the minimum accumulator, nil is
rlm@10 183 ; similarly used to represent +infinity.
rlm@10 184
rlm@10 185 (defacc maximum (fn [& xs]
rlm@10 186 (when-let [xs (seq (filter identity xs))]
rlm@10 187 (apply max xs)))
rlm@10 188 nil
rlm@10 189 "An empty maximum accumulator. Only numbers can be added.")
rlm@10 190
rlm@10 191 (defacc minimum (fn [& xs]
rlm@10 192 (when-let [xs (seq (filter identity xs))]
rlm@10 193 (apply min xs)))
rlm@10 194 nil
rlm@10 195 "An empty minimum accumulator. Only numbers can be added.")
rlm@10 196
rlm@10 197 ;
rlm@10 198 ; Numeric min-max accumulator
rlm@10 199 ; (combination of minimum and maximum)
rlm@10 200 ;
rlm@10 201 (deftype ::min-max min-max
rlm@10 202 (fn [min max] {:min min :max max})
rlm@10 203 (fn [mm] (list (:min mm) (:max mm))))
rlm@10 204
rlm@10 205 (derive ::min-max ::accumulator)
rlm@10 206
rlm@10 207 (defvar empty-min-max (min-max nil nil)
rlm@10 208 "An empty min-max accumulator, combining minimum and maximum.
rlm@10 209 Only numbers can be added.")
rlm@10 210
rlm@10 211 (defmethod combine ::min-max
rlm@10 212 [& vs]
rlm@10 213 (let [total-min (apply min (map :min vs))
rlm@10 214 total-max (apply max (map :max vs))]
rlm@10 215 (min-max total-min total-max)))
rlm@10 216
rlm@10 217 (defmethod add ::min-max
rlm@10 218 [v e]
rlm@10 219 (let [min-v (:min v)
rlm@10 220 max-v (:max v)
rlm@10 221 new-min (if (nil? min-v) e (min min-v e))
rlm@10 222 new-max (if (nil? max-v) e (max max-v e))]
rlm@10 223 (min-max new-min new-max)))
rlm@10 224
rlm@10 225 ;
rlm@10 226 ; Mean and variance accumulator
rlm@10 227 ;
rlm@10 228 (deftype ::mean-variance mean-variance)
rlm@10 229
rlm@10 230 (derive ::mean-variance ::accumulator)
rlm@10 231
rlm@10 232 (defvar empty-mean-variance (mean-variance {:n 0 :mean 0 :variance 0})
rlm@10 233 "An empty mean-variance accumulator, combining sample mean and
rlm@10 234 sample variance. Only numbers can be added.")
rlm@10 235
rlm@10 236 (defmethod combine ::mean-variance
rlm@10 237 ([mv]
rlm@10 238 mv)
rlm@10 239
rlm@10 240 ([mv1 mv2]
rlm@10 241 (let [{n1 :n mean1 :mean var1 :variance} mv1
rlm@10 242 {n2 :n mean2 :mean var2 :variance} mv2
rlm@10 243 n (+ n1 n2)
rlm@10 244 mean (/ (+ (* n1 mean1) (* n2 mean2)) n)
rlm@10 245 sq #(* % %)
rlm@10 246 c (+ (* n1 (sq (- mean mean1))) (* n2 (sq (- mean mean2))))
rlm@10 247 var (if (< n 2)
rlm@10 248 0
rlm@10 249 (/ (+ c (* (dec n1) var1) (* (dec n2) var2)) (dec n)))]
rlm@10 250 (mean-variance {:n n :mean mean :variance var})))
rlm@10 251
rlm@10 252 ([mv1 mv2 & mvs]
rlm@10 253 (reduce combine (combine mv1 mv2) mvs)))
rlm@10 254
rlm@10 255 (defmethod add ::mean-variance
rlm@10 256 [mv x]
rlm@10 257 (let [{n :n mean :mean var :variance} mv
rlm@10 258 n1 (inc n)
rlm@10 259 d (- x mean)
rlm@10 260 new-mean (+ mean (/ d n1))
rlm@10 261 new-var (if (zero? n) 0 (/ (+ (* (dec n) var) (* d (- x new-mean))) n))]
rlm@10 262 (mean-variance {:n n1 :mean new-mean :variance new-var})))
rlm@10 263
rlm@10 264 ;
rlm@10 265 ; Counter accumulator
rlm@10 266 ;
rlm@10 267 (deftype ::counter counter)
rlm@10 268
rlm@10 269 (derive ::counter ::accumulator)
rlm@10 270
rlm@10 271 (defvar empty-counter (counter {})
rlm@10 272 "An empty counter accumulator. Its value is a map that stores for
rlm@10 273 every item the number of times it was added.")
rlm@10 274
rlm@10 275 (defmethod combine ::counter
rlm@10 276 [v & vs]
rlm@10 277 (letfn [(add-item [cntr [item n]]
rlm@10 278 (assoc cntr item (+ n (get cntr item 0))))
rlm@10 279 (add-two [c1 c2] (reduce add-item c1 c2))]
rlm@10 280 (reduce add-two v vs)))
rlm@10 281
rlm@10 282 (defmethod add ::counter
rlm@10 283 [v e]
rlm@10 284 (assoc v e (inc (get v e 0))))
rlm@10 285
rlm@10 286 ;
rlm@10 287 ; Counter accumulator with total count
rlm@10 288 ;
rlm@10 289 (deftype ::counter-with-total counter-with-total)
rlm@10 290 (derive ::counter-with-total ::counter)
rlm@10 291
rlm@10 292 (defvar empty-counter-with-total
rlm@10 293 (counter-with-total {:total 0})
rlm@10 294 "An empty counter-with-total accumulator. It works like the counter
rlm@10 295 accumulator, except that the total number of items added is stored as the
rlm@10 296 value of the key :total.")
rlm@10 297
rlm@10 298 (defmethod add ::counter-with-total
rlm@10 299 [v e]
rlm@10 300 (assoc v e (inc (get v e 0))
rlm@10 301 :total (inc (:total v))))
rlm@10 302
rlm@10 303 ;
rlm@10 304 ; Accumulator n-tuple
rlm@10 305 ;
rlm@10 306 (deftype ::tuple acc-tuple)
rlm@10 307
rlm@10 308 (derive ::tuple ::accumulator)
rlm@10 309
rlm@10 310 (defn empty-tuple
rlm@10 311 "Returns an accumulator tuple with the supplied empty-accumulators
rlm@10 312 as its value. Accumulator tuples consist of several accumulators that
rlm@10 313 work in parallel. Added items must be sequences whose number of elements
rlm@10 314 matches the number of sub-accumulators."
rlm@10 315 [empty-accumulators]
rlm@10 316 (acc-tuple (into [] empty-accumulators)))
rlm@10 317
rlm@10 318 (defmethod combine ::tuple
rlm@10 319 [& vs]
rlm@10 320 (acc-tuple (vec (map combine vs))))
rlm@10 321
rlm@10 322 (defmethod add ::tuple
rlm@10 323 [v e]
rlm@10 324 (acc-tuple (vec (map add v e))))