rlm@10: ;; Accumulators rlm@10: rlm@10: ;; by Konrad Hinsen rlm@10: ;; last updated May 19, 2009 rlm@10: rlm@10: ;; This module defines various accumulators (list, vector, map, rlm@10: ;; sum, product, counter, and combinations thereof) with a common rlm@10: ;; interface defined by the multimethods add and combine. rlm@10: ;; For each accumulator type, its empty value is defined in this module. rlm@10: ;; Applications typically use this as a starting value and add data rlm@10: ;; using the add multimethod. rlm@10: rlm@10: ;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use rlm@10: ;; and distribution terms for this software are covered by the Eclipse rlm@10: ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) rlm@10: ;; which can be found in the file epl-v10.html at the root of this rlm@10: ;; distribution. By using this software in any fashion, you are rlm@10: ;; agreeing to be bound by the terms of this license. You must not rlm@10: ;; remove this notice, or any other, from this software. rlm@10: rlm@10: (ns rlm@10: ^{:author "Konrad Hinsen" rlm@10: :doc "A generic accumulator interface and implementations of various rlm@10: accumulators."} rlm@10: clojure.contrib.accumulators rlm@10: (:refer-clojure :exclude (deftype)) rlm@10: (:use [clojure.contrib.types :only (deftype)]) rlm@10: (:use [clojure.contrib.def :only (defvar defvar- defmacro-)]) rlm@10: (:require [clojure.contrib.generic.arithmetic :as ga])) rlm@10: rlm@10: (defmulti add rlm@10: "Add item to the accumulator acc. The exact meaning of adding an rlm@10: an item depends on the type of the accumulator." rlm@10: {:arglists '([acc item])} rlm@10: (fn [acc item] (type acc))) rlm@10: rlm@10: (defn add-items rlm@10: "Add all elements of a collection coll to the accumulator acc." rlm@10: [acc items] rlm@10: (reduce add acc items)) rlm@10: rlm@10: (defmulti combine rlm@10: "Combine the values of the accumulators acc1 and acc2 into a rlm@10: single accumulator of the same type." rlm@10: {:arglists '([& accs])} rlm@10: (fn [& accs] (type (first accs)))) rlm@10: rlm@10: ; rlm@10: ; An ::accumulator type tag is attached to tbe built-in types rlm@10: ; when used as accumulators, and new types are derived from it. rlm@10: ; Multimethods add and combine for ::accumulator sub-dispatch on class. rlm@10: ; We also define generic addition as the combine operation. rlm@10: ; rlm@10: (let [meta-map {:type ::accumulator}] rlm@10: (defn- with-acc-tag rlm@10: [x] rlm@10: (with-meta x meta-map))) rlm@10: rlm@10: (defmethod add ::accumulator rlm@10: [a e] rlm@10: ((get-method add (class a)) a e)) rlm@10: rlm@10: (defmethod combine ::accumulator rlm@10: [& as] rlm@10: (apply (get-method combine (class (first as))) as)) rlm@10: rlm@10: (defmethod ga/+ ::accumulator rlm@10: [x y] rlm@10: (combine x y)) rlm@10: rlm@10: ; rlm@10: ; Vector accumulator rlm@10: ; rlm@10: (defvar empty-vector (with-acc-tag []) rlm@10: "An empty vector accumulator. Adding an item appends it at the end.") rlm@10: rlm@10: (defmethod combine clojure.lang.IPersistentVector rlm@10: [& vs] rlm@10: (with-acc-tag (vec (apply concat vs)))) rlm@10: rlm@10: (defmethod add clojure.lang.IPersistentVector rlm@10: [v e] rlm@10: (with-acc-tag (conj v e))) rlm@10: rlm@10: ; rlm@10: ; List accumulator rlm@10: ; rlm@10: (defvar empty-list (with-acc-tag '()) rlm@10: "An empty list accumulator. Adding an item appends it at the beginning.") rlm@10: rlm@10: (defmethod combine clojure.lang.IPersistentList rlm@10: [& vs] rlm@10: (with-acc-tag (apply concat vs))) rlm@10: rlm@10: (defmethod add clojure.lang.IPersistentList rlm@10: [v e] rlm@10: (with-acc-tag (conj v e))) rlm@10: rlm@10: ; rlm@10: ; Queue accumulator rlm@10: ; rlm@10: (defvar empty-queue (with-acc-tag clojure.lang.PersistentQueue/EMPTY) rlm@10: "An empty queue accumulator. Adding an item appends it at the end.") rlm@10: rlm@10: (defmethod combine clojure.lang.PersistentQueue rlm@10: [& vs] rlm@10: (add-items (first vs) (apply concat (rest vs)))) rlm@10: rlm@10: (defmethod add clojure.lang.PersistentQueue rlm@10: [v e] rlm@10: (with-acc-tag (conj v e))) rlm@10: rlm@10: ; rlm@10: ; Set accumulator rlm@10: ; rlm@10: (defvar empty-set (with-acc-tag #{}) rlm@10: "An empty set accumulator.") rlm@10: rlm@10: (defmethod combine (class empty-set) rlm@10: [& vs] rlm@10: (with-acc-tag (apply clojure.set/union vs))) rlm@10: rlm@10: (defmethod add (class empty-set) rlm@10: [v e] rlm@10: (with-acc-tag (conj v e))) rlm@10: rlm@10: ; rlm@10: ; String accumulator rlm@10: ; rlm@10: (defvar empty-string "" rlm@10: "An empty string accumulator. Adding an item (string or character) rlm@10: appends it at the end.") rlm@10: rlm@10: (defmethod combine java.lang.String rlm@10: [& vs] rlm@10: (apply str vs)) rlm@10: rlm@10: (defmethod add java.lang.String rlm@10: [v e] rlm@10: (str v e)) rlm@10: rlm@10: ; rlm@10: ; Map accumulator rlm@10: ; rlm@10: (defvar empty-map (with-acc-tag {}) rlm@10: "An empty map accumulator. Items to be added must be [key value] pairs.") rlm@10: rlm@10: (defmethod combine clojure.lang.IPersistentMap rlm@10: [& vs] rlm@10: (with-acc-tag (apply merge vs))) rlm@10: rlm@10: (defmethod add clojure.lang.IPersistentMap rlm@10: [v e] rlm@10: (with-acc-tag (conj v e))) rlm@10: rlm@10: ; rlm@10: ; Numerical accumulators: sum, product, minimum, maximum rlm@10: ; rlm@10: (defmacro- defacc rlm@10: [name op empty doc-string] rlm@10: (let [type-tag (keyword (str *ns*) (str name)) rlm@10: empty-symbol (symbol (str "empty-" name))] rlm@10: `(let [op# ~op] rlm@10: (deftype ~type-tag ~name rlm@10: (fn [~'x] {:value ~'x}) rlm@10: (fn [~'x] (list (:value ~'x)))) rlm@10: (derive ~type-tag ::accumulator) rlm@10: (defvar ~empty-symbol (~name ~empty) ~doc-string) rlm@10: (defmethod combine ~type-tag [& vs#] rlm@10: (~name (apply op# (map :value vs#)))) rlm@10: (defmethod add ~type-tag [v# e#] rlm@10: (~name (op# (:value v#) e#)))))) rlm@10: rlm@10: (defacc sum + 0 rlm@10: "An empty sum accumulator. Only numbers can be added.") rlm@10: rlm@10: (defacc product * 1 rlm@10: "An empty sum accumulator. Only numbers can be added.") rlm@10: rlm@10: ; The empty maximum accumulator should have value -infinity. rlm@10: ; This is represented by nil and taken into account in an rlm@10: ; adapted max function. In the minimum accumulator, nil is rlm@10: ; similarly used to represent +infinity. rlm@10: rlm@10: (defacc maximum (fn [& xs] rlm@10: (when-let [xs (seq (filter identity xs))] rlm@10: (apply max xs))) rlm@10: nil rlm@10: "An empty maximum accumulator. Only numbers can be added.") rlm@10: rlm@10: (defacc minimum (fn [& xs] rlm@10: (when-let [xs (seq (filter identity xs))] rlm@10: (apply min xs))) rlm@10: nil rlm@10: "An empty minimum accumulator. Only numbers can be added.") rlm@10: rlm@10: ; rlm@10: ; Numeric min-max accumulator rlm@10: ; (combination of minimum and maximum) rlm@10: ; rlm@10: (deftype ::min-max min-max rlm@10: (fn [min max] {:min min :max max}) rlm@10: (fn [mm] (list (:min mm) (:max mm)))) rlm@10: rlm@10: (derive ::min-max ::accumulator) rlm@10: rlm@10: (defvar empty-min-max (min-max nil nil) rlm@10: "An empty min-max accumulator, combining minimum and maximum. rlm@10: Only numbers can be added.") rlm@10: rlm@10: (defmethod combine ::min-max rlm@10: [& vs] rlm@10: (let [total-min (apply min (map :min vs)) rlm@10: total-max (apply max (map :max vs))] rlm@10: (min-max total-min total-max))) rlm@10: rlm@10: (defmethod add ::min-max rlm@10: [v e] rlm@10: (let [min-v (:min v) rlm@10: max-v (:max v) rlm@10: new-min (if (nil? min-v) e (min min-v e)) rlm@10: new-max (if (nil? max-v) e (max max-v e))] rlm@10: (min-max new-min new-max))) rlm@10: rlm@10: ; rlm@10: ; Mean and variance accumulator rlm@10: ; rlm@10: (deftype ::mean-variance mean-variance) rlm@10: rlm@10: (derive ::mean-variance ::accumulator) rlm@10: rlm@10: (defvar empty-mean-variance (mean-variance {:n 0 :mean 0 :variance 0}) rlm@10: "An empty mean-variance accumulator, combining sample mean and rlm@10: sample variance. Only numbers can be added.") rlm@10: rlm@10: (defmethod combine ::mean-variance rlm@10: ([mv] rlm@10: mv) rlm@10: rlm@10: ([mv1 mv2] rlm@10: (let [{n1 :n mean1 :mean var1 :variance} mv1 rlm@10: {n2 :n mean2 :mean var2 :variance} mv2 rlm@10: n (+ n1 n2) rlm@10: mean (/ (+ (* n1 mean1) (* n2 mean2)) n) rlm@10: sq #(* % %) rlm@10: c (+ (* n1 (sq (- mean mean1))) (* n2 (sq (- mean mean2)))) rlm@10: var (if (< n 2) rlm@10: 0 rlm@10: (/ (+ c (* (dec n1) var1) (* (dec n2) var2)) (dec n)))] rlm@10: (mean-variance {:n n :mean mean :variance var}))) rlm@10: rlm@10: ([mv1 mv2 & mvs] rlm@10: (reduce combine (combine mv1 mv2) mvs))) rlm@10: rlm@10: (defmethod add ::mean-variance rlm@10: [mv x] rlm@10: (let [{n :n mean :mean var :variance} mv rlm@10: n1 (inc n) rlm@10: d (- x mean) rlm@10: new-mean (+ mean (/ d n1)) rlm@10: new-var (if (zero? n) 0 (/ (+ (* (dec n) var) (* d (- x new-mean))) n))] rlm@10: (mean-variance {:n n1 :mean new-mean :variance new-var}))) rlm@10: rlm@10: ; rlm@10: ; Counter accumulator rlm@10: ; rlm@10: (deftype ::counter counter) rlm@10: rlm@10: (derive ::counter ::accumulator) rlm@10: rlm@10: (defvar empty-counter (counter {}) rlm@10: "An empty counter accumulator. Its value is a map that stores for rlm@10: every item the number of times it was added.") rlm@10: rlm@10: (defmethod combine ::counter rlm@10: [v & vs] rlm@10: (letfn [(add-item [cntr [item n]] rlm@10: (assoc cntr item (+ n (get cntr item 0)))) rlm@10: (add-two [c1 c2] (reduce add-item c1 c2))] rlm@10: (reduce add-two v vs))) rlm@10: rlm@10: (defmethod add ::counter rlm@10: [v e] rlm@10: (assoc v e (inc (get v e 0)))) rlm@10: rlm@10: ; rlm@10: ; Counter accumulator with total count rlm@10: ; rlm@10: (deftype ::counter-with-total counter-with-total) rlm@10: (derive ::counter-with-total ::counter) rlm@10: rlm@10: (defvar empty-counter-with-total rlm@10: (counter-with-total {:total 0}) rlm@10: "An empty counter-with-total accumulator. It works like the counter rlm@10: accumulator, except that the total number of items added is stored as the rlm@10: value of the key :total.") rlm@10: rlm@10: (defmethod add ::counter-with-total rlm@10: [v e] rlm@10: (assoc v e (inc (get v e 0)) rlm@10: :total (inc (:total v)))) rlm@10: rlm@10: ; rlm@10: ; Accumulator n-tuple rlm@10: ; rlm@10: (deftype ::tuple acc-tuple) rlm@10: rlm@10: (derive ::tuple ::accumulator) rlm@10: rlm@10: (defn empty-tuple rlm@10: "Returns an accumulator tuple with the supplied empty-accumulators rlm@10: as its value. Accumulator tuples consist of several accumulators that rlm@10: work in parallel. Added items must be sequences whose number of elements rlm@10: matches the number of sub-accumulators." rlm@10: [empty-accumulators] rlm@10: (acc-tuple (into [] empty-accumulators))) rlm@10: rlm@10: (defmethod combine ::tuple rlm@10: [& vs] rlm@10: (acc-tuple (vec (map combine vs)))) rlm@10: rlm@10: (defmethod add ::tuple rlm@10: [v e] rlm@10: (acc-tuple (vec (map add v e))))