annotate src/clojure/contrib/dataflow.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;; Copyright (c) Jeffrey Straszheim. All rights reserved. The use and
rlm@10 2 ;; distribution terms for this software are covered by the Eclipse Public
rlm@10 3 ;; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can
rlm@10 4 ;; be found in the file epl-v10.html at the root of this distribution. By
rlm@10 5 ;; using this software in any fashion, you are agreeing to be bound by the
rlm@10 6 ;; terms of this license. You must not remove this notice, or any other,
rlm@10 7 ;; from this software.
rlm@10 8 ;;
rlm@10 9 ;; dataflow.clj
rlm@10 10 ;;
rlm@10 11 ;; A Library to Support a Dataflow Model of State
rlm@10 12 ;;
rlm@10 13 ;; straszheimjeffrey (gmail)
rlm@10 14 ;; Created 10 March 2009
rlm@10 15
rlm@10 16
rlm@10 17 (ns
rlm@10 18 ^{:author "Jeffrey Straszheim",
rlm@10 19 :doc "A library to support a dataflow model of state"}
rlm@10 20 clojure.contrib.dataflow
rlm@10 21 (:use [clojure.set :only (union intersection difference)])
rlm@10 22 (:use [clojure.contrib.graph :only (directed-graph
rlm@10 23 reverse-graph
rlm@10 24 dependency-list
rlm@10 25 get-neighbors)])
rlm@10 26 (:use [clojure.walk :only (postwalk)])
rlm@10 27 (:use [clojure.contrib.except :only (throwf)])
rlm@10 28 (:import java.io.Writer))
rlm@10 29
rlm@10 30
rlm@10 31 ;;; Chief Data Structures
rlm@10 32
rlm@10 33
rlm@10 34 ;; Source Cell
rlm@10 35
rlm@10 36 ; The data of a source cell is directly set by a calling function. It
rlm@10 37 ; never depends on other cells.
rlm@10 38
rlm@10 39 (defstruct source-cell
rlm@10 40 :name ; The name, a symbol
rlm@10 41 :value ; Its value, a Ref
rlm@10 42 :cell-type) ; Should be ::source-cell
rlm@10 43
rlm@10 44 ;; Cell
rlm@10 45
rlm@10 46 ; A standard cell that computes its value from other cells.
rlm@10 47
rlm@10 48 (defstruct standard-cell
rlm@10 49 :name ; The name, a symbol
rlm@10 50 :value ; Its value, a Ref
rlm@10 51 :dependents ; The names of cells on which this depends, a collection
rlm@10 52 :fun ; A closure that computes the value, given an environment
rlm@10 53 :display ; The original expression for display
rlm@10 54 :cell-type) ; Should be ::cell
rlm@10 55
rlm@10 56 (derive ::cell ::dependent-cell) ; A cell that has a dependents field
rlm@10 57
rlm@10 58 ;; Validator
rlm@10 59
rlm@10 60 ; A cell that has no value, but can throw an exception when run
rlm@10 61
rlm@10 62 (defstruct validator-cell
rlm@10 63 :name ; Always ::validator
rlm@10 64 :dependents ; The names of cells on which this depends, a collection
rlm@10 65 :fun ; A clojure that can throw an exception
rlm@10 66 :display ; The original exprssion for display
rlm@10 67 :cell-type) ; Should be ::validator-cell
rlm@10 68
rlm@10 69 (derive ::validator-cell ::dependent-cell)
rlm@10 70
rlm@10 71
rlm@10 72 ;; A sentinal value
rlm@10 73
rlm@10 74 (def *empty-value* (java.lang.Object.))
rlm@10 75
rlm@10 76
rlm@10 77 ;; Dataflow
rlm@10 78
rlm@10 79 ; A collection of cells and dependency information
rlm@10 80
rlm@10 81 (defstruct dataflow
rlm@10 82 :cells ; A set of all cells
rlm@10 83 :cells-map ; A map of cell names (symbols) to collections of cells
rlm@10 84 :fore-graph ; The inverse of the dependency graph, nodes are cells
rlm@10 85 :topological) ; A vector of sets of independent nodes -- orders the computation
rlm@10 86
rlm@10 87
rlm@10 88 ;;; Environment Access
rlm@10 89
rlm@10 90 (defn get-cells
rlm@10 91 "Get all the cells named by name"
rlm@10 92 [df name]
rlm@10 93 ((:cells-map @df) name))
rlm@10 94
rlm@10 95 (defn get-cell
rlm@10 96 "Get the single cell named by name"
rlm@10 97 [df name]
rlm@10 98 (let [cells (get-cells df name)]
rlm@10 99 (cond
rlm@10 100 (= (count cells) 1) (first cells)
rlm@10 101 (> (count cells) 1) (throwf Exception "Cell %s has multiple instances" name)
rlm@10 102 :otherwise (throwf Exception "Cell %s is undefined" name))))
rlm@10 103
rlm@10 104 (defn source-cell?
rlm@10 105 "Is this cell a source cell?"
rlm@10 106 [cell]
rlm@10 107 (isa? (:cell-type cell) ::source-cell))
rlm@10 108
rlm@10 109 (defn get-source-cells
rlm@10 110 "Returns a collection of source cells from the dataflow"
rlm@10 111 [df]
rlm@10 112 (for [cell (:cells @df)
rlm@10 113 :when (source-cell? cell)]
rlm@10 114 cell))
rlm@10 115
rlm@10 116 (defn get-value
rlm@10 117 "Gets a value from the df matching the passed symbol.
rlm@10 118 Signals an error if the name is not present, or if it not a single
rlm@10 119 value."
rlm@10 120 [df name]
rlm@10 121 (let [cell (get-cell df name)
rlm@10 122 result @(:value cell)]
rlm@10 123 (do (when (= *empty-value* result)
rlm@10 124 (throwf Exception "Cell named %s empty" name))
rlm@10 125 result)))
rlm@10 126
rlm@10 127 (defn get-values
rlm@10 128 "Gets a collection of values from the df by name"
rlm@10 129 [df name]
rlm@10 130 (let [cells (get-cells df name)
rlm@10 131 results (map #(-> % :value deref) cells)]
rlm@10 132 (do
rlm@10 133 (when (some #(= % *empty-value*) results)
rlm@10 134 (throwf Exception "At least one empty cell named %s found" name))
rlm@10 135 results)))
rlm@10 136
rlm@10 137 (defn get-old-value
rlm@10 138 "Looks up an old value"
rlm@10 139 [df env name]
rlm@10 140 (if (contains? env name)
rlm@10 141 (env name)
rlm@10 142 (get-value df name)))
rlm@10 143
rlm@10 144 (defn get-value-from-cell
rlm@10 145 "Given a cell, get its value"
rlm@10 146 [cell]
rlm@10 147 (-> cell :value deref))
rlm@10 148
rlm@10 149 ;;; Build Dataflow Structure
rlm@10 150
rlm@10 151 (defn- build-cells-map
rlm@10 152 "Given a collection of cells, build a name->cells-collection map
rlm@10 153 from it."
rlm@10 154 [cs]
rlm@10 155 (let [step (fn [m c]
rlm@10 156 (let [n (:name c)
rlm@10 157 o (get m n #{})
rlm@10 158 s (conj o c)]
rlm@10 159 (assoc m n s)))]
rlm@10 160 (reduce step {} cs)))
rlm@10 161
rlm@10 162 (defn- build-back-graph
rlm@10 163 "Builds the backward dependency graph from the cells map. Each
rlm@10 164 node of the graph is a cell."
rlm@10 165 [cells cells-map]
rlm@10 166 (let [step (fn [n]
rlm@10 167 (apply union (for [dep-name (:dependents n)]
rlm@10 168 (cells-map dep-name))))
rlm@10 169 neighbors (zipmap cells (map step cells))]
rlm@10 170 (struct-map directed-graph
rlm@10 171 :nodes cells
rlm@10 172 :neighbors neighbors)))
rlm@10 173
rlm@10 174 (defn- build-dataflow*
rlm@10 175 "Builds the dataflow structure"
rlm@10 176 [cs]
rlm@10 177 (let [cells (set cs)
rlm@10 178 cells-map (build-cells-map cs)
rlm@10 179 back-graph (build-back-graph cells cells-map)
rlm@10 180 fore-graph (reverse-graph back-graph)]
rlm@10 181 (struct-map dataflow
rlm@10 182 :cells cells
rlm@10 183 :cells-map cells-map
rlm@10 184 :fore-graph fore-graph
rlm@10 185 :topological (dependency-list back-graph))))
rlm@10 186
rlm@10 187 (def initialize)
rlm@10 188
rlm@10 189 (defn build-dataflow
rlm@10 190 "Given a collection of cells, build and return a dataflow object"
rlm@10 191 [cs]
rlm@10 192 (dosync
rlm@10 193 (let [df (ref (build-dataflow* cs))]
rlm@10 194 (initialize df)
rlm@10 195 df)))
rlm@10 196
rlm@10 197
rlm@10 198 ;;; Displaying a dataflow
rlm@10 199
rlm@10 200 (defn print-dataflow
rlm@10 201 "Prints a dataflow, one cell per line"
rlm@10 202 [df]
rlm@10 203 (println)
rlm@10 204 (let [f (fn [cell] (-> cell :name str))]
rlm@10 205 (doseq [cell (sort-by f (:cells @df))]
rlm@10 206 (prn cell))))
rlm@10 207
rlm@10 208
rlm@10 209 ;;; Modifying a Dataflow
rlm@10 210
rlm@10 211 (defn add-cells
rlm@10 212 "Given a collection of cells, add them to the dataflow."
rlm@10 213 [df cells]
rlm@10 214 (dosync
rlm@10 215 (let [new-cells (union (set cells) (:cells @df))]
rlm@10 216 (ref-set df (build-dataflow* new-cells))
rlm@10 217 (initialize df))))
rlm@10 218
rlm@10 219 (defn remove-cells
rlm@10 220 "Given a collection of cells, remove them from the dataflow."
rlm@10 221 [df cells]
rlm@10 222 (dosync
rlm@10 223 (let [new-cells (difference (:cells @df) (set cells))]
rlm@10 224 (ref-set df (build-dataflow* new-cells))
rlm@10 225 (initialize df))))
rlm@10 226
rlm@10 227
rlm@10 228 ;;; Cell building
rlm@10 229
rlm@10 230 (def *meta* {:type ::dataflow-cell})
rlm@10 231
rlm@10 232 (defn build-source-cell
rlm@10 233 "Builds a source cell"
rlm@10 234 [name init]
rlm@10 235 (with-meta (struct source-cell name (ref init) ::source-cell)
rlm@10 236 *meta*))
rlm@10 237
rlm@10 238 (defn- is-col-var?
rlm@10 239 [symb]
rlm@10 240 (let [name (name symb)]
rlm@10 241 (and (= \? (first name))
rlm@10 242 (= \* (second name)))))
rlm@10 243
rlm@10 244 (defn- is-old-var?
rlm@10 245 [symb]
rlm@10 246 (let [name (name symb)]
rlm@10 247 (and (= \? (first name))
rlm@10 248 (= \- (second name)))))
rlm@10 249
rlm@10 250 (defn- is-var?
rlm@10 251 [symb]
rlm@10 252 (let [name (name symb)]
rlm@10 253 (and (= \? (first name))
rlm@10 254 (-> symb is-col-var? not)
rlm@10 255 (-> symb is-old-var? not))))
rlm@10 256
rlm@10 257 (defn- cell-name
rlm@10 258 [symb]
rlm@10 259 `(quote ~(cond (is-var? symb) (-> symb name (.substring 1) symbol)
rlm@10 260 (or (is-col-var? symb)
rlm@10 261 (is-old-var? symb)) (-> symb name (.substring 2) symbol))))
rlm@10 262
rlm@10 263 (defn- replace-symbol
rlm@10 264 "Walk the from replacing the ?X forms with the needed calls"
rlm@10 265 [dfs ov form]
rlm@10 266 (cond
rlm@10 267 (-> form symbol? not) form
rlm@10 268 (is-var? form) `(get-value ~dfs ~(cell-name form))
rlm@10 269 (is-col-var? form) `(get-values ~dfs ~(cell-name form))
rlm@10 270 (is-old-var? form) `(get-old-value ~dfs ~ov ~(cell-name form))
rlm@10 271 :otherwise form))
rlm@10 272
rlm@10 273 (defn- build-fun
rlm@10 274 "Build the closure needed to compute a cell"
rlm@10 275 [form]
rlm@10 276 (let [dfs (gensym "df_")
rlm@10 277 ov (gensym "old_")]
rlm@10 278 `(fn [~dfs ~ov] ~(postwalk (partial replace-symbol dfs ov) form))))
rlm@10 279
rlm@10 280 (defn- get-deps
rlm@10 281 "Get the names of the dependent cells"
rlm@10 282 [form]
rlm@10 283 (let [step (fn [f]
rlm@10 284 (cond
rlm@10 285 (coll? f) (apply union f)
rlm@10 286 (-> f symbol? not) nil
rlm@10 287 (is-var? f) #{(cell-name f)}
rlm@10 288 (is-col-var? f) #{(cell-name f)}
rlm@10 289 (is-old-var? f) #{(cell-name f)}
rlm@10 290 :otherwise nil))]
rlm@10 291 (postwalk step form)))
rlm@10 292
rlm@10 293 (defn build-standard-cell
rlm@10 294 "Builds a standard cell"
rlm@10 295 [name deps fun expr]
rlm@10 296 (with-meta (struct standard-cell name (ref *empty-value*) deps fun expr ::cell)
rlm@10 297 *meta*))
rlm@10 298
rlm@10 299 (defn build-validator-cell
rlm@10 300 "Builds a validator cell"
rlm@10 301 [deps fun expr]
rlm@10 302 (with-meta (struct validator-cell ::validator deps fun expr ::validator-cell)
rlm@10 303 *meta*))
rlm@10 304
rlm@10 305 (defmacro cell
rlm@10 306 "Build a standard cell, like this:
rlm@10 307
rlm@10 308 (cell fred
rlm@10 309 (* ?mary ?joe))
rlm@10 310
rlm@10 311 Which creates a cell named fred that is the product of a cell mary and cell joe
rlm@10 312
rlm@10 313 Or:
rlm@10 314
rlm@10 315 (cell joe
rlm@10 316 (apply * ?*sally))
rlm@10 317
rlm@10 318 Which creates a cell that applies * to the collection of all cells named sally
rlm@10 319
rlm@10 320 Or:
rlm@10 321
rlm@10 322 (cell :source fred 0)
rlm@10 323
rlm@10 324 Which builds a source cell fred with initial value 0
rlm@10 325
rlm@10 326 Or:
rlm@10 327
rlm@10 328 (cell :validator (when (< ?fred ?sally)
rlm@10 329 (throwf \"%s must be greater than %s\" ?fred ?sally))
rlm@10 330
rlm@10 331 Which will perform the validation"
rlm@10 332 [type & data]
rlm@10 333 (cond
rlm@10 334 (symbol? type) (let [name type ; No type for standard cell
rlm@10 335 expr (first data) ; we ignore extra data!
rlm@10 336 deps (get-deps expr)
rlm@10 337 fun (build-fun expr)]
rlm@10 338 `(build-standard-cell '~name ~deps ~fun '~expr))
rlm@10 339 (= type :source) (let [[name init] data]
rlm@10 340 `(build-source-cell '~name ~init))
rlm@10 341 (= type :validator) (let [[expr] data
rlm@10 342 deps (get-deps expr)
rlm@10 343 fun (build-fun expr)]
rlm@10 344 `(build-validator-cell ~deps ~fun '~expr))))
rlm@10 345
rlm@10 346
rlm@10 347 ;;; Cell Display
rlm@10 348
rlm@10 349 (defmulti display-cell
rlm@10 350 "A 'readable' form of the cell"
rlm@10 351 :cell-type)
rlm@10 352
rlm@10 353 (defmethod display-cell ::source-cell
rlm@10 354 [cell]
rlm@10 355 (list 'cell :source (:name cell) (-> cell :value deref)))
rlm@10 356
rlm@10 357 (defmethod display-cell ::cell
rlm@10 358 [cell]
rlm@10 359 (list 'cell (:name cell) (:display cell) (-> cell :value deref)))
rlm@10 360
rlm@10 361 (defmethod display-cell ::validator-cell
rlm@10 362 [cell]
rlm@10 363 (list 'cell :validator (:display cell)))
rlm@10 364
rlm@10 365 (defmethod print-method ::dataflow-cell
rlm@10 366 [f ^Writer w]
rlm@10 367 (binding [*out* w]
rlm@10 368 (pr (display-cell f))))
rlm@10 369
rlm@10 370
rlm@10 371 ;;; Evaluation
rlm@10 372
rlm@10 373 (defmulti eval-cell
rlm@10 374 "Evaluate a dataflow cell. Return [changed, old val]"
rlm@10 375 (fn [df data old cell] (:cell-type cell)))
rlm@10 376
rlm@10 377 (defmethod eval-cell ::source-cell
rlm@10 378 [df data old cell]
rlm@10 379 (let [name (:name cell)
rlm@10 380 val (:value cell)
rlm@10 381 ov @val]
rlm@10 382 (if (contains? data name)
rlm@10 383 (let [new-val (data name)]
rlm@10 384 (if (not= ov new-val)
rlm@10 385 (do (ref-set val new-val)
rlm@10 386 [true ov])
rlm@10 387 [false ov]))
rlm@10 388 [false ov])))
rlm@10 389
rlm@10 390 (defmethod eval-cell ::cell
rlm@10 391 [df data old cell]
rlm@10 392 (let [val (:value cell)
rlm@10 393 old-val @val
rlm@10 394 new-val ((:fun cell) df old)]
rlm@10 395 (if (not= old-val new-val)
rlm@10 396 (do (ref-set val new-val)
rlm@10 397 [true old-val])
rlm@10 398 [false old-val])))
rlm@10 399
rlm@10 400 (defmethod eval-cell ::validator-cell
rlm@10 401 [df data old cell]
rlm@10 402 (do ((:fun cell) df old)
rlm@10 403 [false nil]))
rlm@10 404
rlm@10 405 (defn- perform-flow
rlm@10 406 "Evaluate the needed cells (a set) from the given dataflow. Data is
rlm@10 407 a name-value mapping of new values for the source cells"
rlm@10 408 [df data needed]
rlm@10 409 (loop [needed needed
rlm@10 410 tops (:topological @df)
rlm@10 411 old {}]
rlm@10 412 (let [now (first tops) ; Now is a set of nodes
rlm@10 413 new-tops (next tops)]
rlm@10 414 (when (and (-> needed empty? not)
rlm@10 415 (-> now empty? not))
rlm@10 416 (let [step (fn [[needed old] cell]
rlm@10 417 (let [[changed ov] (try
rlm@10 418 (eval-cell df data old cell)
rlm@10 419 (catch Exception e
rlm@10 420 (throw (Exception. (str cell) e))))
rlm@10 421 nn (disj needed cell)]
rlm@10 422 (if changed
rlm@10 423 [(union nn (get-neighbors (:fore-graph @df) cell))
rlm@10 424 (assoc old (:name cell) ov)]
rlm@10 425 [nn old])))
rlm@10 426 [new-needed new-old] (reduce step
rlm@10 427 [needed old]
rlm@10 428 (intersection now needed))]
rlm@10 429 (recur new-needed new-tops new-old))))))
rlm@10 430
rlm@10 431 (defn- validate-update
rlm@10 432 "Ensure that all the updated cells are source cells"
rlm@10 433 [df names]
rlm@10 434 (let [scns (set (map :name (get-source-cells df)))]
rlm@10 435 (doseq [name names]
rlm@10 436 (when (-> name scns not)
rlm@10 437 (throwf Exception "Cell %n is not a source cell" name)))))
rlm@10 438
rlm@10 439 (defn update-values
rlm@10 440 "Given a dataflow, and a map of name-value pairs, update the
rlm@10 441 dataflow by binding the new values. Each name must be of a source
rlm@10 442 cell"
rlm@10 443 [df data]
rlm@10 444 (dosync
rlm@10 445 (validate-update df (keys data))
rlm@10 446 (let [needed (apply union (for [name (keys data)]
rlm@10 447 (set ((:cells-map @df) name))))]
rlm@10 448 (perform-flow df data needed))))
rlm@10 449
rlm@10 450 (defn- initialize
rlm@10 451 "Apply all the current source cell values. Useful for a new
rlm@10 452 dataflow, or one that has been updated with new cells"
rlm@10 453 [df]
rlm@10 454 (let [needed (:cells @df)
rlm@10 455 fg (:fore-graph @df)]
rlm@10 456 (perform-flow df {} needed)))
rlm@10 457
rlm@10 458
rlm@10 459 ;;; Watchers
rlm@10 460
rlm@10 461 (defn add-cell-watcher
rlm@10 462 "Adds a watcher to a cell to respond to changes of value. The is a
rlm@10 463 function of 4 values: a key, the cell, its old value, its new
rlm@10 464 value. This is implemented using Clojure's add-watch to the
rlm@10 465 underlying ref, and shared its sematics"
rlm@10 466 [cell key fun]
rlm@10 467 (let [val (:value cell)]
rlm@10 468 (add-watch val key (fn [key _ old-v new-v]
rlm@10 469 (fun key cell old-v new-v)))))
rlm@10 470
rlm@10 471
rlm@10 472 (comment
rlm@10 473
rlm@10 474 (def df
rlm@10 475 (build-dataflow
rlm@10 476 [(cell :source fred 1)
rlm@10 477 (cell :source mary 0)
rlm@10 478 (cell greg (+ ?fred ?mary))
rlm@10 479 (cell joan (+ ?fred ?mary))
rlm@10 480 (cell joan (* ?fred ?mary))
rlm@10 481 (cell sally (apply + ?*joan))
rlm@10 482 (cell :validator (when (number? ?-greg)
rlm@10 483 (when (<= ?greg ?-greg)
rlm@10 484 (throwf Exception "Non monotonic"))))]))
rlm@10 485
rlm@10 486 (do (println)
rlm@10 487 (print-dataflow df))
rlm@10 488
rlm@10 489 (add-cell-watcher (get-cell df 'sally)
rlm@10 490 nil
rlm@10 491 (fn [key cell o n]
rlm@10 492 (printf "sally changed from %s to %s\n" o n)))
rlm@10 493
rlm@10 494 (update-values df {'fred 1 'mary 1})
rlm@10 495 (update-values df {'fred 5 'mary 1})
rlm@10 496 (update-values df {'fred 0 'mary 0})
rlm@10 497
rlm@10 498 (get-value df 'fred)
rlm@10 499 (get-values df 'joan)
rlm@10 500 (get-value df 'sally)
rlm@10 501 (get-value df 'greg)
rlm@10 502
rlm@10 503 (use :reload 'clojure.contrib.dataflow)
rlm@10 504 (use 'clojure.stacktrace) (e)
rlm@10 505 (use 'clojure.contrib.trace)
rlm@10 506 )
rlm@10 507
rlm@10 508
rlm@10 509 ;; End of file