annotate src/clojure/contrib/datalog/database.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;; Copyright (c) Jeffrey Straszheim. All rights reserved. The use and
rlm@10 2 ;; distribution terms for this software are covered by the Eclipse Public
rlm@10 3 ;; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can
rlm@10 4 ;; be found in the file epl-v10.html at the root of this distribution. By
rlm@10 5 ;; using this software in any fashion, you are agreeing to be bound by the
rlm@10 6 ;; terms of this license. You must not remove this notice, or any other,
rlm@10 7 ;; from this software.
rlm@10 8 ;;
rlm@10 9 ;; database.clj
rlm@10 10 ;;
rlm@10 11 ;; A Clojure implementation of Datalog -- Support for in-memory database
rlm@10 12 ;;
rlm@10 13 ;; straszheimjeffrey (gmail)
rlm@10 14 ;; Created 21 Feburary 2009
rlm@10 15
rlm@10 16
rlm@10 17 (ns clojure.contrib.datalog.database
rlm@10 18 (:use clojure.contrib.datalog.util)
rlm@10 19 (:use clojure.contrib.def)
rlm@10 20 (:use [clojure.set :only (union intersection difference)])
rlm@10 21 (:use [clojure.contrib.except :only (throwf)])
rlm@10 22 (:import java.io.Writer))
rlm@10 23
rlm@10 24
rlm@10 25 (defstruct relation
rlm@10 26 :schema ; A set of key names
rlm@10 27 :data ; A set of tuples
rlm@10 28 :indexes) ; A map key names to indexes (in turn a map of value to tuples)
rlm@10 29
rlm@10 30
rlm@10 31 ;;; DDL
rlm@10 32
rlm@10 33 (defmethod print-method ::datalog-database
rlm@10 34 [db ^Writer writer]
rlm@10 35 (binding [*out* writer]
rlm@10 36 (do
rlm@10 37 (println "(datalog-database")
rlm@10 38 (println "{")
rlm@10 39 (doseq [key (keys db)]
rlm@10 40 (println)
rlm@10 41 (println key)
rlm@10 42 (print-method (db key) writer))
rlm@10 43 (println "})"))))
rlm@10 44
rlm@10 45 (defn datalog-database
rlm@10 46 [rels]
rlm@10 47 (with-meta rels {:type ::datalog-database}))
rlm@10 48
rlm@10 49 (def empty-database (datalog-database {}))
rlm@10 50
rlm@10 51 (defmethod print-method ::datalog-relation
rlm@10 52 [rel ^Writer writer]
rlm@10 53 (binding [*out* writer]
rlm@10 54 (do
rlm@10 55 (println "(datalog-relation")
rlm@10 56 (println " ;; Schema")
rlm@10 57 (println " " (:schema rel))
rlm@10 58 (println)
rlm@10 59 (println " ;; Data")
rlm@10 60 (println " #{")
rlm@10 61 (doseq [tuple (:data rel)]
rlm@10 62 (println " " tuple))
rlm@10 63 (println " }")
rlm@10 64 (println)
rlm@10 65 (println " ;; Indexes")
rlm@10 66 (println " {")
rlm@10 67 (doseq [key (-> rel :indexes keys)]
rlm@10 68 (println " " key)
rlm@10 69 (println " {")
rlm@10 70 (doseq [val (keys ((:indexes rel) key))]
rlm@10 71 (println " " val)
rlm@10 72 (println " " (get-in rel [:indexes key val])))
rlm@10 73 (println " }"))
rlm@10 74 (println " })"))))
rlm@10 75
rlm@10 76 (defn datalog-relation
rlm@10 77 "Creates a relation"
rlm@10 78 [schema data indexes]
rlm@10 79 (with-meta (struct relation schema data indexes) {:type ::datalog-relation}))
rlm@10 80
rlm@10 81 (defn add-relation
rlm@10 82 "Adds a relation to the database"
rlm@10 83 [db name keys]
rlm@10 84 (assoc db name (datalog-relation (set keys) #{} {})))
rlm@10 85
rlm@10 86 (defn add-index
rlm@10 87 "Adds an index to an empty relation named name"
rlm@10 88 [db name key]
rlm@10 89 (assert (empty? (:data (db name))))
rlm@10 90 (let [rel (db name)
rlm@10 91 inx (assoc (:indexes rel) key {})]
rlm@10 92 (assoc db name (datalog-relation (:schema rel)
rlm@10 93 (:data rel)
rlm@10 94 inx))))
rlm@10 95
rlm@10 96 (defn ensure-relation
rlm@10 97 "If the database lacks the named relation, add it"
rlm@10 98 [db name keys indexes]
rlm@10 99 (if-let [rel (db name)]
rlm@10 100 (do
rlm@10 101 (assert (= (:schema rel) (set keys)))
rlm@10 102 db)
rlm@10 103 (let [db1 (add-relation db name keys)]
rlm@10 104 (reduce (fn [db key] (add-index db name key))
rlm@10 105 db1
rlm@10 106 indexes))))
rlm@10 107
rlm@10 108
rlm@10 109 (defmacro make-database
rlm@10 110 "Makes a database, like this
rlm@10 111 (make-database
rlm@10 112 (relation :fred [:mary :sue])
rlm@10 113 (index :fred :mary)
rlm@10 114 (relation :sally [:jen :becky])
rlm@10 115 (index :sally :jen)
rlm@10 116 (index :sally :becky))"
rlm@10 117 [& commands]
rlm@10 118 (let [wrapper (fn [cur new]
rlm@10 119 (let [cmd (first new)
rlm@10 120 body (next new)]
rlm@10 121 (assert (= 2 (count body)))
rlm@10 122 (cond
rlm@10 123 (= cmd 'relation)
rlm@10 124 `(add-relation ~cur ~(first body) ~(fnext body))
rlm@10 125 (= cmd 'index)
rlm@10 126 `(add-index ~cur ~(first body) ~(fnext body))
rlm@10 127 :otherwise (throwf "%s not recognized" new))))]
rlm@10 128 (reduce wrapper `empty-database commands)))
rlm@10 129
rlm@10 130 (defn get-relation
rlm@10 131 "Get a relation object by name"
rlm@10 132 [db rel-name]
rlm@10 133 (db rel-name))
rlm@10 134
rlm@10 135 (defn replace-relation
rlm@10 136 "Add or replace a fully constructed relation object to the database."
rlm@10 137 [db rel-name rel]
rlm@10 138 (assoc db rel-name rel))
rlm@10 139
rlm@10 140
rlm@10 141 ;;; DML
rlm@10 142
rlm@10 143
rlm@10 144 (defn database-counts
rlm@10 145 "Returns a map with the count of elements in each relation."
rlm@10 146 [db]
rlm@10 147 (map-values #(-> % :data count) db))
rlm@10 148
rlm@10 149 (defn- modify-indexes
rlm@10 150 "Perform f on the indexed tuple-set. f should take a set and tuple,
rlm@10 151 and return the new set."
rlm@10 152 [idxs tuple f]
rlm@10 153 (into {} (for [ik (keys idxs)]
rlm@10 154 (let [im (idxs ik)
rlm@10 155 iv (tuple ik)
rlm@10 156 os (get im iv #{})
rlm@10 157 ns (f os tuple)]
rlm@10 158 [ik (if (empty? ns)
rlm@10 159 (dissoc im iv)
rlm@10 160 (assoc im iv (f os tuple)))]))))
rlm@10 161
rlm@10 162 (defn- add-to-indexes
rlm@10 163 "Adds the tuple to the appropriate keys in the index map"
rlm@10 164 [idxs tuple]
rlm@10 165 (modify-indexes idxs tuple conj))
rlm@10 166
rlm@10 167 (defn- remove-from-indexes
rlm@10 168 "Removes the tuple from the appropriate keys in the index map"
rlm@10 169 [idxs tuple]
rlm@10 170 (modify-indexes idxs tuple disj))
rlm@10 171
rlm@10 172 (defn add-tuple
rlm@10 173 "Two forms:
rlm@10 174
rlm@10 175 [db relation-name tuple] adds tuple to the named relation. Returns
rlm@10 176 the new database.
rlm@10 177
rlm@10 178 [rel tuple] adds to the relation object. Returns the new relation."
rlm@10 179 ([db rel-name tuple]
rlm@10 180 (assert (= (-> tuple keys set) (-> rel-name db :schema)))
rlm@10 181 (assoc db rel-name (add-tuple (db rel-name) tuple)))
rlm@10 182 ([rel tuple]
rlm@10 183 (let [data (:data rel)
rlm@10 184 new-data (conj data tuple)]
rlm@10 185 (if (identical? data new-data) ; optimization hack!
rlm@10 186 rel
rlm@10 187 (let [idxs (add-to-indexes (:indexes rel) tuple)]
rlm@10 188 (assoc rel :data new-data :indexes idxs))))))
rlm@10 189
rlm@10 190 (defn remove-tuple
rlm@10 191 "Two forms:
rlm@10 192
rlm@10 193 [db relation-name tuple] removes the tuple from the named relation,
rlm@10 194 returns a new database.
rlm@10 195
rlm@10 196 [rel tuple] removes the tuple from the relation. Returns the new
rlm@10 197 relation."
rlm@10 198 ([db rel-name tuple] (assoc db rel-name (remove-tuple (db rel-name) tuple)))
rlm@10 199 ([rel tuple]
rlm@10 200 (let [data (:data rel)
rlm@10 201 new-data (disj data tuple)]
rlm@10 202 (if (identical? data new-data)
rlm@10 203 rel
rlm@10 204 (let [idxs (remove-from-indexes (:indexes rel) tuple)]
rlm@10 205 (assoc rel :data new-data :indexes idxs))))))
rlm@10 206
rlm@10 207 (defn add-tuples
rlm@10 208 "Adds a collection of tuples to the db, as
rlm@10 209 (add-tuples db
rlm@10 210 [:rel-name :key-1 1 :key-2 2]
rlm@10 211 [:rel-name :key-1 2 :key-2 3])"
rlm@10 212 [db & tupls]
rlm@10 213 (reduce #(add-tuple %1 (first %2) (apply hash-map (next %2))) db tupls))
rlm@10 214
rlm@10 215 (defn- find-indexes
rlm@10 216 "Given a map of indexes and a partial tuple, return the sets of full tuples"
rlm@10 217 [idxs pt]
rlm@10 218 (if (empty? idxs)
rlm@10 219 nil
rlm@10 220 (filter identity (for [key (keys pt)]
rlm@10 221 (if-let [idx-map (idxs key)]
rlm@10 222 (get idx-map (pt key) #{})
rlm@10 223 nil)))))
rlm@10 224
rlm@10 225 (defn- match?
rlm@10 226 "Is m2 contained in m1?"
rlm@10 227 [m1 m2]
rlm@10 228 (let [compare (fn [key]
rlm@10 229 (and (contains? m1 key)
rlm@10 230 (= (m1 key) (m2 key))))]
rlm@10 231 (every? compare (keys m2))))
rlm@10 232
rlm@10 233 (defn- scan-space
rlm@10 234 "Computes a stream of tuples from relation rn matching partial tuple (pt)
rlm@10 235 and applies fun to each"
rlm@10 236 [fun db rn pt]
rlm@10 237 (let [rel (db rn)
rlm@10 238 idxs (find-indexes (:indexes rel) pt)
rlm@10 239 space (if (empty? idxs)
rlm@10 240 (:data rel) ; table scan :(
rlm@10 241 (reduce intersection idxs))]
rlm@10 242 (trace-datalog (when (empty? idxs)
rlm@10 243 (println (format "Table scan of %s: %s rows!!!!!"
rlm@10 244 rn
rlm@10 245 (count space)))))
rlm@10 246 (fun #(match? % pt) space)))
rlm@10 247
rlm@10 248 (defn select
rlm@10 249 "finds all matching tuples to the partial tuple (pt) in the relation named (rn)"
rlm@10 250 [db rn pt]
rlm@10 251 (scan-space filter db rn pt))
rlm@10 252
rlm@10 253 (defn any-match?
rlm@10 254 "Finds if there are any matching records for the partial tuple"
rlm@10 255 [db rn pt]
rlm@10 256 (if (= (-> pt keys set) (:schema (db rn)))
rlm@10 257 (contains? (:data (db rn)) pt)
rlm@10 258 (scan-space some db rn pt)))
rlm@10 259
rlm@10 260
rlm@10 261 ;;; Merge
rlm@10 262
rlm@10 263 (defn merge-indexes
rlm@10 264 [idx1 idx2]
rlm@10 265 (merge-with (fn [h1 h2] (merge-with union h1 h2)) idx1 idx2))
rlm@10 266
rlm@10 267 (defn merge-relations
rlm@10 268 "Merges two relations"
rlm@10 269 [r1 r2]
rlm@10 270 (assert (= (:schema r1) (:schema r2)))
rlm@10 271 (let [merged-indexes (merge-indexes (:indexes r1)
rlm@10 272 (:indexes r2))
rlm@10 273 merged-data (union (:data r1)
rlm@10 274 (:data r2))]
rlm@10 275 (assoc r1 :data merged-data :indexes merged-indexes)))
rlm@10 276
rlm@10 277 (defn database-merge
rlm@10 278 "Merges databases together"
rlm@10 279 [dbs]
rlm@10 280 (apply merge-with merge-relations dbs))
rlm@10 281
rlm@10 282 (defn database-merge-parallel
rlm@10 283 "Merges databases together in parallel"
rlm@10 284 [dbs]
rlm@10 285 (preduce merge-relations dbs))
rlm@10 286
rlm@10 287
rlm@10 288 ;; End of file