rlm@10
|
1 ;; Copyright (c) Jeffrey Straszheim. All rights reserved. The use and
|
rlm@10
|
2 ;; distribution terms for this software are covered by the Eclipse Public
|
rlm@10
|
3 ;; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can
|
rlm@10
|
4 ;; be found in the file epl-v10.html at the root of this distribution. By
|
rlm@10
|
5 ;; using this software in any fashion, you are agreeing to be bound by the
|
rlm@10
|
6 ;; terms of this license. You must not remove this notice, or any other,
|
rlm@10
|
7 ;; from this software.
|
rlm@10
|
8 ;;
|
rlm@10
|
9 ;; database.clj
|
rlm@10
|
10 ;;
|
rlm@10
|
11 ;; A Clojure implementation of Datalog -- Support for in-memory database
|
rlm@10
|
12 ;;
|
rlm@10
|
13 ;; straszheimjeffrey (gmail)
|
rlm@10
|
14 ;; Created 21 Feburary 2009
|
rlm@10
|
15
|
rlm@10
|
16
|
rlm@10
|
17 (ns clojure.contrib.datalog.database
|
rlm@10
|
18 (:use clojure.contrib.datalog.util)
|
rlm@10
|
19 (:use clojure.contrib.def)
|
rlm@10
|
20 (:use [clojure.set :only (union intersection difference)])
|
rlm@10
|
21 (:use [clojure.contrib.except :only (throwf)])
|
rlm@10
|
22 (:import java.io.Writer))
|
rlm@10
|
23
|
rlm@10
|
24
|
rlm@10
|
25 (defstruct relation
|
rlm@10
|
26 :schema ; A set of key names
|
rlm@10
|
27 :data ; A set of tuples
|
rlm@10
|
28 :indexes) ; A map key names to indexes (in turn a map of value to tuples)
|
rlm@10
|
29
|
rlm@10
|
30
|
rlm@10
|
31 ;;; DDL
|
rlm@10
|
32
|
rlm@10
|
33 (defmethod print-method ::datalog-database
|
rlm@10
|
34 [db ^Writer writer]
|
rlm@10
|
35 (binding [*out* writer]
|
rlm@10
|
36 (do
|
rlm@10
|
37 (println "(datalog-database")
|
rlm@10
|
38 (println "{")
|
rlm@10
|
39 (doseq [key (keys db)]
|
rlm@10
|
40 (println)
|
rlm@10
|
41 (println key)
|
rlm@10
|
42 (print-method (db key) writer))
|
rlm@10
|
43 (println "})"))))
|
rlm@10
|
44
|
rlm@10
|
45 (defn datalog-database
|
rlm@10
|
46 [rels]
|
rlm@10
|
47 (with-meta rels {:type ::datalog-database}))
|
rlm@10
|
48
|
rlm@10
|
49 (def empty-database (datalog-database {}))
|
rlm@10
|
50
|
rlm@10
|
51 (defmethod print-method ::datalog-relation
|
rlm@10
|
52 [rel ^Writer writer]
|
rlm@10
|
53 (binding [*out* writer]
|
rlm@10
|
54 (do
|
rlm@10
|
55 (println "(datalog-relation")
|
rlm@10
|
56 (println " ;; Schema")
|
rlm@10
|
57 (println " " (:schema rel))
|
rlm@10
|
58 (println)
|
rlm@10
|
59 (println " ;; Data")
|
rlm@10
|
60 (println " #{")
|
rlm@10
|
61 (doseq [tuple (:data rel)]
|
rlm@10
|
62 (println " " tuple))
|
rlm@10
|
63 (println " }")
|
rlm@10
|
64 (println)
|
rlm@10
|
65 (println " ;; Indexes")
|
rlm@10
|
66 (println " {")
|
rlm@10
|
67 (doseq [key (-> rel :indexes keys)]
|
rlm@10
|
68 (println " " key)
|
rlm@10
|
69 (println " {")
|
rlm@10
|
70 (doseq [val (keys ((:indexes rel) key))]
|
rlm@10
|
71 (println " " val)
|
rlm@10
|
72 (println " " (get-in rel [:indexes key val])))
|
rlm@10
|
73 (println " }"))
|
rlm@10
|
74 (println " })"))))
|
rlm@10
|
75
|
rlm@10
|
76 (defn datalog-relation
|
rlm@10
|
77 "Creates a relation"
|
rlm@10
|
78 [schema data indexes]
|
rlm@10
|
79 (with-meta (struct relation schema data indexes) {:type ::datalog-relation}))
|
rlm@10
|
80
|
rlm@10
|
81 (defn add-relation
|
rlm@10
|
82 "Adds a relation to the database"
|
rlm@10
|
83 [db name keys]
|
rlm@10
|
84 (assoc db name (datalog-relation (set keys) #{} {})))
|
rlm@10
|
85
|
rlm@10
|
86 (defn add-index
|
rlm@10
|
87 "Adds an index to an empty relation named name"
|
rlm@10
|
88 [db name key]
|
rlm@10
|
89 (assert (empty? (:data (db name))))
|
rlm@10
|
90 (let [rel (db name)
|
rlm@10
|
91 inx (assoc (:indexes rel) key {})]
|
rlm@10
|
92 (assoc db name (datalog-relation (:schema rel)
|
rlm@10
|
93 (:data rel)
|
rlm@10
|
94 inx))))
|
rlm@10
|
95
|
rlm@10
|
96 (defn ensure-relation
|
rlm@10
|
97 "If the database lacks the named relation, add it"
|
rlm@10
|
98 [db name keys indexes]
|
rlm@10
|
99 (if-let [rel (db name)]
|
rlm@10
|
100 (do
|
rlm@10
|
101 (assert (= (:schema rel) (set keys)))
|
rlm@10
|
102 db)
|
rlm@10
|
103 (let [db1 (add-relation db name keys)]
|
rlm@10
|
104 (reduce (fn [db key] (add-index db name key))
|
rlm@10
|
105 db1
|
rlm@10
|
106 indexes))))
|
rlm@10
|
107
|
rlm@10
|
108
|
rlm@10
|
109 (defmacro make-database
|
rlm@10
|
110 "Makes a database, like this
|
rlm@10
|
111 (make-database
|
rlm@10
|
112 (relation :fred [:mary :sue])
|
rlm@10
|
113 (index :fred :mary)
|
rlm@10
|
114 (relation :sally [:jen :becky])
|
rlm@10
|
115 (index :sally :jen)
|
rlm@10
|
116 (index :sally :becky))"
|
rlm@10
|
117 [& commands]
|
rlm@10
|
118 (let [wrapper (fn [cur new]
|
rlm@10
|
119 (let [cmd (first new)
|
rlm@10
|
120 body (next new)]
|
rlm@10
|
121 (assert (= 2 (count body)))
|
rlm@10
|
122 (cond
|
rlm@10
|
123 (= cmd 'relation)
|
rlm@10
|
124 `(add-relation ~cur ~(first body) ~(fnext body))
|
rlm@10
|
125 (= cmd 'index)
|
rlm@10
|
126 `(add-index ~cur ~(first body) ~(fnext body))
|
rlm@10
|
127 :otherwise (throwf "%s not recognized" new))))]
|
rlm@10
|
128 (reduce wrapper `empty-database commands)))
|
rlm@10
|
129
|
rlm@10
|
130 (defn get-relation
|
rlm@10
|
131 "Get a relation object by name"
|
rlm@10
|
132 [db rel-name]
|
rlm@10
|
133 (db rel-name))
|
rlm@10
|
134
|
rlm@10
|
135 (defn replace-relation
|
rlm@10
|
136 "Add or replace a fully constructed relation object to the database."
|
rlm@10
|
137 [db rel-name rel]
|
rlm@10
|
138 (assoc db rel-name rel))
|
rlm@10
|
139
|
rlm@10
|
140
|
rlm@10
|
141 ;;; DML
|
rlm@10
|
142
|
rlm@10
|
143
|
rlm@10
|
144 (defn database-counts
|
rlm@10
|
145 "Returns a map with the count of elements in each relation."
|
rlm@10
|
146 [db]
|
rlm@10
|
147 (map-values #(-> % :data count) db))
|
rlm@10
|
148
|
rlm@10
|
149 (defn- modify-indexes
|
rlm@10
|
150 "Perform f on the indexed tuple-set. f should take a set and tuple,
|
rlm@10
|
151 and return the new set."
|
rlm@10
|
152 [idxs tuple f]
|
rlm@10
|
153 (into {} (for [ik (keys idxs)]
|
rlm@10
|
154 (let [im (idxs ik)
|
rlm@10
|
155 iv (tuple ik)
|
rlm@10
|
156 os (get im iv #{})
|
rlm@10
|
157 ns (f os tuple)]
|
rlm@10
|
158 [ik (if (empty? ns)
|
rlm@10
|
159 (dissoc im iv)
|
rlm@10
|
160 (assoc im iv (f os tuple)))]))))
|
rlm@10
|
161
|
rlm@10
|
162 (defn- add-to-indexes
|
rlm@10
|
163 "Adds the tuple to the appropriate keys in the index map"
|
rlm@10
|
164 [idxs tuple]
|
rlm@10
|
165 (modify-indexes idxs tuple conj))
|
rlm@10
|
166
|
rlm@10
|
167 (defn- remove-from-indexes
|
rlm@10
|
168 "Removes the tuple from the appropriate keys in the index map"
|
rlm@10
|
169 [idxs tuple]
|
rlm@10
|
170 (modify-indexes idxs tuple disj))
|
rlm@10
|
171
|
rlm@10
|
172 (defn add-tuple
|
rlm@10
|
173 "Two forms:
|
rlm@10
|
174
|
rlm@10
|
175 [db relation-name tuple] adds tuple to the named relation. Returns
|
rlm@10
|
176 the new database.
|
rlm@10
|
177
|
rlm@10
|
178 [rel tuple] adds to the relation object. Returns the new relation."
|
rlm@10
|
179 ([db rel-name tuple]
|
rlm@10
|
180 (assert (= (-> tuple keys set) (-> rel-name db :schema)))
|
rlm@10
|
181 (assoc db rel-name (add-tuple (db rel-name) tuple)))
|
rlm@10
|
182 ([rel tuple]
|
rlm@10
|
183 (let [data (:data rel)
|
rlm@10
|
184 new-data (conj data tuple)]
|
rlm@10
|
185 (if (identical? data new-data) ; optimization hack!
|
rlm@10
|
186 rel
|
rlm@10
|
187 (let [idxs (add-to-indexes (:indexes rel) tuple)]
|
rlm@10
|
188 (assoc rel :data new-data :indexes idxs))))))
|
rlm@10
|
189
|
rlm@10
|
190 (defn remove-tuple
|
rlm@10
|
191 "Two forms:
|
rlm@10
|
192
|
rlm@10
|
193 [db relation-name tuple] removes the tuple from the named relation,
|
rlm@10
|
194 returns a new database.
|
rlm@10
|
195
|
rlm@10
|
196 [rel tuple] removes the tuple from the relation. Returns the new
|
rlm@10
|
197 relation."
|
rlm@10
|
198 ([db rel-name tuple] (assoc db rel-name (remove-tuple (db rel-name) tuple)))
|
rlm@10
|
199 ([rel tuple]
|
rlm@10
|
200 (let [data (:data rel)
|
rlm@10
|
201 new-data (disj data tuple)]
|
rlm@10
|
202 (if (identical? data new-data)
|
rlm@10
|
203 rel
|
rlm@10
|
204 (let [idxs (remove-from-indexes (:indexes rel) tuple)]
|
rlm@10
|
205 (assoc rel :data new-data :indexes idxs))))))
|
rlm@10
|
206
|
rlm@10
|
207 (defn add-tuples
|
rlm@10
|
208 "Adds a collection of tuples to the db, as
|
rlm@10
|
209 (add-tuples db
|
rlm@10
|
210 [:rel-name :key-1 1 :key-2 2]
|
rlm@10
|
211 [:rel-name :key-1 2 :key-2 3])"
|
rlm@10
|
212 [db & tupls]
|
rlm@10
|
213 (reduce #(add-tuple %1 (first %2) (apply hash-map (next %2))) db tupls))
|
rlm@10
|
214
|
rlm@10
|
215 (defn- find-indexes
|
rlm@10
|
216 "Given a map of indexes and a partial tuple, return the sets of full tuples"
|
rlm@10
|
217 [idxs pt]
|
rlm@10
|
218 (if (empty? idxs)
|
rlm@10
|
219 nil
|
rlm@10
|
220 (filter identity (for [key (keys pt)]
|
rlm@10
|
221 (if-let [idx-map (idxs key)]
|
rlm@10
|
222 (get idx-map (pt key) #{})
|
rlm@10
|
223 nil)))))
|
rlm@10
|
224
|
rlm@10
|
225 (defn- match?
|
rlm@10
|
226 "Is m2 contained in m1?"
|
rlm@10
|
227 [m1 m2]
|
rlm@10
|
228 (let [compare (fn [key]
|
rlm@10
|
229 (and (contains? m1 key)
|
rlm@10
|
230 (= (m1 key) (m2 key))))]
|
rlm@10
|
231 (every? compare (keys m2))))
|
rlm@10
|
232
|
rlm@10
|
233 (defn- scan-space
|
rlm@10
|
234 "Computes a stream of tuples from relation rn matching partial tuple (pt)
|
rlm@10
|
235 and applies fun to each"
|
rlm@10
|
236 [fun db rn pt]
|
rlm@10
|
237 (let [rel (db rn)
|
rlm@10
|
238 idxs (find-indexes (:indexes rel) pt)
|
rlm@10
|
239 space (if (empty? idxs)
|
rlm@10
|
240 (:data rel) ; table scan :(
|
rlm@10
|
241 (reduce intersection idxs))]
|
rlm@10
|
242 (trace-datalog (when (empty? idxs)
|
rlm@10
|
243 (println (format "Table scan of %s: %s rows!!!!!"
|
rlm@10
|
244 rn
|
rlm@10
|
245 (count space)))))
|
rlm@10
|
246 (fun #(match? % pt) space)))
|
rlm@10
|
247
|
rlm@10
|
248 (defn select
|
rlm@10
|
249 "finds all matching tuples to the partial tuple (pt) in the relation named (rn)"
|
rlm@10
|
250 [db rn pt]
|
rlm@10
|
251 (scan-space filter db rn pt))
|
rlm@10
|
252
|
rlm@10
|
253 (defn any-match?
|
rlm@10
|
254 "Finds if there are any matching records for the partial tuple"
|
rlm@10
|
255 [db rn pt]
|
rlm@10
|
256 (if (= (-> pt keys set) (:schema (db rn)))
|
rlm@10
|
257 (contains? (:data (db rn)) pt)
|
rlm@10
|
258 (scan-space some db rn pt)))
|
rlm@10
|
259
|
rlm@10
|
260
|
rlm@10
|
261 ;;; Merge
|
rlm@10
|
262
|
rlm@10
|
263 (defn merge-indexes
|
rlm@10
|
264 [idx1 idx2]
|
rlm@10
|
265 (merge-with (fn [h1 h2] (merge-with union h1 h2)) idx1 idx2))
|
rlm@10
|
266
|
rlm@10
|
267 (defn merge-relations
|
rlm@10
|
268 "Merges two relations"
|
rlm@10
|
269 [r1 r2]
|
rlm@10
|
270 (assert (= (:schema r1) (:schema r2)))
|
rlm@10
|
271 (let [merged-indexes (merge-indexes (:indexes r1)
|
rlm@10
|
272 (:indexes r2))
|
rlm@10
|
273 merged-data (union (:data r1)
|
rlm@10
|
274 (:data r2))]
|
rlm@10
|
275 (assoc r1 :data merged-data :indexes merged-indexes)))
|
rlm@10
|
276
|
rlm@10
|
277 (defn database-merge
|
rlm@10
|
278 "Merges databases together"
|
rlm@10
|
279 [dbs]
|
rlm@10
|
280 (apply merge-with merge-relations dbs))
|
rlm@10
|
281
|
rlm@10
|
282 (defn database-merge-parallel
|
rlm@10
|
283 "Merges databases together in parallel"
|
rlm@10
|
284 [dbs]
|
rlm@10
|
285 (preduce merge-relations dbs))
|
rlm@10
|
286
|
rlm@10
|
287
|
rlm@10
|
288 ;; End of file
|