diff src/clojure/contrib/datalog/database.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/datalog/database.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,288 @@
     1.4 +;;  Copyright (c) Jeffrey Straszheim. All rights reserved.  The use and
     1.5 +;;  distribution terms for this software are covered by the Eclipse Public
     1.6 +;;  License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can
     1.7 +;;  be found in the file epl-v10.html at the root of this distribution.  By
     1.8 +;;  using this software in any fashion, you are agreeing to be bound by the
     1.9 +;;  terms of this license.  You must not remove this notice, or any other,
    1.10 +;;  from this software.
    1.11 +;;
    1.12 +;;  database.clj
    1.13 +;;
    1.14 +;;  A Clojure implementation of Datalog -- Support for in-memory database
    1.15 +;;
    1.16 +;;  straszheimjeffrey (gmail)
    1.17 +;;  Created 21 Feburary 2009
    1.18 +
    1.19 +
    1.20 +(ns clojure.contrib.datalog.database
    1.21 +  (:use clojure.contrib.datalog.util)
    1.22 +  (:use clojure.contrib.def)
    1.23 +  (:use [clojure.set :only (union intersection difference)])
    1.24 +  (:use [clojure.contrib.except :only (throwf)])
    1.25 +  (:import java.io.Writer))
    1.26 +
    1.27 +
    1.28 +(defstruct relation
    1.29 +  :schema           ; A set of key names
    1.30 +  :data             ; A set of tuples
    1.31 +  :indexes)         ; A map key names to indexes (in turn a map of value to tuples)
    1.32 +
    1.33 +
    1.34 +;;; DDL
    1.35 +
    1.36 +(defmethod print-method ::datalog-database
    1.37 +  [db ^Writer writer]
    1.38 +  (binding [*out* writer]
    1.39 +    (do
    1.40 +      (println "(datalog-database")
    1.41 +      (println "{")
    1.42 +      (doseq [key (keys db)]
    1.43 +        (println)
    1.44 +        (println key)
    1.45 +        (print-method (db key) writer))
    1.46 +      (println "})"))))
    1.47 +
    1.48 +(defn datalog-database
    1.49 +  [rels]
    1.50 +  (with-meta rels {:type ::datalog-database}))
    1.51 +
    1.52 +(def empty-database (datalog-database {}))
    1.53 +
    1.54 +(defmethod print-method ::datalog-relation
    1.55 +  [rel ^Writer writer]
    1.56 +  (binding [*out* writer]
    1.57 +    (do
    1.58 +      (println "(datalog-relation")
    1.59 +      (println " ;; Schema")
    1.60 +      (println " " (:schema rel))
    1.61 +      (println)
    1.62 +      (println " ;; Data")
    1.63 +      (println " #{")
    1.64 +      (doseq [tuple (:data rel)]
    1.65 +        (println "  " tuple))
    1.66 +      (println " }")
    1.67 +      (println)
    1.68 +      (println " ;; Indexes")
    1.69 +      (println "  {")
    1.70 +      (doseq [key (-> rel :indexes keys)]
    1.71 +        (println "  " key)
    1.72 +        (println "    {")
    1.73 +        (doseq [val (keys ((:indexes rel) key))]
    1.74 +          (println "      " val)
    1.75 +          (println "        " (get-in rel [:indexes key val])))
    1.76 +        (println "    }"))
    1.77 +      (println "  })"))))
    1.78 +
    1.79 +(defn datalog-relation
    1.80 +  "Creates a relation"
    1.81 +  [schema data indexes]
    1.82 +  (with-meta (struct relation schema data indexes) {:type ::datalog-relation}))
    1.83 +
    1.84 +(defn add-relation
    1.85 +  "Adds a relation to the database"
    1.86 +  [db name keys]
    1.87 +  (assoc db name (datalog-relation (set keys) #{} {})))
    1.88 +
    1.89 +(defn add-index
    1.90 +  "Adds an index to an empty relation named name"
    1.91 +  [db name key]
    1.92 +  (assert (empty? (:data (db name))))
    1.93 +  (let [rel (db name)
    1.94 +        inx (assoc (:indexes rel) key {})]
    1.95 +    (assoc db name (datalog-relation (:schema rel)
    1.96 +                                     (:data rel)
    1.97 +                                     inx))))
    1.98 +
    1.99 +(defn ensure-relation
   1.100 +  "If the database lacks the named relation, add it"
   1.101 +  [db name keys indexes]
   1.102 +  (if-let [rel (db name)]
   1.103 +    (do
   1.104 +      (assert (= (:schema rel) (set keys)))
   1.105 +      db)
   1.106 +    (let [db1 (add-relation db name keys)]
   1.107 +      (reduce (fn [db key] (add-index db name key))
   1.108 +              db1
   1.109 +              indexes))))
   1.110 +    
   1.111 +
   1.112 +(defmacro make-database
   1.113 +  "Makes a database, like this
   1.114 +   (make-database
   1.115 +     (relation :fred [:mary :sue])
   1.116 +     (index :fred :mary)
   1.117 +     (relation :sally [:jen :becky])
   1.118 +     (index :sally :jen)
   1.119 +     (index :sally :becky))"
   1.120 +  [& commands]
   1.121 +  (let [wrapper (fn [cur new]
   1.122 +                  (let [cmd (first new)
   1.123 +                        body (next new)]
   1.124 +                    (assert (= 2 (count body)))
   1.125 +                    (cond
   1.126 +                     (= cmd 'relation)
   1.127 +                       `(add-relation ~cur ~(first body) ~(fnext body))
   1.128 +                     (= cmd 'index)
   1.129 +                       `(add-index ~cur ~(first body) ~(fnext body))
   1.130 +                     :otherwise (throwf "%s not recognized" new))))]
   1.131 +    (reduce wrapper `empty-database commands)))
   1.132 +
   1.133 +(defn get-relation
   1.134 +  "Get a relation object by name"
   1.135 +  [db rel-name]
   1.136 +  (db rel-name))
   1.137 +
   1.138 +(defn replace-relation
   1.139 +  "Add or replace a fully constructed relation object to the database."
   1.140 +  [db rel-name rel]
   1.141 +  (assoc db rel-name rel))
   1.142 +
   1.143 +
   1.144 +;;; DML
   1.145 +
   1.146 +
   1.147 +(defn database-counts
   1.148 +  "Returns a map with the count of elements in each relation."
   1.149 +  [db]
   1.150 +  (map-values #(-> % :data count) db))
   1.151 +
   1.152 +(defn- modify-indexes
   1.153 +  "Perform f on the indexed tuple-set.  f should take a set and tuple,
   1.154 +   and return the new set."
   1.155 +  [idxs tuple f]
   1.156 +  (into {} (for [ik (keys idxs)]
   1.157 +             (let [im (idxs ik)
   1.158 +                   iv (tuple ik)
   1.159 +                   os (get im iv #{})
   1.160 +                   ns (f os tuple)]
   1.161 +               [ik (if (empty? ns)
   1.162 +                     (dissoc im iv)
   1.163 +                     (assoc im iv (f os tuple)))]))))
   1.164 +
   1.165 +(defn- add-to-indexes
   1.166 +  "Adds the tuple to the appropriate keys in the index map"
   1.167 +  [idxs tuple]
   1.168 +  (modify-indexes idxs tuple conj))
   1.169 +
   1.170 +(defn- remove-from-indexes
   1.171 +  "Removes the tuple from the appropriate keys in the index map"
   1.172 +  [idxs tuple]
   1.173 +  (modify-indexes idxs tuple disj))
   1.174 +
   1.175 +(defn add-tuple
   1.176 +  "Two forms:
   1.177 +
   1.178 +   [db relation-name tuple] adds tuple to the named relation.  Returns
   1.179 +   the new database.
   1.180 +
   1.181 +   [rel tuple] adds to the relation object.  Returns the new relation."
   1.182 +  ([db rel-name tuple]
   1.183 +     (assert (= (-> tuple keys set) (-> rel-name db :schema)))
   1.184 +     (assoc db rel-name (add-tuple (db rel-name) tuple)))
   1.185 +  ([rel tuple]
   1.186 +     (let [data (:data rel)
   1.187 +           new-data (conj data tuple)]
   1.188 +       (if (identical? data new-data) ; optimization hack!
   1.189 +         rel
   1.190 +         (let [idxs (add-to-indexes (:indexes rel) tuple)]
   1.191 +           (assoc rel :data new-data :indexes idxs))))))
   1.192 +
   1.193 +(defn remove-tuple
   1.194 +  "Two forms:
   1.195 +
   1.196 +   [db relation-name tuple] removes the tuple from the named relation,
   1.197 +   returns a new database.
   1.198 +
   1.199 +   [rel tuple] removes the tuple from the relation.  Returns the new
   1.200 +   relation."
   1.201 +  ([db rel-name tuple] (assoc db rel-name (remove-tuple (db rel-name) tuple)))
   1.202 +  ([rel tuple]
   1.203 +     (let [data (:data rel)
   1.204 +           new-data (disj data tuple)]
   1.205 +       (if (identical? data new-data)
   1.206 +         rel
   1.207 +         (let [idxs (remove-from-indexes (:indexes rel) tuple)]
   1.208 +           (assoc rel :data new-data :indexes idxs))))))
   1.209 +                      
   1.210 +(defn add-tuples
   1.211 +  "Adds a collection of tuples to the db, as
   1.212 +   (add-tuples db
   1.213 +      [:rel-name :key-1 1 :key-2 2]
   1.214 +      [:rel-name :key-1 2 :key-2 3])"
   1.215 +  [db & tupls]
   1.216 +  (reduce #(add-tuple %1 (first %2) (apply hash-map (next %2))) db tupls))
   1.217 +
   1.218 +(defn- find-indexes
   1.219 +  "Given a map of indexes and a partial tuple, return the sets of full tuples"
   1.220 +  [idxs pt]
   1.221 +  (if (empty? idxs)
   1.222 +    nil
   1.223 +    (filter identity (for [key (keys pt)]
   1.224 +                       (if-let [idx-map (idxs key)]
   1.225 +                         (get idx-map (pt key) #{})
   1.226 +                         nil)))))
   1.227 +
   1.228 +(defn- match?
   1.229 +  "Is m2 contained in m1?"
   1.230 +  [m1 m2]
   1.231 +  (let [compare (fn [key]
   1.232 +                  (and (contains? m1 key)
   1.233 +                       (= (m1 key) (m2 key))))]
   1.234 +  (every? compare (keys m2))))
   1.235 +
   1.236 +(defn- scan-space
   1.237 +  "Computes a stream of tuples from relation rn matching partial tuple (pt)
   1.238 +   and applies fun to each"
   1.239 +  [fun db rn pt]
   1.240 +  (let [rel (db rn)
   1.241 +        idxs (find-indexes (:indexes rel) pt)
   1.242 +        space (if (empty? idxs)
   1.243 +                (:data rel) ; table scan :(
   1.244 +                (reduce intersection idxs))]
   1.245 +    (trace-datalog (when (empty? idxs)
   1.246 +                     (println (format "Table scan of %s: %s rows!!!!!"
   1.247 +                                      rn
   1.248 +                                      (count space)))))
   1.249 +    (fun #(match? % pt) space)))
   1.250 +    
   1.251 +(defn select
   1.252 +  "finds all matching tuples to the partial tuple (pt) in the relation named (rn)"
   1.253 +  [db rn pt]
   1.254 +  (scan-space filter db rn pt))
   1.255 +
   1.256 +(defn any-match?
   1.257 +  "Finds if there are any matching records for the partial tuple"
   1.258 +  [db rn pt]
   1.259 +  (if (= (-> pt keys set) (:schema (db rn)))
   1.260 +    (contains? (:data (db rn)) pt)
   1.261 +    (scan-space some db rn pt)))
   1.262 +
   1.263 +
   1.264 +;;; Merge
   1.265 +
   1.266 +(defn merge-indexes
   1.267 +  [idx1 idx2]
   1.268 +  (merge-with (fn [h1 h2] (merge-with union h1 h2)) idx1 idx2))
   1.269 +  
   1.270 +(defn merge-relations
   1.271 +  "Merges two relations"
   1.272 +  [r1 r2]
   1.273 +  (assert (= (:schema r1) (:schema r2)))
   1.274 +  (let [merged-indexes (merge-indexes (:indexes r1)
   1.275 +                                      (:indexes r2))
   1.276 +        merged-data (union (:data r1)
   1.277 +                           (:data r2))]
   1.278 +    (assoc r1 :data merged-data :indexes merged-indexes)))
   1.279 +    
   1.280 +(defn database-merge
   1.281 +  "Merges databases together"
   1.282 +  [dbs]
   1.283 +  (apply merge-with merge-relations dbs))
   1.284 +
   1.285 +(defn database-merge-parallel
   1.286 +  "Merges databases together in parallel"
   1.287 +  [dbs]
   1.288 +  (preduce merge-relations dbs))
   1.289 +
   1.290 +
   1.291 +;; End of file